Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80 : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 13624 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 13624 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 5093 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 5093 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
171 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178 : #endif
179 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186 : #endif
187 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
193 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
194 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
195 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
196 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 :
199 : /************************************************************************/
200 : /* GWKJobStruct */
201 : /************************************************************************/
202 :
203 : struct GWKJobStruct
204 : {
205 : std::mutex &mutex;
206 : std::condition_variable &cv;
207 : int counterSingleThreaded = 0;
208 : int &counter;
209 : bool &stopFlag;
210 : GDALWarpKernel *poWK = nullptr;
211 : int iYMin = 0;
212 : int iYMax = 0;
213 : int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
214 : void *pTransformerArg = nullptr;
215 : // used by GWKRun() to assign the proper pTransformerArg
216 : void (*pfnFunc)(void *) = nullptr;
217 :
218 3135 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
219 : int &counter_, bool &stopFlag_)
220 3135 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
221 : {
222 3135 : }
223 : };
224 :
225 : struct GWKThreadData
226 : {
227 : std::unique_ptr<CPLJobQueue> poJobQueue{};
228 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
229 : int nMaxThreads{0};
230 : int counter{0};
231 : bool stopFlag{false};
232 : std::mutex mutex{};
233 : std::condition_variable cv{};
234 : bool bTransformerArgInputAssignedToThread{false};
235 : void *pTransformerArgInput{
236 : nullptr}; // owned by calling layer. Not to be destroyed
237 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
238 : int nTotalThreadCountForThisRun = 0;
239 : int nCurThreadCountForThisRun = 0;
240 : };
241 :
242 : /************************************************************************/
243 : /* GWKProgressThread() */
244 : /************************************************************************/
245 :
246 : // Return TRUE if the computation must be interrupted.
247 36 : static int GWKProgressThread(GWKJobStruct *psJob)
248 : {
249 36 : bool stop = false;
250 : {
251 36 : std::lock_guard<std::mutex> lock(psJob->mutex);
252 36 : psJob->counter++;
253 36 : stop = psJob->stopFlag;
254 : }
255 36 : psJob->cv.notify_one();
256 :
257 36 : return stop;
258 : }
259 :
260 : /************************************************************************/
261 : /* GWKProgressMonoThread() */
262 : /************************************************************************/
263 :
264 : // Return TRUE if the computation must be interrupted.
265 417779 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
266 : {
267 417779 : GDALWarpKernel *poWK = psJob->poWK;
268 417779 : if (!poWK->pfnProgress(poWK->dfProgressBase +
269 417779 : poWK->dfProgressScale *
270 417779 : (++psJob->counterSingleThreaded /
271 417779 : static_cast<double>(psJob->iYMax)),
272 : "", poWK->pProgress))
273 : {
274 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
275 1 : psJob->stopFlag = true;
276 1 : return TRUE;
277 : }
278 417778 : return FALSE;
279 : }
280 :
281 : /************************************************************************/
282 : /* GWKGenericMonoThread() */
283 : /************************************************************************/
284 :
285 3111 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
286 : void (*pfnFunc)(void *pUserData))
287 : {
288 3111 : GWKThreadData td;
289 :
290 : // NOTE: the mutex is not used.
291 3111 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
292 3111 : job.poWK = poWK;
293 3111 : job.iYMin = 0;
294 3111 : job.iYMax = poWK->nDstYSize;
295 3111 : job.pfnProgress = GWKProgressMonoThread;
296 3111 : job.pTransformerArg = poWK->pTransformerArg;
297 3111 : job.counterSingleThreaded = td.counter;
298 3111 : pfnFunc(&job);
299 3111 : td.counter = job.counterSingleThreaded;
300 :
301 6222 : return td.stopFlag ? CE_Failure : CE_None;
302 : }
303 :
304 : /************************************************************************/
305 : /* GWKThreadsCreate() */
306 : /************************************************************************/
307 :
308 1794 : void *GWKThreadsCreate(char **papszWarpOptions,
309 : GDALTransformerFunc /* pfnTransformer */,
310 : void *pTransformerArg)
311 : {
312 1794 : const int nThreads = GDALGetNumThreads(papszWarpOptions, "NUM_THREADS",
313 : GDAL_DEFAULT_MAX_THREAD_COUNT,
314 : /* bDefaultAllCPUs = */ false);
315 1794 : GWKThreadData *psThreadData = new GWKThreadData();
316 : auto poThreadPool =
317 1794 : nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
318 1794 : if (poThreadPool)
319 : {
320 24 : psThreadData->nMaxThreads = nThreads;
321 24 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
322 : nThreads,
323 24 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
324 48 : psThreadData->counter, psThreadData->stopFlag)));
325 :
326 24 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
327 24 : psThreadData->pTransformerArgInput = pTransformerArg;
328 : }
329 :
330 1794 : return psThreadData;
331 : }
332 :
333 : /************************************************************************/
334 : /* GWKThreadsEnd() */
335 : /************************************************************************/
336 :
337 1794 : void GWKThreadsEnd(void *psThreadDataIn)
338 : {
339 1794 : if (psThreadDataIn == nullptr)
340 0 : return;
341 :
342 1794 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
343 1794 : if (psThreadData->poJobQueue)
344 : {
345 : // cppcheck-suppress constVariableReference
346 34 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
347 : {
348 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
349 10 : GDALDestroyTransformer(pair.second);
350 : }
351 24 : psThreadData->poJobQueue.reset();
352 : }
353 1794 : delete psThreadData;
354 : }
355 :
356 : /************************************************************************/
357 : /* ThreadFuncAdapter() */
358 : /************************************************************************/
359 :
360 33 : static void ThreadFuncAdapter(void *pData)
361 : {
362 33 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
363 33 : GWKThreadData *psThreadData =
364 33 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
365 :
366 : // Look if we have already a per-thread transformer
367 33 : void *pTransformerArg = nullptr;
368 33 : const GIntBig nThreadId = CPLGetPID();
369 :
370 : {
371 66 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
372 33 : ++psThreadData->nCurThreadCountForThisRun;
373 :
374 33 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
375 33 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
376 : {
377 0 : pTransformerArg = oIter->second;
378 : }
379 33 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
380 33 : psThreadData->nCurThreadCountForThisRun ==
381 33 : psThreadData->nTotalThreadCountForThisRun)
382 : {
383 : // If we are the last thread to be started, temporarily borrow the
384 : // original transformer
385 23 : psThreadData->bTransformerArgInputAssignedToThread = true;
386 23 : pTransformerArg = psThreadData->pTransformerArgInput;
387 23 : psThreadData->mapThreadToTransformerArg[nThreadId] =
388 : pTransformerArg;
389 : }
390 :
391 33 : if (pTransformerArg == nullptr)
392 : {
393 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
394 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
395 : }
396 : }
397 :
398 : // If no transformer assigned to current thread, instantiate one
399 33 : if (pTransformerArg == nullptr)
400 : {
401 : // This somehow assumes that GDALCloneTransformer() is thread-safe
402 : // which should normally be the case.
403 : pTransformerArg =
404 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
405 :
406 : // Lock for the stop flag and the transformer map.
407 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
408 10 : if (!pTransformerArg)
409 : {
410 0 : psJob->stopFlag = true;
411 0 : return;
412 : }
413 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
414 : }
415 :
416 33 : psJob->pTransformerArg = pTransformerArg;
417 33 : psJob->pfnFunc(pData);
418 :
419 : // Give back original transformer, if borrowed.
420 : {
421 66 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
422 33 : if (psThreadData->bTransformerArgInputAssignedToThread &&
423 26 : pTransformerArg == psThreadData->pTransformerArgInput)
424 : {
425 : psThreadData->mapThreadToTransformerArg.erase(
426 23 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
427 23 : psThreadData->bTransformerArgInputAssignedToThread = false;
428 : }
429 : }
430 : }
431 :
432 : /************************************************************************/
433 : /* GWKRun() */
434 : /************************************************************************/
435 :
436 3134 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
437 : void (*pfnFunc)(void *pUserData))
438 :
439 : {
440 3134 : const int nDstYSize = poWK->nDstYSize;
441 :
442 3134 : CPLDebug("GDAL",
443 : "GDALWarpKernel()::%s() "
444 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
445 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
446 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
447 : poWK->nDstYSize);
448 :
449 3134 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
450 : {
451 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
452 0 : return CE_Failure;
453 : }
454 :
455 3134 : GWKThreadData *psThreadData =
456 : static_cast<GWKThreadData *>(poWK->psThreadData);
457 3134 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
458 : {
459 3111 : return GWKGenericMonoThread(poWK, pfnFunc);
460 : }
461 :
462 23 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
463 : // Config option mostly useful for tests to be able to test multithreading
464 : // with small rasters
465 : const int nWarpChunkSize =
466 23 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
467 23 : if (nWarpChunkSize > 0)
468 : {
469 21 : GIntBig nChunks =
470 21 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
471 21 : if (nThreads > nChunks)
472 16 : nThreads = static_cast<int>(nChunks);
473 : }
474 23 : if (nThreads <= 0)
475 19 : nThreads = 1;
476 :
477 23 : CPLDebug("WARP", "Using %d threads", nThreads);
478 :
479 23 : auto &jobs = *psThreadData->threadJobs;
480 23 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
481 : // Fill-in job structures.
482 56 : for (int i = 0; i < nThreads; ++i)
483 : {
484 33 : auto &job = jobs[i];
485 33 : job.poWK = poWK;
486 33 : job.iYMin =
487 33 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
488 33 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
489 33 : nThreads);
490 33 : if (poWK->pfnProgress != GDALDummyProgress)
491 2 : job.pfnProgress = GWKProgressThread;
492 33 : job.pfnFunc = pfnFunc;
493 : }
494 :
495 : bool bStopFlag;
496 : {
497 23 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
498 :
499 23 : psThreadData->nTotalThreadCountForThisRun = nThreads;
500 : // coverity[missing_lock]
501 23 : psThreadData->nCurThreadCountForThisRun = 0;
502 :
503 : // Start jobs.
504 56 : for (int i = 0; i < nThreads; ++i)
505 : {
506 33 : auto &job = jobs[i];
507 33 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
508 : static_cast<void *>(&job));
509 : }
510 :
511 : /* --------------------------------------------------------------------
512 : */
513 : /* Report progress. */
514 : /* --------------------------------------------------------------------
515 : */
516 23 : if (poWK->pfnProgress != GDALDummyProgress)
517 : {
518 4 : while (psThreadData->counter < nDstYSize)
519 : {
520 3 : psThreadData->cv.wait(lock);
521 3 : if (!poWK->pfnProgress(poWK->dfProgressBase +
522 3 : poWK->dfProgressScale *
523 3 : (psThreadData->counter /
524 3 : static_cast<double>(nDstYSize)),
525 : "", poWK->pProgress))
526 : {
527 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
528 1 : psThreadData->stopFlag = true;
529 1 : break;
530 : }
531 : }
532 : }
533 :
534 23 : bStopFlag = psThreadData->stopFlag;
535 : }
536 :
537 : /* -------------------------------------------------------------------- */
538 : /* Wait for all jobs to complete. */
539 : /* -------------------------------------------------------------------- */
540 23 : psThreadData->poJobQueue->WaitCompletion();
541 :
542 23 : return bStopFlag ? CE_Failure : CE_None;
543 : }
544 :
545 : /************************************************************************/
546 : /* ==================================================================== */
547 : /* GDALWarpKernel */
548 : /* ==================================================================== */
549 : /************************************************************************/
550 :
551 : /**
552 : * \class GDALWarpKernel "gdalwarper.h"
553 : *
554 : * Low level image warping class.
555 : *
556 : * This class is responsible for low level image warping for one
557 : * "chunk" of imagery. The class is essentially a structure with all
558 : * data members public - primarily so that new special-case functions
559 : * can be added without changing the class declaration.
560 : *
561 : * Applications are normally intended to interactive with warping facilities
562 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
563 : * theory be used directly if great care is taken in setting up the
564 : * control data.
565 : *
566 : * <h3>Design Issues</h3>
567 : *
568 : * The intention is that PerformWarp() would analyze the setup in terms
569 : * of the datatype, resampling type, and validity/density mask usage and
570 : * pick one of many specific implementations of the warping algorithm over
571 : * a continuum of optimization vs. generality. At one end there will be a
572 : * reference general purpose implementation of the algorithm that supports
573 : * any data type (working internally in double precision complex), all three
574 : * resampling types, and any or all of the validity/density masks. At the
575 : * other end would be highly optimized algorithms for common cases like
576 : * nearest neighbour resampling on GDT_UInt8 data with no masks.
577 : *
578 : * The full set of optimized versions have not been decided but we should
579 : * expect to have at least:
580 : * - One for each resampling algorithm for 8bit data with no masks.
581 : * - One for each resampling algorithm for float data with no masks.
582 : * - One for each resampling algorithm for float data with any/all masks
583 : * (essentially the generic case for just float data).
584 : * - One for each resampling algorithm for 8bit data with support for
585 : * input validity masks (per band or per pixel). This handles the common
586 : * case of nodata masking.
587 : * - One for each resampling algorithm for float data with support for
588 : * input validity masks (per band or per pixel). This handles the common
589 : * case of nodata masking.
590 : *
591 : * Some of the specializations would operate on all bands in one pass
592 : * (especially the ones without masking would do this), while others might
593 : * process each band individually to reduce code complexity.
594 : *
595 : * <h3>Masking Semantics</h3>
596 : *
597 : * A detailed explanation of the semantics of the validity and density masks,
598 : * and their effects on resampling kernels is needed here.
599 : */
600 :
601 : /************************************************************************/
602 : /* GDALWarpKernel Data Members */
603 : /************************************************************************/
604 :
605 : /**
606 : * \var GDALResampleAlg GDALWarpKernel::eResample;
607 : *
608 : * Resampling algorithm.
609 : *
610 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
611 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
612 : * GRA_Mode or GRA_Sum.
613 : *
614 : * This field is required. GDT_NearestNeighbour may be used as a default
615 : * value.
616 : */
617 :
618 : /**
619 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
620 : *
621 : * Working pixel data type.
622 : *
623 : * The datatype of pixels in the source image (papabySrcimage) and
624 : * destination image (papabyDstImage) buffers. Note that operations on
625 : * some data types (such as GDT_UInt8) may be much better optimized than other
626 : * less common cases.
627 : *
628 : * This field is required. It may not be GDT_Unknown.
629 : */
630 :
631 : /**
632 : * \var int GDALWarpKernel::nBands;
633 : *
634 : * Number of bands.
635 : *
636 : * The number of bands (layers) of imagery being warped. Determines the
637 : * number of entries in the papabySrcImage, papanBandSrcValid,
638 : * and papabyDstImage arrays.
639 : *
640 : * This field is required.
641 : */
642 :
643 : /**
644 : * \var int GDALWarpKernel::nSrcXSize;
645 : *
646 : * Source image width in pixels.
647 : *
648 : * This field is required.
649 : */
650 :
651 : /**
652 : * \var int GDALWarpKernel::nSrcYSize;
653 : *
654 : * Source image height in pixels.
655 : *
656 : * This field is required.
657 : */
658 :
659 : /**
660 : * \var double GDALWarpKernel::dfSrcXExtraSize;
661 : *
662 : * Number of pixels included in nSrcXSize that are present on the edges of
663 : * the area of interest to take into account the width of the kernel.
664 : *
665 : * This field is required.
666 : */
667 :
668 : /**
669 : * \var double GDALWarpKernel::dfSrcYExtraSize;
670 : *
671 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
672 : * the area of interest to take into account the height of the kernel.
673 : *
674 : * This field is required.
675 : */
676 :
677 : /**
678 : * \var int GDALWarpKernel::papabySrcImage;
679 : *
680 : * Array of source image band data.
681 : *
682 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
683 : * to image data. Each individual band of image data is organized as a single
684 : * block of image data in left to right, then bottom to top order. The actual
685 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
686 : *
687 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
688 : * the second band with eWorkingDataType set to GDT_Float32 use code like
689 : * this:
690 : *
691 : * \code
692 : * float dfPixelValue;
693 : * int nBand = 2-1; // Band indexes are zero based.
694 : * int nPixel = 3; // Zero based.
695 : * int nLine = 4; // Zero based.
696 : *
697 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
698 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
699 : * assert( nBand >= 0 && nBand < poKern->nBands );
700 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
701 : * [nPixel + nLine * poKern->nSrcXSize];
702 : * \endcode
703 : *
704 : * This field is required.
705 : */
706 :
707 : /**
708 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
709 : *
710 : * Per band validity mask for source pixels.
711 : *
712 : * Array of pixel validity mask layers for each source band. Each of
713 : * the mask layers is the same size (in pixels) as the source image with
714 : * one bit per pixel. Note that it is legal (and common) for this to be
715 : * NULL indicating that none of the pixels are invalidated, or for some
716 : * band validity masks to be NULL in which case all pixels of the band are
717 : * valid. The following code can be used to test the validity of a particular
718 : * pixel.
719 : *
720 : * \code
721 : * int bIsValid = TRUE;
722 : * int nBand = 2-1; // Band indexes are zero based.
723 : * int nPixel = 3; // Zero based.
724 : * int nLine = 4; // Zero based.
725 : *
726 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
727 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
728 : * assert( nBand >= 0 && nBand < poKern->nBands );
729 : *
730 : * if( poKern->papanBandSrcValid != NULL
731 : * && poKern->papanBandSrcValid[nBand] != NULL )
732 : * {
733 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
734 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
735 : *
736 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
737 : * }
738 : * \endcode
739 : */
740 :
741 : /**
742 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
743 : *
744 : * Per pixel validity mask for source pixels.
745 : *
746 : * A single validity mask layer that applies to the pixels of all source
747 : * bands. It is accessed similarly to papanBandSrcValid, but without the
748 : * extra level of band indirection.
749 : *
750 : * This pointer may be NULL indicating that all pixels are valid.
751 : *
752 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
753 : * the pixel isn't considered to be valid unless both arrays indicate it is
754 : * valid.
755 : */
756 :
757 : /**
758 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
759 : *
760 : * Per pixel density mask for source pixels.
761 : *
762 : * A single density mask layer that applies to the pixels of all source
763 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
764 : * which this pixel should be allowed to contribute to the output result.
765 : *
766 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
767 : *
768 : * The density for a pixel may be accessed like this:
769 : *
770 : * \code
771 : * float fDensity = 1.0;
772 : * int nPixel = 3; // Zero based.
773 : * int nLine = 4; // Zero based.
774 : *
775 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
776 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
777 : * if( poKern->pafUnifiedSrcDensity != NULL )
778 : * fDensity = poKern->pafUnifiedSrcDensity
779 : * [nPixel + nLine * poKern->nSrcXSize];
780 : * \endcode
781 : */
782 :
783 : /**
784 : * \var int GDALWarpKernel::nDstXSize;
785 : *
786 : * Width of destination image in pixels.
787 : *
788 : * This field is required.
789 : */
790 :
791 : /**
792 : * \var int GDALWarpKernel::nDstYSize;
793 : *
794 : * Height of destination image in pixels.
795 : *
796 : * This field is required.
797 : */
798 :
799 : /**
800 : * \var GByte **GDALWarpKernel::papabyDstImage;
801 : *
802 : * Array of destination image band data.
803 : *
804 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
805 : * to image data. Each individual band of image data is organized as a single
806 : * block of image data in left to right, then bottom to top order. The actual
807 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
808 : *
809 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
810 : * the second band with eWorkingDataType set to GDT_Float32 use code like
811 : * this:
812 : *
813 : * \code
814 : * float dfPixelValue;
815 : * int nBand = 2-1; // Band indexes are zero based.
816 : * int nPixel = 3; // Zero based.
817 : * int nLine = 4; // Zero based.
818 : *
819 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
820 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
821 : * assert( nBand >= 0 && nBand < poKern->nBands );
822 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
823 : * [nPixel + nLine * poKern->nSrcYSize];
824 : * \endcode
825 : *
826 : * This field is required.
827 : */
828 :
829 : /**
830 : * \var GUInt32 *GDALWarpKernel::panDstValid;
831 : *
832 : * Per pixel validity mask for destination pixels.
833 : *
834 : * A single validity mask layer that applies to the pixels of all destination
835 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
836 : * on the size of the destination image.
837 : *
838 : * This pointer may be NULL indicating that all pixels are valid.
839 : */
840 :
841 : /**
842 : * \var float *GDALWarpKernel::pafDstDensity;
843 : *
844 : * Per pixel density mask for destination pixels.
845 : *
846 : * A single density mask layer that applies to the pixels of all destination
847 : * bands. It contains values between 0.0 and 1.0.
848 : *
849 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
850 : *
851 : * The density for a pixel may be accessed like this:
852 : *
853 : * \code
854 : * float fDensity = 1.0;
855 : * int nPixel = 3; // Zero based.
856 : * int nLine = 4; // Zero based.
857 : *
858 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
859 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
860 : * if( poKern->pafDstDensity != NULL )
861 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
862 : * \endcode
863 : */
864 :
865 : /**
866 : * \var int GDALWarpKernel::nSrcXOff;
867 : *
868 : * X offset to source pixel coordinates for transformation.
869 : *
870 : * See pfnTransformer.
871 : *
872 : * This field is required.
873 : */
874 :
875 : /**
876 : * \var int GDALWarpKernel::nSrcYOff;
877 : *
878 : * Y offset to source pixel coordinates for transformation.
879 : *
880 : * See pfnTransformer.
881 : *
882 : * This field is required.
883 : */
884 :
885 : /**
886 : * \var int GDALWarpKernel::nDstXOff;
887 : *
888 : * X offset to destination pixel coordinates for transformation.
889 : *
890 : * See pfnTransformer.
891 : *
892 : * This field is required.
893 : */
894 :
895 : /**
896 : * \var int GDALWarpKernel::nDstYOff;
897 : *
898 : * Y offset to destination pixel coordinates for transformation.
899 : *
900 : * See pfnTransformer.
901 : *
902 : * This field is required.
903 : */
904 :
905 : /**
906 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
907 : *
908 : * Source/destination location transformer.
909 : *
910 : * The function to call to transform coordinates between source image
911 : * pixel/line coordinates and destination image pixel/line coordinates.
912 : * See GDALTransformerFunc() for details of the semantics of this function.
913 : *
914 : * The GDALWarpKern algorithm will only ever use this transformer in
915 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
916 : * partial or complete scanlines of points in the destination image as
917 : * input. This means, among other things, that it is safe to the
918 : * approximating transform GDALApproxTransform() as the transformation
919 : * function.
920 : *
921 : * Source and destination images may be subsets of a larger overall image.
922 : * The transformation algorithms will expect and return pixel/line coordinates
923 : * in terms of this larger image, so coordinates need to be offset by
924 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
925 : * passing to pfnTransformer, and after return from it.
926 : *
927 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
928 : * data to this function when it is called.
929 : *
930 : * This field is required.
931 : */
932 :
933 : /**
934 : * \var void *GDALWarpKernel::pTransformerArg;
935 : *
936 : * Callback data for pfnTransformer.
937 : *
938 : * This field may be NULL if not required for the pfnTransformer being used.
939 : */
940 :
941 : /**
942 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
943 : *
944 : * The function to call to report progress of the algorithm, and to check
945 : * for a requested termination of the operation. It operates according to
946 : * GDALProgressFunc() semantics.
947 : *
948 : * Generally speaking the progress function will be invoked for each
949 : * scanline of the destination buffer that has been processed.
950 : *
951 : * This field may be NULL (internally set to GDALDummyProgress()).
952 : */
953 :
954 : /**
955 : * \var void *GDALWarpKernel::pProgress;
956 : *
957 : * Callback data for pfnProgress.
958 : *
959 : * This field may be NULL if not required for the pfnProgress being used.
960 : */
961 :
962 : /************************************************************************/
963 : /* GDALWarpKernel() */
964 : /************************************************************************/
965 :
966 3739 : GDALWarpKernel::GDALWarpKernel()
967 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
968 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
969 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
970 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
971 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
972 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
973 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
974 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
975 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
976 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
977 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
978 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
979 3739 : eTieStrategy(GWKTS_First)
980 : {
981 3739 : }
982 :
983 : /************************************************************************/
984 : /* ~GDALWarpKernel() */
985 : /************************************************************************/
986 :
987 3739 : GDALWarpKernel::~GDALWarpKernel()
988 : {
989 3739 : }
990 :
991 : /************************************************************************/
992 : /* getArea() */
993 : /************************************************************************/
994 :
995 : typedef std::pair<double, double> XYPair;
996 :
997 : typedef std::vector<XYPair> XYPoly;
998 :
999 : // poly may or may not be closed.
1000 565793 : static double getArea(const XYPoly &poly)
1001 : {
1002 : // CPLAssert(poly.size() >= 2);
1003 565793 : const size_t nPointCount = poly.size();
1004 : double dfAreaSum =
1005 565793 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
1006 :
1007 1786950 : for (size_t i = 1; i < nPointCount - 1; i++)
1008 : {
1009 1221160 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
1010 : }
1011 :
1012 565793 : dfAreaSum += poly[nPointCount - 1].first *
1013 565793 : (poly[0].second - poly[nPointCount - 2].second);
1014 :
1015 565793 : return 0.5 * std::fabs(dfAreaSum);
1016 : }
1017 :
1018 : /************************************************************************/
1019 : /* CanUse4SamplesFormula() */
1020 : /************************************************************************/
1021 :
1022 4620 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
1023 : {
1024 4620 : if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
1025 : {
1026 : // Use 4-sample formula if we are not downsampling by more than a
1027 : // factor of 1:2
1028 2637 : if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
1029 2197 : return true;
1030 440 : CPLDebugOnce("WARP",
1031 : "Not using 4-sample bilinear/bicubic formula because "
1032 : "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
1033 : poWK->dfXScale, poWK->dfYScale);
1034 : }
1035 2423 : return false;
1036 : }
1037 :
1038 : /************************************************************************/
1039 : /* PerformWarp() */
1040 : /************************************************************************/
1041 :
1042 : /**
1043 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1044 : *
1045 : * This method performs the warp described in the GDALWarpKernel.
1046 : *
1047 : * @return CE_None on success or CE_Failure if an error occurs.
1048 : */
1049 :
1050 3735 : CPLErr GDALWarpKernel::PerformWarp()
1051 :
1052 : {
1053 3735 : const CPLErr eErr = Validate();
1054 :
1055 3735 : if (eErr != CE_None)
1056 1 : return eErr;
1057 :
1058 : // See #2445 and #3079.
1059 3734 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1060 : {
1061 600 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1062 : {
1063 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1064 0 : return CE_Failure;
1065 : }
1066 600 : return CE_None;
1067 : }
1068 :
1069 : /* -------------------------------------------------------------------- */
1070 : /* Pre-calculate resampling scales and window sizes for filtering. */
1071 : /* -------------------------------------------------------------------- */
1072 :
1073 3134 : dfXScale = 0.0;
1074 3134 : dfYScale = 0.0;
1075 :
1076 : // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
1077 : // heterogeneous change in shapes.
1078 : // Best would probably be a per-pixel scale computation.
1079 3134 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1080 3134 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1081 3134 : if (!pszXScale || !pszYScale)
1082 : {
1083 : // Sample points along a grid in the destination space
1084 3133 : constexpr int MAX_POINTS_PER_DIM = 10;
1085 3133 : const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
1086 3133 : const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
1087 3133 : constexpr int CORNER_COUNT_PER_SQUARE = 4;
1088 3133 : const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
1089 6266 : std::vector<double> adfX;
1090 6266 : std::vector<double> adfY;
1091 3133 : adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1092 3133 : adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1093 6266 : std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
1094 6266 : std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
1095 30070 : for (int iY = 0; iY < nPointsY; iY++)
1096 : {
1097 26937 : const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
1098 26937 : const double dfY =
1099 26937 : dfYShift + (nPointsY == 1 ? 0.0
1100 26721 : : static_cast<double>(iY) *
1101 26721 : nDstYSize / (nPointsY - 1));
1102 :
1103 285962 : for (int iX = 0; iX < nPointsX; iX++)
1104 : {
1105 259025 : const double dfXShift =
1106 259025 : (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
1107 :
1108 259025 : const double dfX =
1109 259025 : dfXShift + (nPointsX == 1 ? 0.0
1110 258823 : : static_cast<double>(iX) *
1111 258823 : nDstXSize / (nPointsX - 1));
1112 :
1113 : // Reproject a unit square at each sample point
1114 259025 : adfX.push_back(dfX);
1115 259025 : adfY.push_back(dfY);
1116 :
1117 259025 : adfX.push_back(dfX + 1);
1118 259025 : adfY.push_back(dfY);
1119 :
1120 259025 : adfX.push_back(dfX);
1121 259025 : adfY.push_back(dfY + 1);
1122 :
1123 259025 : adfX.push_back(dfX + 1);
1124 259025 : adfY.push_back(dfY + 1);
1125 : }
1126 : }
1127 3133 : pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
1128 : adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
1129 :
1130 6266 : std::vector<XYPair> adfXYScales;
1131 3133 : adfXYScales.reserve(nPoints);
1132 262158 : for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
1133 : {
1134 516934 : if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
1135 257909 : abSuccess[i + 3])
1136 : {
1137 2063260 : const auto square = [](double x) { return x * x; };
1138 :
1139 257907 : const double vx01 = adfX[i + 1] - adfX[i + 0];
1140 257907 : const double vy01 = adfY[i + 1] - adfY[i + 0];
1141 257907 : const double len01_sq = square(vx01) + square(vy01);
1142 :
1143 257907 : const double vx23 = adfX[i + 3] - adfX[i + 2];
1144 257907 : const double vy23 = adfY[i + 3] - adfY[i + 2];
1145 257907 : const double len23_sq = square(vx23) + square(vy23);
1146 :
1147 257907 : const double vx02 = adfX[i + 2] - adfX[i + 0];
1148 257907 : const double vy02 = adfY[i + 2] - adfY[i + 0];
1149 257907 : const double len02_sq = square(vx02) + square(vy02);
1150 :
1151 257907 : const double vx13 = adfX[i + 3] - adfX[i + 1];
1152 257907 : const double vy13 = adfY[i + 3] - adfY[i + 1];
1153 257907 : const double len13_sq = square(vx13) + square(vy13);
1154 :
1155 : // ~ 20 degree, heuristic
1156 257907 : constexpr double TAN_MODEST_ANGLE = 0.35;
1157 :
1158 : // 10%, heuristic
1159 257907 : constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
1160 :
1161 : // Security margin to avoid division by zero (would only
1162 : // happen in case of degenerated coordinate transformation,
1163 : // or insane upsampling)
1164 257907 : constexpr double EPSILON = 1e-10;
1165 :
1166 : // Does the transformed square looks like an almost non-rotated
1167 : // quasi-rectangle ?
1168 257907 : if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
1169 250790 : std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
1170 250763 : std::fabs(len01_sq - len23_sq) <
1171 250763 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
1172 250650 : std::fabs(len02_sq - len13_sq) <
1173 250650 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
1174 : {
1175 : // Using a geometric average here of lenAB_sq and lenCD_sq,
1176 : // hence a sqrt(), and as this is still a squared value,
1177 : // we need another sqrt() to get a distance.
1178 : const double dfXLength =
1179 250635 : std::sqrt(std::sqrt(len01_sq * len23_sq));
1180 : const double dfYLength =
1181 250635 : std::sqrt(std::sqrt(len02_sq * len13_sq));
1182 250635 : if (dfXLength > EPSILON && dfYLength > EPSILON)
1183 : {
1184 250635 : const double dfThisXScale = 1.0 / dfXLength;
1185 250635 : const double dfThisYScale = 1.0 / dfYLength;
1186 250635 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1187 250635 : }
1188 : }
1189 : else
1190 : {
1191 : // If not, then consider the area of the transformed unit
1192 : // square to determine the X/Y scales.
1193 7272 : const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
1194 7272 : {adfX[i + 1], adfY[i + 1]},
1195 7272 : {adfX[i + 3], adfY[i + 3]},
1196 29088 : {adfX[i + 2], adfY[i + 2]}};
1197 7272 : const double dfSrcArea = getArea(poly);
1198 7272 : const double dfFactor = std::sqrt(dfSrcArea);
1199 7272 : if (dfFactor > EPSILON)
1200 : {
1201 7272 : const double dfThisXScale = 1.0 / dfFactor;
1202 7272 : const double dfThisYScale = dfThisXScale;
1203 7272 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1204 : }
1205 : }
1206 : }
1207 : }
1208 :
1209 3133 : if (!adfXYScales.empty())
1210 : {
1211 : // Sort by increasing xscale * yscale
1212 3133 : std::sort(adfXYScales.begin(), adfXYScales.end(),
1213 1401590 : [](const XYPair &a, const XYPair &b)
1214 1401590 : { return a.first * a.second < b.first * b.second; });
1215 :
1216 : // Compute the per-axis maximum of scale
1217 3133 : double dfXMax = 0;
1218 3133 : double dfYMax = 0;
1219 261040 : for (const auto &[dfX, dfY] : adfXYScales)
1220 : {
1221 257907 : dfXMax = std::max(dfXMax, dfX);
1222 257907 : dfYMax = std::max(dfYMax, dfY);
1223 : }
1224 :
1225 : // Now eliminate outliers, defined as ones whose value is < 10% of
1226 : // the maximum value, typically found at a polar discontinuity, and
1227 : // compute the average of non-outlier values.
1228 3133 : dfXScale = 0;
1229 3133 : dfYScale = 0;
1230 3133 : int i = 0;
1231 3133 : constexpr double THRESHOLD = 0.1; // 10%, rather arbitrary
1232 261040 : for (const auto &[dfX, dfY] : adfXYScales)
1233 : {
1234 257907 : if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
1235 : {
1236 255719 : ++i;
1237 255719 : const double dfXDelta = dfX - dfXScale;
1238 255719 : const double dfYDelta = dfY - dfYScale;
1239 255719 : const double dfInvI = 1.0 / i;
1240 255719 : dfXScale += dfXDelta * dfInvI;
1241 255719 : dfYScale += dfYDelta * dfInvI;
1242 : }
1243 : }
1244 : }
1245 : }
1246 :
1247 : // Round to closest integer reciprocal scale if we are very close to it
1248 : const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
1249 6268 : [](double dfScale)
1250 : {
1251 6268 : if (dfScale < 1.0)
1252 : {
1253 2521 : double dfReciprocalScale = 1.0 / dfScale;
1254 2521 : const int nReciprocalScale =
1255 2521 : static_cast<int>(dfReciprocalScale + 0.5);
1256 2521 : if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
1257 2152 : dfScale = 1.0 / nReciprocalScale;
1258 : }
1259 6268 : return dfScale;
1260 : };
1261 :
1262 3134 : if (dfXScale <= 0)
1263 1 : dfXScale = 1.0;
1264 3134 : if (dfYScale <= 0)
1265 1 : dfYScale = 1.0;
1266 :
1267 3134 : dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
1268 3134 : dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
1269 :
1270 3134 : if (pszXScale != nullptr)
1271 1 : dfXScale = CPLAtof(pszXScale);
1272 3134 : if (pszYScale != nullptr)
1273 1 : dfYScale = CPLAtof(pszYScale);
1274 :
1275 3134 : if (!pszXScale || !pszYScale)
1276 3133 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1277 :
1278 3134 : const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
1279 :
1280 : // Safety check for callers that would use GDALWarpKernel without using
1281 : // GDALWarpOperation.
1282 3071 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1283 3006 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1284 6268 : !bUse4SamplesFormula)) &&
1285 348 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1286 : WARP_EXTRA_ELTS)
1287 : {
1288 0 : CPLError(CE_Failure, CPLE_AppDefined,
1289 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1290 : "their end. "
1291 : "See GDALWarpKernel class definition. If this condition is "
1292 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1293 : WARP_EXTRA_ELTS);
1294 0 : return CE_Failure;
1295 : }
1296 :
1297 3134 : dfXFilter = anGWKFilterRadius[eResample];
1298 3134 : dfYFilter = anGWKFilterRadius[eResample];
1299 :
1300 3134 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1301 2581 : : static_cast<int>(dfXFilter);
1302 3134 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1303 2585 : : static_cast<int>(dfYFilter);
1304 :
1305 : // Filter window offset depends on the parity of the kernel radius.
1306 3134 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1307 3134 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1308 :
1309 3134 : bApplyVerticalShift =
1310 3134 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1311 3134 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1312 3134 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1313 :
1314 : /* -------------------------------------------------------------------- */
1315 : /* Set up resampling functions. */
1316 : /* -------------------------------------------------------------------- */
1317 3134 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1318 12 : return GWKGeneralCase(this);
1319 :
1320 3122 : const bool bNoMasksOrDstDensityOnly =
1321 3115 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1322 6237 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1323 :
1324 3122 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
1325 : bNoMasksOrDstDensityOnly)
1326 953 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1327 :
1328 2169 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
1329 : bNoMasksOrDstDensityOnly)
1330 128 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1331 :
1332 2041 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
1333 : bNoMasksOrDstDensityOnly)
1334 850 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1335 :
1336 1191 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
1337 : bNoMasksOrDstDensityOnly)
1338 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1339 :
1340 1179 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
1341 360 : return GWKNearestByte(this);
1342 :
1343 819 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1344 168 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1345 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1346 :
1347 805 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1348 : bNoMasksOrDstDensityOnly)
1349 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1350 :
1351 800 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1352 : bNoMasksOrDstDensityOnly)
1353 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1354 :
1355 794 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1356 : bNoMasksOrDstDensityOnly)
1357 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1358 :
1359 789 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1360 : bNoMasksOrDstDensityOnly)
1361 14 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1362 :
1363 775 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1364 : bNoMasksOrDstDensityOnly)
1365 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1366 :
1367 770 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1368 : bNoMasksOrDstDensityOnly)
1369 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1370 :
1371 764 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1372 48 : return GWKNearestShort(this);
1373 :
1374 716 : if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1375 10 : return GWKNearestUnsignedShort(this);
1376 :
1377 706 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1378 : bNoMasksOrDstDensityOnly)
1379 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1380 :
1381 695 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1382 50 : return GWKNearestFloat(this);
1383 :
1384 645 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1385 : bNoMasksOrDstDensityOnly)
1386 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1387 :
1388 641 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1389 : bNoMasksOrDstDensityOnly)
1390 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1391 :
1392 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1393 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1394 : bNoMasksOrDstDensityOnly)
1395 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1396 :
1397 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1398 : bNoMasksOrDstDensityOnly)
1399 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1400 : #endif
1401 :
1402 632 : if (eResample == GRA_Average)
1403 77 : return GWKAverageOrMode(this);
1404 :
1405 555 : if (eResample == GRA_RMS)
1406 9 : return GWKAverageOrMode(this);
1407 :
1408 546 : if (eResample == GRA_Mode)
1409 45 : return GWKAverageOrMode(this);
1410 :
1411 501 : if (eResample == GRA_Max)
1412 6 : return GWKAverageOrMode(this);
1413 :
1414 495 : if (eResample == GRA_Min)
1415 5 : return GWKAverageOrMode(this);
1416 :
1417 490 : if (eResample == GRA_Med)
1418 6 : return GWKAverageOrMode(this);
1419 :
1420 484 : if (eResample == GRA_Q1)
1421 10 : return GWKAverageOrMode(this);
1422 :
1423 474 : if (eResample == GRA_Q3)
1424 5 : return GWKAverageOrMode(this);
1425 :
1426 469 : if (eResample == GRA_Sum)
1427 19 : return GWKSumPreserving(this);
1428 :
1429 450 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1430 : {
1431 223 : return GWKRealCase(this);
1432 : }
1433 :
1434 227 : return GWKGeneralCase(this);
1435 : }
1436 :
1437 : /************************************************************************/
1438 : /* Validate() */
1439 : /************************************************************************/
1440 :
1441 : /**
1442 : * \fn CPLErr GDALWarpKernel::Validate()
1443 : *
1444 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1445 : * (and return CE_Failure) if the configuration is considered to be
1446 : * invalid for some reason.
1447 : *
1448 : * This method will also do some standard defaulting such as setting
1449 : * pfnProgress to GDALDummyProgress() if it is NULL.
1450 : *
1451 : * @return CE_None on success or CE_Failure if an error is detected.
1452 : */
1453 :
1454 3735 : CPLErr GDALWarpKernel::Validate()
1455 :
1456 : {
1457 3735 : if (static_cast<size_t>(eResample) >=
1458 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1459 : {
1460 0 : CPLError(CE_Failure, CPLE_AppDefined,
1461 : "Unsupported resampling method %d.",
1462 0 : static_cast<int>(eResample));
1463 0 : return CE_Failure;
1464 : }
1465 :
1466 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1467 : // be ignored as contributing source pixels during resampling. Only taken into account by
1468 : // Average currently
1469 : const char *pszExcludedValues =
1470 3735 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1471 3735 : if (pszExcludedValues)
1472 : {
1473 : const CPLStringList aosTokens(
1474 14 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1475 14 : if ((aosTokens.size() % nBands) != 0)
1476 : {
1477 1 : CPLError(CE_Failure, CPLE_AppDefined,
1478 : "EXCLUDED_VALUES should contain one or several tuples of "
1479 : "%d values formatted like <R>,<G>,<B> or "
1480 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1481 : "tuples",
1482 : nBands);
1483 1 : return CE_Failure;
1484 : }
1485 26 : std::vector<double> adfTuple;
1486 52 : for (int i = 0; i < aosTokens.size(); ++i)
1487 : {
1488 39 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1489 39 : if (((i + 1) % nBands) == 0)
1490 : {
1491 13 : m_aadfExcludedValues.push_back(adfTuple);
1492 13 : adfTuple.clear();
1493 : }
1494 : }
1495 : }
1496 :
1497 3734 : return CE_None;
1498 : }
1499 :
1500 : /************************************************************************/
1501 : /* GWKOverlayDensity() */
1502 : /* */
1503 : /* Compute the final density for the destination pixel. This */
1504 : /* is a function of the overlay density (passed in) and the */
1505 : /* original density. */
1506 : /************************************************************************/
1507 :
1508 14990200 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1509 : double dfDensity)
1510 : {
1511 14990200 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1512 13024100 : return;
1513 :
1514 1966070 : poWK->pafDstDensity[iDstOffset] =
1515 1966070 : 1.0f -
1516 1966070 : (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1517 : }
1518 :
1519 : /************************************************************************/
1520 : /* GWKRoundValueT() */
1521 : /************************************************************************/
1522 :
1523 : template <class T, class U, bool is_signed> struct sGWKRoundValueT
1524 : {
1525 : static T eval(U);
1526 : };
1527 :
1528 : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1529 : {
1530 791525 : static T eval(U value)
1531 : {
1532 791525 : return static_cast<T>(floor(value + U(0.5)));
1533 : }
1534 : };
1535 :
1536 : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1537 : {
1538 152028497 : static T eval(U value)
1539 : {
1540 152028497 : return static_cast<T>(value + U(0.5));
1541 : }
1542 : };
1543 :
1544 152820022 : template <class T, class U> static T GWKRoundValueT(U value)
1545 : {
1546 152820022 : return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1547 : }
1548 :
1549 268974 : template <> float GWKRoundValueT<float, double>(double value)
1550 : {
1551 268974 : return static_cast<float>(value);
1552 : }
1553 :
1554 : #ifdef notused
1555 : template <> double GWKRoundValueT<double, double>(double value)
1556 : {
1557 : return value;
1558 : }
1559 : #endif
1560 :
1561 : /************************************************************************/
1562 : /* GWKClampValueT() */
1563 : /************************************************************************/
1564 :
1565 145649582 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1566 : {
1567 145649582 : if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1568 568589 : return cpl::NumericLimits<T>::min();
1569 145080984 : else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1570 773151 : return cpl::NumericLimits<T>::max();
1571 : else
1572 144308044 : return GWKRoundValueT<T, U>(value);
1573 : }
1574 :
1575 718914 : template <> float GWKClampValueT<float, double>(double dfValue)
1576 : {
1577 718914 : return static_cast<float>(dfValue);
1578 : }
1579 :
1580 : #ifdef notused
1581 : template <> double GWKClampValueT<double, double>(double dfValue)
1582 : {
1583 : return dfValue;
1584 : }
1585 : #endif
1586 :
1587 : /************************************************************************/
1588 : /* AvoidNoData() */
1589 : /************************************************************************/
1590 :
1591 1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
1592 : {
1593 : if constexpr (cpl::NumericLimits<T>::is_integer)
1594 : {
1595 1027 : if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
1596 : {
1597 515 : pDst[iDstOffset] =
1598 515 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1599 : }
1600 : else
1601 512 : pDst[iDstOffset]--;
1602 : }
1603 : else
1604 : {
1605 256 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1606 : {
1607 : using std::nextafter;
1608 0 : pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
1609 : }
1610 : else
1611 : {
1612 : using std::nextafter;
1613 256 : pDst[iDstOffset] =
1614 256 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1615 : }
1616 : }
1617 1283 : }
1618 :
1619 : /************************************************************************/
1620 : /* AvoidNoData() */
1621 : /************************************************************************/
1622 :
1623 : template <class T>
1624 18505930 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1625 : GPtrDiff_t iDstOffset)
1626 : {
1627 18505930 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1628 18505930 : T *pDst = reinterpret_cast<T *>(pabyDst);
1629 :
1630 18505930 : if (poWK->padfDstNoDataReal != nullptr &&
1631 11380638 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1632 : {
1633 640 : AvoidNoData(pDst, iDstOffset);
1634 :
1635 640 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1636 : {
1637 40 : const_cast<GDALWarpKernel *>(poWK)
1638 : ->bWarnedAboutDstNoDataReplacement = true;
1639 40 : CPLError(CE_Warning, CPLE_AppDefined,
1640 : "Value %g in the source dataset has been changed to %g "
1641 : "in the destination dataset to avoid being treated as "
1642 : "NoData. To avoid this, select a different NoData value "
1643 : "for the destination dataset.",
1644 40 : poWK->padfDstNoDataReal[iBand],
1645 40 : static_cast<double>(pDst[iDstOffset]));
1646 : }
1647 : }
1648 18505930 : }
1649 :
1650 : /************************************************************************/
1651 : /* GWKAvoidNoDataMultiBand() */
1652 : /************************************************************************/
1653 :
1654 : template <class T>
1655 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1656 : GPtrDiff_t iDstOffset)
1657 : {
1658 524573 : T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
1659 524573 : if (poWK->padfDstNoDataReal != nullptr)
1660 : {
1661 208615 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1662 : {
1663 208294 : if (poWK->padfDstNoDataReal[iBand] !=
1664 208294 : static_cast<double>(ppDst[iBand][iDstOffset]))
1665 205830 : return;
1666 : }
1667 964 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1668 : {
1669 643 : AvoidNoData(ppDst[iBand], iDstOffset);
1670 : }
1671 :
1672 321 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1673 : {
1674 21 : const_cast<GDALWarpKernel *>(poWK)
1675 : ->bWarnedAboutDstNoDataReplacement = true;
1676 42 : std::string valueSrc, valueDst;
1677 64 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1678 : {
1679 43 : if (!valueSrc.empty())
1680 : {
1681 22 : valueSrc += ',';
1682 22 : valueDst += ',';
1683 : }
1684 43 : valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
1685 43 : valueDst += CPLSPrintf(
1686 43 : "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
1687 : }
1688 21 : CPLError(CE_Warning, CPLE_AppDefined,
1689 : "Value %s in the source dataset has been changed to %s "
1690 : "in the destination dataset to avoid being treated as "
1691 : "NoData. To avoid this, select a different NoData value "
1692 : "for the destination dataset.",
1693 : valueSrc.c_str(), valueDst.c_str());
1694 : }
1695 : }
1696 : }
1697 :
1698 : /************************************************************************/
1699 : /* GWKAvoidNoDataMultiBand() */
1700 : /************************************************************************/
1701 :
1702 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1703 : GPtrDiff_t iDstOffset)
1704 : {
1705 524573 : switch (poWK->eWorkingDataType)
1706 : {
1707 523997 : case GDT_UInt8:
1708 523997 : GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
1709 523997 : break;
1710 :
1711 0 : case GDT_Int8:
1712 0 : GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
1713 0 : break;
1714 :
1715 128 : case GDT_Int16:
1716 128 : GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
1717 128 : break;
1718 :
1719 64 : case GDT_UInt16:
1720 64 : GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
1721 64 : break;
1722 :
1723 64 : case GDT_Int32:
1724 64 : GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
1725 64 : break;
1726 :
1727 64 : case GDT_UInt32:
1728 64 : GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
1729 64 : break;
1730 :
1731 64 : case GDT_Int64:
1732 64 : GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
1733 64 : break;
1734 :
1735 64 : case GDT_UInt64:
1736 64 : GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
1737 64 : break;
1738 :
1739 0 : case GDT_Float16:
1740 0 : GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
1741 0 : break;
1742 :
1743 64 : case GDT_Float32:
1744 64 : GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
1745 64 : break;
1746 :
1747 64 : case GDT_Float64:
1748 64 : GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
1749 64 : break;
1750 :
1751 0 : case GDT_CInt16:
1752 : case GDT_CInt32:
1753 : case GDT_CFloat16:
1754 : case GDT_CFloat32:
1755 : case GDT_CFloat64:
1756 : case GDT_Unknown:
1757 : case GDT_TypeCount:
1758 0 : break;
1759 : }
1760 524573 : }
1761 :
1762 : /************************************************************************/
1763 : /* GWKSetPixelValueRealT() */
1764 : /************************************************************************/
1765 :
1766 : template <class T>
1767 14953877 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1768 : GPtrDiff_t iDstOffset, double dfDensity,
1769 : T value, bool bAvoidNoDataSingleBand)
1770 : {
1771 14953877 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1772 :
1773 : /* -------------------------------------------------------------------- */
1774 : /* If the source density is less than 100% we need to fetch the */
1775 : /* existing destination value, and mix it with the source to */
1776 : /* get the new "to apply" value. Also compute composite */
1777 : /* density. */
1778 : /* */
1779 : /* We avoid mixing if density is very near one or risk mixing */
1780 : /* in very extreme nodata values and causing odd results (#1610) */
1781 : /* -------------------------------------------------------------------- */
1782 14953877 : if (dfDensity < 0.9999)
1783 : {
1784 945508 : if (dfDensity < 0.0001)
1785 0 : return true;
1786 :
1787 945508 : double dfDstDensity = 1.0;
1788 :
1789 945508 : if (poWK->pafDstDensity != nullptr)
1790 944036 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1791 1472 : else if (poWK->panDstValid != nullptr &&
1792 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1793 0 : dfDstDensity = 0.0;
1794 :
1795 : // It seems like we also ought to be testing panDstValid[] here!
1796 :
1797 945508 : const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1798 :
1799 : // The destination density is really only relative to the portion
1800 : // not occluded by the overlay.
1801 945508 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1802 :
1803 945508 : const double dfReal =
1804 945508 : (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1805 945508 : (dfDensity + dfDstInfluence);
1806 :
1807 : /* --------------------------------------------------------------------
1808 : */
1809 : /* Actually apply the destination value. */
1810 : /* */
1811 : /* Avoid using the destination nodata value for integer datatypes
1812 : */
1813 : /* if by chance it is equal to the computed pixel value. */
1814 : /* --------------------------------------------------------------------
1815 : */
1816 945508 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1817 : }
1818 : else
1819 : {
1820 14008336 : pDst[iDstOffset] = value;
1821 : }
1822 :
1823 14953877 : if (bAvoidNoDataSingleBand)
1824 13681221 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1825 :
1826 14953877 : return true;
1827 : }
1828 :
1829 : /************************************************************************/
1830 : /* ClampRoundAndAvoidNoData() */
1831 : /************************************************************************/
1832 :
1833 : template <class T>
1834 5125155 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1835 : GPtrDiff_t iDstOffset, double dfReal,
1836 : bool bAvoidNoDataSingleBand)
1837 : {
1838 5125155 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1839 5125155 : T *pDst = reinterpret_cast<T *>(pabyDst);
1840 :
1841 : if constexpr (cpl::NumericLimits<T>::is_integer)
1842 : {
1843 : using std::floor;
1844 4628025 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1845 5298 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1846 4622725 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1847 23635 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1848 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1849 10410 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1850 : else
1851 4588685 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1852 : }
1853 : else
1854 : {
1855 497130 : pDst[iDstOffset] = static_cast<T>(dfReal);
1856 : }
1857 :
1858 5125155 : if (bAvoidNoDataSingleBand)
1859 4824749 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1860 5125155 : }
1861 :
1862 : /************************************************************************/
1863 : /* GWKSetPixelValue() */
1864 : /************************************************************************/
1865 :
1866 4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1867 : GPtrDiff_t iDstOffset, double dfDensity,
1868 : double dfReal, double dfImag,
1869 : bool bAvoidNoDataSingleBand)
1870 :
1871 : {
1872 4012410 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1873 :
1874 : /* -------------------------------------------------------------------- */
1875 : /* If the source density is less than 100% we need to fetch the */
1876 : /* existing destination value, and mix it with the source to */
1877 : /* get the new "to apply" value. Also compute composite */
1878 : /* density. */
1879 : /* */
1880 : /* We avoid mixing if density is very near one or risk mixing */
1881 : /* in very extreme nodata values and causing odd results (#1610) */
1882 : /* -------------------------------------------------------------------- */
1883 4012410 : if (dfDensity < 0.9999)
1884 : {
1885 800 : if (dfDensity < 0.0001)
1886 0 : return true;
1887 :
1888 800 : double dfDstDensity = 1.0;
1889 800 : if (poWK->pafDstDensity != nullptr)
1890 800 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1891 0 : else if (poWK->panDstValid != nullptr &&
1892 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1893 0 : dfDstDensity = 0.0;
1894 :
1895 800 : double dfDstReal = 0.0;
1896 800 : double dfDstImag = 0.0;
1897 : // It seems like we also ought to be testing panDstValid[] here!
1898 :
1899 : // TODO(schwehr): Factor out this repreated type of set.
1900 800 : switch (poWK->eWorkingDataType)
1901 : {
1902 0 : case GDT_UInt8:
1903 0 : dfDstReal = pabyDst[iDstOffset];
1904 0 : dfDstImag = 0.0;
1905 0 : break;
1906 :
1907 0 : case GDT_Int8:
1908 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1909 0 : dfDstImag = 0.0;
1910 0 : break;
1911 :
1912 400 : case GDT_Int16:
1913 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1914 400 : dfDstImag = 0.0;
1915 400 : break;
1916 :
1917 400 : case GDT_UInt16:
1918 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1919 400 : dfDstImag = 0.0;
1920 400 : break;
1921 :
1922 0 : case GDT_Int32:
1923 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1924 0 : dfDstImag = 0.0;
1925 0 : break;
1926 :
1927 0 : case GDT_UInt32:
1928 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1929 0 : dfDstImag = 0.0;
1930 0 : break;
1931 :
1932 0 : case GDT_Int64:
1933 0 : dfDstReal = static_cast<double>(
1934 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1935 0 : dfDstImag = 0.0;
1936 0 : break;
1937 :
1938 0 : case GDT_UInt64:
1939 0 : dfDstReal = static_cast<double>(
1940 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1941 0 : dfDstImag = 0.0;
1942 0 : break;
1943 :
1944 0 : case GDT_Float16:
1945 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1946 0 : dfDstImag = 0.0;
1947 0 : break;
1948 :
1949 0 : case GDT_Float32:
1950 0 : dfDstReal =
1951 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1952 0 : dfDstImag = 0.0;
1953 0 : break;
1954 :
1955 0 : case GDT_Float64:
1956 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1957 0 : dfDstImag = 0.0;
1958 0 : break;
1959 :
1960 0 : case GDT_CInt16:
1961 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1962 0 : dfDstImag =
1963 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1964 0 : break;
1965 :
1966 0 : case GDT_CInt32:
1967 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1968 0 : dfDstImag =
1969 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1970 0 : break;
1971 :
1972 0 : case GDT_CFloat16:
1973 : dfDstReal =
1974 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1975 : dfDstImag =
1976 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1977 0 : break;
1978 :
1979 0 : case GDT_CFloat32:
1980 0 : dfDstReal =
1981 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
1982 0 : dfDstImag = double(
1983 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
1984 0 : break;
1985 :
1986 0 : case GDT_CFloat64:
1987 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1988 0 : dfDstImag =
1989 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1990 0 : break;
1991 :
1992 0 : case GDT_Unknown:
1993 : case GDT_TypeCount:
1994 0 : CPLAssert(false);
1995 : return false;
1996 : }
1997 :
1998 : // The destination density is really only relative to the portion
1999 : // not occluded by the overlay.
2000 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2001 :
2002 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2003 800 : (dfDensity + dfDstInfluence);
2004 :
2005 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
2006 800 : (dfDensity + dfDstInfluence);
2007 : }
2008 :
2009 : /* -------------------------------------------------------------------- */
2010 : /* Actually apply the destination value. */
2011 : /* */
2012 : /* Avoid using the destination nodata value for integer datatypes */
2013 : /* if by chance it is equal to the computed pixel value. */
2014 : /* -------------------------------------------------------------------- */
2015 :
2016 4012410 : switch (poWK->eWorkingDataType)
2017 : {
2018 3290010 : case GDT_UInt8:
2019 3290010 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2020 : bAvoidNoDataSingleBand);
2021 3290010 : break;
2022 :
2023 0 : case GDT_Int8:
2024 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2025 : bAvoidNoDataSingleBand);
2026 0 : break;
2027 :
2028 7472 : case GDT_Int16:
2029 7472 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2030 : bAvoidNoDataSingleBand);
2031 7472 : break;
2032 :
2033 464 : case GDT_UInt16:
2034 464 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2035 : bAvoidNoDataSingleBand);
2036 464 : break;
2037 :
2038 63 : case GDT_UInt32:
2039 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2040 : bAvoidNoDataSingleBand);
2041 63 : break;
2042 :
2043 63 : case GDT_Int32:
2044 63 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2045 : bAvoidNoDataSingleBand);
2046 63 : break;
2047 :
2048 0 : case GDT_UInt64:
2049 0 : ClampRoundAndAvoidNoData<std::uint64_t>(
2050 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2051 0 : break;
2052 :
2053 0 : case GDT_Int64:
2054 0 : ClampRoundAndAvoidNoData<std::int64_t>(
2055 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2056 0 : break;
2057 :
2058 0 : case GDT_Float16:
2059 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2060 : bAvoidNoDataSingleBand);
2061 0 : break;
2062 :
2063 478957 : case GDT_Float32:
2064 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2065 : bAvoidNoDataSingleBand);
2066 478957 : break;
2067 :
2068 149 : case GDT_Float64:
2069 149 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2070 : bAvoidNoDataSingleBand);
2071 149 : break;
2072 :
2073 234079 : case GDT_CInt16:
2074 : {
2075 : typedef GInt16 T;
2076 234079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2077 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2078 0 : cpl::NumericLimits<T>::min();
2079 234079 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2080 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2081 0 : cpl::NumericLimits<T>::max();
2082 : else
2083 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2084 234079 : static_cast<T>(floor(dfReal + 0.5));
2085 234079 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2086 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2087 0 : cpl::NumericLimits<T>::min();
2088 234079 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2089 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2090 0 : cpl::NumericLimits<T>::max();
2091 : else
2092 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2093 234079 : static_cast<T>(floor(dfImag + 0.5));
2094 234079 : break;
2095 : }
2096 :
2097 379 : case GDT_CInt32:
2098 : {
2099 : typedef GInt32 T;
2100 379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2101 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2102 0 : cpl::NumericLimits<T>::min();
2103 379 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2104 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2105 0 : cpl::NumericLimits<T>::max();
2106 : else
2107 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2108 379 : static_cast<T>(floor(dfReal + 0.5));
2109 379 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2110 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2111 0 : cpl::NumericLimits<T>::min();
2112 379 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2113 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2114 0 : cpl::NumericLimits<T>::max();
2115 : else
2116 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2117 379 : static_cast<T>(floor(dfImag + 0.5));
2118 379 : break;
2119 : }
2120 :
2121 0 : case GDT_CFloat16:
2122 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
2123 0 : static_cast<GFloat16>(dfReal);
2124 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
2125 0 : static_cast<GFloat16>(dfImag);
2126 0 : break;
2127 :
2128 394 : case GDT_CFloat32:
2129 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
2130 394 : static_cast<float>(dfReal);
2131 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
2132 394 : static_cast<float>(dfImag);
2133 394 : break;
2134 :
2135 380 : case GDT_CFloat64:
2136 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
2137 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
2138 380 : break;
2139 :
2140 0 : case GDT_Unknown:
2141 : case GDT_TypeCount:
2142 0 : return false;
2143 : }
2144 :
2145 4012410 : return true;
2146 : }
2147 :
2148 : /************************************************************************/
2149 : /* GWKSetPixelValueReal() */
2150 : /************************************************************************/
2151 :
2152 1347980 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2153 : GPtrDiff_t iDstOffset, double dfDensity,
2154 : double dfReal, bool bAvoidNoDataSingleBand)
2155 :
2156 : {
2157 1347980 : GByte *pabyDst = poWK->papabyDstImage[iBand];
2158 :
2159 : /* -------------------------------------------------------------------- */
2160 : /* If the source density is less than 100% we need to fetch the */
2161 : /* existing destination value, and mix it with the source to */
2162 : /* get the new "to apply" value. Also compute composite */
2163 : /* density. */
2164 : /* */
2165 : /* We avoid mixing if density is very near one or risk mixing */
2166 : /* in very extreme nodata values and causing odd results (#1610) */
2167 : /* -------------------------------------------------------------------- */
2168 1347980 : if (dfDensity < 0.9999)
2169 : {
2170 600 : if (dfDensity < 0.0001)
2171 0 : return true;
2172 :
2173 600 : double dfDstReal = 0.0;
2174 600 : double dfDstDensity = 1.0;
2175 :
2176 600 : if (poWK->pafDstDensity != nullptr)
2177 600 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
2178 0 : else if (poWK->panDstValid != nullptr &&
2179 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
2180 0 : dfDstDensity = 0.0;
2181 :
2182 : // It seems like we also ought to be testing panDstValid[] here!
2183 :
2184 600 : switch (poWK->eWorkingDataType)
2185 : {
2186 0 : case GDT_UInt8:
2187 0 : dfDstReal = pabyDst[iDstOffset];
2188 0 : break;
2189 :
2190 0 : case GDT_Int8:
2191 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
2192 0 : break;
2193 :
2194 300 : case GDT_Int16:
2195 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
2196 300 : break;
2197 :
2198 300 : case GDT_UInt16:
2199 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
2200 300 : break;
2201 :
2202 0 : case GDT_Int32:
2203 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
2204 0 : break;
2205 :
2206 0 : case GDT_UInt32:
2207 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
2208 0 : break;
2209 :
2210 0 : case GDT_Int64:
2211 0 : dfDstReal = static_cast<double>(
2212 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
2213 0 : break;
2214 :
2215 0 : case GDT_UInt64:
2216 0 : dfDstReal = static_cast<double>(
2217 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
2218 0 : break;
2219 :
2220 0 : case GDT_Float16:
2221 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
2222 0 : break;
2223 :
2224 0 : case GDT_Float32:
2225 0 : dfDstReal =
2226 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
2227 0 : break;
2228 :
2229 0 : case GDT_Float64:
2230 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
2231 0 : break;
2232 :
2233 0 : case GDT_CInt16:
2234 : case GDT_CInt32:
2235 : case GDT_CFloat16:
2236 : case GDT_CFloat32:
2237 : case GDT_CFloat64:
2238 : case GDT_Unknown:
2239 : case GDT_TypeCount:
2240 0 : CPLAssert(false);
2241 : return false;
2242 : }
2243 :
2244 : // The destination density is really only relative to the portion
2245 : // not occluded by the overlay.
2246 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2247 :
2248 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2249 600 : (dfDensity + dfDstInfluence);
2250 : }
2251 :
2252 : /* -------------------------------------------------------------------- */
2253 : /* Actually apply the destination value. */
2254 : /* */
2255 : /* Avoid using the destination nodata value for integer datatypes */
2256 : /* if by chance it is equal to the computed pixel value. */
2257 : /* -------------------------------------------------------------------- */
2258 :
2259 1347980 : switch (poWK->eWorkingDataType)
2260 : {
2261 1325840 : case GDT_UInt8:
2262 1325840 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2263 : bAvoidNoDataSingleBand);
2264 1325840 : break;
2265 :
2266 0 : case GDT_Int8:
2267 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2268 : bAvoidNoDataSingleBand);
2269 0 : break;
2270 :
2271 1309 : case GDT_Int16:
2272 1309 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2273 : bAvoidNoDataSingleBand);
2274 1309 : break;
2275 :
2276 475 : case GDT_UInt16:
2277 475 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2278 : bAvoidNoDataSingleBand);
2279 475 : break;
2280 :
2281 539 : case GDT_UInt32:
2282 539 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2283 : bAvoidNoDataSingleBand);
2284 539 : break;
2285 :
2286 1342 : case GDT_Int32:
2287 1342 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2288 : bAvoidNoDataSingleBand);
2289 1342 : break;
2290 :
2291 224 : case GDT_UInt64:
2292 224 : ClampRoundAndAvoidNoData<std::uint64_t>(
2293 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2294 224 : break;
2295 :
2296 224 : case GDT_Int64:
2297 224 : ClampRoundAndAvoidNoData<std::int64_t>(
2298 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2299 224 : break;
2300 :
2301 0 : case GDT_Float16:
2302 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2303 : bAvoidNoDataSingleBand);
2304 0 : break;
2305 :
2306 3538 : case GDT_Float32:
2307 3538 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2308 : bAvoidNoDataSingleBand);
2309 3538 : break;
2310 :
2311 14486 : case GDT_Float64:
2312 14486 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2313 : bAvoidNoDataSingleBand);
2314 14486 : break;
2315 :
2316 0 : case GDT_CInt16:
2317 : case GDT_CInt32:
2318 : case GDT_CFloat16:
2319 : case GDT_CFloat32:
2320 : case GDT_CFloat64:
2321 0 : return false;
2322 :
2323 0 : case GDT_Unknown:
2324 : case GDT_TypeCount:
2325 0 : CPLAssert(false);
2326 : return false;
2327 : }
2328 :
2329 1347980 : return true;
2330 : }
2331 :
2332 : /************************************************************************/
2333 : /* GWKGetPixelValue() */
2334 : /************************************************************************/
2335 :
2336 : /* It is assumed that panUnifiedSrcValid has been checked before */
2337 :
2338 30268000 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2339 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2340 : double *pdfReal, double *pdfImag)
2341 :
2342 : {
2343 30268000 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2344 :
2345 60536000 : if (poWK->papanBandSrcValid != nullptr &&
2346 30268000 : poWK->papanBandSrcValid[iBand] != nullptr &&
2347 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2348 : {
2349 0 : *pdfDensity = 0.0;
2350 0 : return false;
2351 : }
2352 :
2353 30268000 : *pdfReal = 0.0;
2354 30268000 : *pdfImag = 0.0;
2355 :
2356 : // TODO(schwehr): Fix casting.
2357 30268000 : switch (poWK->eWorkingDataType)
2358 : {
2359 29191100 : case GDT_UInt8:
2360 29191100 : *pdfReal = pabySrc[iSrcOffset];
2361 29191100 : *pdfImag = 0.0;
2362 29191100 : break;
2363 :
2364 0 : case GDT_Int8:
2365 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2366 0 : *pdfImag = 0.0;
2367 0 : break;
2368 :
2369 28232 : case GDT_Int16:
2370 28232 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2371 28232 : *pdfImag = 0.0;
2372 28232 : break;
2373 :
2374 166 : case GDT_UInt16:
2375 166 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2376 166 : *pdfImag = 0.0;
2377 166 : break;
2378 :
2379 63 : case GDT_Int32:
2380 63 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2381 63 : *pdfImag = 0.0;
2382 63 : break;
2383 :
2384 63 : case GDT_UInt32:
2385 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2386 63 : *pdfImag = 0.0;
2387 63 : break;
2388 :
2389 0 : case GDT_Int64:
2390 0 : *pdfReal = static_cast<double>(
2391 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2392 0 : *pdfImag = 0.0;
2393 0 : break;
2394 :
2395 0 : case GDT_UInt64:
2396 0 : *pdfReal = static_cast<double>(
2397 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2398 0 : *pdfImag = 0.0;
2399 0 : break;
2400 :
2401 0 : case GDT_Float16:
2402 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2403 0 : *pdfImag = 0.0;
2404 0 : break;
2405 :
2406 1047220 : case GDT_Float32:
2407 1047220 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2408 1047220 : *pdfImag = 0.0;
2409 1047220 : break;
2410 :
2411 587 : case GDT_Float64:
2412 587 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2413 587 : *pdfImag = 0.0;
2414 587 : break;
2415 :
2416 133 : case GDT_CInt16:
2417 133 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2418 133 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2419 133 : break;
2420 :
2421 133 : case GDT_CInt32:
2422 133 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2423 133 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2424 133 : break;
2425 :
2426 0 : case GDT_CFloat16:
2427 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2428 0 : *pdfImag =
2429 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2430 0 : break;
2431 :
2432 194 : case GDT_CFloat32:
2433 194 : *pdfReal =
2434 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2435 194 : *pdfImag =
2436 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2437 194 : break;
2438 :
2439 138 : case GDT_CFloat64:
2440 138 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2441 138 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2442 138 : break;
2443 :
2444 0 : case GDT_Unknown:
2445 : case GDT_TypeCount:
2446 0 : CPLAssert(false);
2447 : *pdfDensity = 0.0;
2448 : return false;
2449 : }
2450 :
2451 30268000 : if (poWK->pafUnifiedSrcDensity != nullptr)
2452 4194800 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2453 : else
2454 26073200 : *pdfDensity = 1.0;
2455 :
2456 30268000 : return *pdfDensity != 0.0;
2457 : }
2458 :
2459 : /************************************************************************/
2460 : /* GWKGetPixelValueReal() */
2461 : /************************************************************************/
2462 :
2463 15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2464 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2465 : double *pdfReal)
2466 :
2467 : {
2468 15516 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2469 :
2470 31034 : if (poWK->papanBandSrcValid != nullptr &&
2471 15518 : poWK->papanBandSrcValid[iBand] != nullptr &&
2472 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2473 : {
2474 0 : *pdfDensity = 0.0;
2475 0 : return false;
2476 : }
2477 :
2478 15516 : switch (poWK->eWorkingDataType)
2479 : {
2480 1 : case GDT_UInt8:
2481 1 : *pdfReal = pabySrc[iSrcOffset];
2482 1 : break;
2483 :
2484 0 : case GDT_Int8:
2485 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2486 0 : break;
2487 :
2488 1 : case GDT_Int16:
2489 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2490 1 : break;
2491 :
2492 1 : case GDT_UInt16:
2493 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2494 1 : break;
2495 :
2496 982 : case GDT_Int32:
2497 982 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2498 982 : break;
2499 :
2500 179 : case GDT_UInt32:
2501 179 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2502 179 : break;
2503 :
2504 112 : case GDT_Int64:
2505 112 : *pdfReal = static_cast<double>(
2506 112 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2507 112 : break;
2508 :
2509 112 : case GDT_UInt64:
2510 112 : *pdfReal = static_cast<double>(
2511 112 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2512 112 : break;
2513 :
2514 0 : case GDT_Float16:
2515 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2516 0 : break;
2517 :
2518 2 : case GDT_Float32:
2519 2 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2520 2 : break;
2521 :
2522 14126 : case GDT_Float64:
2523 14126 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2524 14126 : break;
2525 :
2526 0 : case GDT_CInt16:
2527 : case GDT_CInt32:
2528 : case GDT_CFloat16:
2529 : case GDT_CFloat32:
2530 : case GDT_CFloat64:
2531 : case GDT_Unknown:
2532 : case GDT_TypeCount:
2533 0 : CPLAssert(false);
2534 : return false;
2535 : }
2536 :
2537 15516 : if (poWK->pafUnifiedSrcDensity != nullptr)
2538 0 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2539 : else
2540 15516 : *pdfDensity = 1.0;
2541 :
2542 15516 : return *pdfDensity != 0.0;
2543 : }
2544 :
2545 : /************************************************************************/
2546 : /* GWKGetPixelRow() */
2547 : /************************************************************************/
2548 :
2549 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2550 : /* data-types. */
2551 :
2552 2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2553 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2554 : double *padfDensity, double adfReal[],
2555 : double *padfImag)
2556 : {
2557 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2558 2369710 : const int nSrcLen = nHalfSrcLen * 2;
2559 2369710 : bool bHasValid = false;
2560 :
2561 2369710 : if (padfDensity != nullptr)
2562 : {
2563 : // Init the density.
2564 3384030 : for (int i = 0; i < nSrcLen; i += 2)
2565 : {
2566 2211910 : padfDensity[i] = 1.0;
2567 2211910 : padfDensity[i + 1] = 1.0;
2568 : }
2569 :
2570 1172120 : if (poWK->panUnifiedSrcValid != nullptr)
2571 : {
2572 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2573 : {
2574 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2575 2067740 : bHasValid = true;
2576 : else
2577 74323 : padfDensity[i] = 0.0;
2578 :
2579 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2580 2068400 : bHasValid = true;
2581 : else
2582 73668 : padfDensity[i + 1] = 0.0;
2583 : }
2584 :
2585 : // Reset or fail as needed.
2586 1139400 : if (bHasValid)
2587 1116590 : bHasValid = false;
2588 : else
2589 22806 : return false;
2590 : }
2591 :
2592 1149320 : if (poWK->papanBandSrcValid != nullptr &&
2593 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2594 : {
2595 0 : for (int i = 0; i < nSrcLen; i += 2)
2596 : {
2597 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2598 0 : bHasValid = true;
2599 : else
2600 0 : padfDensity[i] = 0.0;
2601 :
2602 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2603 0 : iSrcOffset + i + 1))
2604 0 : bHasValid = true;
2605 : else
2606 0 : padfDensity[i + 1] = 0.0;
2607 : }
2608 :
2609 : // Reset or fail as needed.
2610 0 : if (bHasValid)
2611 0 : bHasValid = false;
2612 : else
2613 0 : return false;
2614 : }
2615 : }
2616 :
2617 : // TODO(schwehr): Fix casting.
2618 : // Fetch data.
2619 2346910 : switch (poWK->eWorkingDataType)
2620 : {
2621 1136680 : case GDT_UInt8:
2622 : {
2623 1136680 : GByte *pSrc =
2624 1136680 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2625 1136680 : pSrc += iSrcOffset;
2626 3281570 : for (int i = 0; i < nSrcLen; i += 2)
2627 : {
2628 2144890 : adfReal[i] = pSrc[i];
2629 2144890 : adfReal[i + 1] = pSrc[i + 1];
2630 : }
2631 1136680 : break;
2632 : }
2633 :
2634 0 : case GDT_Int8:
2635 : {
2636 0 : GInt8 *pSrc =
2637 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2638 0 : pSrc += iSrcOffset;
2639 0 : for (int i = 0; i < nSrcLen; i += 2)
2640 : {
2641 0 : adfReal[i] = pSrc[i];
2642 0 : adfReal[i + 1] = pSrc[i + 1];
2643 : }
2644 0 : break;
2645 : }
2646 :
2647 5950 : case GDT_Int16:
2648 : {
2649 5950 : GInt16 *pSrc =
2650 5950 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2651 5950 : pSrc += iSrcOffset;
2652 22164 : for (int i = 0; i < nSrcLen; i += 2)
2653 : {
2654 16214 : adfReal[i] = pSrc[i];
2655 16214 : adfReal[i + 1] = pSrc[i + 1];
2656 : }
2657 5950 : break;
2658 : }
2659 :
2660 4310 : case GDT_UInt16:
2661 : {
2662 4310 : GUInt16 *pSrc =
2663 4310 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2664 4310 : pSrc += iSrcOffset;
2665 18884 : for (int i = 0; i < nSrcLen; i += 2)
2666 : {
2667 14574 : adfReal[i] = pSrc[i];
2668 14574 : adfReal[i + 1] = pSrc[i + 1];
2669 : }
2670 4310 : break;
2671 : }
2672 :
2673 946 : case GDT_Int32:
2674 : {
2675 946 : GInt32 *pSrc =
2676 946 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2677 946 : pSrc += iSrcOffset;
2678 2624 : for (int i = 0; i < nSrcLen; i += 2)
2679 : {
2680 1678 : adfReal[i] = pSrc[i];
2681 1678 : adfReal[i + 1] = pSrc[i + 1];
2682 : }
2683 946 : break;
2684 : }
2685 :
2686 946 : case GDT_UInt32:
2687 : {
2688 946 : GUInt32 *pSrc =
2689 946 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2690 946 : pSrc += iSrcOffset;
2691 2624 : for (int i = 0; i < nSrcLen; i += 2)
2692 : {
2693 1678 : adfReal[i] = pSrc[i];
2694 1678 : adfReal[i + 1] = pSrc[i + 1];
2695 : }
2696 946 : break;
2697 : }
2698 :
2699 196 : case GDT_Int64:
2700 : {
2701 196 : auto pSrc =
2702 196 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2703 196 : pSrc += iSrcOffset;
2704 392 : for (int i = 0; i < nSrcLen; i += 2)
2705 : {
2706 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2707 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2708 : }
2709 196 : break;
2710 : }
2711 :
2712 196 : case GDT_UInt64:
2713 : {
2714 196 : auto pSrc =
2715 196 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2716 196 : pSrc += iSrcOffset;
2717 392 : for (int i = 0; i < nSrcLen; i += 2)
2718 : {
2719 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2720 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2721 : }
2722 196 : break;
2723 : }
2724 :
2725 0 : case GDT_Float16:
2726 : {
2727 0 : GFloat16 *pSrc =
2728 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2729 0 : pSrc += iSrcOffset;
2730 0 : for (int i = 0; i < nSrcLen; i += 2)
2731 : {
2732 0 : adfReal[i] = pSrc[i];
2733 0 : adfReal[i + 1] = pSrc[i + 1];
2734 : }
2735 0 : break;
2736 : }
2737 :
2738 25270 : case GDT_Float32:
2739 : {
2740 25270 : float *pSrc =
2741 25270 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2742 25270 : pSrc += iSrcOffset;
2743 121739 : for (int i = 0; i < nSrcLen; i += 2)
2744 : {
2745 96469 : adfReal[i] = double(pSrc[i]);
2746 96469 : adfReal[i + 1] = double(pSrc[i + 1]);
2747 : }
2748 25270 : break;
2749 : }
2750 :
2751 946 : case GDT_Float64:
2752 : {
2753 946 : double *pSrc =
2754 946 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2755 946 : pSrc += iSrcOffset;
2756 2624 : for (int i = 0; i < nSrcLen; i += 2)
2757 : {
2758 1678 : adfReal[i] = pSrc[i];
2759 1678 : adfReal[i + 1] = pSrc[i + 1];
2760 : }
2761 946 : break;
2762 : }
2763 :
2764 1169220 : case GDT_CInt16:
2765 : {
2766 1169220 : GInt16 *pSrc =
2767 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2768 1169220 : pSrc += 2 * iSrcOffset;
2769 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2770 : {
2771 3506800 : adfReal[i] = pSrc[2 * i];
2772 3506800 : padfImag[i] = pSrc[2 * i + 1];
2773 :
2774 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2775 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2776 : }
2777 1169220 : break;
2778 : }
2779 :
2780 750 : case GDT_CInt32:
2781 : {
2782 750 : GInt32 *pSrc =
2783 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2784 750 : pSrc += 2 * iSrcOffset;
2785 2232 : for (int i = 0; i < nSrcLen; i += 2)
2786 : {
2787 1482 : adfReal[i] = pSrc[2 * i];
2788 1482 : padfImag[i] = pSrc[2 * i + 1];
2789 :
2790 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2791 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2792 : }
2793 750 : break;
2794 : }
2795 :
2796 0 : case GDT_CFloat16:
2797 : {
2798 0 : GFloat16 *pSrc =
2799 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2800 0 : pSrc += 2 * iSrcOffset;
2801 0 : for (int i = 0; i < nSrcLen; i += 2)
2802 : {
2803 0 : adfReal[i] = pSrc[2 * i];
2804 0 : padfImag[i] = pSrc[2 * i + 1];
2805 :
2806 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2807 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2808 : }
2809 0 : break;
2810 : }
2811 :
2812 750 : case GDT_CFloat32:
2813 : {
2814 750 : float *pSrc =
2815 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2816 750 : pSrc += 2 * iSrcOffset;
2817 2232 : for (int i = 0; i < nSrcLen; i += 2)
2818 : {
2819 1482 : adfReal[i] = double(pSrc[2 * i]);
2820 1482 : padfImag[i] = double(pSrc[2 * i + 1]);
2821 :
2822 1482 : adfReal[i + 1] = double(pSrc[2 * i + 2]);
2823 1482 : padfImag[i + 1] = double(pSrc[2 * i + 3]);
2824 : }
2825 750 : break;
2826 : }
2827 :
2828 750 : case GDT_CFloat64:
2829 : {
2830 750 : double *pSrc =
2831 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2832 750 : pSrc += 2 * iSrcOffset;
2833 2232 : for (int i = 0; i < nSrcLen; i += 2)
2834 : {
2835 1482 : adfReal[i] = pSrc[2 * i];
2836 1482 : padfImag[i] = pSrc[2 * i + 1];
2837 :
2838 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2839 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2840 : }
2841 750 : break;
2842 : }
2843 :
2844 0 : case GDT_Unknown:
2845 : case GDT_TypeCount:
2846 0 : CPLAssert(false);
2847 : if (padfDensity)
2848 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2849 : return false;
2850 : }
2851 :
2852 2346910 : if (padfDensity == nullptr)
2853 1197590 : return true;
2854 :
2855 1149320 : if (poWK->pafUnifiedSrcDensity == nullptr)
2856 : {
2857 3256740 : for (int i = 0; i < nSrcLen; i += 2)
2858 : {
2859 : // Take into account earlier calcs.
2860 2127390 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2861 : {
2862 2087480 : padfDensity[i] = 1.0;
2863 2087480 : bHasValid = true;
2864 : }
2865 :
2866 2127390 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2867 : {
2868 2088140 : padfDensity[i + 1] = 1.0;
2869 2088140 : bHasValid = true;
2870 : }
2871 : }
2872 : }
2873 : else
2874 : {
2875 70068 : for (int i = 0; i < nSrcLen; i += 2)
2876 : {
2877 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2878 50103 : padfDensity[i] =
2879 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2880 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2881 49252 : bHasValid = true;
2882 :
2883 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2884 50103 : padfDensity[i + 1] =
2885 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2886 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2887 49170 : bHasValid = true;
2888 : }
2889 : }
2890 :
2891 1149320 : return bHasValid;
2892 : }
2893 :
2894 : /************************************************************************/
2895 : /* GWKGetPixelT() */
2896 : /************************************************************************/
2897 :
2898 : template <class T>
2899 14964159 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2900 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2901 :
2902 : {
2903 14964159 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2904 :
2905 33171143 : if ((poWK->panUnifiedSrcValid != nullptr &&
2906 29928318 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2907 14964159 : (poWK->papanBandSrcValid != nullptr &&
2908 589836 : poWK->papanBandSrcValid[iBand] != nullptr &&
2909 589836 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2910 : {
2911 9 : *pdfDensity = 0.0;
2912 9 : return false;
2913 : }
2914 :
2915 14964159 : *pValue = pSrc[iSrcOffset];
2916 :
2917 14964159 : if (poWK->pafUnifiedSrcDensity == nullptr)
2918 13841766 : *pdfDensity = 1.0;
2919 : else
2920 1122362 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2921 :
2922 14964159 : return *pdfDensity != 0.0;
2923 : }
2924 :
2925 : /************************************************************************/
2926 : /* GWKBilinearResample() */
2927 : /* Set of bilinear interpolators */
2928 : /************************************************************************/
2929 :
2930 77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2931 : double dfSrcX, double dfSrcY,
2932 : double *pdfDensity, double *pdfReal,
2933 : double *pdfImag)
2934 :
2935 : {
2936 : // Save as local variables to avoid following pointers.
2937 77448 : const int nSrcXSize = poWK->nSrcXSize;
2938 77448 : const int nSrcYSize = poWK->nSrcYSize;
2939 :
2940 77448 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2941 77448 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2942 77448 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2943 77448 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2944 77448 : bool bShifted = false;
2945 :
2946 77448 : if (iSrcX == -1)
2947 : {
2948 1534 : iSrcX = 0;
2949 1534 : dfRatioX = 1;
2950 : }
2951 77448 : if (iSrcY == -1)
2952 : {
2953 7734 : iSrcY = 0;
2954 7734 : dfRatioY = 1;
2955 : }
2956 77448 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2957 :
2958 : // Shift so we don't overrun the array.
2959 77448 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2960 77330 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2961 77330 : iSrcOffset + nSrcXSize + 1)
2962 : {
2963 230 : bShifted = true;
2964 230 : --iSrcOffset;
2965 : }
2966 :
2967 77448 : double adfDensity[2] = {0.0, 0.0};
2968 77448 : double adfReal[2] = {0.0, 0.0};
2969 77448 : double adfImag[2] = {0.0, 0.0};
2970 77448 : double dfAccumulatorReal = 0.0;
2971 77448 : double dfAccumulatorImag = 0.0;
2972 77448 : double dfAccumulatorDensity = 0.0;
2973 77448 : double dfAccumulatorDivisor = 0.0;
2974 :
2975 77448 : const GPtrDiff_t nSrcPixels =
2976 77448 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2977 : // Get pixel row.
2978 77448 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2979 154896 : iSrcOffset < nSrcPixels &&
2980 77448 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2981 : adfImag))
2982 : {
2983 71504 : double dfMult1 = dfRatioX * dfRatioY;
2984 71504 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2985 :
2986 : // Shifting corrected.
2987 71504 : if (bShifted)
2988 : {
2989 230 : adfReal[0] = adfReal[1];
2990 230 : adfImag[0] = adfImag[1];
2991 230 : adfDensity[0] = adfDensity[1];
2992 : }
2993 :
2994 : // Upper Left Pixel.
2995 71504 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2996 71504 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
2997 : {
2998 66050 : dfAccumulatorDivisor += dfMult1;
2999 :
3000 66050 : dfAccumulatorReal += adfReal[0] * dfMult1;
3001 66050 : dfAccumulatorImag += adfImag[0] * dfMult1;
3002 66050 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3003 : }
3004 :
3005 : // Upper Right Pixel.
3006 71504 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3007 70609 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3008 : {
3009 65335 : dfAccumulatorDivisor += dfMult2;
3010 :
3011 65335 : dfAccumulatorReal += adfReal[1] * dfMult2;
3012 65335 : dfAccumulatorImag += adfImag[1] * dfMult2;
3013 65335 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3014 : }
3015 : }
3016 :
3017 : // Get pixel row.
3018 77448 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
3019 228032 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
3020 73136 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
3021 : adfReal, adfImag))
3022 : {
3023 67577 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
3024 67577 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3025 :
3026 : // Shifting corrected
3027 67577 : if (bShifted)
3028 : {
3029 112 : adfReal[0] = adfReal[1];
3030 112 : adfImag[0] = adfImag[1];
3031 112 : adfDensity[0] = adfDensity[1];
3032 : }
3033 :
3034 : // Lower Left Pixel
3035 67577 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3036 67577 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3037 : {
3038 62298 : dfAccumulatorDivisor += dfMult1;
3039 :
3040 62298 : dfAccumulatorReal += adfReal[0] * dfMult1;
3041 62298 : dfAccumulatorImag += adfImag[0] * dfMult1;
3042 62298 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3043 : }
3044 :
3045 : // Lower Right Pixel.
3046 67577 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3047 66800 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3048 : {
3049 61823 : dfAccumulatorDivisor += dfMult2;
3050 :
3051 61823 : dfAccumulatorReal += adfReal[1] * dfMult2;
3052 61823 : dfAccumulatorImag += adfImag[1] * dfMult2;
3053 61823 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3054 : }
3055 : }
3056 :
3057 : /* -------------------------------------------------------------------- */
3058 : /* Return result. */
3059 : /* -------------------------------------------------------------------- */
3060 77448 : if (dfAccumulatorDivisor == 1.0)
3061 : {
3062 45929 : *pdfReal = dfAccumulatorReal;
3063 45929 : *pdfImag = dfAccumulatorImag;
3064 45929 : *pdfDensity = dfAccumulatorDensity;
3065 45929 : return false;
3066 : }
3067 31519 : else if (dfAccumulatorDivisor < 0.00001)
3068 : {
3069 0 : *pdfReal = 0.0;
3070 0 : *pdfImag = 0.0;
3071 0 : *pdfDensity = 0.0;
3072 0 : return false;
3073 : }
3074 : else
3075 : {
3076 31519 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
3077 31519 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
3078 31519 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
3079 31519 : return true;
3080 : }
3081 : }
3082 :
3083 : template <class T>
3084 8780972 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3085 : int iBand, double dfSrcX,
3086 : double dfSrcY, T *pValue)
3087 :
3088 : {
3089 :
3090 8780972 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3091 8780972 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3092 8780972 : GPtrDiff_t iSrcOffset =
3093 8780972 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3094 8780972 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
3095 8780972 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
3096 :
3097 8780972 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
3098 :
3099 8780972 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3100 6025939 : iSrcY + 1 < poWK->nSrcYSize)
3101 : {
3102 5834442 : const double dfAccumulator =
3103 5834442 : (double(pSrc[iSrcOffset]) * dfRatioX +
3104 5834442 : double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
3105 : dfRatioY +
3106 5834442 : (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
3107 5834442 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
3108 5834442 : (1.0 - dfRatioX)) *
3109 5834442 : (1.0 - dfRatioY);
3110 :
3111 5834442 : *pValue = GWKRoundValueT<T>(dfAccumulator);
3112 :
3113 5834442 : return true;
3114 : }
3115 :
3116 2946530 : double dfAccumulatorDivisor = 0.0;
3117 2946530 : double dfAccumulator = 0.0;
3118 :
3119 : // Upper Left Pixel.
3120 2946530 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
3121 564876 : iSrcY < poWK->nSrcYSize)
3122 : {
3123 564876 : const double dfMult = dfRatioX * dfRatioY;
3124 :
3125 564876 : dfAccumulatorDivisor += dfMult;
3126 :
3127 564876 : dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
3128 : }
3129 :
3130 : // Upper Right Pixel.
3131 2946530 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3132 2261926 : iSrcY < poWK->nSrcYSize)
3133 : {
3134 2261926 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
3135 :
3136 2261926 : dfAccumulatorDivisor += dfMult;
3137 :
3138 2261926 : dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
3139 : }
3140 :
3141 : // Lower Right Pixel.
3142 2946530 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3143 2512924 : iSrcY + 1 < poWK->nSrcYSize)
3144 : {
3145 2261243 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3146 :
3147 2261243 : dfAccumulatorDivisor += dfMult;
3148 :
3149 2261243 : dfAccumulator +=
3150 2261243 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
3151 : }
3152 :
3153 : // Lower Left Pixel.
3154 2946530 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3155 815601 : iSrcY + 1 < poWK->nSrcYSize)
3156 : {
3157 563917 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
3158 :
3159 563917 : dfAccumulatorDivisor += dfMult;
3160 :
3161 563917 : dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
3162 : }
3163 :
3164 : /* -------------------------------------------------------------------- */
3165 : /* Return result. */
3166 : /* -------------------------------------------------------------------- */
3167 2946530 : double dfValue = 0.0;
3168 :
3169 2946530 : if (dfAccumulatorDivisor < 0.00001)
3170 : {
3171 0 : *pValue = 0;
3172 0 : return false;
3173 : }
3174 2946530 : else if (dfAccumulatorDivisor == 1.0)
3175 : {
3176 22176 : dfValue = dfAccumulator;
3177 : }
3178 : else
3179 : {
3180 2924358 : dfValue = dfAccumulator / dfAccumulatorDivisor;
3181 : }
3182 :
3183 2946530 : *pValue = GWKRoundValueT<T>(dfValue);
3184 :
3185 2946530 : return true;
3186 : }
3187 :
3188 : /************************************************************************/
3189 : /* GWKCubicResample() */
3190 : /* Set of bicubic interpolators using cubic convolution. */
3191 : /************************************************************************/
3192 :
3193 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
3194 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
3195 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
3196 :
3197 : template <typename T>
3198 1810720 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
3199 : T f1, T f2, T f3)
3200 : {
3201 1810720 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
3202 1810720 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
3203 1810720 : distance3 * (3 * (f1 - f2) + f3 - f0)));
3204 : }
3205 :
3206 : /************************************************************************/
3207 : /* GWKCubicComputeWeights() */
3208 : /************************************************************************/
3209 :
3210 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
3211 :
3212 : template <typename T>
3213 97650960 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
3214 : {
3215 97650960 : const T halfX = T(0.5) * x;
3216 97650960 : const T threeX = T(3.0) * x;
3217 97650960 : const T halfX2 = halfX * x;
3218 :
3219 97650960 : coeffs[0] = halfX * (-1 + x * (2 - x));
3220 97650960 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
3221 97650960 : coeffs[2] = halfX * (1 + x * (4 - threeX));
3222 97650960 : coeffs[3] = halfX2 * (-1 + x);
3223 97650960 : }
3224 :
3225 14682546 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
3226 : {
3227 14682546 : return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
3228 14682546 : v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
3229 : }
3230 :
3231 : #if 0
3232 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
3233 : // instead of 17.
3234 : // TODO(schwehr): Use an inline function.
3235 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
3236 : { \
3237 : const double dfX = dfX_; \
3238 : dfHalfX = 0.5 * dfX; \
3239 : const double dfThreeX = 3.0 * dfX; \
3240 : const double dfXMinus1 = dfX - 1; \
3241 : \
3242 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
3243 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
3244 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
3245 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
3246 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
3247 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
3248 : }
3249 :
3250 : // TODO(schwehr): Use an inline function.
3251 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
3252 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
3253 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
3254 : #endif
3255 :
3256 302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3257 : double dfSrcX, double dfSrcY,
3258 : double *pdfDensity, double *pdfReal,
3259 : double *pdfImag)
3260 :
3261 : {
3262 302045 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3263 302045 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3264 302045 : GPtrDiff_t iSrcOffset =
3265 302045 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3266 302045 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3267 302045 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3268 302045 : double adfDensity[4] = {};
3269 302045 : double adfReal[4] = {};
3270 302045 : double adfImag[4] = {};
3271 :
3272 : // Get the bilinear interpolation at the image borders.
3273 302045 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3274 286140 : iSrcY + 2 >= poWK->nSrcYSize)
3275 24670 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3276 24670 : pdfDensity, pdfReal, pdfImag);
3277 :
3278 277375 : double adfValueDens[4] = {};
3279 277375 : double adfValueReal[4] = {};
3280 277375 : double adfValueImag[4] = {};
3281 :
3282 277375 : double adfCoeffsX[4] = {};
3283 277375 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3284 :
3285 1240570 : for (GPtrDiff_t i = -1; i < 3; i++)
3286 : {
3287 1009640 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3288 998035 : 2, adfDensity, adfReal, adfImag) ||
3289 998035 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3290 980395 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3291 2979770 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3292 972094 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3293 : {
3294 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3295 46449 : pdfDensity, pdfReal, pdfImag);
3296 : }
3297 :
3298 963196 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3299 963196 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3300 963196 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3301 : }
3302 :
3303 : /* -------------------------------------------------------------------- */
3304 : /* For now, if we have any pixels missing in the kernel area, */
3305 : /* we fallback on using bilinear interpolation. Ideally we */
3306 : /* should do "weight adjustment" of our results similarly to */
3307 : /* what is done for the cubic spline and lanc. interpolators. */
3308 : /* -------------------------------------------------------------------- */
3309 :
3310 230926 : double adfCoeffsY[4] = {};
3311 230926 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3312 :
3313 230926 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3314 230926 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3315 230926 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3316 :
3317 230926 : return true;
3318 : }
3319 :
3320 : #ifdef USE_SSE2
3321 :
3322 : /************************************************************************/
3323 : /* XMMLoad4Values() */
3324 : /* */
3325 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3326 : /* m128 register. */
3327 : /************************************************************************/
3328 :
3329 566236000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3330 : {
3331 : unsigned int i;
3332 566236000 : memcpy(&i, ptr, 4);
3333 1132470000 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3334 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3335 : // 32-bit integers.
3336 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3337 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3338 : #else
3339 1132470000 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3340 1132470000 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3341 : #endif
3342 1132470000 : return _mm_cvtepi32_ps(xmm_i);
3343 : }
3344 :
3345 1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3346 : {
3347 : GUInt64 i;
3348 1108340 : memcpy(&i, ptr, 8);
3349 2216690 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3350 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3351 : // 32-bit integers.
3352 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3353 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3354 : #else
3355 2216690 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3356 : #endif
3357 2216690 : return _mm_cvtepi32_ps(xmm_i);
3358 : }
3359 :
3360 : /************************************************************************/
3361 : /* XMMHorizontalAdd() */
3362 : /* */
3363 : /* Return the sum of the 4 floating points of the register. */
3364 : /************************************************************************/
3365 :
3366 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3367 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3368 : {
3369 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3370 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3371 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3372 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3373 : return _mm_cvtss_f32(sums);
3374 : }
3375 : #else
3376 141836000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3377 : {
3378 141836000 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3379 141836000 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3380 141836000 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3381 141836000 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3382 141836000 : return _mm_cvtss_f32(sums);
3383 : }
3384 : #endif
3385 :
3386 : #endif // define USE_SSE2
3387 :
3388 : /************************************************************************/
3389 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3390 : /************************************************************************/
3391 :
3392 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3393 : // because there are a few assumptions above those types.
3394 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3395 : // perf benefit.
3396 :
3397 : template <class T>
3398 389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3399 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3400 : double *pdfDensity, double *pdfReal)
3401 : {
3402 389755 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3403 389755 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3404 389755 : const GPtrDiff_t iSrcOffset =
3405 389755 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3406 :
3407 : // Get the bilinear interpolation at the image borders.
3408 389755 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3409 387271 : iSrcY + 2 >= poWK->nSrcYSize)
3410 : {
3411 2484 : double adfImagIgnored[4] = {};
3412 2484 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3413 2484 : pdfDensity, pdfReal, adfImagIgnored);
3414 : }
3415 :
3416 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3417 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3418 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3419 :
3420 : // TODO(schwehr): Explain the magic numbers.
3421 : float afTemp[4 + 4 + 4 + 1];
3422 : float *pafAligned =
3423 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3424 : float *pafCoeffs = pafAligned;
3425 : float *pafDensity = pafAligned + 4;
3426 : float *pafValue = pafAligned + 8;
3427 :
3428 : const float fHalfDeltaX = 0.5f * fDeltaX;
3429 : const float fThreeDeltaX = 3.0f * fDeltaX;
3430 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3431 :
3432 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3433 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3434 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3435 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3436 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3437 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3438 :
3439 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3440 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3441 : i++, iOffset += poWK->nSrcXSize)
3442 : {
3443 : const __m128 xmmDensity =
3444 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3445 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3446 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3447 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3448 :
3449 : const __m128 xmmValues =
3450 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3451 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3452 : }
3453 : if (_mm_movemask_ps(xmmMaskLowDensity))
3454 : {
3455 : double adfImagIgnored[4] = {};
3456 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3457 : pdfDensity, pdfReal, adfImagIgnored);
3458 : }
3459 :
3460 : const float fHalfDeltaY = 0.5f * fDeltaY;
3461 : const float fThreeDeltaY = 3.0f * fDeltaY;
3462 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3463 :
3464 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3465 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3466 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3467 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3468 :
3469 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3470 :
3471 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3472 : const __m128 xmmValue = _mm_load_ps(pafValue);
3473 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3474 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3475 :
3476 : // We did all above computations on float32 whereas the general case is
3477 : // float64. Not sure if one is fundamentally more correct than the other
3478 : // one, but we want our optimization to give the same result as the
3479 : // general case as much as possible, so if the resulting value is
3480 : // close to some_int_value + 0.5, redo the computation with the general
3481 : // case.
3482 : // Note: If other types than Byte or UInt16, will need changes.
3483 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3484 : return true;
3485 :
3486 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3487 :
3488 387271 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3489 387271 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3490 :
3491 387271 : double adfValueDens[4] = {};
3492 387271 : double adfValueReal[4] = {};
3493 :
3494 387271 : double adfCoeffsX[4] = {};
3495 387271 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3496 :
3497 387271 : double adfCoeffsY[4] = {};
3498 387271 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3499 :
3500 1930200 : for (GPtrDiff_t i = -1; i < 3; i++)
3501 : {
3502 1544480 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3503 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3504 1544480 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3505 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3506 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 1] <
3507 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3508 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 2] <
3509 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3510 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 3] <
3511 : SRC_DENSITY_THRESHOLD_FLOAT)
3512 : {
3513 1551 : double adfImagIgnored[4] = {};
3514 1551 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3515 : pdfDensity, pdfReal,
3516 1551 : adfImagIgnored);
3517 : }
3518 : #endif
3519 :
3520 3085860 : adfValueDens[i + 1] =
3521 1542930 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3522 :
3523 1542930 : adfValueReal[i + 1] = CONVOL4(
3524 : adfCoeffsX,
3525 1542930 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3526 : }
3527 :
3528 385720 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3529 385720 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3530 :
3531 385720 : return true;
3532 : }
3533 :
3534 : /************************************************************************/
3535 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3536 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3537 : /************************************************************************/
3538 :
3539 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3540 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3541 : double *pdfDensity, double *pdfReal)
3542 :
3543 : {
3544 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3545 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3546 0 : const GPtrDiff_t iSrcOffset =
3547 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3548 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3549 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3550 :
3551 : // Get the bilinear interpolation at the image borders.
3552 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3553 0 : iSrcY + 2 >= poWK->nSrcYSize)
3554 : {
3555 0 : double adfImagIgnored[4] = {};
3556 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3557 0 : pdfDensity, pdfReal, adfImagIgnored);
3558 : }
3559 :
3560 0 : double adfCoeffsX[4] = {};
3561 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3562 :
3563 0 : double adfCoeffsY[4] = {};
3564 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3565 :
3566 0 : double adfValueDens[4] = {};
3567 0 : double adfValueReal[4] = {};
3568 0 : double adfDensity[4] = {};
3569 0 : double adfReal[4] = {};
3570 0 : double adfImagIgnored[4] = {};
3571 :
3572 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3573 : {
3574 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3575 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3576 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3577 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3578 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3579 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3580 : {
3581 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3582 : pdfDensity, pdfReal,
3583 0 : adfImagIgnored);
3584 : }
3585 :
3586 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3587 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3588 : }
3589 :
3590 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3591 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3592 :
3593 0 : return true;
3594 : }
3595 :
3596 : template <class T>
3597 2300964 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3598 : int iBand, double dfSrcX,
3599 : double dfSrcY, T *pValue)
3600 :
3601 : {
3602 2300964 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3603 2300964 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3604 2300964 : const GPtrDiff_t iSrcOffset =
3605 2300964 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3606 2300964 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3607 2300964 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3608 2300964 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3609 2300964 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3610 :
3611 : // Get the bilinear interpolation at the image borders.
3612 2300964 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3613 1883033 : iSrcY + 2 >= poWK->nSrcYSize)
3614 490244 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3615 490244 : pValue);
3616 :
3617 1810720 : double adfCoeffs[4] = {};
3618 1810720 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3619 :
3620 1810720 : double adfValue[4] = {};
3621 :
3622 9053590 : for (GPtrDiff_t i = -1; i < 3; i++)
3623 : {
3624 7242876 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3625 :
3626 7242876 : adfValue[i + 1] = CONVOL4(
3627 : adfCoeffs,
3628 7242876 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3629 : }
3630 :
3631 : const double dfValue =
3632 1810720 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3633 : adfValue[1], adfValue[2], adfValue[3]);
3634 :
3635 1810720 : *pValue = GWKClampValueT<T>(dfValue);
3636 :
3637 1810720 : return true;
3638 : }
3639 :
3640 : /************************************************************************/
3641 : /* GWKLanczosSinc() */
3642 : /************************************************************************/
3643 :
3644 : /*
3645 : * Lanczos windowed sinc interpolation kernel with radius r.
3646 : * /
3647 : * | sinc(x) * sinc(x/r), if |x| < r
3648 : * L(x) = | 1, if x = 0 ,
3649 : * | 0, otherwise
3650 : * \
3651 : *
3652 : * where sinc(x) = sin(PI * x) / (PI * x).
3653 : */
3654 :
3655 1632 : static double GWKLanczosSinc(double dfX)
3656 : {
3657 1632 : if (dfX == 0.0)
3658 0 : return 1.0;
3659 :
3660 1632 : const double dfPIX = M_PI * dfX;
3661 1632 : const double dfPIXoverR = dfPIX / 3;
3662 1632 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3663 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3664 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3665 1632 : const double dfSinPIXoverR = sin(dfPIXoverR);
3666 1632 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3667 1632 : const double dfSinPIXMulSinPIXoverR =
3668 1632 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3669 1632 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3670 : }
3671 :
3672 106692 : static double GWKLanczosSinc4Values(double *padfValues)
3673 : {
3674 533460 : for (int i = 0; i < 4; i++)
3675 : {
3676 426768 : if (padfValues[i] == 0.0)
3677 : {
3678 0 : padfValues[i] = 1.0;
3679 : }
3680 : else
3681 : {
3682 426768 : const double dfPIX = M_PI * padfValues[i];
3683 426768 : const double dfPIXoverR = dfPIX / 3;
3684 426768 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3685 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3686 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3687 426768 : const double dfSinPIXoverR = sin(dfPIXoverR);
3688 426768 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3689 426768 : const double dfSinPIXMulSinPIXoverR =
3690 426768 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3691 426768 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3692 : }
3693 : }
3694 106692 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3695 : }
3696 :
3697 : /************************************************************************/
3698 : /* GWKBilinear() */
3699 : /************************************************************************/
3700 :
3701 6339310 : static double GWKBilinear(double dfX)
3702 : {
3703 6339310 : double dfAbsX = fabs(dfX);
3704 6339310 : if (dfAbsX <= 1.0)
3705 5869990 : return 1 - dfAbsX;
3706 : else
3707 469322 : return 0.0;
3708 : }
3709 :
3710 236458 : static double GWKBilinear4Values(double *padfValues)
3711 : {
3712 236458 : double dfAbsX0 = fabs(padfValues[0]);
3713 236458 : double dfAbsX1 = fabs(padfValues[1]);
3714 236458 : double dfAbsX2 = fabs(padfValues[2]);
3715 236458 : double dfAbsX3 = fabs(padfValues[3]);
3716 236458 : if (dfAbsX0 <= 1.0)
3717 236458 : padfValues[0] = 1 - dfAbsX0;
3718 : else
3719 0 : padfValues[0] = 0.0;
3720 236458 : if (dfAbsX1 <= 1.0)
3721 236458 : padfValues[1] = 1 - dfAbsX1;
3722 : else
3723 0 : padfValues[1] = 0.0;
3724 236458 : if (dfAbsX2 <= 1.0)
3725 236458 : padfValues[2] = 1 - dfAbsX2;
3726 : else
3727 0 : padfValues[2] = 0.0;
3728 236458 : if (dfAbsX3 <= 1.0)
3729 236442 : padfValues[3] = 1 - dfAbsX3;
3730 : else
3731 16 : padfValues[3] = 0.0;
3732 236458 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3733 : }
3734 :
3735 : /************************************************************************/
3736 : /* GWKCubic() */
3737 : /************************************************************************/
3738 :
3739 86934 : static double GWKCubic(double dfX)
3740 : {
3741 86934 : return CubicKernel(dfX);
3742 : }
3743 :
3744 2963710 : static double GWKCubic4Values(double *padfValues)
3745 : {
3746 2963710 : const double dfAbsX_0 = fabs(padfValues[0]);
3747 2963710 : const double dfAbsX_1 = fabs(padfValues[1]);
3748 2963710 : const double dfAbsX_2 = fabs(padfValues[2]);
3749 2963710 : const double dfAbsX_3 = fabs(padfValues[3]);
3750 2963710 : const double dfX2_0 = padfValues[0] * padfValues[0];
3751 2963710 : const double dfX2_1 = padfValues[1] * padfValues[1];
3752 2963710 : const double dfX2_2 = padfValues[2] * padfValues[2];
3753 2963710 : const double dfX2_3 = padfValues[3] * padfValues[3];
3754 :
3755 2963710 : double dfVal0 = 0.0;
3756 2963710 : if (dfAbsX_0 <= 1.0)
3757 1117140 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3758 1846570 : else if (dfAbsX_0 <= 2.0)
3759 1846400 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3760 :
3761 2963710 : double dfVal1 = 0.0;
3762 2963710 : if (dfAbsX_1 <= 1.0)
3763 1844850 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3764 1118860 : else if (dfAbsX_1 <= 2.0)
3765 1118860 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3766 :
3767 2963710 : double dfVal2 = 0.0;
3768 2963710 : if (dfAbsX_2 <= 1.0)
3769 1855340 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3770 1108360 : else if (dfAbsX_2 <= 2.0)
3771 1108360 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3772 :
3773 2963710 : double dfVal3 = 0.0;
3774 2963710 : if (dfAbsX_3 <= 1.0)
3775 1127350 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3776 1836360 : else if (dfAbsX_3 <= 2.0)
3777 1836200 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3778 :
3779 2963710 : padfValues[0] = dfVal0;
3780 2963710 : padfValues[1] = dfVal1;
3781 2963710 : padfValues[2] = dfVal2;
3782 2963710 : padfValues[3] = dfVal3;
3783 2963710 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3784 : }
3785 :
3786 : /************************************************************************/
3787 : /* GWKBSpline() */
3788 : /************************************************************************/
3789 :
3790 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3791 : // Equation 8 with (B,C)=(1,0)
3792 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3793 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3794 :
3795 139200 : static double GWKBSpline(double x)
3796 : {
3797 139200 : const double xp2 = x + 2.0;
3798 139200 : const double xp1 = x + 1.0;
3799 139200 : const double xm1 = x - 1.0;
3800 :
3801 : // This will most likely be used, so we'll compute it ahead of time to
3802 : // avoid stalling the processor.
3803 139200 : const double xp2c = xp2 * xp2 * xp2;
3804 :
3805 : // Note that the test is computed only if it is needed.
3806 : // TODO(schwehr): Make this easier to follow.
3807 : return xp2 > 0.0
3808 278400 : ? ((xp1 > 0.0)
3809 139200 : ? ((x > 0.0)
3810 124806 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3811 90308 : 6.0 * x * x * x
3812 : : 0.0) +
3813 124806 : -4.0 * xp1 * xp1 * xp1
3814 : : 0.0) +
3815 : xp2c
3816 139200 : : 0.0; // * 0.166666666666666666666
3817 : }
3818 :
3819 2220680 : static double GWKBSpline4Values(double *padfValues)
3820 : {
3821 11103400 : for (int i = 0; i < 4; i++)
3822 : {
3823 8882740 : const double x = padfValues[i];
3824 8882740 : const double xp2 = x + 2.0;
3825 8882740 : const double xp1 = x + 1.0;
3826 8882740 : const double xm1 = x - 1.0;
3827 :
3828 : // This will most likely be used, so we'll compute it ahead of time to
3829 : // avoid stalling the processor.
3830 8882740 : const double xp2c = xp2 * xp2 * xp2;
3831 :
3832 : // Note that the test is computed only if it is needed.
3833 : // TODO(schwehr): Make this easier to follow.
3834 8882740 : padfValues[i] =
3835 : (xp2 > 0.0)
3836 17765500 : ? ((xp1 > 0.0)
3837 8882740 : ? ((x > 0.0)
3838 6661820 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3839 4438260 : 6.0 * x * x * x
3840 : : 0.0) +
3841 6661820 : -4.0 * xp1 * xp1 * xp1
3842 : : 0.0) +
3843 : xp2c
3844 : : 0.0; // * 0.166666666666666666666
3845 : }
3846 2220680 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3847 : }
3848 : /************************************************************************/
3849 : /* GWKResampleWrkStruct */
3850 : /************************************************************************/
3851 :
3852 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3853 :
3854 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3855 : double dfSrcX, double dfSrcY,
3856 : double *pdfDensity, double *pdfReal,
3857 : double *pdfImag,
3858 : GWKResampleWrkStruct *psWrkStruct);
3859 :
3860 : struct _GWKResampleWrkStruct
3861 : {
3862 : pfnGWKResampleType pfnGWKResample;
3863 :
3864 : // Space for saved X weights.
3865 : double *padfWeightsX;
3866 : bool *pabCalcX;
3867 :
3868 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3869 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3870 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3871 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3872 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3873 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3874 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3875 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3876 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3877 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3878 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3879 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3880 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3881 :
3882 : // Space for saving a row of pixels.
3883 : double *padfRowDensity;
3884 : double *padfRowReal;
3885 : double *padfRowImag;
3886 : };
3887 :
3888 : /************************************************************************/
3889 : /* GWKResampleCreateWrkStruct() */
3890 : /************************************************************************/
3891 :
3892 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3893 : double dfSrcY, double *pdfDensity, double *pdfReal,
3894 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3895 :
3896 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3897 : double dfSrcX, double dfSrcY,
3898 : double *pdfDensity, double *pdfReal,
3899 : double *pdfImag,
3900 : GWKResampleWrkStruct *psWrkStruct);
3901 :
3902 401 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3903 : {
3904 401 : const int nXDist = (poWK->nXRadius + 1) * 2;
3905 401 : const int nYDist = (poWK->nYRadius + 1) * 2;
3906 :
3907 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3908 401 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3909 :
3910 : // Alloc space for saved X weights.
3911 401 : psWrkStruct->padfWeightsX =
3912 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3913 401 : psWrkStruct->pabCalcX =
3914 401 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3915 :
3916 401 : psWrkStruct->padfWeightsY =
3917 401 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3918 401 : psWrkStruct->iLastSrcX = -10;
3919 401 : psWrkStruct->iLastSrcY = -10;
3920 401 : psWrkStruct->dfLastDeltaX = -10;
3921 401 : psWrkStruct->dfLastDeltaY = -10;
3922 :
3923 : // Alloc space for saving a row of pixels.
3924 401 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3925 365 : poWK->panUnifiedSrcValid == nullptr &&
3926 342 : poWK->papanBandSrcValid == nullptr)
3927 : {
3928 342 : psWrkStruct->padfRowDensity = nullptr;
3929 : }
3930 : else
3931 : {
3932 59 : psWrkStruct->padfRowDensity =
3933 59 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3934 : }
3935 401 : psWrkStruct->padfRowReal =
3936 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3937 401 : psWrkStruct->padfRowImag =
3938 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3939 :
3940 401 : if (poWK->eResample == GRA_Lanczos)
3941 : {
3942 65 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3943 :
3944 65 : if (poWK->dfXScale < 1)
3945 : {
3946 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3947 4 : psWrkStruct->dfSinPiXScaleOver3 =
3948 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3949 4 : psWrkStruct->dfCosPiXScaleOver3);
3950 : // "Naive":
3951 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3952 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3953 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3954 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3955 4 : psWrkStruct->dfCosPiXScaleOver3 -
3956 4 : 3) *
3957 4 : psWrkStruct->dfCosPiXScaleOver3;
3958 4 : psWrkStruct->dfSinPiXScale = sqrt(
3959 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3960 : }
3961 :
3962 65 : if (poWK->dfYScale < 1)
3963 : {
3964 13 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3965 13 : psWrkStruct->dfSinPiYScaleOver3 =
3966 13 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3967 13 : psWrkStruct->dfCosPiYScaleOver3);
3968 : // "Naive":
3969 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3970 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3971 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3972 13 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3973 13 : psWrkStruct->dfCosPiYScaleOver3 -
3974 13 : 3) *
3975 13 : psWrkStruct->dfCosPiYScaleOver3;
3976 13 : psWrkStruct->dfSinPiYScale = sqrt(
3977 13 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3978 : }
3979 : }
3980 : else
3981 336 : psWrkStruct->pfnGWKResample = GWKResample;
3982 :
3983 401 : return psWrkStruct;
3984 : }
3985 :
3986 : /************************************************************************/
3987 : /* GWKResampleDeleteWrkStruct() */
3988 : /************************************************************************/
3989 :
3990 401 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3991 : {
3992 401 : CPLFree(psWrkStruct->padfWeightsX);
3993 401 : CPLFree(psWrkStruct->padfWeightsY);
3994 401 : CPLFree(psWrkStruct->pabCalcX);
3995 401 : CPLFree(psWrkStruct->padfRowDensity);
3996 401 : CPLFree(psWrkStruct->padfRowReal);
3997 401 : CPLFree(psWrkStruct->padfRowImag);
3998 401 : CPLFree(psWrkStruct);
3999 401 : }
4000 :
4001 : /************************************************************************/
4002 : /* GWKResample() */
4003 : /************************************************************************/
4004 :
4005 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4006 : double dfSrcY, double *pdfDensity, double *pdfReal,
4007 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
4008 :
4009 : {
4010 : // Save as local variables to avoid following pointers in loops.
4011 239383 : const int nSrcXSize = poWK->nSrcXSize;
4012 239383 : const int nSrcYSize = poWK->nSrcYSize;
4013 :
4014 239383 : double dfAccumulatorReal = 0.0;
4015 239383 : double dfAccumulatorImag = 0.0;
4016 239383 : double dfAccumulatorDensity = 0.0;
4017 239383 : double dfAccumulatorWeight = 0.0;
4018 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4019 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4020 239383 : const GPtrDiff_t iSrcOffset =
4021 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4022 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4023 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4024 :
4025 239383 : const double dfXScale = poWK->dfXScale;
4026 239383 : const double dfYScale = poWK->dfYScale;
4027 :
4028 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
4029 :
4030 : // Space for saved X weights.
4031 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
4032 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
4033 :
4034 : // Space for saving a row of pixels.
4035 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
4036 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
4037 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
4038 :
4039 : // Mark as needing calculation (don't calculate the weights yet,
4040 : // because a mask may render it unnecessary).
4041 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
4042 :
4043 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
4044 239383 : CPLAssert(pfnGetWeight);
4045 :
4046 : // Skip sampling over edge of image.
4047 239383 : int j = poWK->nFiltInitY;
4048 239383 : int jMax = poWK->nYRadius;
4049 239383 : if (iSrcY + j < 0)
4050 566 : j = -iSrcY;
4051 239383 : if (iSrcY + jMax >= nSrcYSize)
4052 662 : jMax = nSrcYSize - iSrcY - 1;
4053 :
4054 239383 : int iMin = poWK->nFiltInitX;
4055 239383 : int iMax = poWK->nXRadius;
4056 239383 : if (iSrcX + iMin < 0)
4057 566 : iMin = -iSrcX;
4058 239383 : if (iSrcX + iMax >= nSrcXSize)
4059 659 : iMax = nSrcXSize - iSrcX - 1;
4060 :
4061 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
4062 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
4063 :
4064 239383 : GPtrDiff_t iRowOffset =
4065 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
4066 :
4067 : // Loop over pixel rows in the kernel.
4068 1445930 : for (; j <= jMax; ++j)
4069 : {
4070 1206540 : iRowOffset += nSrcXSize;
4071 :
4072 : // Get pixel values.
4073 : // We can potentially read extra elements after the "normal" end of the
4074 : // source arrays, but the contract of papabySrcImage[iBand],
4075 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4076 : // is to have WARP_EXTRA_ELTS reserved at their end.
4077 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4078 : padfRowDensity, padfRowReal, padfRowImag))
4079 72 : continue;
4080 :
4081 : // Calculate the Y weight.
4082 : double dfWeight1 = (bYScaleBelow1)
4083 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
4084 1600 : : pfnGetWeight(j - dfDeltaY);
4085 :
4086 : // Iterate over pixels in row.
4087 1206470 : double dfAccumulatorRealLocal = 0.0;
4088 1206470 : double dfAccumulatorImagLocal = 0.0;
4089 1206470 : double dfAccumulatorDensityLocal = 0.0;
4090 1206470 : double dfAccumulatorWeightLocal = 0.0;
4091 :
4092 7317420 : for (int i = iMin; i <= iMax; ++i)
4093 : {
4094 : // Skip sampling if pixel has zero density.
4095 6110940 : if (padfRowDensity != nullptr &&
4096 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4097 546 : continue;
4098 :
4099 6110400 : double dfWeight2 = 0.0;
4100 :
4101 : // Make or use a cached set of weights for this row.
4102 6110400 : if (pabCalcX[i - iMin])
4103 : {
4104 : // Use saved weight value instead of recomputing it.
4105 4903920 : dfWeight2 = padfWeightsX[i - iMin];
4106 : }
4107 : else
4108 : {
4109 : // Calculate & save the X weight.
4110 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
4111 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
4112 1600 : : pfnGetWeight(i - dfDeltaX);
4113 :
4114 1206480 : pabCalcX[i - iMin] = true;
4115 : }
4116 :
4117 : // Accumulate!
4118 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
4119 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
4120 6110400 : if (padfRowDensity != nullptr)
4121 76731 : dfAccumulatorDensityLocal +=
4122 76731 : padfRowDensity[i - iMin] * dfWeight2;
4123 6110400 : dfAccumulatorWeightLocal += dfWeight2;
4124 : }
4125 :
4126 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
4127 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
4128 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
4129 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
4130 : }
4131 :
4132 239383 : if (dfAccumulatorWeight < 0.000001 ||
4133 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
4134 : {
4135 0 : *pdfDensity = 0.0;
4136 0 : return false;
4137 : }
4138 :
4139 : // Calculate the output taking into account weighting.
4140 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4141 : {
4142 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
4143 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
4144 239380 : if (padfRowDensity != nullptr)
4145 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
4146 : else
4147 237496 : *pdfDensity = 1.0;
4148 : }
4149 : else
4150 : {
4151 3 : *pdfReal = dfAccumulatorReal;
4152 3 : *pdfImag = dfAccumulatorImag;
4153 3 : if (padfRowDensity != nullptr)
4154 3 : *pdfDensity = dfAccumulatorDensity;
4155 : else
4156 0 : *pdfDensity = 1.0;
4157 : }
4158 :
4159 239383 : return true;
4160 : }
4161 :
4162 : /************************************************************************/
4163 : /* GWKResampleOptimizedLanczos() */
4164 : /************************************************************************/
4165 :
4166 634574 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
4167 : double dfSrcX, double dfSrcY,
4168 : double *pdfDensity, double *pdfReal,
4169 : double *pdfImag,
4170 : GWKResampleWrkStruct *psWrkStruct)
4171 :
4172 : {
4173 : // Save as local variables to avoid following pointers in loops.
4174 634574 : const int nSrcXSize = poWK->nSrcXSize;
4175 634574 : const int nSrcYSize = poWK->nSrcYSize;
4176 :
4177 634574 : double dfAccumulatorReal = 0.0;
4178 634574 : double dfAccumulatorImag = 0.0;
4179 634574 : double dfAccumulatorDensity = 0.0;
4180 634574 : double dfAccumulatorWeight = 0.0;
4181 634574 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4182 634574 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4183 634574 : const GPtrDiff_t iSrcOffset =
4184 634574 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4185 634574 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4186 634574 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4187 :
4188 634574 : const double dfXScale = poWK->dfXScale;
4189 634574 : const double dfYScale = poWK->dfYScale;
4190 :
4191 : // Space for saved X weights.
4192 634574 : double *const padfWeightsXShifted =
4193 634574 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
4194 634574 : double *const padfWeightsYShifted =
4195 634574 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
4196 :
4197 : // Space for saving a row of pixels.
4198 634574 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
4199 634574 : double *const padfRowReal = psWrkStruct->padfRowReal;
4200 634574 : double *const padfRowImag = psWrkStruct->padfRowImag;
4201 :
4202 : // Skip sampling over edge of image.
4203 634574 : int jMin = poWK->nFiltInitY;
4204 634574 : int jMax = poWK->nYRadius;
4205 634574 : if (iSrcY + jMin < 0)
4206 19638 : jMin = -iSrcY;
4207 634574 : if (iSrcY + jMax >= nSrcYSize)
4208 7942 : jMax = nSrcYSize - iSrcY - 1;
4209 :
4210 634574 : int iMin = poWK->nFiltInitX;
4211 634574 : int iMax = poWK->nXRadius;
4212 634574 : if (iSrcX + iMin < 0)
4213 18827 : iMin = -iSrcX;
4214 634574 : if (iSrcX + iMax >= nSrcXSize)
4215 6817 : iMax = nSrcXSize - iSrcX - 1;
4216 :
4217 634574 : if (dfXScale < 1.0)
4218 : {
4219 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
4220 200179 : iMin++;
4221 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
4222 0 : iMax--;
4223 :
4224 : // clang-format off
4225 : /*
4226 : Naive version:
4227 : for (int i = iMin; i <= iMax; ++i)
4228 : {
4229 : psWrkStruct->padfWeightsXShifted[i] =
4230 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
4231 : }
4232 :
4233 : but given that:
4234 :
4235 : GWKLanczosSinc(x):
4236 : if (dfX == 0.0)
4237 : return 1.0;
4238 :
4239 : const double dfPIX = M_PI * dfX;
4240 : const double dfPIXoverR = dfPIX / 3;
4241 : const double dfPIX2overR = dfPIX * dfPIXoverR;
4242 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
4243 :
4244 : and
4245 : sin (a + b) = sin a cos b + cos a sin b.
4246 : cos (a + b) = cos a cos b - sin a sin b.
4247 :
4248 : we can skip any sin() computation within the loop
4249 : */
4250 : // clang-format on
4251 :
4252 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
4253 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4254 : {
4255 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4256 :
4257 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4258 71790 : double dfCosOver3 = cos(dfPIXover3);
4259 71790 : double dfSinOver3 = sin(dfPIXover3);
4260 :
4261 : // "Naive":
4262 : // double dfSin = sin( M_PI * dfX );
4263 : // double dfCos = cos( M_PI * dfX );
4264 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4265 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4266 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4267 :
4268 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4269 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4270 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4271 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4272 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4273 71790 : padfWeightsXShifted[iMin] =
4274 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4275 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
4276 : {
4277 1564690 : dfX += dfXScale;
4278 1564690 : const double dfNewSin =
4279 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4280 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4281 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
4282 1564690 : padfWeightsXShifted[i] =
4283 : dfX == 0
4284 1564690 : ? 1.0
4285 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4286 1564690 : const double dfNewCos =
4287 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4288 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4289 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
4290 1564690 : dfSin = dfNewSin;
4291 1564690 : dfCos = dfNewCos;
4292 1564690 : dfSinOver3 = dfNewSinOver3;
4293 1564690 : dfCosOver3 = dfNewCosOver3;
4294 : }
4295 :
4296 71790 : psWrkStruct->iLastSrcX = iSrcX;
4297 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4298 : }
4299 : }
4300 : else
4301 : {
4302 789372 : while (iMin - dfDeltaX < -3.0)
4303 357660 : iMin++;
4304 431712 : while (iMax - dfDeltaX > 3.0)
4305 0 : iMax--;
4306 :
4307 431712 : if (iSrcX != psWrkStruct->iLastSrcX ||
4308 225330 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4309 : {
4310 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4311 : // following trigonometric formulas.
4312 :
4313 : // TODO(schwehr): Move this somewhere where it can be rendered at
4314 : // LaTeX.
4315 : // clang-format off
4316 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4317 : // cos(M_PI * dfBase) * sin(M_PI * k)
4318 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4319 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4320 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4321 :
4322 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4323 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4324 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4325 : // clang-format on
4326 :
4327 420092 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4328 420092 : const double dfSin2PIDeltaXOver3 =
4329 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4330 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4331 420092 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4332 420092 : const double dfSinPIDeltaX =
4333 420092 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4334 420092 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4335 420092 : const double dfInvPI2Over3xSinPIDeltaX =
4336 : dfInvPI2Over3 * dfSinPIDeltaX;
4337 420092 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4338 420092 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4339 420092 : const double dfSinPIOver3 = 0.8660254037844386;
4340 420092 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4341 420092 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4342 : const double padfCst[] = {
4343 420092 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4344 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4345 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4346 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4347 420092 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4348 :
4349 2974940 : for (int i = iMin; i <= iMax; ++i)
4350 : {
4351 2554850 : const double dfX = i - dfDeltaX;
4352 2554850 : if (dfX == 0.0)
4353 58282 : padfWeightsXShifted[i] = 1.0;
4354 : else
4355 2496570 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4356 : #if DEBUG_VERBOSE
4357 : // TODO(schwehr): AlmostEqual.
4358 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4359 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4360 : #endif
4361 : }
4362 :
4363 420092 : psWrkStruct->iLastSrcX = iSrcX;
4364 420092 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4365 : }
4366 : }
4367 :
4368 634574 : if (dfYScale < 1.0)
4369 : {
4370 406666 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4371 203804 : jMin++;
4372 206462 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4373 3600 : jMax--;
4374 :
4375 : // clang-format off
4376 : /*
4377 : Naive version:
4378 : for (int j = jMin; j <= jMax; ++j)
4379 : {
4380 : padfWeightsYShifted[j] =
4381 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4382 : }
4383 : */
4384 : // clang-format on
4385 :
4386 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4387 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4388 : {
4389 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4390 :
4391 383 : double dfPIYover3 = M_PI / 3 * dfY;
4392 383 : double dfCosOver3 = cos(dfPIYover3);
4393 383 : double dfSinOver3 = sin(dfPIYover3);
4394 :
4395 : // "Naive":
4396 : // double dfSin = sin( M_PI * dfY );
4397 : // double dfCos = cos( M_PI * dfY );
4398 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4399 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4400 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4401 :
4402 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4403 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4404 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4405 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4406 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4407 383 : padfWeightsYShifted[jMin] =
4408 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4409 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4410 : {
4411 6935 : dfY += dfYScale;
4412 6935 : const double dfNewSin =
4413 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4414 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4415 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4416 6935 : padfWeightsYShifted[j] =
4417 : dfY == 0
4418 6935 : ? 1.0
4419 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4420 6935 : const double dfNewCos =
4421 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4422 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4423 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4424 6935 : dfSin = dfNewSin;
4425 6935 : dfCos = dfNewCos;
4426 6935 : dfSinOver3 = dfNewSinOver3;
4427 6935 : dfCosOver3 = dfNewCosOver3;
4428 : }
4429 :
4430 383 : psWrkStruct->iLastSrcY = iSrcY;
4431 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4432 : }
4433 : }
4434 : else
4435 : {
4436 713336 : while (jMin - dfDeltaY < -3.0)
4437 281624 : jMin++;
4438 431712 : while (jMax - dfDeltaY > 3.0)
4439 0 : jMax--;
4440 :
4441 431712 : if (iSrcY != psWrkStruct->iLastSrcY ||
4442 431040 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4443 : {
4444 6942 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4445 6942 : const double dfSin2PIDeltaYOver3 =
4446 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4447 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4448 6942 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4449 6942 : const double dfSinPIDeltaY =
4450 6942 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4451 6942 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4452 6942 : const double dfInvPI2Over3xSinPIDeltaY =
4453 : dfInvPI2Over3 * dfSinPIDeltaY;
4454 6942 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4455 6942 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4456 6942 : const double dfSinPIOver3 = 0.8660254037844386;
4457 6942 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4458 6942 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4459 : const double padfCst[] = {
4460 6942 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4461 6942 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4462 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4463 6942 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4464 6942 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4465 :
4466 45985 : for (int j = jMin; j <= jMax; ++j)
4467 : {
4468 39043 : const double dfY = j - dfDeltaY;
4469 39043 : if (dfY == 0.0)
4470 468 : padfWeightsYShifted[j] = 1.0;
4471 : else
4472 38575 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4473 : #if DEBUG_VERBOSE
4474 : // TODO(schwehr): AlmostEqual.
4475 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4476 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4477 : #endif
4478 : }
4479 :
4480 6942 : psWrkStruct->iLastSrcY = iSrcY;
4481 6942 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4482 : }
4483 : }
4484 :
4485 : // If we have no density information, we can simply compute the
4486 : // accumulated weight.
4487 634574 : if (padfRowDensity == nullptr)
4488 : {
4489 634574 : double dfRowAccWeight = 0.0;
4490 8017750 : for (int i = iMin; i <= iMax; ++i)
4491 : {
4492 7383170 : dfRowAccWeight += padfWeightsXShifted[i];
4493 : }
4494 634574 : double dfColAccWeight = 0.0;
4495 8075420 : for (int j = jMin; j <= jMax; ++j)
4496 : {
4497 7440850 : dfColAccWeight += padfWeightsYShifted[j];
4498 : }
4499 634574 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4500 : }
4501 :
4502 : // Loop over pixel rows in the kernel.
4503 :
4504 634574 : if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
4505 633954 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4506 : !padfRowDensity)
4507 : {
4508 : // Optimization for Byte case without any masking/alpha
4509 :
4510 633954 : if (dfAccumulatorWeight < 0.000001)
4511 : {
4512 0 : *pdfDensity = 0.0;
4513 0 : return false;
4514 : }
4515 :
4516 633954 : const GByte *pSrc =
4517 633954 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4518 633954 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4519 :
4520 : #if defined(USE_SSE2)
4521 633954 : if (iMax - iMin + 1 == 6)
4522 : {
4523 : // This is just an optimized version of the general case in
4524 : // the else clause.
4525 :
4526 359916 : pSrc += iMin;
4527 359916 : int j = jMin;
4528 : const auto fourXWeights =
4529 359916 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4530 :
4531 : // Process 2 lines at the same time.
4532 1424180 : for (; j < jMax; j += 2)
4533 : {
4534 : const XMMReg4Double v_acc =
4535 1064270 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4536 : const XMMReg4Double v_acc2 =
4537 1064270 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4538 1064270 : const double dfRowAcc = v_acc.GetHorizSum();
4539 1064270 : const double dfRowAccEnd =
4540 1064270 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4541 1064270 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4542 1064270 : dfAccumulatorReal +=
4543 1064270 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4544 1064270 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4545 1064270 : const double dfRowAcc2End =
4546 1064270 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4547 1064270 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4548 1064270 : dfAccumulatorReal +=
4549 1064270 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4550 1064270 : pSrc += 2 * nSrcXSize;
4551 : }
4552 359916 : if (j == jMax)
4553 : {
4554 : // Process last line if there's an odd number of them.
4555 :
4556 : const XMMReg4Double v_acc =
4557 90039 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4558 90039 : const double dfRowAcc = v_acc.GetHorizSum();
4559 90039 : const double dfRowAccEnd =
4560 90039 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4561 90039 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4562 90039 : dfAccumulatorReal +=
4563 90039 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4564 : }
4565 : }
4566 : else
4567 : #endif
4568 : {
4569 5493380 : for (int j = jMin; j <= jMax; ++j)
4570 : {
4571 5219340 : int i = iMin;
4572 5219340 : double dfRowAcc1 = 0.0;
4573 5219340 : double dfRowAcc2 = 0.0;
4574 : // A bit of loop unrolling
4575 62823300 : for (; i < iMax; i += 2)
4576 : {
4577 57604000 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4578 57604000 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4579 : }
4580 5219340 : if (i == iMax)
4581 : {
4582 : // Process last column if there's an odd number of them.
4583 442077 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4584 : }
4585 :
4586 5219340 : dfAccumulatorReal +=
4587 5219340 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4588 5219340 : pSrc += nSrcXSize;
4589 : }
4590 : }
4591 :
4592 : // Calculate the output taking into account weighting.
4593 633954 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4594 : {
4595 579748 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4596 579748 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4597 579748 : *pdfDensity = 1.0;
4598 : }
4599 : else
4600 : {
4601 54206 : *pdfReal = dfAccumulatorReal;
4602 54206 : *pdfDensity = 1.0;
4603 : }
4604 :
4605 633954 : return true;
4606 : }
4607 :
4608 620 : GPtrDiff_t iRowOffset =
4609 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4610 :
4611 620 : int nCountValid = 0;
4612 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4613 :
4614 3560 : for (int j = jMin; j <= jMax; ++j)
4615 : {
4616 2940 : iRowOffset += nSrcXSize;
4617 :
4618 : // Get pixel values.
4619 : // We can potentially read extra elements after the "normal" end of the
4620 : // source arrays, but the contract of papabySrcImage[iBand],
4621 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4622 : // is to have WARP_EXTRA_ELTS reserved at their end.
4623 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4624 : padfRowDensity, padfRowReal, padfRowImag))
4625 0 : continue;
4626 :
4627 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4628 :
4629 : // Iterate over pixels in row.
4630 2940 : if (padfRowDensity != nullptr)
4631 : {
4632 0 : for (int i = iMin; i <= iMax; ++i)
4633 : {
4634 : // Skip sampling if pixel has zero density.
4635 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4636 0 : continue;
4637 :
4638 0 : nCountValid++;
4639 :
4640 : // Use a cached set of weights for this row.
4641 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4642 :
4643 : // Accumulate!
4644 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4645 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4646 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4647 0 : dfAccumulatorWeight += dfWeight2;
4648 : }
4649 : }
4650 2940 : else if (bIsNonComplex)
4651 : {
4652 1764 : double dfRowAccReal = 0.0;
4653 10560 : for (int i = iMin; i <= iMax; ++i)
4654 : {
4655 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4656 :
4657 : // Accumulate!
4658 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4659 : }
4660 :
4661 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4662 : }
4663 : else
4664 : {
4665 1176 : double dfRowAccReal = 0.0;
4666 1176 : double dfRowAccImag = 0.0;
4667 7040 : for (int i = iMin; i <= iMax; ++i)
4668 : {
4669 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4670 :
4671 : // Accumulate!
4672 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4673 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4674 : }
4675 :
4676 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4677 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4678 : }
4679 : }
4680 :
4681 620 : if (dfAccumulatorWeight < 0.000001 ||
4682 0 : (padfRowDensity != nullptr &&
4683 0 : (dfAccumulatorDensity < 0.000001 ||
4684 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4685 : {
4686 0 : *pdfDensity = 0.0;
4687 0 : return false;
4688 : }
4689 :
4690 : // Calculate the output taking into account weighting.
4691 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4692 : {
4693 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4694 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4695 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4696 0 : if (padfRowDensity != nullptr)
4697 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4698 : else
4699 0 : *pdfDensity = 1.0;
4700 : }
4701 : else
4702 : {
4703 620 : *pdfReal = dfAccumulatorReal;
4704 620 : *pdfImag = dfAccumulatorImag;
4705 620 : if (padfRowDensity != nullptr)
4706 0 : *pdfDensity = dfAccumulatorDensity;
4707 : else
4708 620 : *pdfDensity = 1.0;
4709 : }
4710 :
4711 620 : return true;
4712 : }
4713 :
4714 : /************************************************************************/
4715 : /* GWKComputeWeights() */
4716 : /************************************************************************/
4717 :
4718 1222150 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4719 : double dfDeltaX, double dfXScale, int jMin,
4720 : int jMax, double dfDeltaY, double dfYScale,
4721 : double *padfWeightsHorizontal,
4722 : double *padfWeightsVertical, double &dfInvWeights)
4723 : {
4724 :
4725 1222150 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4726 1222150 : CPLAssert(pfnGetWeight);
4727 1222150 : const FilterFunc4ValuesType pfnGetWeight4Values =
4728 1222150 : apfGWKFilter4Values[eResample];
4729 1222150 : CPLAssert(pfnGetWeight4Values);
4730 :
4731 1222150 : int i = iMin; // Used after for.
4732 1222150 : int iC = 0; // Used after for.
4733 : // Not zero, but as close as possible to it, to avoid potential division by
4734 : // zero at end of function
4735 1222150 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4736 2990960 : for (; i + 2 < iMax; i += 4, iC += 4)
4737 : {
4738 1768820 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4739 1768820 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4740 1768820 : padfWeightsHorizontal[iC + 2] =
4741 1768820 : padfWeightsHorizontal[iC + 1] + dfXScale;
4742 1768820 : padfWeightsHorizontal[iC + 3] =
4743 1768820 : padfWeightsHorizontal[iC + 2] + dfXScale;
4744 1768820 : dfAccumulatorWeightHorizontal +=
4745 1768820 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4746 : }
4747 1280860 : for (; i <= iMax; ++i, ++iC)
4748 : {
4749 58719 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4750 58719 : padfWeightsHorizontal[iC] = dfWeight;
4751 58719 : dfAccumulatorWeightHorizontal += dfWeight;
4752 : }
4753 :
4754 1222150 : int j = jMin; // Used after for.
4755 1222150 : int jC = 0; // Used after for.
4756 : // Not zero, but as close as possible to it, to avoid potential division by
4757 : // zero at end of function
4758 1222150 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4759 2984620 : for (; j + 2 < jMax; j += 4, jC += 4)
4760 : {
4761 1762470 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4762 1762470 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4763 1762470 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4764 1762470 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4765 1762470 : dfAccumulatorWeightVertical +=
4766 1762470 : pfnGetWeight4Values(padfWeightsVertical + jC);
4767 : }
4768 1288930 : for (; j <= jMax; ++j, ++jC)
4769 : {
4770 66786 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4771 66786 : padfWeightsVertical[jC] = dfWeight;
4772 66786 : dfAccumulatorWeightVertical += dfWeight;
4773 : }
4774 :
4775 1222150 : dfInvWeights =
4776 1222150 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4777 1222150 : }
4778 :
4779 : /************************************************************************/
4780 : /* GWKResampleNoMasksT() */
4781 : /************************************************************************/
4782 :
4783 : template <class T>
4784 : static bool
4785 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4786 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4787 : double *padfWeightsVertical, double &dfInvWeights)
4788 :
4789 : {
4790 : // Commonly used; save locally.
4791 : const int nSrcXSize = poWK->nSrcXSize;
4792 : const int nSrcYSize = poWK->nSrcYSize;
4793 :
4794 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4795 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4796 : const GPtrDiff_t iSrcOffset =
4797 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4798 :
4799 : const int nXRadius = poWK->nXRadius;
4800 : const int nYRadius = poWK->nYRadius;
4801 :
4802 : // Politely refuse to process invalid coordinates or obscenely small image.
4803 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4804 : nYRadius > nSrcYSize)
4805 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4806 : pValue);
4807 :
4808 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4809 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4810 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4811 :
4812 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4813 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4814 :
4815 : int iMin = 1 - nXRadius;
4816 : if (iSrcX + iMin < 0)
4817 : iMin = -iSrcX;
4818 : int iMax = nXRadius;
4819 : if (iSrcX + iMax >= nSrcXSize - 1)
4820 : iMax = nSrcXSize - 1 - iSrcX;
4821 :
4822 : int jMin = 1 - nYRadius;
4823 : if (iSrcY + jMin < 0)
4824 : jMin = -iSrcY;
4825 : int jMax = nYRadius;
4826 : if (iSrcY + jMax >= nSrcYSize - 1)
4827 : jMax = nSrcYSize - 1 - iSrcY;
4828 :
4829 : if (iBand == 0)
4830 : {
4831 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4832 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4833 : padfWeightsVertical, dfInvWeights);
4834 : }
4835 :
4836 : // Loop over all rows in the kernel.
4837 : double dfAccumulator = 0.0;
4838 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4839 : {
4840 : const GPtrDiff_t iSampJ =
4841 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4842 :
4843 : // Loop over all pixels in the row.
4844 : double dfAccumulatorLocal = 0.0;
4845 : double dfAccumulatorLocal2 = 0.0;
4846 : int iC = 0;
4847 : int i = iMin;
4848 : // Process by chunk of 4 cols.
4849 : for (; i + 2 < iMax; i += 4, iC += 4)
4850 : {
4851 : // Retrieve the pixel & accumulate.
4852 : dfAccumulatorLocal +=
4853 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4854 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4855 : padfWeightsHorizontal[iC + 1];
4856 : dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4857 : padfWeightsHorizontal[iC + 2];
4858 : dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4859 : padfWeightsHorizontal[iC + 3];
4860 : }
4861 : dfAccumulatorLocal += dfAccumulatorLocal2;
4862 : if (i < iMax)
4863 : {
4864 : dfAccumulatorLocal +=
4865 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4866 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4867 : padfWeightsHorizontal[iC + 1];
4868 : i += 2;
4869 : iC += 2;
4870 : }
4871 : if (i == iMax)
4872 : {
4873 : dfAccumulatorLocal +=
4874 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4875 : }
4876 :
4877 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4878 : }
4879 :
4880 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4881 :
4882 : return true;
4883 : }
4884 :
4885 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4886 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4887 : #if defined(USE_SSE2)
4888 :
4889 : /************************************************************************/
4890 : /* GWKResampleNoMasks_SSE2_T() */
4891 : /************************************************************************/
4892 :
4893 : template <class T>
4894 1775366 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4895 : double dfSrcX, double dfSrcY, T *pValue,
4896 : double *padfWeightsHorizontal,
4897 : double *padfWeightsVertical,
4898 : double &dfInvWeights)
4899 : {
4900 : // Commonly used; save locally.
4901 1775366 : const int nSrcXSize = poWK->nSrcXSize;
4902 1775366 : const int nSrcYSize = poWK->nSrcYSize;
4903 :
4904 1775366 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4905 1775366 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4906 1775366 : const GPtrDiff_t iSrcOffset =
4907 1775366 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4908 1775366 : const int nXRadius = poWK->nXRadius;
4909 1775366 : const int nYRadius = poWK->nYRadius;
4910 :
4911 : // Politely refuse to process invalid coordinates or obscenely small image.
4912 1775366 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4913 : nYRadius > nSrcYSize)
4914 3 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4915 3 : pValue);
4916 :
4917 1775364 : const T *pSrcBand =
4918 1775364 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4919 :
4920 1775364 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4921 1775364 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4922 1775364 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4923 1775364 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4924 :
4925 1775364 : int iMin = 1 - nXRadius;
4926 1775364 : if (iSrcX + iMin < 0)
4927 22616 : iMin = -iSrcX;
4928 1775364 : int iMax = nXRadius;
4929 1775364 : if (iSrcX + iMax >= nSrcXSize - 1)
4930 9506 : iMax = nSrcXSize - 1 - iSrcX;
4931 :
4932 1775364 : int jMin = 1 - nYRadius;
4933 1775364 : if (iSrcY + jMin < 0)
4934 26049 : jMin = -iSrcY;
4935 1775364 : int jMax = nYRadius;
4936 1775364 : if (iSrcY + jMax >= nSrcYSize - 1)
4937 13135 : jMax = nSrcYSize - 1 - iSrcY;
4938 :
4939 1775364 : if (iBand == 0)
4940 : {
4941 1222146 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4942 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4943 : padfWeightsVertical, dfInvWeights);
4944 : }
4945 :
4946 1775364 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4947 : // Process by chunk of 4 rows.
4948 1775364 : int jC = 0;
4949 1775364 : int j = jMin;
4950 1775364 : double dfAccumulator = 0.0;
4951 5023910 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4952 : {
4953 : // Loop over all pixels in the row.
4954 3248546 : int iC = 0;
4955 3248546 : int i = iMin;
4956 : // Process by chunk of 4 cols.
4957 3248546 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4958 3248546 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4959 3248546 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4960 3248546 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4961 11835082 : for (; i + 2 < iMax; i += 4, iC += 4)
4962 : {
4963 : // Retrieve the pixel & accumulate.
4964 8586546 : XMMReg4Double v_pixels_1 =
4965 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4966 8586546 : XMMReg4Double v_pixels_2 =
4967 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4968 8586546 : XMMReg4Double v_pixels_3 =
4969 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4970 8586546 : XMMReg4Double v_pixels_4 =
4971 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4972 :
4973 8586546 : XMMReg4Double v_padfWeight =
4974 8586546 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4975 :
4976 8586546 : v_acc_1 += v_pixels_1 * v_padfWeight;
4977 8586546 : v_acc_2 += v_pixels_2 * v_padfWeight;
4978 8586546 : v_acc_3 += v_pixels_3 * v_padfWeight;
4979 8586546 : v_acc_4 += v_pixels_4 * v_padfWeight;
4980 : }
4981 :
4982 3248546 : if (i < iMax)
4983 : {
4984 49932 : XMMReg2Double v_pixels_1 =
4985 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4986 49932 : XMMReg2Double v_pixels_2 =
4987 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4988 49932 : XMMReg2Double v_pixels_3 =
4989 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4990 49932 : XMMReg2Double v_pixels_4 =
4991 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4992 :
4993 49932 : XMMReg2Double v_padfWeight =
4994 49932 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4995 :
4996 49932 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4997 49932 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4998 49932 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4999 49932 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
5000 :
5001 49932 : i += 2;
5002 49932 : iC += 2;
5003 : }
5004 :
5005 3248546 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
5006 3248546 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
5007 3248546 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
5008 3248546 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
5009 :
5010 3248546 : if (i == iMax)
5011 : {
5012 27545 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
5013 27545 : padfWeightsHorizontal[iC];
5014 27545 : dfAccumulatorLocal_2 +=
5015 27545 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
5016 27545 : padfWeightsHorizontal[iC];
5017 27545 : dfAccumulatorLocal_3 +=
5018 27545 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
5019 27545 : padfWeightsHorizontal[iC];
5020 27545 : dfAccumulatorLocal_4 +=
5021 27545 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
5022 27545 : padfWeightsHorizontal[iC];
5023 : }
5024 :
5025 3248546 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
5026 3248546 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
5027 3248546 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
5028 3248546 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
5029 : }
5030 1866210 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
5031 : {
5032 : // Loop over all pixels in the row.
5033 90850 : int iC = 0;
5034 90850 : int i = iMin;
5035 : // Process by chunk of 4 cols.
5036 90850 : XMMReg4Double v_acc = XMMReg4Double::Zero();
5037 243258 : for (; i + 2 < iMax; i += 4, iC += 4)
5038 : {
5039 : // Retrieve the pixel & accumulate.
5040 152408 : XMMReg4Double v_pixels =
5041 152408 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
5042 152408 : XMMReg4Double v_padfWeight =
5043 152408 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
5044 :
5045 152408 : v_acc += v_pixels * v_padfWeight;
5046 : }
5047 :
5048 90850 : double dfAccumulatorLocal = v_acc.GetHorizSum();
5049 :
5050 90850 : if (i < iMax)
5051 : {
5052 2090 : dfAccumulatorLocal +=
5053 2090 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
5054 2090 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
5055 2090 : padfWeightsHorizontal[iC + 1];
5056 2090 : i += 2;
5057 2090 : iC += 2;
5058 : }
5059 90850 : if (i == iMax)
5060 : {
5061 1839 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
5062 1839 : padfWeightsHorizontal[iC];
5063 : }
5064 :
5065 90850 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
5066 : }
5067 :
5068 1775364 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
5069 :
5070 1775364 : return true;
5071 : }
5072 :
5073 : /************************************************************************/
5074 : /* GWKResampleNoMasksT<GByte>() */
5075 : /************************************************************************/
5076 :
5077 : template <>
5078 1270240 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
5079 : double dfSrcX, double dfSrcY, GByte *pValue,
5080 : double *padfWeightsHorizontal,
5081 : double *padfWeightsVertical,
5082 : double &dfInvWeights)
5083 : {
5084 1270240 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5085 : padfWeightsHorizontal, padfWeightsVertical,
5086 1270240 : dfInvWeights);
5087 : }
5088 :
5089 : /************************************************************************/
5090 : /* GWKResampleNoMasksT<GInt16>() */
5091 : /************************************************************************/
5092 :
5093 : template <>
5094 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
5095 : double dfSrcX, double dfSrcY, GInt16 *pValue,
5096 : double *padfWeightsHorizontal,
5097 : double *padfWeightsVertical,
5098 : double &dfInvWeights)
5099 : {
5100 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5101 : padfWeightsHorizontal, padfWeightsVertical,
5102 252563 : dfInvWeights);
5103 : }
5104 :
5105 : /************************************************************************/
5106 : /* GWKResampleNoMasksT<GUInt16>() */
5107 : /************************************************************************/
5108 :
5109 : template <>
5110 250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
5111 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
5112 : double *padfWeightsHorizontal,
5113 : double *padfWeightsVertical,
5114 : double &dfInvWeights)
5115 : {
5116 250063 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5117 : padfWeightsHorizontal, padfWeightsVertical,
5118 250063 : dfInvWeights);
5119 : }
5120 :
5121 : /************************************************************************/
5122 : /* GWKResampleNoMasksT<float>() */
5123 : /************************************************************************/
5124 :
5125 : template <>
5126 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
5127 : double dfSrcX, double dfSrcY, float *pValue,
5128 : double *padfWeightsHorizontal,
5129 : double *padfWeightsVertical,
5130 : double &dfInvWeights)
5131 : {
5132 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5133 : padfWeightsHorizontal, padfWeightsVertical,
5134 2500 : dfInvWeights);
5135 : }
5136 :
5137 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5138 :
5139 : /************************************************************************/
5140 : /* GWKResampleNoMasksT<double>() */
5141 : /************************************************************************/
5142 :
5143 : template <>
5144 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
5145 : double dfSrcX, double dfSrcY, double *pValue,
5146 : double *padfWeightsHorizontal,
5147 : double *padfWeightsVertical,
5148 : double &dfInvWeights)
5149 : {
5150 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5151 : padfWeightsHorizontal, padfWeightsVertical,
5152 : dfInvWeights);
5153 : }
5154 :
5155 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
5156 :
5157 : #endif /* defined(USE_SSE2) */
5158 :
5159 : /************************************************************************/
5160 : /* GWKRoundSourceCoordinates() */
5161 : /************************************************************************/
5162 :
5163 1000 : static void GWKRoundSourceCoordinates(
5164 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
5165 : double dfSrcCoordPrecision, double dfErrorThreshold,
5166 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
5167 : double dfDstY)
5168 : {
5169 1000 : double dfPct = 0.8;
5170 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
5171 : {
5172 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
5173 : }
5174 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
5175 :
5176 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5177 : {
5178 500000 : const double dfXBefore = padfX[iDstX];
5179 500000 : const double dfYBefore = padfY[iDstX];
5180 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5181 : dfSrcCoordPrecision;
5182 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5183 : dfSrcCoordPrecision;
5184 :
5185 : // If we are in an uncertainty zone, go to non-approximated
5186 : // transformation.
5187 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
5188 : // be at least 10 times greater than the approximation error.
5189 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
5190 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
5191 : {
5192 180090 : padfX[iDstX] = iDstX + dfDstXOff;
5193 180090 : padfY[iDstX] = dfDstY;
5194 180090 : padfZ[iDstX] = 0.0;
5195 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
5196 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
5197 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5198 : dfSrcCoordPrecision;
5199 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5200 : dfSrcCoordPrecision;
5201 : }
5202 : }
5203 1000 : }
5204 :
5205 : /************************************************************************/
5206 : /* GWKCheckAndComputeSrcOffsets() */
5207 : /************************************************************************/
5208 : static CPL_INLINE bool
5209 188281000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5210 : int _iDstY, double *_padfX, double *_padfY,
5211 : int _nSrcXSize, int _nSrcYSize,
5212 : GPtrDiff_t &iSrcOffset)
5213 : {
5214 188281000 : const GDALWarpKernel *_poWK = psJob->poWK;
5215 194887000 : for (int iTry = 0; iTry < 2; ++iTry)
5216 : {
5217 194887000 : if (iTry == 1)
5218 : {
5219 : // If the source coordinate is slightly outside of the source raster
5220 : // retry to transform it alone, so that the exact coordinate
5221 : // transformer is used.
5222 :
5223 6605570 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5224 6605570 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5225 6605570 : double dfZ = 0;
5226 6605570 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5227 6605570 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5228 6605570 : _pabSuccess + _iDstX);
5229 : }
5230 194887000 : if (!_pabSuccess[_iDstX])
5231 3615020 : return false;
5232 :
5233 : // If this happens this is likely the symptom of a bug somewhere.
5234 191272000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5235 : {
5236 : static bool bNanCoordFound = false;
5237 0 : if (!bNanCoordFound)
5238 : {
5239 0 : CPLDebug("WARP",
5240 : "GWKCheckAndComputeSrcOffsets(): "
5241 : "NaN coordinate found on point %d.",
5242 : _iDstX);
5243 0 : bNanCoordFound = true;
5244 : }
5245 0 : return false;
5246 : }
5247 :
5248 : /* --------------------------------------------------------------------
5249 : */
5250 : /* Figure out what pixel we want in our source raster, and skip */
5251 : /* further processing if it is well off the source image. */
5252 : /* --------------------------------------------------------------------
5253 : */
5254 : /* We test against the value before casting to avoid the */
5255 : /* problem of asymmetric truncation effects around zero. That is */
5256 : /* -0.5 will be 0 when cast to an int. */
5257 191272000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5258 : {
5259 : // If the source coordinate is slightly outside of the source raster
5260 : // retry to transform it alone, so that the exact coordinate
5261 : // transformer is used.
5262 16862100 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5263 2889880 : continue;
5264 13972200 : return false;
5265 : }
5266 :
5267 174410000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5268 : {
5269 : // If the source coordinate is slightly outside of the source raster
5270 : // retry to transform it alone, so that the exact coordinate
5271 : // transformer is used.
5272 7904380 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5273 636226 : continue;
5274 7268150 : return false;
5275 : }
5276 :
5277 : // Check for potential overflow when casting from float to int, (if
5278 : // operating outside natural projection area, padfX/Y can be a very huge
5279 : // positive number before doing the actual conversion), as such cast is
5280 : // undefined behavior that can trigger exception with some compilers
5281 : // (see #6753)
5282 166505000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5283 : {
5284 : // If the source coordinate is slightly outside of the source raster
5285 : // retry to transform it alone, so that the exact coordinate
5286 : // transformer is used.
5287 13197100 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5288 2712810 : continue;
5289 10484300 : return false;
5290 : }
5291 153308000 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5292 : {
5293 : // If the source coordinate is slightly outside of the source raster
5294 : // retry to transform it alone, so that the exact coordinate
5295 : // transformer is used.
5296 5693610 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5297 366653 : continue;
5298 5326950 : return false;
5299 : }
5300 :
5301 147614000 : break;
5302 : }
5303 :
5304 147614000 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5305 147614000 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5306 147614000 : if (iSrcX == _nSrcXSize)
5307 0 : iSrcX--;
5308 147614000 : if (iSrcY == _nSrcYSize)
5309 0 : iSrcY--;
5310 :
5311 : // Those checks should normally be OK given the previous ones.
5312 147614000 : CPLAssert(iSrcX >= 0);
5313 147614000 : CPLAssert(iSrcY >= 0);
5314 147614000 : CPLAssert(iSrcX < _nSrcXSize);
5315 147614000 : CPLAssert(iSrcY < _nSrcYSize);
5316 :
5317 147614000 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5318 :
5319 147614000 : return true;
5320 : }
5321 :
5322 : /************************************************************************/
5323 : /* GWKOneSourceCornerFailsToReproject() */
5324 : /************************************************************************/
5325 :
5326 934 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5327 : {
5328 934 : GDALWarpKernel *poWK = psJob->poWK;
5329 2792 : for (int iY = 0; iY <= 1; ++iY)
5330 : {
5331 5580 : for (int iX = 0; iX <= 1; ++iX)
5332 : {
5333 3722 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5334 3722 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5335 3722 : double dfZTmp = 0;
5336 3722 : int nSuccess = FALSE;
5337 3722 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5338 : &dfYTmp, &dfZTmp, &nSuccess);
5339 3722 : if (!nSuccess)
5340 6 : return true;
5341 : }
5342 : }
5343 928 : return false;
5344 : }
5345 :
5346 : /************************************************************************/
5347 : /* GWKAdjustSrcOffsetOnEdge() */
5348 : /************************************************************************/
5349 :
5350 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5351 : GPtrDiff_t &iSrcOffset)
5352 : {
5353 9714 : GDALWarpKernel *poWK = psJob->poWK;
5354 9714 : const int nSrcXSize = poWK->nSrcXSize;
5355 9714 : const int nSrcYSize = poWK->nSrcYSize;
5356 :
5357 : // Check if the computed source position slightly altered
5358 : // fails to reproject. If so, then we are at the edge of
5359 : // the validity area, and it is worth checking neighbour
5360 : // source pixels for validity.
5361 9714 : int nSuccess = FALSE;
5362 : {
5363 9714 : double dfXTmp =
5364 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5365 9714 : double dfYTmp =
5366 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5367 9714 : double dfZTmp = 0;
5368 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5369 : &dfZTmp, &nSuccess);
5370 : }
5371 9714 : if (nSuccess)
5372 : {
5373 6996 : double dfXTmp =
5374 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5375 6996 : double dfYTmp =
5376 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5377 6996 : double dfZTmp = 0;
5378 6996 : nSuccess = FALSE;
5379 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5380 : &dfZTmp, &nSuccess);
5381 : }
5382 9714 : if (nSuccess)
5383 : {
5384 5624 : double dfXTmp =
5385 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5386 5624 : double dfYTmp =
5387 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5388 5624 : double dfZTmp = 0;
5389 5624 : nSuccess = FALSE;
5390 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5391 : &dfZTmp, &nSuccess);
5392 : }
5393 :
5394 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5395 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5396 : {
5397 1860 : iSrcOffset++;
5398 1860 : return true;
5399 : }
5400 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5401 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5402 : {
5403 1334 : iSrcOffset += nSrcXSize;
5404 1334 : return true;
5405 : }
5406 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5407 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5408 : {
5409 956 : iSrcOffset--;
5410 956 : return true;
5411 : }
5412 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5413 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5414 : {
5415 340 : iSrcOffset -= nSrcXSize;
5416 340 : return true;
5417 : }
5418 :
5419 5224 : return false;
5420 : }
5421 :
5422 : /************************************************************************/
5423 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5424 : /************************************************************************/
5425 :
5426 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5427 : GPtrDiff_t &iSrcOffset)
5428 : {
5429 0 : GDALWarpKernel *poWK = psJob->poWK;
5430 0 : const int nSrcXSize = poWK->nSrcXSize;
5431 0 : const int nSrcYSize = poWK->nSrcYSize;
5432 :
5433 : // Check if the computed source position slightly altered
5434 : // fails to reproject. If so, then we are at the edge of
5435 : // the validity area, and it is worth checking neighbour
5436 : // source pixels for validity.
5437 0 : int nSuccess = FALSE;
5438 : {
5439 0 : double dfXTmp =
5440 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5441 0 : double dfYTmp =
5442 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5443 0 : double dfZTmp = 0;
5444 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5445 : &dfZTmp, &nSuccess);
5446 : }
5447 0 : if (nSuccess)
5448 : {
5449 0 : double dfXTmp =
5450 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5451 0 : double dfYTmp =
5452 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5453 0 : double dfZTmp = 0;
5454 0 : nSuccess = FALSE;
5455 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5456 : &dfZTmp, &nSuccess);
5457 : }
5458 0 : if (nSuccess)
5459 : {
5460 0 : double dfXTmp =
5461 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5462 0 : double dfYTmp =
5463 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5464 0 : double dfZTmp = 0;
5465 0 : nSuccess = FALSE;
5466 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5467 : &dfZTmp, &nSuccess);
5468 : }
5469 :
5470 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5471 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5472 : SRC_DENSITY_THRESHOLD_FLOAT)
5473 : {
5474 0 : iSrcOffset++;
5475 0 : return true;
5476 : }
5477 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5478 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5479 : SRC_DENSITY_THRESHOLD_FLOAT)
5480 : {
5481 0 : iSrcOffset += nSrcXSize;
5482 0 : return true;
5483 : }
5484 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5485 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5486 : SRC_DENSITY_THRESHOLD_FLOAT)
5487 : {
5488 0 : iSrcOffset--;
5489 0 : return true;
5490 : }
5491 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5492 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5493 : SRC_DENSITY_THRESHOLD_FLOAT)
5494 : {
5495 0 : iSrcOffset -= nSrcXSize;
5496 0 : return true;
5497 : }
5498 :
5499 0 : return false;
5500 : }
5501 :
5502 : /************************************************************************/
5503 : /* GWKGeneralCase() */
5504 : /* */
5505 : /* This is the most general case. It attempts to handle all */
5506 : /* possible features with relatively little concern for */
5507 : /* efficiency. */
5508 : /************************************************************************/
5509 :
5510 239 : static void GWKGeneralCaseThread(void *pData)
5511 : {
5512 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5513 239 : GDALWarpKernel *poWK = psJob->poWK;
5514 239 : const int iYMin = psJob->iYMin;
5515 239 : const int iYMax = psJob->iYMax;
5516 : const double dfMultFactorVerticalShiftPipeline =
5517 239 : poWK->bApplyVerticalShift
5518 239 : ? CPLAtof(CSLFetchNameValueDef(
5519 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5520 : "1.0"))
5521 239 : : 0.0;
5522 : const bool bAvoidNoDataSingleBand =
5523 239 : poWK->nBands == 1 ||
5524 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5525 239 : "UNIFIED_SRC_NODATA", "FALSE"));
5526 :
5527 239 : int nDstXSize = poWK->nDstXSize;
5528 239 : int nSrcXSize = poWK->nSrcXSize;
5529 239 : int nSrcYSize = poWK->nSrcYSize;
5530 :
5531 : /* -------------------------------------------------------------------- */
5532 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5533 : /* scanlines worth of positions. */
5534 : /* -------------------------------------------------------------------- */
5535 : // For x, 2 *, because we cache the precomputed values at the end.
5536 : double *padfX =
5537 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5538 : double *padfY =
5539 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5540 : double *padfZ =
5541 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5542 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5543 :
5544 239 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5545 :
5546 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5547 239 : if (poWK->eResample != GRA_NearestNeighbour)
5548 : {
5549 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5550 : }
5551 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5552 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5553 239 : const double dfErrorThreshold = CPLAtof(
5554 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5555 :
5556 : const bool bOneSourceCornerFailsToReproject =
5557 239 : GWKOneSourceCornerFailsToReproject(psJob);
5558 :
5559 : // Precompute values.
5560 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5561 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5562 :
5563 : /* ==================================================================== */
5564 : /* Loop over output lines. */
5565 : /* ==================================================================== */
5566 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5567 : {
5568 : /* --------------------------------------------------------------------
5569 : */
5570 : /* Setup points to transform to source image space. */
5571 : /* --------------------------------------------------------------------
5572 : */
5573 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5574 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5575 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5576 236160 : padfY[iDstX] = dfY;
5577 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5578 :
5579 : /* --------------------------------------------------------------------
5580 : */
5581 : /* Transform the points from destination pixel/line coordinates */
5582 : /* to source pixel/line coordinates. */
5583 : /* --------------------------------------------------------------------
5584 : */
5585 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5586 : padfY, padfZ, pabSuccess);
5587 6230 : if (dfSrcCoordPrecision > 0.0)
5588 : {
5589 0 : GWKRoundSourceCoordinates(
5590 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5591 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5592 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5593 : }
5594 :
5595 : /* ====================================================================
5596 : */
5597 : /* Loop over pixels in output scanline. */
5598 : /* ====================================================================
5599 : */
5600 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5601 : {
5602 236160 : GPtrDiff_t iSrcOffset = 0;
5603 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5604 : padfX, padfY, nSrcXSize,
5605 : nSrcYSize, iSrcOffset))
5606 0 : continue;
5607 :
5608 : /* --------------------------------------------------------------------
5609 : */
5610 : /* Do not try to apply transparent/invalid source pixels to the
5611 : */
5612 : /* destination. This currently ignores the multi-pixel input
5613 : */
5614 : /* of bilinear and cubic resamples. */
5615 : /* --------------------------------------------------------------------
5616 : */
5617 236160 : double dfDensity = 1.0;
5618 :
5619 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5620 : {
5621 1200 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5622 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5623 : {
5624 0 : if (!bOneSourceCornerFailsToReproject)
5625 : {
5626 0 : continue;
5627 : }
5628 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5629 : psJob, iSrcOffset))
5630 : {
5631 0 : dfDensity =
5632 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5633 : }
5634 : else
5635 : {
5636 0 : continue;
5637 : }
5638 : }
5639 : }
5640 :
5641 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5642 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5643 : {
5644 0 : if (!bOneSourceCornerFailsToReproject)
5645 : {
5646 0 : continue;
5647 : }
5648 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5649 : {
5650 0 : continue;
5651 : }
5652 : }
5653 :
5654 : /* ====================================================================
5655 : */
5656 : /* Loop processing each band. */
5657 : /* ====================================================================
5658 : */
5659 236160 : bool bHasFoundDensity = false;
5660 :
5661 236160 : const GPtrDiff_t iDstOffset =
5662 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5663 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5664 : {
5665 236160 : double dfBandDensity = 0.0;
5666 236160 : double dfValueReal = 0.0;
5667 236160 : double dfValueImag = 0.0;
5668 :
5669 : /* --------------------------------------------------------------------
5670 : */
5671 : /* Collect the source value. */
5672 : /* --------------------------------------------------------------------
5673 : */
5674 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5675 : nSrcYSize == 1)
5676 : {
5677 : // FALSE is returned if dfBandDensity == 0, which is
5678 : // checked below.
5679 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5680 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5681 : &dfValueImag));
5682 : }
5683 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5684 : {
5685 248 : GWKBilinearResample4Sample(
5686 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5687 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5688 : &dfValueReal, &dfValueImag);
5689 : }
5690 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5691 : {
5692 248 : GWKCubicResample4Sample(
5693 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5694 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5695 : &dfValueReal, &dfValueImag);
5696 : }
5697 : else
5698 : #ifdef DEBUG
5699 : // Only useful for clang static analyzer.
5700 235096 : if (psWrkStruct != nullptr)
5701 : #endif
5702 : {
5703 235096 : psWrkStruct->pfnGWKResample(
5704 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5705 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5706 : &dfValueReal, &dfValueImag, psWrkStruct);
5707 : }
5708 :
5709 : // If we didn't find any valid inputs skip to next band.
5710 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5711 0 : continue;
5712 :
5713 236160 : if (poWK->bApplyVerticalShift)
5714 : {
5715 0 : if (!std::isfinite(padfZ[iDstX]))
5716 0 : continue;
5717 : // Subtract padfZ[] since the coordinate transformation is
5718 : // from target to source
5719 0 : dfValueReal =
5720 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5721 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5722 : }
5723 :
5724 236160 : bHasFoundDensity = true;
5725 :
5726 : /* --------------------------------------------------------------------
5727 : */
5728 : /* We have a computed value from the source. Now apply it
5729 : * to */
5730 : /* the destination pixel. */
5731 : /* --------------------------------------------------------------------
5732 : */
5733 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5734 : dfValueReal, dfValueImag,
5735 : bAvoidNoDataSingleBand);
5736 : }
5737 :
5738 236160 : if (!bHasFoundDensity)
5739 0 : continue;
5740 :
5741 236160 : if (!bAvoidNoDataSingleBand)
5742 : {
5743 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
5744 : }
5745 :
5746 : /* --------------------------------------------------------------------
5747 : */
5748 : /* Update destination density/validity masks. */
5749 : /* --------------------------------------------------------------------
5750 : */
5751 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5752 :
5753 236160 : if (poWK->panDstValid != nullptr)
5754 : {
5755 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5756 : }
5757 : } /* Next iDstX */
5758 :
5759 : /* --------------------------------------------------------------------
5760 : */
5761 : /* Report progress to the user, and optionally cancel out. */
5762 : /* --------------------------------------------------------------------
5763 : */
5764 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5765 0 : break;
5766 : }
5767 :
5768 : /* -------------------------------------------------------------------- */
5769 : /* Cleanup and return. */
5770 : /* -------------------------------------------------------------------- */
5771 239 : CPLFree(padfX);
5772 239 : CPLFree(padfY);
5773 239 : CPLFree(padfZ);
5774 239 : CPLFree(pabSuccess);
5775 239 : if (psWrkStruct)
5776 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5777 239 : }
5778 :
5779 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5780 : {
5781 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5782 : }
5783 :
5784 : /************************************************************************/
5785 : /* GWKRealCase() */
5786 : /* */
5787 : /* General case for non-complex data types. */
5788 : /************************************************************************/
5789 :
5790 223 : static void GWKRealCaseThread(void *pData)
5791 :
5792 : {
5793 223 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5794 223 : GDALWarpKernel *poWK = psJob->poWK;
5795 223 : const int iYMin = psJob->iYMin;
5796 223 : const int iYMax = psJob->iYMax;
5797 :
5798 223 : const int nDstXSize = poWK->nDstXSize;
5799 223 : const int nSrcXSize = poWK->nSrcXSize;
5800 223 : const int nSrcYSize = poWK->nSrcYSize;
5801 : const double dfMultFactorVerticalShiftPipeline =
5802 223 : poWK->bApplyVerticalShift
5803 223 : ? CPLAtof(CSLFetchNameValueDef(
5804 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5805 : "1.0"))
5806 223 : : 0.0;
5807 : const bool bAvoidNoDataSingleBand =
5808 305 : poWK->nBands == 1 ||
5809 82 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5810 223 : "UNIFIED_SRC_NODATA", "FALSE"));
5811 :
5812 : /* -------------------------------------------------------------------- */
5813 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5814 : /* scanlines worth of positions. */
5815 : /* -------------------------------------------------------------------- */
5816 :
5817 : // For x, 2 *, because we cache the precomputed values at the end.
5818 : double *padfX =
5819 223 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5820 : double *padfY =
5821 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5822 : double *padfZ =
5823 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5824 223 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5825 :
5826 223 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5827 :
5828 223 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5829 223 : if (poWK->eResample != GRA_NearestNeighbour)
5830 : {
5831 181 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5832 : }
5833 223 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5834 223 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5835 223 : const double dfErrorThreshold = CPLAtof(
5836 223 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5837 :
5838 638 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5839 415 : poWK->papanBandSrcValid == nullptr &&
5840 192 : poWK->pafUnifiedSrcDensity != nullptr;
5841 :
5842 : const bool bOneSourceCornerFailsToReproject =
5843 223 : GWKOneSourceCornerFailsToReproject(psJob);
5844 :
5845 : // Precompute values.
5846 24657 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5847 24434 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5848 :
5849 : /* ==================================================================== */
5850 : /* Loop over output lines. */
5851 : /* ==================================================================== */
5852 25909 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5853 : {
5854 : /* --------------------------------------------------------------------
5855 : */
5856 : /* Setup points to transform to source image space. */
5857 : /* --------------------------------------------------------------------
5858 : */
5859 25686 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5860 25686 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5861 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5862 44568500 : padfY[iDstX] = dfY;
5863 25686 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5864 :
5865 : /* --------------------------------------------------------------------
5866 : */
5867 : /* Transform the points from destination pixel/line coordinates */
5868 : /* to source pixel/line coordinates. */
5869 : /* --------------------------------------------------------------------
5870 : */
5871 25686 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5872 : padfY, padfZ, pabSuccess);
5873 25686 : if (dfSrcCoordPrecision > 0.0)
5874 : {
5875 0 : GWKRoundSourceCoordinates(
5876 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5877 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5878 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5879 : }
5880 :
5881 : /* ====================================================================
5882 : */
5883 : /* Loop over pixels in output scanline. */
5884 : /* ====================================================================
5885 : */
5886 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5887 : {
5888 44568500 : GPtrDiff_t iSrcOffset = 0;
5889 44568500 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5890 : padfX, padfY, nSrcXSize,
5891 : nSrcYSize, iSrcOffset))
5892 43823900 : continue;
5893 :
5894 : /* --------------------------------------------------------------------
5895 : */
5896 : /* Do not try to apply transparent/invalid source pixels to the
5897 : */
5898 : /* destination. This currently ignores the multi-pixel input
5899 : */
5900 : /* of bilinear and cubic resamples. */
5901 : /* --------------------------------------------------------------------
5902 : */
5903 31812400 : double dfDensity = 1.0;
5904 :
5905 31812400 : if (poWK->pafUnifiedSrcDensity != nullptr)
5906 : {
5907 1669560 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5908 1669560 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5909 : {
5910 1538480 : if (!bOneSourceCornerFailsToReproject)
5911 : {
5912 1538480 : continue;
5913 : }
5914 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5915 : psJob, iSrcOffset))
5916 : {
5917 0 : dfDensity =
5918 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5919 : }
5920 : else
5921 : {
5922 0 : continue;
5923 : }
5924 : }
5925 : }
5926 :
5927 59903100 : if (poWK->panUnifiedSrcValid != nullptr &&
5928 29629200 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5929 : {
5930 29531600 : if (!bOneSourceCornerFailsToReproject)
5931 : {
5932 29529300 : continue;
5933 : }
5934 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5935 : {
5936 0 : continue;
5937 : }
5938 : }
5939 :
5940 : /* ====================================================================
5941 : */
5942 : /* Loop processing each band. */
5943 : /* ====================================================================
5944 : */
5945 744578 : bool bHasFoundDensity = false;
5946 :
5947 744578 : const GPtrDiff_t iDstOffset =
5948 744578 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5949 2092550 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5950 : {
5951 1347980 : double dfBandDensity = 0.0;
5952 1347980 : double dfValueReal = 0.0;
5953 :
5954 : /* --------------------------------------------------------------------
5955 : */
5956 : /* Collect the source value. */
5957 : /* --------------------------------------------------------------------
5958 : */
5959 1347980 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5960 : nSrcYSize == 1)
5961 : {
5962 : // FALSE is returned if dfBandDensity == 0, which is
5963 : // checked below.
5964 15516 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5965 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5966 : }
5967 1332460 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5968 : {
5969 2046 : double dfValueImagIgnored = 0.0;
5970 2046 : GWKBilinearResample4Sample(
5971 2046 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5972 2046 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5973 2046 : &dfValueReal, &dfValueImagIgnored);
5974 : }
5975 1330410 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5976 : {
5977 691552 : if (bSrcMaskIsDensity)
5978 : {
5979 389755 : if (poWK->eWorkingDataType == GDT_UInt8)
5980 : {
5981 389755 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5982 389755 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5983 389755 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5984 : &dfValueReal);
5985 : }
5986 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
5987 : {
5988 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
5989 0 : GUInt16>(poWK, iBand,
5990 0 : padfX[iDstX] - poWK->nSrcXOff,
5991 0 : padfY[iDstX] - poWK->nSrcYOff,
5992 : &dfBandDensity, &dfValueReal);
5993 : }
5994 : else
5995 : {
5996 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
5997 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5998 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5999 : &dfValueReal);
6000 : }
6001 : }
6002 : else
6003 : {
6004 301797 : double dfValueImagIgnored = 0.0;
6005 301797 : GWKCubicResample4Sample(
6006 301797 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6007 301797 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6008 : &dfValueReal, &dfValueImagIgnored);
6009 691552 : }
6010 : }
6011 : else
6012 : #ifdef DEBUG
6013 : // Only useful for clang static analyzer.
6014 638861 : if (psWrkStruct != nullptr)
6015 : #endif
6016 : {
6017 638861 : double dfValueImagIgnored = 0.0;
6018 638861 : psWrkStruct->pfnGWKResample(
6019 638861 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6020 638861 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6021 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6022 : }
6023 :
6024 : // If we didn't find any valid inputs skip to next band.
6025 1347980 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6026 0 : continue;
6027 :
6028 1347980 : if (poWK->bApplyVerticalShift)
6029 : {
6030 0 : if (!std::isfinite(padfZ[iDstX]))
6031 0 : continue;
6032 : // Subtract padfZ[] since the coordinate transformation is
6033 : // from target to source
6034 0 : dfValueReal =
6035 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6036 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6037 : }
6038 :
6039 1347980 : bHasFoundDensity = true;
6040 :
6041 : /* --------------------------------------------------------------------
6042 : */
6043 : /* We have a computed value from the source. Now apply it
6044 : * to */
6045 : /* the destination pixel. */
6046 : /* --------------------------------------------------------------------
6047 : */
6048 1347980 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6049 : dfValueReal, bAvoidNoDataSingleBand);
6050 : }
6051 :
6052 744578 : if (!bHasFoundDensity)
6053 0 : continue;
6054 :
6055 744578 : if (!bAvoidNoDataSingleBand)
6056 : {
6057 100295 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6058 : }
6059 :
6060 : /* --------------------------------------------------------------------
6061 : */
6062 : /* Update destination density/validity masks. */
6063 : /* --------------------------------------------------------------------
6064 : */
6065 744578 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6066 :
6067 744578 : if (poWK->panDstValid != nullptr)
6068 : {
6069 104586 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6070 : }
6071 : } // Next iDstX.
6072 :
6073 : /* --------------------------------------------------------------------
6074 : */
6075 : /* Report progress to the user, and optionally cancel out. */
6076 : /* --------------------------------------------------------------------
6077 : */
6078 25686 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6079 0 : break;
6080 : }
6081 :
6082 : /* -------------------------------------------------------------------- */
6083 : /* Cleanup and return. */
6084 : /* -------------------------------------------------------------------- */
6085 223 : CPLFree(padfX);
6086 223 : CPLFree(padfY);
6087 223 : CPLFree(padfZ);
6088 223 : CPLFree(pabSuccess);
6089 223 : if (psWrkStruct)
6090 181 : GWKResampleDeleteWrkStruct(psWrkStruct);
6091 223 : }
6092 :
6093 223 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6094 : {
6095 223 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6096 : }
6097 :
6098 : /************************************************************************/
6099 : /* GWKCubicResampleNoMasks4MultiBandT() */
6100 : /************************************************************************/
6101 :
6102 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6103 : /* and enough SSE registries */
6104 : #if defined(USE_SSE2)
6105 :
6106 141836000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6107 : const __m128 row2, const __m128 row3,
6108 : const __m128 weightsXY0,
6109 : const __m128 weightsXY1,
6110 : const __m128 weightsXY2,
6111 : const __m128 weightsXY3)
6112 : {
6113 992853000 : return XMMHorizontalAdd(_mm_add_ps(
6114 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6115 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6116 141836000 : _mm_mul_ps(row3, weightsXY3))));
6117 : }
6118 :
6119 : template <class T>
6120 48760542 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6121 : double dfSrcX, double dfSrcY,
6122 : const GPtrDiff_t iDstOffset)
6123 : {
6124 48760542 : const double dfSrcXShifted = dfSrcX - 0.5;
6125 48760542 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6126 48760542 : const double dfSrcYShifted = dfSrcY - 0.5;
6127 48760542 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6128 48760542 : const GPtrDiff_t iSrcOffset =
6129 48760542 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6130 :
6131 : // Get the bilinear interpolation at the image borders.
6132 48760542 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6133 47414062 : iSrcY + 2 >= poWK->nSrcYSize)
6134 : {
6135 5927540 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6136 : {
6137 : T value;
6138 4445650 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6139 : &value);
6140 4445650 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6141 : value;
6142 1481880 : }
6143 : }
6144 : else
6145 : {
6146 47278662 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6147 47278662 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6148 :
6149 : float afCoeffsX[4];
6150 : float afCoeffsY[4];
6151 47278662 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6152 47278662 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6153 47278662 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6154 : const auto weightsXY0 =
6155 94557424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6156 : const auto weightsXY1 =
6157 94557424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6158 : const auto weightsXY2 =
6159 94557424 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6160 : const auto weightsXY3 =
6161 47278662 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6162 :
6163 47278662 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6164 :
6165 47278662 : int iBand = 0;
6166 : // Process 2 bands at a time
6167 94557424 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6168 : {
6169 47278662 : const T *CPL_RESTRICT pBand0 =
6170 47278662 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6171 47278662 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6172 : const auto row1_0 =
6173 47278662 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6174 : const auto row2_0 =
6175 47278662 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6176 : const auto row3_0 =
6177 47278662 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6178 :
6179 47278662 : const T *CPL_RESTRICT pBand1 =
6180 47278662 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6181 47278662 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6182 : const auto row1_1 =
6183 47278662 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6184 : const auto row2_1 =
6185 47278662 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6186 : const auto row3_1 =
6187 47278662 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6188 :
6189 : const float fValue_0 =
6190 47278662 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6191 : weightsXY1, weightsXY2, weightsXY3);
6192 :
6193 : const float fValue_1 =
6194 47278662 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6195 : weightsXY1, weightsXY2, weightsXY3);
6196 :
6197 47278662 : T *CPL_RESTRICT pDstBand0 =
6198 47278662 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6199 47278662 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6200 :
6201 47278662 : T *CPL_RESTRICT pDstBand1 =
6202 47278662 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6203 47278662 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6204 : }
6205 47278662 : if (iBand < poWK->nBands)
6206 : {
6207 47278662 : const T *CPL_RESTRICT pBand0 =
6208 47278662 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6209 47278662 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6210 : const auto row1 =
6211 47278662 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6212 : const auto row2 =
6213 47278662 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6214 : const auto row3 =
6215 47278662 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6216 :
6217 : const float fValue =
6218 47278662 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6219 : weightsXY2, weightsXY3);
6220 :
6221 47278662 : T *CPL_RESTRICT pDstBand =
6222 47278662 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6223 47278662 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6224 : }
6225 : }
6226 :
6227 48760542 : if (poWK->pafDstDensity)
6228 46606601 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6229 48760542 : }
6230 :
6231 : #endif // defined(USE_SSE2)
6232 :
6233 : /************************************************************************/
6234 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6235 : /************************************************************************/
6236 :
6237 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6238 2028 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6239 :
6240 : {
6241 2028 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6242 2028 : GDALWarpKernel *poWK = psJob->poWK;
6243 2028 : const int iYMin = psJob->iYMin;
6244 2028 : const int iYMax = psJob->iYMax;
6245 2010 : const double dfMultFactorVerticalShiftPipeline =
6246 2028 : poWK->bApplyVerticalShift
6247 18 : ? CPLAtof(CSLFetchNameValueDef(
6248 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6249 : "1.0"))
6250 : : 0.0;
6251 :
6252 2028 : const int nDstXSize = poWK->nDstXSize;
6253 2028 : const int nSrcXSize = poWK->nSrcXSize;
6254 2028 : const int nSrcYSize = poWK->nSrcYSize;
6255 :
6256 : /* -------------------------------------------------------------------- */
6257 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6258 : /* scanlines worth of positions. */
6259 : /* -------------------------------------------------------------------- */
6260 :
6261 : // For x, 2 *, because we cache the precomputed values at the end.
6262 : double *padfX =
6263 2028 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6264 : double *padfY =
6265 2028 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6266 : double *padfZ =
6267 2028 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6268 2028 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6269 :
6270 2028 : const int nXRadius = poWK->nXRadius;
6271 : double *padfWeightsX =
6272 2028 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6273 : double *padfWeightsY = static_cast<double *>(
6274 2028 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6275 2028 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6276 2028 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6277 2028 : const double dfErrorThreshold = CPLAtof(
6278 2028 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6279 :
6280 : // Precompute values.
6281 509839 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6282 507811 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6283 :
6284 : /* ==================================================================== */
6285 : /* Loop over output lines. */
6286 : /* ==================================================================== */
6287 316415 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6288 : {
6289 : /* --------------------------------------------------------------------
6290 : */
6291 : /* Setup points to transform to source image space. */
6292 : /* --------------------------------------------------------------------
6293 : */
6294 314388 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6295 314388 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6296 110215489 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6297 109901005 : padfY[iDstX] = dfY;
6298 314388 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6299 :
6300 : /* --------------------------------------------------------------------
6301 : */
6302 : /* Transform the points from destination pixel/line coordinates */
6303 : /* to source pixel/line coordinates. */
6304 : /* --------------------------------------------------------------------
6305 : */
6306 314388 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6307 : padfY, padfZ, pabSuccess);
6308 314388 : if (dfSrcCoordPrecision > 0.0)
6309 : {
6310 1000 : GWKRoundSourceCoordinates(
6311 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6312 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6313 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6314 : }
6315 :
6316 : /* ====================================================================
6317 : */
6318 : /* Loop over pixels in output scanline. */
6319 : /* ====================================================================
6320 : */
6321 110215489 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6322 : {
6323 109901005 : GPtrDiff_t iSrcOffset = 0;
6324 109901005 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6325 : padfX, padfY, nSrcXSize,
6326 : nSrcYSize, iSrcOffset))
6327 61411278 : continue;
6328 :
6329 : /* ====================================================================
6330 : */
6331 : /* Loop processing each band. */
6332 : /* ====================================================================
6333 : */
6334 97250319 : const GPtrDiff_t iDstOffset =
6335 97250319 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6336 :
6337 : #if defined(USE_SSE2)
6338 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6339 : (std::is_same<T, GByte>::value ||
6340 : std::is_same<T, GUInt16>::value))
6341 : {
6342 49826241 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6343 : {
6344 48760542 : GWKCubicResampleNoMasks4MultiBandT<T>(
6345 48760542 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6346 48760542 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6347 :
6348 48760542 : continue;
6349 : }
6350 : }
6351 : #endif // defined(USE_SSE2)
6352 :
6353 48489690 : [[maybe_unused]] double dfInvWeights = 0;
6354 134905636 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6355 : {
6356 86415430 : T value = 0;
6357 : if constexpr (eResample == GRA_NearestNeighbour)
6358 : {
6359 78494030 : value = reinterpret_cast<T *>(
6360 78494030 : poWK->papabySrcImage[iBand])[iSrcOffset];
6361 : }
6362 : else if constexpr (bUse4SamplesFormula)
6363 : {
6364 : if constexpr (eResample == GRA_Bilinear)
6365 3845071 : GWKBilinearResampleNoMasks4SampleT(
6366 3845071 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6367 3845071 : padfY[iDstX] - poWK->nSrcYOff, &value);
6368 : else
6369 2300964 : GWKCubicResampleNoMasks4SampleT(
6370 2300964 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6371 2300964 : padfY[iDstX] - poWK->nSrcYOff, &value);
6372 : }
6373 : else
6374 : {
6375 1775365 : GWKResampleNoMasksT(
6376 1775365 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6377 1775365 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6378 : padfWeightsY, dfInvWeights);
6379 : }
6380 :
6381 86415430 : if (poWK->bApplyVerticalShift)
6382 : {
6383 818 : if (!std::isfinite(padfZ[iDstX]))
6384 0 : continue;
6385 : // Subtract padfZ[] since the coordinate transformation is
6386 : // from target to source
6387 818 : value = GWKClampValueT<T>(
6388 818 : double(value) * poWK->dfMultFactorVerticalShift -
6389 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6390 : }
6391 :
6392 86415430 : if (poWK->pafDstDensity)
6393 13020199 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6394 :
6395 86415430 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6396 : value;
6397 : }
6398 : }
6399 :
6400 : /* --------------------------------------------------------------------
6401 : */
6402 : /* Report progress to the user, and optionally cancel out. */
6403 : /* --------------------------------------------------------------------
6404 : */
6405 314388 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6406 1 : break;
6407 : }
6408 :
6409 : /* -------------------------------------------------------------------- */
6410 : /* Cleanup and return. */
6411 : /* -------------------------------------------------------------------- */
6412 2028 : CPLFree(padfX);
6413 2028 : CPLFree(padfY);
6414 2028 : CPLFree(padfZ);
6415 2028 : CPLFree(pabSuccess);
6416 2028 : CPLFree(padfWeightsX);
6417 2028 : CPLFree(padfWeightsY);
6418 2028 : }
6419 :
6420 : template <class T, GDALResampleAlg eResample>
6421 1004 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6422 : {
6423 1004 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6424 : pData);
6425 1004 : }
6426 :
6427 : template <class T, GDALResampleAlg eResample>
6428 1024 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6429 :
6430 : {
6431 1024 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6432 1024 : GDALWarpKernel *poWK = psJob->poWK;
6433 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6434 1024 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
6435 1024 : if (bUse4SamplesFormula)
6436 967 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6437 : pData);
6438 : else
6439 57 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6440 : pData);
6441 1024 : }
6442 :
6443 953 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6444 : {
6445 953 : return GWKRun(
6446 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6447 953 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6448 : }
6449 :
6450 128 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6451 : {
6452 128 : return GWKRun(
6453 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6454 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6455 128 : GRA_Bilinear>);
6456 : }
6457 :
6458 850 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6459 : {
6460 850 : return GWKRun(
6461 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6462 850 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6463 : }
6464 :
6465 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6466 : {
6467 9 : return GWKRun(
6468 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6469 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6470 : }
6471 :
6472 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6473 :
6474 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6475 : {
6476 : return GWKRun(
6477 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6478 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6479 : }
6480 : #endif
6481 :
6482 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6483 : {
6484 12 : return GWKRun(
6485 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6486 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6487 : }
6488 :
6489 : /************************************************************************/
6490 : /* GWKNearestByte() */
6491 : /* */
6492 : /* Case for 8bit input data with nearest neighbour resampling */
6493 : /* using valid flags. Should be as fast as possible for this */
6494 : /* particular transformation type. */
6495 : /************************************************************************/
6496 :
6497 472 : template <class T> static void GWKNearestThread(void *pData)
6498 :
6499 : {
6500 472 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6501 472 : GDALWarpKernel *poWK = psJob->poWK;
6502 472 : const int iYMin = psJob->iYMin;
6503 472 : const int iYMax = psJob->iYMax;
6504 472 : const double dfMultFactorVerticalShiftPipeline =
6505 472 : poWK->bApplyVerticalShift
6506 0 : ? CPLAtof(CSLFetchNameValueDef(
6507 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6508 : "1.0"))
6509 : : 0.0;
6510 472 : const bool bAvoidNoDataSingleBand =
6511 538 : poWK->nBands == 1 ||
6512 66 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
6513 : "UNIFIED_SRC_NODATA", "FALSE"));
6514 :
6515 472 : const int nDstXSize = poWK->nDstXSize;
6516 472 : const int nSrcXSize = poWK->nSrcXSize;
6517 472 : const int nSrcYSize = poWK->nSrcYSize;
6518 :
6519 : /* -------------------------------------------------------------------- */
6520 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6521 : /* scanlines worth of positions. */
6522 : /* -------------------------------------------------------------------- */
6523 :
6524 : // For x, 2 *, because we cache the precomputed values at the end.
6525 : double *padfX =
6526 472 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6527 : double *padfY =
6528 472 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6529 : double *padfZ =
6530 472 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6531 472 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6532 :
6533 472 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6534 472 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6535 472 : const double dfErrorThreshold = CPLAtof(
6536 472 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6537 :
6538 : const bool bOneSourceCornerFailsToReproject =
6539 472 : GWKOneSourceCornerFailsToReproject(psJob);
6540 :
6541 : // Precompute values.
6542 79763 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6543 79291 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6544 :
6545 : /* ==================================================================== */
6546 : /* Loop over output lines. */
6547 : /* ==================================================================== */
6548 63919 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6549 : {
6550 :
6551 : /* --------------------------------------------------------------------
6552 : */
6553 : /* Setup points to transform to source image space. */
6554 : /* --------------------------------------------------------------------
6555 : */
6556 63447 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6557 63447 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6558 33638877 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6559 33575441 : padfY[iDstX] = dfY;
6560 63447 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6561 :
6562 : /* --------------------------------------------------------------------
6563 : */
6564 : /* Transform the points from destination pixel/line coordinates */
6565 : /* to source pixel/line coordinates. */
6566 : /* --------------------------------------------------------------------
6567 : */
6568 63447 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6569 : padfY, padfZ, pabSuccess);
6570 63447 : if (dfSrcCoordPrecision > 0.0)
6571 : {
6572 0 : GWKRoundSourceCoordinates(
6573 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6574 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6575 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6576 : }
6577 : /* ====================================================================
6578 : */
6579 : /* Loop over pixels in output scanline. */
6580 : /* ====================================================================
6581 : */
6582 33638877 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6583 : {
6584 33575441 : GPtrDiff_t iSrcOffset = 0;
6585 33575441 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6586 : padfX, padfY, nSrcXSize,
6587 : nSrcYSize, iSrcOffset))
6588 21187043 : continue;
6589 :
6590 : /* --------------------------------------------------------------------
6591 : */
6592 : /* Do not try to apply invalid source pixels to the dest. */
6593 : /* --------------------------------------------------------------------
6594 : */
6595 24833405 : if (poWK->panUnifiedSrcValid != nullptr &&
6596 6517835 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6597 : {
6598 4924382 : if (!bOneSourceCornerFailsToReproject)
6599 : {
6600 4916896 : continue;
6601 : }
6602 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6603 : {
6604 5224 : continue;
6605 : }
6606 : }
6607 :
6608 : /* --------------------------------------------------------------------
6609 : */
6610 : /* Do not try to apply transparent source pixels to the
6611 : * destination.*/
6612 : /* --------------------------------------------------------------------
6613 : */
6614 13393380 : double dfDensity = 1.0;
6615 :
6616 13393380 : if (poWK->pafUnifiedSrcDensity != nullptr)
6617 : {
6618 1557335 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6619 1557335 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6620 1005075 : continue;
6621 : }
6622 :
6623 : /* ====================================================================
6624 : */
6625 : /* Loop processing each band. */
6626 : /* ====================================================================
6627 : */
6628 :
6629 12388398 : const GPtrDiff_t iDstOffset =
6630 12388398 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6631 :
6632 27338858 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6633 : {
6634 14950460 : T value = 0;
6635 14950460 : double dfBandDensity = 0.0;
6636 :
6637 : /* --------------------------------------------------------------------
6638 : */
6639 : /* Collect the source value. */
6640 : /* --------------------------------------------------------------------
6641 : */
6642 14950460 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6643 : &value))
6644 : {
6645 :
6646 14950460 : if (poWK->bApplyVerticalShift)
6647 : {
6648 0 : if (!std::isfinite(padfZ[iDstX]))
6649 0 : continue;
6650 : // Subtract padfZ[] since the coordinate transformation
6651 : // is from target to source
6652 0 : value = GWKClampValueT<T>(
6653 0 : double(value) * poWK->dfMultFactorVerticalShift -
6654 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6655 : }
6656 :
6657 14950460 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6658 : dfBandDensity, value,
6659 : bAvoidNoDataSingleBand);
6660 : }
6661 : }
6662 :
6663 : /* --------------------------------------------------------------------
6664 : */
6665 : /* Mark this pixel valid/opaque in the output. */
6666 : /* --------------------------------------------------------------------
6667 : */
6668 :
6669 12388398 : if (!bAvoidNoDataSingleBand)
6670 : {
6671 424278 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6672 : }
6673 :
6674 12388398 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6675 :
6676 12388398 : if (poWK->panDstValid != nullptr)
6677 : {
6678 11118345 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6679 : }
6680 : } /* Next iDstX */
6681 :
6682 : /* --------------------------------------------------------------------
6683 : */
6684 : /* Report progress to the user, and optionally cancel out. */
6685 : /* --------------------------------------------------------------------
6686 : */
6687 63447 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6688 0 : break;
6689 : }
6690 :
6691 : /* -------------------------------------------------------------------- */
6692 : /* Cleanup and return. */
6693 : /* -------------------------------------------------------------------- */
6694 472 : CPLFree(padfX);
6695 472 : CPLFree(padfY);
6696 472 : CPLFree(padfZ);
6697 472 : CPLFree(pabSuccess);
6698 472 : }
6699 :
6700 360 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6701 : {
6702 360 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6703 : }
6704 :
6705 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6706 : {
6707 14 : return GWKRun(
6708 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6709 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6710 : }
6711 :
6712 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6713 : {
6714 5 : return GWKRun(
6715 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6716 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6717 5 : GRA_Bilinear>);
6718 : }
6719 :
6720 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6721 : {
6722 6 : return GWKRun(
6723 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6724 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6725 6 : GRA_Bilinear>);
6726 : }
6727 :
6728 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6729 : {
6730 4 : return GWKRun(
6731 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6732 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6733 4 : GRA_Bilinear>);
6734 : }
6735 :
6736 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6737 :
6738 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6739 : {
6740 : return GWKRun(
6741 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6742 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6743 : GRA_Bilinear>);
6744 : }
6745 : #endif
6746 :
6747 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6748 : {
6749 5 : return GWKRun(
6750 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6751 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6752 : }
6753 :
6754 14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6755 : {
6756 14 : return GWKRun(
6757 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6758 14 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6759 : }
6760 :
6761 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6762 : {
6763 6 : return GWKRun(
6764 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6765 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6766 : }
6767 :
6768 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6769 : {
6770 5 : return GWKRun(
6771 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6772 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6773 : }
6774 :
6775 48 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6776 : {
6777 48 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6778 : }
6779 :
6780 10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6781 : {
6782 10 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6783 : }
6784 :
6785 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6786 : {
6787 11 : return GWKRun(
6788 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6789 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6790 : }
6791 :
6792 50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6793 : {
6794 50 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6795 : }
6796 :
6797 : /************************************************************************/
6798 : /* GWKAverageOrMode() */
6799 : /* */
6800 : /************************************************************************/
6801 :
6802 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6803 : ((iSrcY == iSrcYMin) \
6804 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6805 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6806 : : 1.0)
6807 :
6808 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6809 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6810 : ? dfWeightY \
6811 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6812 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6813 : : dfWeightY)
6814 :
6815 : static void GWKAverageOrModeThread(void *pData);
6816 :
6817 163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6818 : {
6819 163 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6820 : }
6821 :
6822 : /************************************************************************/
6823 : /* GWKAverageOrModeComputeLineCoords() */
6824 : /************************************************************************/
6825 :
6826 8183 : static void GWKAverageOrModeComputeLineCoords(
6827 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6828 : double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6829 : int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6830 : double dfErrorThreshold)
6831 : {
6832 8183 : const GDALWarpKernel *poWK = psJob->poWK;
6833 8183 : const int nDstXSize = poWK->nDstXSize;
6834 :
6835 : // Setup points to transform to source image space.
6836 2097530 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6837 : {
6838 2089340 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6839 2089340 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6840 2089340 : padfZ[iDstX] = 0.0;
6841 2089340 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6842 2089340 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6843 2089340 : padfZ2[iDstX] = 0.0;
6844 : }
6845 :
6846 : /* ----------------------------------------------------------------- */
6847 : /* Transform the points from destination pixel/line coordinates */
6848 : /* to source pixel/line coordinates. */
6849 : /* ----------------------------------------------------------------- */
6850 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6851 : padfZ, pabSuccess);
6852 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6853 : padfY2, padfZ2, pabSuccess2);
6854 :
6855 8183 : if (dfSrcCoordPrecision > 0.0)
6856 : {
6857 0 : GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6858 : dfSrcCoordPrecision, dfErrorThreshold,
6859 0 : poWK->pfnTransformer, psJob->pTransformerArg,
6860 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6861 0 : GWKRoundSourceCoordinates(
6862 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6863 0 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6864 0 : 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6865 : }
6866 8183 : }
6867 :
6868 : /************************************************************************/
6869 : /* GWKAverageOrModeComputeSourceCoords() */
6870 : /************************************************************************/
6871 :
6872 2089340 : static bool GWKAverageOrModeComputeSourceCoords(
6873 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6874 : double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6875 : // Output:
6876 : bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6877 : double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6878 : {
6879 2089340 : const GDALWarpKernel *poWK = psJob->poWK;
6880 2089340 : const int nSrcXSize = poWK->nSrcXSize;
6881 2089340 : const int nSrcYSize = poWK->nSrcYSize;
6882 :
6883 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6884 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6885 2089340 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6886 1992640 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6887 1992640 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6888 1965720 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6889 1965720 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6890 1912820 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6891 1912310 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6892 1910810 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6893 : {
6894 178602 : return false;
6895 : }
6896 :
6897 : // Compute corners in source crs.
6898 :
6899 : // The transformation might not have preserved ordering of
6900 : // coordinates so do the necessary swapping (#5433).
6901 : // NOTE: this is really an approximative fix. To do something
6902 : // more precise we would for example need to compute the
6903 : // transformation of coordinates in the
6904 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6905 : // coordinates, and take the bounding box of the got source
6906 : // coordinates.
6907 :
6908 1910740 : if (padfX[iDstX] > padfX2[iDstX])
6909 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
6910 :
6911 : // Detect situations where the target pixel is close to the
6912 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6913 : // close to the left-most and right-most columns of the source
6914 : // raster. The 2 value below was experimentally determined to
6915 : // avoid false-positives and false-negatives.
6916 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6917 1910740 : bWrapOverX = false;
6918 1910740 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6919 1910740 : if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
6920 1903470 : 2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
6921 17795 : nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
6922 : {
6923 : // Check there is a discontinuity by checking at mid-pixel.
6924 : // NOTE: all this remains fragile. To confidently
6925 : // detect antimeridian warping we should probably try to access
6926 : // georeferenced coordinates, and not rely only on tests on
6927 : // image space coordinates. But accessing georeferenced
6928 : // coordinates from here is not trivial, and we would for example
6929 : // have to handle both geographic, Mercator, etc.
6930 : // Let's hope this heuristics is good enough for now.
6931 1200 : double x = iDstX + 0.5 + poWK->nDstXOff;
6932 1200 : double y = iDstY + poWK->nDstYOff;
6933 1200 : double z = 0;
6934 1200 : int bSuccess = FALSE;
6935 1200 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6936 : &bSuccess);
6937 1200 : if (bSuccess && x < padfX[iDstX])
6938 : {
6939 1192 : bWrapOverX = true;
6940 1192 : std::swap(padfX[iDstX], padfX2[iDstX]);
6941 1192 : padfX2[iDstX] += nSrcXSize;
6942 : }
6943 : }
6944 :
6945 1910740 : dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6946 1910740 : dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6947 1910740 : constexpr double EPSILON = 1e-10;
6948 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6949 1910740 : if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6950 372 : return false;
6951 1910370 : iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6952 1910370 : iSrcXMax = static_cast<int>(
6953 1910370 : std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
6954 1910370 : if (!bWrapOverX)
6955 1909180 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6956 1910370 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6957 472 : iSrcXMax++;
6958 :
6959 1910370 : if (padfY[iDstX] > padfY2[iDstX])
6960 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
6961 1910370 : dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6962 1910370 : dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6963 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6964 1910370 : if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
6965 238 : return false;
6966 1910130 : iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
6967 1910130 : iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
6968 1910130 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6969 0 : iSrcYMax++;
6970 :
6971 1910130 : return true;
6972 : }
6973 :
6974 : /************************************************************************/
6975 : /* GWKModeRealType() */
6976 : /************************************************************************/
6977 :
6978 17780 : template <class T> static inline bool IsSame(T a, T b)
6979 : {
6980 17780 : return a == b;
6981 : }
6982 :
6983 0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
6984 : {
6985 0 : return a == b || (CPLIsNan(a) && CPLIsNan(b));
6986 : }
6987 :
6988 18 : template <> bool IsSame<float>(float a, float b)
6989 : {
6990 18 : return a == b || (std::isnan(a) && std::isnan(b));
6991 : }
6992 :
6993 56 : template <> bool IsSame<double>(double a, double b)
6994 : {
6995 56 : return a == b || (std::isnan(a) && std::isnan(b));
6996 : }
6997 :
6998 19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
6999 : {
7000 19 : const GDALWarpKernel *poWK = psJob->poWK;
7001 19 : const int iYMin = psJob->iYMin;
7002 19 : const int iYMax = psJob->iYMax;
7003 19 : const int nDstXSize = poWK->nDstXSize;
7004 19 : const int nSrcXSize = poWK->nSrcXSize;
7005 19 : const int nSrcYSize = poWK->nSrcYSize;
7006 19 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7007 :
7008 19 : T *pVals = nullptr;
7009 19 : float *pafCounts = nullptr;
7010 :
7011 19 : if (nSrcXSize > 0 && nSrcYSize > 0)
7012 : {
7013 : pVals = static_cast<T *>(
7014 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
7015 : pafCounts = static_cast<float *>(
7016 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7017 19 : if (pVals == nullptr || pafCounts == nullptr)
7018 : {
7019 0 : VSIFree(pVals);
7020 0 : VSIFree(pafCounts);
7021 0 : return;
7022 : }
7023 : }
7024 :
7025 : /* -------------------------------------------------------------------- */
7026 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7027 : /* scanlines worth of positions. */
7028 : /* -------------------------------------------------------------------- */
7029 :
7030 : double *padfX =
7031 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7032 : double *padfY =
7033 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7034 : double *padfZ =
7035 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7036 : double *padfX2 =
7037 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7038 : double *padfY2 =
7039 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7040 : double *padfZ2 =
7041 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7042 19 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7043 19 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7044 :
7045 19 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7046 19 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7047 19 : const double dfErrorThreshold = CPLAtof(
7048 19 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7049 19 : const bool bAvoidNoDataSingleBand =
7050 19 : poWK->nBands == 1 ||
7051 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7052 : "UNIFIED_SRC_NODATA", "FALSE"));
7053 :
7054 19 : const int nXMargin =
7055 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7056 19 : const int nYMargin =
7057 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7058 :
7059 : /* ==================================================================== */
7060 : /* Loop over output lines. */
7061 : /* ==================================================================== */
7062 116 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7063 : {
7064 97 : GWKAverageOrModeComputeLineCoords(
7065 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7066 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7067 :
7068 : // Loop over pixels in output scanline.
7069 3514 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7070 : {
7071 3417 : GPtrDiff_t iSrcOffset = 0;
7072 3417 : double dfDensity = 1.0;
7073 3417 : bool bHasFoundDensity = false;
7074 :
7075 3417 : bool bWrapOverX = false;
7076 3417 : double dfXMin = 0;
7077 3417 : double dfYMin = 0;
7078 3417 : double dfXMax = 0;
7079 3417 : double dfYMax = 0;
7080 3417 : int iSrcXMin = 0;
7081 3417 : int iSrcYMin = 0;
7082 3417 : int iSrcXMax = 0;
7083 3417 : int iSrcYMax = 0;
7084 3417 : if (!GWKAverageOrModeComputeSourceCoords(
7085 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7086 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7087 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7088 : {
7089 0 : continue;
7090 : }
7091 :
7092 3417 : const GPtrDiff_t iDstOffset =
7093 3417 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7094 :
7095 : // Loop processing each band.
7096 6834 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7097 : {
7098 3417 : double dfBandDensity = 0.0;
7099 :
7100 3417 : int nBins = 0;
7101 3417 : int iModeIndex = -1;
7102 3417 : T nVal{};
7103 :
7104 10248 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7105 : {
7106 6831 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7107 6831 : iSrcOffset =
7108 6831 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7109 20530 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7110 : iSrcX++, iSrcOffset++)
7111 : {
7112 13699 : if (bWrapOverX)
7113 0 : iSrcOffset =
7114 0 : (iSrcX % nSrcXSize) +
7115 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7116 :
7117 13699 : if (poWK->panUnifiedSrcValid != nullptr &&
7118 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7119 0 : continue;
7120 :
7121 13699 : if (GWKGetPixelT(poWK, iBand, iSrcOffset,
7122 27398 : &dfBandDensity, &nVal) &&
7123 13699 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7124 : {
7125 13699 : const double dfWeight =
7126 13699 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7127 :
7128 : // Check array for existing entry.
7129 13699 : int i = 0;
7130 29194 : for (i = 0; i < nBins; ++i)
7131 : {
7132 17807 : if (IsSame(pVals[i], nVal))
7133 : {
7134 :
7135 2312 : pafCounts[i] +=
7136 2312 : static_cast<float>(dfWeight);
7137 2312 : bool bValIsMaxCount =
7138 2312 : (pafCounts[i] > pafCounts[iModeIndex]);
7139 :
7140 2312 : if (!bValIsMaxCount &&
7141 1498 : pafCounts[i] == pafCounts[iModeIndex])
7142 : {
7143 1490 : switch (eTieStrategy)
7144 : {
7145 1477 : case GWKTS_First:
7146 1477 : break;
7147 6 : case GWKTS_Min:
7148 6 : bValIsMaxCount =
7149 6 : nVal < pVals[iModeIndex];
7150 6 : break;
7151 7 : case GWKTS_Max:
7152 7 : bValIsMaxCount =
7153 7 : nVal > pVals[iModeIndex];
7154 7 : break;
7155 : }
7156 : }
7157 :
7158 2312 : if (bValIsMaxCount)
7159 : {
7160 817 : iModeIndex = i;
7161 : }
7162 :
7163 2312 : break;
7164 : }
7165 : }
7166 :
7167 : // Add to arr if entry not already there.
7168 13699 : if (i == nBins)
7169 : {
7170 11387 : pVals[i] = nVal;
7171 11387 : pafCounts[i] = static_cast<float>(dfWeight);
7172 :
7173 11387 : if (iModeIndex < 0)
7174 3417 : iModeIndex = i;
7175 :
7176 11387 : ++nBins;
7177 : }
7178 : }
7179 : }
7180 : }
7181 :
7182 3417 : if (iModeIndex != -1)
7183 : {
7184 3417 : nVal = pVals[iModeIndex];
7185 3417 : dfBandDensity = 1;
7186 3417 : bHasFoundDensity = true;
7187 : }
7188 :
7189 : // We have a computed value from the source. Now apply it
7190 : // to the destination pixel
7191 3417 : if (bHasFoundDensity)
7192 : {
7193 3417 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
7194 : dfBandDensity, nVal,
7195 : bAvoidNoDataSingleBand);
7196 : }
7197 : }
7198 :
7199 3417 : if (!bHasFoundDensity)
7200 0 : continue;
7201 :
7202 3417 : if (!bAvoidNoDataSingleBand)
7203 : {
7204 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7205 : }
7206 :
7207 : /* --------------------------------------------------------------------
7208 : */
7209 : /* Update destination density/validity masks. */
7210 : /* --------------------------------------------------------------------
7211 : */
7212 3417 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7213 :
7214 3417 : if (poWK->panDstValid != nullptr)
7215 : {
7216 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7217 : }
7218 : } /* Next iDstX */
7219 :
7220 : /* --------------------------------------------------------------------
7221 : */
7222 : /* Report progress to the user, and optionally cancel out. */
7223 : /* --------------------------------------------------------------------
7224 : */
7225 97 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7226 0 : break;
7227 : }
7228 :
7229 : /* -------------------------------------------------------------------- */
7230 : /* Cleanup and return. */
7231 : /* -------------------------------------------------------------------- */
7232 19 : CPLFree(padfX);
7233 19 : CPLFree(padfY);
7234 19 : CPLFree(padfZ);
7235 19 : CPLFree(padfX2);
7236 19 : CPLFree(padfY2);
7237 19 : CPLFree(padfZ2);
7238 19 : CPLFree(pabSuccess);
7239 19 : CPLFree(pabSuccess2);
7240 19 : VSIFree(pVals);
7241 19 : VSIFree(pafCounts);
7242 : }
7243 :
7244 : /************************************************************************/
7245 : /* GWKModeComplexType() */
7246 : /************************************************************************/
7247 :
7248 8 : static void GWKModeComplexType(GWKJobStruct *psJob)
7249 : {
7250 8 : const GDALWarpKernel *poWK = psJob->poWK;
7251 8 : const int iYMin = psJob->iYMin;
7252 8 : const int iYMax = psJob->iYMax;
7253 8 : const int nDstXSize = poWK->nDstXSize;
7254 8 : const int nSrcXSize = poWK->nSrcXSize;
7255 8 : const int nSrcYSize = poWK->nSrcYSize;
7256 8 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7257 : const double dfMultFactorVerticalShiftPipeline =
7258 8 : poWK->bApplyVerticalShift
7259 8 : ? CPLAtof(CSLFetchNameValueDef(
7260 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7261 : "1.0"))
7262 8 : : 0.0;
7263 : const bool bAvoidNoDataSingleBand =
7264 8 : poWK->nBands == 1 ||
7265 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7266 8 : "UNIFIED_SRC_NODATA", "FALSE"));
7267 :
7268 8 : double *padfRealVals = nullptr;
7269 8 : double *padfImagVals = nullptr;
7270 8 : float *pafCounts = nullptr;
7271 :
7272 8 : if (nSrcXSize > 0 && nSrcYSize > 0)
7273 : {
7274 : padfRealVals = static_cast<double *>(
7275 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7276 : padfImagVals = static_cast<double *>(
7277 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7278 : pafCounts = static_cast<float *>(
7279 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7280 8 : if (padfRealVals == nullptr || padfImagVals == nullptr ||
7281 : pafCounts == nullptr)
7282 : {
7283 0 : VSIFree(padfRealVals);
7284 0 : VSIFree(padfImagVals);
7285 0 : VSIFree(pafCounts);
7286 0 : return;
7287 : }
7288 : }
7289 :
7290 : /* -------------------------------------------------------------------- */
7291 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7292 : /* scanlines worth of positions. */
7293 : /* -------------------------------------------------------------------- */
7294 :
7295 : double *padfX =
7296 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7297 : double *padfY =
7298 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7299 : double *padfZ =
7300 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7301 : double *padfX2 =
7302 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7303 : double *padfY2 =
7304 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7305 : double *padfZ2 =
7306 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7307 8 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7308 8 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7309 :
7310 8 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7311 8 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7312 8 : const double dfErrorThreshold = CPLAtof(
7313 8 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7314 :
7315 : const int nXMargin =
7316 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7317 : const int nYMargin =
7318 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7319 :
7320 : /* ==================================================================== */
7321 : /* Loop over output lines. */
7322 : /* ==================================================================== */
7323 16 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7324 : {
7325 8 : GWKAverageOrModeComputeLineCoords(
7326 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7327 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7328 :
7329 : // Loop over pixels in output scanline.
7330 16 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7331 : {
7332 8 : GPtrDiff_t iSrcOffset = 0;
7333 8 : double dfDensity = 1.0;
7334 8 : bool bHasFoundDensity = false;
7335 :
7336 8 : bool bWrapOverX = false;
7337 8 : double dfXMin = 0;
7338 8 : double dfYMin = 0;
7339 8 : double dfXMax = 0;
7340 8 : double dfYMax = 0;
7341 8 : int iSrcXMin = 0;
7342 8 : int iSrcYMin = 0;
7343 8 : int iSrcXMax = 0;
7344 8 : int iSrcYMax = 0;
7345 8 : if (!GWKAverageOrModeComputeSourceCoords(
7346 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7347 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7348 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7349 : {
7350 0 : continue;
7351 : }
7352 :
7353 8 : const GPtrDiff_t iDstOffset =
7354 8 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7355 :
7356 : // Loop processing each band.
7357 16 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7358 : {
7359 8 : double dfBandDensity = 0.0;
7360 :
7361 8 : int nBins = 0;
7362 8 : int iModeIndex = -1;
7363 8 : double dfValueReal = 0;
7364 8 : double dfValueImag = 0;
7365 :
7366 16 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7367 : {
7368 8 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7369 8 : iSrcOffset =
7370 8 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7371 38 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7372 : iSrcX++, iSrcOffset++)
7373 : {
7374 30 : if (bWrapOverX)
7375 0 : iSrcOffset =
7376 0 : (iSrcX % nSrcXSize) +
7377 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7378 :
7379 30 : if (poWK->panUnifiedSrcValid != nullptr &&
7380 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7381 0 : continue;
7382 :
7383 30 : if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7384 : &dfBandDensity, &dfValueReal,
7385 60 : &dfValueImag) &&
7386 30 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7387 : {
7388 30 : const double dfWeight =
7389 30 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7390 :
7391 : // Check array for existing entry.
7392 30 : int i = 0;
7393 49 : for (i = 0; i < nBins; ++i)
7394 : {
7395 47 : if (IsSame(padfRealVals[i], dfValueReal) &&
7396 14 : IsSame(padfImagVals[i], dfValueImag))
7397 : {
7398 :
7399 14 : pafCounts[i] +=
7400 14 : static_cast<float>(dfWeight);
7401 14 : bool bValIsMaxCount =
7402 14 : (pafCounts[i] > pafCounts[iModeIndex]);
7403 :
7404 14 : if (!bValIsMaxCount &&
7405 6 : pafCounts[i] == pafCounts[iModeIndex])
7406 : {
7407 3 : switch (eTieStrategy)
7408 : {
7409 3 : case GWKTS_First:
7410 3 : break;
7411 0 : case GWKTS_Min:
7412 0 : bValIsMaxCount =
7413 0 : dfValueReal <
7414 0 : padfRealVals[iModeIndex];
7415 0 : break;
7416 0 : case GWKTS_Max:
7417 0 : bValIsMaxCount =
7418 0 : dfValueReal >
7419 0 : padfRealVals[iModeIndex];
7420 0 : break;
7421 : }
7422 : }
7423 :
7424 14 : if (bValIsMaxCount)
7425 : {
7426 8 : iModeIndex = i;
7427 : }
7428 :
7429 14 : break;
7430 : }
7431 : }
7432 :
7433 : // Add to arr if entry not already there.
7434 30 : if (i == nBins)
7435 : {
7436 16 : padfRealVals[i] = dfValueReal;
7437 16 : padfImagVals[i] = dfValueImag;
7438 16 : pafCounts[i] = static_cast<float>(dfWeight);
7439 :
7440 16 : if (iModeIndex < 0)
7441 8 : iModeIndex = i;
7442 :
7443 16 : ++nBins;
7444 : }
7445 : }
7446 : }
7447 : }
7448 :
7449 8 : if (iModeIndex != -1)
7450 : {
7451 8 : dfValueReal = padfRealVals[iModeIndex];
7452 8 : dfValueImag = padfImagVals[iModeIndex];
7453 8 : dfBandDensity = 1;
7454 :
7455 8 : if (poWK->bApplyVerticalShift)
7456 : {
7457 0 : if (!std::isfinite(padfZ[iDstX]))
7458 0 : continue;
7459 : // Subtract padfZ[] since the coordinate
7460 : // transformation is from target to source
7461 0 : dfValueReal =
7462 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7463 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7464 : }
7465 :
7466 8 : bHasFoundDensity = true;
7467 : }
7468 :
7469 : // We have a computed value from the source. Now apply it
7470 : // to the destination pixel
7471 8 : if (bHasFoundDensity)
7472 : {
7473 8 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7474 : dfValueReal, dfValueImag,
7475 : bAvoidNoDataSingleBand);
7476 : }
7477 : }
7478 :
7479 8 : if (!bHasFoundDensity)
7480 0 : continue;
7481 :
7482 8 : if (!bAvoidNoDataSingleBand)
7483 : {
7484 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7485 : }
7486 :
7487 : /* --------------------------------------------------------------------
7488 : */
7489 : /* Update destination density/validity masks. */
7490 : /* --------------------------------------------------------------------
7491 : */
7492 8 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7493 :
7494 8 : if (poWK->panDstValid != nullptr)
7495 : {
7496 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7497 : }
7498 : } /* Next iDstX */
7499 :
7500 : /* --------------------------------------------------------------------
7501 : */
7502 : /* Report progress to the user, and optionally cancel out. */
7503 : /* --------------------------------------------------------------------
7504 : */
7505 8 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7506 0 : break;
7507 : }
7508 :
7509 : /* -------------------------------------------------------------------- */
7510 : /* Cleanup and return. */
7511 : /* -------------------------------------------------------------------- */
7512 8 : CPLFree(padfX);
7513 8 : CPLFree(padfY);
7514 8 : CPLFree(padfZ);
7515 8 : CPLFree(padfX2);
7516 8 : CPLFree(padfY2);
7517 8 : CPLFree(padfZ2);
7518 8 : CPLFree(pabSuccess);
7519 8 : CPLFree(pabSuccess2);
7520 8 : VSIFree(padfRealVals);
7521 8 : VSIFree(padfImagVals);
7522 8 : VSIFree(pafCounts);
7523 : }
7524 :
7525 : /************************************************************************/
7526 : /* GWKAverageOrModeThread() */
7527 : /************************************************************************/
7528 :
7529 : // Overall logic based on GWKGeneralCaseThread().
7530 163 : static void GWKAverageOrModeThread(void *pData)
7531 : {
7532 163 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7533 163 : const GDALWarpKernel *poWK = psJob->poWK;
7534 163 : const int iYMin = psJob->iYMin;
7535 163 : const int iYMax = psJob->iYMax;
7536 : const double dfMultFactorVerticalShiftPipeline =
7537 163 : poWK->bApplyVerticalShift
7538 163 : ? CPLAtof(CSLFetchNameValueDef(
7539 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7540 : "1.0"))
7541 163 : : 0.0;
7542 : const bool bAvoidNoDataSingleBand =
7543 194 : poWK->nBands == 1 ||
7544 31 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7545 163 : "UNIFIED_SRC_NODATA", "FALSE"));
7546 :
7547 163 : const int nDstXSize = poWK->nDstXSize;
7548 163 : const int nSrcXSize = poWK->nSrcXSize;
7549 :
7550 : /* -------------------------------------------------------------------- */
7551 : /* Find out which algorithm to use (small optim.) */
7552 : /* -------------------------------------------------------------------- */
7553 :
7554 : // Only used for GRA_Mode
7555 163 : float *pafCounts = nullptr;
7556 163 : int nBins = 0;
7557 163 : int nBinsOffset = 0;
7558 163 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7559 :
7560 : // Only used with Q1, Med and Q3
7561 163 : float quant = 0.0f;
7562 :
7563 : // To control array allocation only when data type is complex
7564 163 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7565 :
7566 163 : if (poWK->eResample == GRA_Mode)
7567 : {
7568 45 : if (poWK->bApplyVerticalShift)
7569 : {
7570 0 : return GWKModeComplexType(psJob);
7571 : }
7572 :
7573 45 : switch (poWK->eWorkingDataType)
7574 : {
7575 7 : case GDT_UInt8:
7576 7 : nBins = 256;
7577 7 : break;
7578 :
7579 0 : case GDT_Int8:
7580 0 : nBins = 256;
7581 0 : nBinsOffset = nBins / 2;
7582 0 : break;
7583 :
7584 1 : case GDT_UInt16:
7585 1 : nBins = 65536;
7586 1 : break;
7587 :
7588 10 : case GDT_Int16:
7589 10 : nBins = 65536;
7590 10 : nBinsOffset = nBins / 2;
7591 10 : break;
7592 :
7593 10 : case GDT_Int32:
7594 10 : return GWKModeRealType<int32_t>(psJob);
7595 :
7596 1 : case GDT_UInt32:
7597 1 : return GWKModeRealType<uint32_t>(psJob);
7598 :
7599 1 : case GDT_Int64:
7600 1 : return GWKModeRealType<int64_t>(psJob);
7601 :
7602 1 : case GDT_UInt64:
7603 1 : return GWKModeRealType<uint64_t>(psJob);
7604 :
7605 0 : case GDT_Float16:
7606 0 : return GWKModeRealType<GFloat16>(psJob);
7607 :
7608 4 : case GDT_Float32:
7609 4 : return GWKModeRealType<float>(psJob);
7610 :
7611 2 : case GDT_Float64:
7612 2 : return GWKModeRealType<double>(psJob);
7613 :
7614 8 : case GDT_CInt16:
7615 : case GDT_CInt32:
7616 : case GDT_CFloat16:
7617 : case GDT_CFloat32:
7618 : case GDT_CFloat64:
7619 8 : return GWKModeComplexType(psJob);
7620 :
7621 0 : case GDT_Unknown:
7622 : case GDT_TypeCount:
7623 0 : CPLAssert(false);
7624 : return;
7625 : }
7626 :
7627 18 : if (nBins)
7628 : {
7629 : pafCounts =
7630 18 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7631 18 : if (pafCounts == nullptr)
7632 0 : return;
7633 : }
7634 : }
7635 118 : else if (poWK->eResample == GRA_Med)
7636 : {
7637 6 : quant = 0.5f;
7638 : }
7639 112 : else if (poWK->eResample == GRA_Q1)
7640 : {
7641 10 : quant = 0.25f;
7642 : }
7643 102 : else if (poWK->eResample == GRA_Q3)
7644 : {
7645 5 : quant = 0.75f;
7646 : }
7647 97 : else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7648 11 : poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7649 : {
7650 : // Other resample algorithms not permitted here.
7651 0 : CPLError(CE_Fatal, CPLE_AppDefined,
7652 : "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7653 : "illegal resample");
7654 : }
7655 :
7656 136 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7657 :
7658 : /* -------------------------------------------------------------------- */
7659 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7660 : /* scanlines worth of positions. */
7661 : /* -------------------------------------------------------------------- */
7662 :
7663 : double *padfX =
7664 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7665 : double *padfY =
7666 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7667 : double *padfZ =
7668 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7669 : double *padfX2 =
7670 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7671 : double *padfY2 =
7672 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7673 : double *padfZ2 =
7674 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7675 136 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7676 136 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7677 :
7678 136 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7679 136 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7680 136 : const double dfErrorThreshold = CPLAtof(
7681 136 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7682 :
7683 : const double dfExcludedValuesThreshold =
7684 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7685 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7686 136 : 100.0;
7687 : const double dfNodataValuesThreshold =
7688 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7689 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7690 136 : 100.0;
7691 :
7692 : const int nXMargin =
7693 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7694 : const int nYMargin =
7695 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7696 :
7697 : /* ==================================================================== */
7698 : /* Loop over output lines. */
7699 : /* ==================================================================== */
7700 8214 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7701 : {
7702 8078 : GWKAverageOrModeComputeLineCoords(
7703 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7704 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7705 :
7706 : /* ====================================================================
7707 : */
7708 : /* Loop over pixels in output scanline. */
7709 : /* ====================================================================
7710 : */
7711 2094000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7712 : {
7713 2085920 : GPtrDiff_t iSrcOffset = 0;
7714 2085920 : double dfDensity = 1.0;
7715 2085920 : bool bHasFoundDensity = false;
7716 :
7717 2085920 : bool bWrapOverX = false;
7718 2085920 : double dfXMin = 0;
7719 2085920 : double dfYMin = 0;
7720 2085920 : double dfXMax = 0;
7721 2085920 : double dfYMax = 0;
7722 2085920 : int iSrcXMin = 0;
7723 2085920 : int iSrcYMin = 0;
7724 2085920 : int iSrcXMax = 0;
7725 2085920 : int iSrcYMax = 0;
7726 2085920 : if (!GWKAverageOrModeComputeSourceCoords(
7727 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7728 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7729 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7730 : {
7731 687183 : continue;
7732 : }
7733 :
7734 1906710 : const GPtrDiff_t iDstOffset =
7735 1906710 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7736 :
7737 1906710 : bool bDone = false;
7738 :
7739 : // Special Average mode where we process all bands together,
7740 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7741 1906710 : constexpr double EPSILON = 1e-10;
7742 4614100 : if (poWK->eResample == GRA_Average &&
7743 800681 : (!poWK->m_aadfExcludedValues.empty() ||
7744 589832 : dfNodataValuesThreshold < 1 - EPSILON) &&
7745 2707390 : !poWK->bApplyVerticalShift && !bIsComplex)
7746 : {
7747 589832 : double dfTotalWeightInvalid = 0.0;
7748 589832 : double dfTotalWeightExcluded = 0.0;
7749 589832 : double dfTotalWeightRegular = 0.0;
7750 1179660 : std::vector<double> adfValueReal(poWK->nBands, 0);
7751 1179660 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7752 : std::vector<int> anCountExcludedValues(
7753 589832 : poWK->m_aadfExcludedValues.size(), 0);
7754 :
7755 2162710 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7756 : {
7757 1572880 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7758 1572880 : iSrcOffset =
7759 1572880 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7760 6291500 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7761 : iSrcX++, iSrcOffset++)
7762 : {
7763 4718620 : if (bWrapOverX)
7764 0 : iSrcOffset =
7765 0 : (iSrcX % nSrcXSize) +
7766 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7767 :
7768 4718620 : const double dfWeight =
7769 4718620 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7770 4718620 : if (dfWeight <= 0)
7771 0 : continue;
7772 :
7773 4718640 : if (poWK->panUnifiedSrcValid != nullptr &&
7774 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7775 : {
7776 3 : dfTotalWeightInvalid += dfWeight;
7777 3 : continue;
7778 : }
7779 :
7780 4718620 : bool bAllValid = true;
7781 8651150 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7782 : {
7783 7340300 : double dfBandDensity = 0;
7784 7340300 : double dfValueImagTmp = 0;
7785 11272800 : if (!(GWKGetPixelValue(
7786 : poWK, iBand, iSrcOffset, &dfBandDensity,
7787 7340300 : &adfValueReal[iBand], &dfValueImagTmp) &&
7788 3932530 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7789 : {
7790 3407770 : bAllValid = false;
7791 3407770 : break;
7792 : }
7793 : }
7794 :
7795 4718620 : if (!bAllValid)
7796 : {
7797 3407770 : dfTotalWeightInvalid += dfWeight;
7798 3407770 : continue;
7799 : }
7800 :
7801 1310850 : bool bExcludedValueFound = false;
7802 2490500 : for (size_t i = 0;
7803 2490500 : i < poWK->m_aadfExcludedValues.size(); ++i)
7804 : {
7805 1179670 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7806 : {
7807 22 : bExcludedValueFound = true;
7808 22 : ++anCountExcludedValues[i];
7809 22 : dfTotalWeightExcluded += dfWeight;
7810 22 : break;
7811 : }
7812 : }
7813 1310850 : if (!bExcludedValueFound)
7814 : {
7815 : // Weighted incremental algorithm mean
7816 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7817 1310830 : dfTotalWeightRegular += dfWeight;
7818 5243290 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7819 : {
7820 3932460 : adfValueAveraged[iBand] +=
7821 7864930 : (dfWeight / dfTotalWeightRegular) *
7822 7864930 : (adfValueReal[iBand] -
7823 3932460 : adfValueAveraged[iBand]);
7824 : }
7825 : }
7826 : }
7827 : }
7828 :
7829 589832 : const double dfTotalWeight = dfTotalWeightInvalid +
7830 : dfTotalWeightExcluded +
7831 : dfTotalWeightRegular;
7832 589832 : if (dfTotalWeightInvalid > 0 &&
7833 : dfTotalWeightInvalid >=
7834 458751 : dfNodataValuesThreshold * dfTotalWeight)
7835 : {
7836 : // Do nothing. Let bHasFoundDensity to false.
7837 : }
7838 131085 : else if (dfTotalWeightExcluded > 0 &&
7839 : dfTotalWeightExcluded >=
7840 7 : dfExcludedValuesThreshold * dfTotalWeight)
7841 : {
7842 : // Find the most represented excluded value tuple
7843 3 : size_t iExcludedValue = 0;
7844 3 : int nExcludedValueCount = 0;
7845 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7846 : ++i)
7847 : {
7848 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
7849 : {
7850 3 : iExcludedValue = i;
7851 3 : nExcludedValueCount = anCountExcludedValues[i];
7852 : }
7853 : }
7854 :
7855 3 : bHasFoundDensity = true;
7856 :
7857 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7858 : {
7859 9 : GWKSetPixelValue(
7860 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7861 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7862 : 0, bAvoidNoDataSingleBand);
7863 : }
7864 :
7865 3 : if (!bAvoidNoDataSingleBand)
7866 : {
7867 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7868 3 : }
7869 : }
7870 131082 : else if (dfTotalWeightRegular > 0)
7871 : {
7872 131082 : bHasFoundDensity = true;
7873 :
7874 524324 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7875 : {
7876 393242 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7877 : /* dfBandDensity = */ 1.0,
7878 393242 : adfValueAveraged[iBand], 0,
7879 : bAvoidNoDataSingleBand);
7880 : }
7881 :
7882 131082 : if (!bAvoidNoDataSingleBand)
7883 : {
7884 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7885 : }
7886 : }
7887 :
7888 : // Skip below loop on bands
7889 589832 : bDone = true;
7890 : }
7891 :
7892 : /* ====================================================================
7893 : */
7894 : /* Loop processing each band. */
7895 : /* ====================================================================
7896 : */
7897 :
7898 4730010 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7899 : {
7900 2823310 : double dfBandDensity = 0.0;
7901 2823310 : double dfValueReal = 0.0;
7902 2823310 : double dfValueImag = 0.0;
7903 2823310 : double dfValueRealTmp = 0.0;
7904 2823310 : double dfValueImagTmp = 0.0;
7905 :
7906 : /* --------------------------------------------------------------------
7907 : */
7908 : /* Collect the source value. */
7909 : /* --------------------------------------------------------------------
7910 : */
7911 :
7912 : // Loop over source lines and pixels - 3 possible algorithms.
7913 :
7914 2823310 : if (poWK->eResample == GRA_Average)
7915 : {
7916 300849 : double dfTotalWeight = 0.0;
7917 :
7918 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7919 : // in gcore/overview.cpp.
7920 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7921 : {
7922 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7923 330459 : iSrcOffset = iSrcXMin +
7924 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7925 773407 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7926 : iSrcX++, iSrcOffset++)
7927 : {
7928 442948 : if (bWrapOverX)
7929 1371 : iSrcOffset =
7930 1371 : (iSrcX % nSrcXSize) +
7931 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7932 :
7933 442952 : if (poWK->panUnifiedSrcValid != nullptr &&
7934 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7935 : iSrcOffset))
7936 : {
7937 1 : continue;
7938 : }
7939 :
7940 442947 : if (GWKGetPixelValue(
7941 : poWK, iBand, iSrcOffset, &dfBandDensity,
7942 885894 : &dfValueRealTmp, &dfValueImagTmp) &&
7943 442947 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7944 : {
7945 442947 : const double dfWeight =
7946 442947 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7947 442947 : if (dfWeight > 0)
7948 : {
7949 : // Weighted incremental algorithm mean
7950 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7951 442947 : dfTotalWeight += dfWeight;
7952 442947 : dfValueReal +=
7953 442947 : (dfWeight / dfTotalWeight) *
7954 442947 : (dfValueRealTmp - dfValueReal);
7955 442947 : if (bIsComplex)
7956 : {
7957 252 : dfValueImag +=
7958 252 : (dfWeight / dfTotalWeight) *
7959 252 : (dfValueImagTmp - dfValueImag);
7960 : }
7961 : }
7962 : }
7963 : }
7964 : }
7965 :
7966 300849 : if (dfTotalWeight > 0)
7967 : {
7968 300849 : if (poWK->bApplyVerticalShift)
7969 : {
7970 0 : if (!std::isfinite(padfZ[iDstX]))
7971 0 : continue;
7972 : // Subtract padfZ[] since the coordinate
7973 : // transformation is from target to source
7974 0 : dfValueReal =
7975 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7976 0 : padfZ[iDstX] *
7977 : dfMultFactorVerticalShiftPipeline;
7978 : }
7979 :
7980 300849 : dfBandDensity = 1;
7981 300849 : bHasFoundDensity = true;
7982 : }
7983 : } // GRA_Average.
7984 :
7985 2522460 : else if (poWK->eResample == GRA_RMS)
7986 : {
7987 300416 : double dfTotalReal = 0.0;
7988 300416 : double dfTotalImag = 0.0;
7989 300416 : double dfTotalWeight = 0.0;
7990 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7991 : // in gcore/overview.cpp.
7992 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7993 : {
7994 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7995 330162 : iSrcOffset = iSrcXMin +
7996 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7997 772930 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7998 : iSrcX++, iSrcOffset++)
7999 : {
8000 442768 : if (bWrapOverX)
8001 1371 : iSrcOffset =
8002 1371 : (iSrcX % nSrcXSize) +
8003 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8004 :
8005 442768 : if (poWK->panUnifiedSrcValid != nullptr &&
8006 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8007 : iSrcOffset))
8008 : {
8009 0 : continue;
8010 : }
8011 :
8012 442768 : if (GWKGetPixelValue(
8013 : poWK, iBand, iSrcOffset, &dfBandDensity,
8014 885536 : &dfValueRealTmp, &dfValueImagTmp) &&
8015 442768 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8016 : {
8017 442768 : const double dfWeight =
8018 442768 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8019 442768 : dfTotalWeight += dfWeight;
8020 442768 : dfTotalReal +=
8021 442768 : dfValueRealTmp * dfValueRealTmp * dfWeight;
8022 442768 : if (bIsComplex)
8023 48 : dfTotalImag += dfValueImagTmp *
8024 48 : dfValueImagTmp * dfWeight;
8025 : }
8026 : }
8027 : }
8028 :
8029 300416 : if (dfTotalWeight > 0)
8030 : {
8031 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
8032 :
8033 300416 : if (poWK->bApplyVerticalShift)
8034 : {
8035 0 : if (!std::isfinite(padfZ[iDstX]))
8036 0 : continue;
8037 : // Subtract padfZ[] since the coordinate
8038 : // transformation is from target to source
8039 0 : dfValueReal =
8040 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8041 0 : padfZ[iDstX] *
8042 : dfMultFactorVerticalShiftPipeline;
8043 : }
8044 :
8045 300416 : if (bIsComplex)
8046 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
8047 :
8048 300416 : dfBandDensity = 1;
8049 300416 : bHasFoundDensity = true;
8050 : }
8051 : } // GRA_RMS.
8052 :
8053 2222040 : else if (poWK->eResample == GRA_Mode)
8054 : {
8055 496623 : float fMaxCount = 0.0f;
8056 496623 : int nMode = -1;
8057 496623 : bool bHasSourceValues = false;
8058 :
8059 496623 : memset(pafCounts, 0, nBins * sizeof(float));
8060 :
8061 1612560 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8062 : {
8063 1115940 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8064 1115940 : iSrcOffset = iSrcXMin +
8065 1115940 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8066 4703370 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8067 : iSrcX++, iSrcOffset++)
8068 : {
8069 3587430 : if (bWrapOverX)
8070 1371 : iSrcOffset =
8071 1371 : (iSrcX % nSrcXSize) +
8072 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8073 :
8074 3587430 : if (poWK->panUnifiedSrcValid != nullptr &&
8075 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8076 : iSrcOffset))
8077 0 : continue;
8078 :
8079 3587430 : if (GWKGetPixelValue(
8080 : poWK, iBand, iSrcOffset, &dfBandDensity,
8081 7174870 : &dfValueRealTmp, &dfValueImagTmp) &&
8082 3587430 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8083 : {
8084 3587430 : bHasSourceValues = true;
8085 3587430 : const int nVal =
8086 3587430 : static_cast<int>(dfValueRealTmp);
8087 3587430 : const int iBin = nVal + nBinsOffset;
8088 3587430 : const double dfWeight =
8089 3587430 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8090 :
8091 : // Sum the density.
8092 3587430 : pafCounts[iBin] += static_cast<float>(dfWeight);
8093 : // Is it the most common value so far?
8094 3587430 : bool bUpdateMode = pafCounts[iBin] > fMaxCount;
8095 3587430 : if (!bUpdateMode &&
8096 750293 : pafCounts[iBin] == fMaxCount)
8097 : {
8098 217592 : switch (eTieStrategy)
8099 : {
8100 217584 : case GWKTS_First:
8101 217584 : break;
8102 4 : case GWKTS_Min:
8103 4 : bUpdateMode = nVal < nMode;
8104 4 : break;
8105 4 : case GWKTS_Max:
8106 4 : bUpdateMode = nVal > nMode;
8107 4 : break;
8108 : }
8109 : }
8110 3587430 : if (bUpdateMode)
8111 : {
8112 2837140 : nMode = nVal;
8113 2837140 : fMaxCount = pafCounts[iBin];
8114 : }
8115 : }
8116 : }
8117 : }
8118 :
8119 496623 : if (bHasSourceValues)
8120 : {
8121 496623 : dfValueReal = nMode;
8122 496623 : dfBandDensity = 1;
8123 496623 : bHasFoundDensity = true;
8124 : }
8125 : } // GRA_Mode.
8126 :
8127 1725420 : else if (poWK->eResample == GRA_Max)
8128 : {
8129 335037 : bool bFoundValid = false;
8130 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
8131 : // This code adapted from nAlgo 1 method, GRA_Average.
8132 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8133 : {
8134 952975 : iSrcOffset = iSrcXMin +
8135 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8136 4376740 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8137 : iSrcX++, iSrcOffset++)
8138 : {
8139 3423770 : if (bWrapOverX)
8140 1371 : iSrcOffset =
8141 1371 : (iSrcX % nSrcXSize) +
8142 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8143 :
8144 3426580 : if (poWK->panUnifiedSrcValid != nullptr &&
8145 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8146 : iSrcOffset))
8147 : {
8148 2446 : continue;
8149 : }
8150 :
8151 : // Returns pixel value if it is not no data.
8152 3421320 : if (GWKGetPixelValue(
8153 : poWK, iBand, iSrcOffset, &dfBandDensity,
8154 6842640 : &dfValueRealTmp, &dfValueImagTmp) &&
8155 3421320 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8156 : {
8157 3421320 : bFoundValid = true;
8158 3421320 : if (dfTotalReal < dfValueRealTmp)
8159 : {
8160 442234 : dfTotalReal = dfValueRealTmp;
8161 : }
8162 : }
8163 : }
8164 : }
8165 :
8166 335037 : if (bFoundValid)
8167 : {
8168 335037 : dfValueReal = dfTotalReal;
8169 :
8170 335037 : if (poWK->bApplyVerticalShift)
8171 : {
8172 0 : if (!std::isfinite(padfZ[iDstX]))
8173 0 : continue;
8174 : // Subtract padfZ[] since the coordinate
8175 : // transformation is from target to source
8176 0 : dfValueReal =
8177 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8178 0 : padfZ[iDstX] *
8179 : dfMultFactorVerticalShiftPipeline;
8180 : }
8181 :
8182 335037 : dfBandDensity = 1;
8183 335037 : bHasFoundDensity = true;
8184 : }
8185 : }
8186 :
8187 1390380 : else if (poWK->eResample == GRA_Min)
8188 : {
8189 335012 : bool bFoundValid = false;
8190 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
8191 : // This code adapted from nAlgo 1 method, GRA_Average.
8192 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8193 : {
8194 952710 : iSrcOffset = iSrcXMin +
8195 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8196 4373670 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8197 : iSrcX++, iSrcOffset++)
8198 : {
8199 3420960 : if (bWrapOverX)
8200 1371 : iSrcOffset =
8201 1371 : (iSrcX % nSrcXSize) +
8202 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8203 :
8204 3420960 : if (poWK->panUnifiedSrcValid != nullptr &&
8205 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8206 : iSrcOffset))
8207 : {
8208 0 : continue;
8209 : }
8210 :
8211 : // Returns pixel value if it is not no data.
8212 3420960 : if (GWKGetPixelValue(
8213 : poWK, iBand, iSrcOffset, &dfBandDensity,
8214 6841920 : &dfValueRealTmp, &dfValueImagTmp) &&
8215 3420960 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8216 : {
8217 3420960 : bFoundValid = true;
8218 3420960 : if (dfTotalReal > dfValueRealTmp)
8219 : {
8220 442628 : dfTotalReal = dfValueRealTmp;
8221 : }
8222 : }
8223 : }
8224 : }
8225 :
8226 335012 : if (bFoundValid)
8227 : {
8228 335012 : dfValueReal = dfTotalReal;
8229 :
8230 335012 : if (poWK->bApplyVerticalShift)
8231 : {
8232 0 : if (!std::isfinite(padfZ[iDstX]))
8233 0 : continue;
8234 : // Subtract padfZ[] since the coordinate
8235 : // transformation is from target to source
8236 0 : dfValueReal =
8237 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8238 0 : padfZ[iDstX] *
8239 : dfMultFactorVerticalShiftPipeline;
8240 : }
8241 :
8242 335012 : dfBandDensity = 1;
8243 335012 : bHasFoundDensity = true;
8244 : }
8245 : } // GRA_Min.
8246 :
8247 : else
8248 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
8249 : {
8250 1055370 : CPLAssert(quant > 0.0f);
8251 :
8252 1055370 : bool bFoundValid = false;
8253 1055370 : std::vector<double> dfRealValuesTmp;
8254 :
8255 : // This code adapted from nAlgo 1 method, GRA_Average.
8256 4014130 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8257 : {
8258 2958760 : iSrcOffset = iSrcXMin +
8259 2958760 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8260 13421300 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8261 : iSrcX++, iSrcOffset++)
8262 : {
8263 10462500 : if (bWrapOverX)
8264 4113 : iSrcOffset =
8265 4113 : (iSrcX % nSrcXSize) +
8266 4113 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8267 :
8268 10659100 : if (poWK->panUnifiedSrcValid != nullptr &&
8269 196608 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8270 : iSrcOffset))
8271 : {
8272 195449 : continue;
8273 : }
8274 :
8275 : // Returns pixel value if it is not no data.
8276 10267100 : if (GWKGetPixelValue(
8277 : poWK, iBand, iSrcOffset, &dfBandDensity,
8278 20534100 : &dfValueRealTmp, &dfValueImagTmp) &&
8279 10267100 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8280 : {
8281 10267100 : bFoundValid = true;
8282 10267100 : dfRealValuesTmp.push_back(dfValueRealTmp);
8283 : }
8284 : }
8285 : }
8286 :
8287 1055370 : if (bFoundValid)
8288 : {
8289 1006150 : std::sort(dfRealValuesTmp.begin(),
8290 : dfRealValuesTmp.end());
8291 : int quantIdx = static_cast<int>(
8292 1006150 : std::ceil(quant * dfRealValuesTmp.size() - 1));
8293 1006150 : dfValueReal = dfRealValuesTmp[quantIdx];
8294 :
8295 1006150 : if (poWK->bApplyVerticalShift)
8296 : {
8297 0 : if (!std::isfinite(padfZ[iDstX]))
8298 0 : continue;
8299 : // Subtract padfZ[] since the coordinate
8300 : // transformation is from target to source
8301 0 : dfValueReal =
8302 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8303 0 : padfZ[iDstX] *
8304 : dfMultFactorVerticalShiftPipeline;
8305 : }
8306 :
8307 1006150 : dfBandDensity = 1;
8308 1006150 : bHasFoundDensity = true;
8309 1006150 : dfRealValuesTmp.clear();
8310 : }
8311 : } // Quantile.
8312 :
8313 : /* --------------------------------------------------------------------
8314 : */
8315 : /* We have a computed value from the source. Now apply it
8316 : * to */
8317 : /* the destination pixel. */
8318 : /* --------------------------------------------------------------------
8319 : */
8320 2823310 : if (bHasFoundDensity)
8321 : {
8322 : // TODO: Should we compute dfBandDensity in fct of
8323 : // nCount/nCount2, or use as a threshold to set the dest
8324 : // value?
8325 : // dfBandDensity = (float) nCount / nCount2;
8326 : // if( (float) nCount / nCount2 > 0.1 )
8327 : // or fix gdalwarp crop_to_cutline to crop partially
8328 : // overlapping pixels.
8329 2774080 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8330 : dfValueReal, dfValueImag,
8331 : bAvoidNoDataSingleBand);
8332 : }
8333 : }
8334 :
8335 1906710 : if (!bHasFoundDensity)
8336 507971 : continue;
8337 :
8338 1398740 : if (!bAvoidNoDataSingleBand)
8339 : {
8340 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
8341 : }
8342 :
8343 : /* --------------------------------------------------------------------
8344 : */
8345 : /* Update destination density/validity masks. */
8346 : /* --------------------------------------------------------------------
8347 : */
8348 1398740 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8349 :
8350 1398740 : if (poWK->panDstValid != nullptr)
8351 : {
8352 1184 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8353 : }
8354 : } /* Next iDstX */
8355 :
8356 : /* --------------------------------------------------------------------
8357 : */
8358 : /* Report progress to the user, and optionally cancel out. */
8359 : /* --------------------------------------------------------------------
8360 : */
8361 8078 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8362 0 : break;
8363 : }
8364 :
8365 : /* -------------------------------------------------------------------- */
8366 : /* Cleanup and return. */
8367 : /* -------------------------------------------------------------------- */
8368 136 : CPLFree(padfX);
8369 136 : CPLFree(padfY);
8370 136 : CPLFree(padfZ);
8371 136 : CPLFree(padfX2);
8372 136 : CPLFree(padfY2);
8373 136 : CPLFree(padfZ2);
8374 136 : CPLFree(pabSuccess);
8375 136 : CPLFree(pabSuccess2);
8376 136 : VSIFree(pafCounts);
8377 : }
8378 :
8379 : /************************************************************************/
8380 : /* getOrientation() */
8381 : /************************************************************************/
8382 :
8383 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8384 : // -1 if it is counter-clockwise oriented,
8385 : // or 0 if it is colinear.
8386 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8387 : {
8388 2355910 : const double p1x = p1.first;
8389 2355910 : const double p1y = p1.second;
8390 2355910 : const double p2x = p2.first;
8391 2355910 : const double p2y = p2.second;
8392 2355910 : const double p3x = p3.first;
8393 2355910 : const double p3y = p3.second;
8394 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8395 2355910 : if (std::abs(val) < 1e-20)
8396 2690 : return 0;
8397 2353220 : else if (val > 0)
8398 0 : return 1;
8399 : else
8400 2353220 : return -1;
8401 : }
8402 :
8403 : /************************************************************************/
8404 : /* isConvex() */
8405 : /************************************************************************/
8406 :
8407 : // poly must be closed
8408 785302 : static bool isConvex(const XYPoly &poly)
8409 : {
8410 785302 : const size_t n = poly.size();
8411 785302 : size_t i = 0;
8412 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8413 785302 : ++i;
8414 2355910 : for (; i < n - 2; ++i)
8415 : {
8416 : const int orientation =
8417 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8418 1570600 : if (orientation != 0)
8419 : {
8420 1567910 : if (last_orientation == 0)
8421 0 : last_orientation = orientation;
8422 1567910 : else if (orientation != last_orientation)
8423 0 : return false;
8424 : }
8425 : }
8426 785302 : return true;
8427 : }
8428 :
8429 : /************************************************************************/
8430 : /* pointIntersectsConvexPoly() */
8431 : /************************************************************************/
8432 :
8433 : // Returns whether xy intersects poly, that must be closed and convex.
8434 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8435 : {
8436 6049100 : const size_t n = poly.size();
8437 6049100 : double dx1 = xy.first - poly[0].first;
8438 6049100 : double dy1 = xy.second - poly[0].second;
8439 6049100 : double dx2 = poly[1].first - poly[0].first;
8440 6049100 : double dy2 = poly[1].second - poly[0].second;
8441 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8442 :
8443 : // Check if the point remains on the same side (left/right) of all edges
8444 14556400 : for (size_t i = 2; i < n; i++)
8445 : {
8446 12793100 : dx1 = xy.first - poly[i - 1].first;
8447 12793100 : dy1 = xy.second - poly[i - 1].second;
8448 :
8449 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8450 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8451 :
8452 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8453 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8454 725558 : prevCrossProduct = crossProduct;
8455 12067500 : else if (prevCrossProduct * crossProduct < 0)
8456 4285760 : return false;
8457 : }
8458 :
8459 1763340 : return true;
8460 : }
8461 :
8462 : /************************************************************************/
8463 : /* getIntersection() */
8464 : /************************************************************************/
8465 :
8466 : /* Returns intersection of [p1,p2] with [p3,p4], if
8467 : * it is a single point, and the 2 segments are not colinear.
8468 : */
8469 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8470 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8471 : {
8472 11811000 : const double x1 = p1.first;
8473 11811000 : const double y1 = p1.second;
8474 11811000 : const double x2 = p2.first;
8475 11811000 : const double y2 = p2.second;
8476 11811000 : const double x3 = p3.first;
8477 11811000 : const double y3 = p3.second;
8478 11811000 : const double x4 = p4.first;
8479 11811000 : const double y4 = p4.second;
8480 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8481 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8482 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8483 9260780 : return false;
8484 :
8485 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8486 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8487 973924 : return false;
8488 :
8489 1576340 : const double t = t_num / denom;
8490 1576340 : xy.first = x1 + t * (x2 - x1);
8491 1576340 : xy.second = y1 + t * (y2 - y1);
8492 1576340 : return true;
8493 : }
8494 :
8495 : /************************************************************************/
8496 : /* getConvexPolyIntersection() */
8497 : /************************************************************************/
8498 :
8499 : // poly1 and poly2 must be closed and convex.
8500 : // The returned intersection will not necessary be closed.
8501 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8502 : XYPoly &intersection)
8503 : {
8504 785302 : intersection.clear();
8505 :
8506 : // Add all points of poly1 inside poly2
8507 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8508 : {
8509 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8510 1187430 : intersection.push_back(poly1[i]);
8511 : }
8512 785302 : if (intersection.size() == poly1.size() - 1)
8513 : {
8514 : // poly1 is inside poly2
8515 119100 : return;
8516 : }
8517 :
8518 : // Add all points of poly2 inside poly1
8519 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8520 : {
8521 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8522 575904 : intersection.push_back(poly2[i]);
8523 : }
8524 :
8525 : // Compute the intersection of all edges of both polygons
8526 726972 : XYPair xy;
8527 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8528 : {
8529 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8530 : {
8531 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8532 11631600 : poly2[i2 + 1], xy))
8533 : {
8534 1576230 : intersection.push_back(xy);
8535 : }
8536 : }
8537 : }
8538 :
8539 726972 : if (intersection.empty())
8540 60770 : return;
8541 :
8542 : // Find lowest-left point in intersection set
8543 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8544 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8545 3772450 : for (const auto &pair : intersection)
8546 : {
8547 3106240 : const double x = pair.first;
8548 3106240 : const double y = pair.second;
8549 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8550 : {
8551 1096040 : lowest_x = x;
8552 1096040 : lowest_y = y;
8553 : }
8554 : }
8555 :
8556 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8557 : {
8558 5737980 : const double p1x_diff = p1.first - lowest_x;
8559 5737980 : const double p1y_diff = p1.second - lowest_y;
8560 5737980 : const double p2x_diff = p2.first - lowest_x;
8561 5737980 : const double p2y_diff = p2.second - lowest_y;
8562 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8563 : {
8564 2655420 : if (p1x_diff >= 0)
8565 : {
8566 2655420 : if (p2x_diff >= 0)
8567 2655420 : return p1.first < p2.first;
8568 0 : return true;
8569 : }
8570 : else
8571 : {
8572 0 : if (p2x_diff >= 0)
8573 0 : return false;
8574 0 : return p1.first < p2.first;
8575 : }
8576 : }
8577 :
8578 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8579 1046960 : return p1.second < p2.second;
8580 :
8581 : double tan_p1;
8582 2035600 : if (p1x_diff == 0.0)
8583 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8584 : else
8585 1570980 : tan_p1 = p1y_diff / p1x_diff;
8586 :
8587 : double tan_p2;
8588 2035600 : if (p2x_diff == 0.0)
8589 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8590 : else
8591 1196080 : tan_p2 = p2y_diff / p2x_diff;
8592 :
8593 2035600 : if (tan_p1 >= 0)
8594 : {
8595 1904790 : if (tan_p2 >= 0)
8596 1881590 : return tan_p1 < tan_p2;
8597 : else
8598 23199 : return true;
8599 : }
8600 : else
8601 : {
8602 130806 : if (tan_p2 >= 0)
8603 103900 : return false;
8604 : else
8605 26906 : return tan_p1 < tan_p2;
8606 : }
8607 666202 : };
8608 :
8609 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8610 : // hull
8611 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8612 :
8613 : // Remove duplicated points
8614 666202 : size_t j = 1;
8615 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8616 : {
8617 2440040 : if (intersection[i] != intersection[i - 1])
8618 : {
8619 1452560 : if (j < i)
8620 545275 : intersection[j] = intersection[i];
8621 1452560 : ++j;
8622 : }
8623 : }
8624 666202 : intersection.resize(j);
8625 : }
8626 :
8627 : /************************************************************************/
8628 : /* GWKSumPreserving() */
8629 : /************************************************************************/
8630 :
8631 : static void GWKSumPreservingThread(void *pData);
8632 :
8633 19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8634 : {
8635 19 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8636 : }
8637 :
8638 19 : static void GWKSumPreservingThread(void *pData)
8639 : {
8640 19 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8641 19 : GDALWarpKernel *poWK = psJob->poWK;
8642 19 : const int iYMin = psJob->iYMin;
8643 19 : const int iYMax = psJob->iYMax;
8644 : const bool bIsAffineNoRotation =
8645 19 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8646 28 : poWK->pTransformerArg) &&
8647 : // for debug/testing purposes
8648 9 : CPLTestBool(
8649 19 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8650 : const bool bAvoidNoDataSingleBand =
8651 21 : poWK->nBands == 1 ||
8652 2 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
8653 19 : "UNIFIED_SRC_NODATA", "FALSE"));
8654 :
8655 19 : const int nDstXSize = poWK->nDstXSize;
8656 19 : const int nSrcXSize = poWK->nSrcXSize;
8657 19 : const int nSrcYSize = poWK->nSrcYSize;
8658 :
8659 38 : std::vector<double> adfX0(nSrcXSize + 1);
8660 38 : std::vector<double> adfY0(nSrcXSize + 1);
8661 38 : std::vector<double> adfZ0(nSrcXSize + 1);
8662 38 : std::vector<double> adfX1(nSrcXSize + 1);
8663 38 : std::vector<double> adfY1(nSrcXSize + 1);
8664 38 : std::vector<double> adfZ1(nSrcXSize + 1);
8665 38 : std::vector<int> abSuccess0(nSrcXSize + 1);
8666 38 : std::vector<int> abSuccess1(nSrcXSize + 1);
8667 :
8668 : CPLRectObj sGlobalBounds;
8669 19 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8670 19 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8671 19 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8672 19 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8673 19 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8674 :
8675 : struct SourcePixel
8676 : {
8677 : int iSrcX;
8678 : int iSrcY;
8679 :
8680 : // Coordinates of source pixel in target pixel coordinates
8681 : double dfDstX0;
8682 : double dfDstY0;
8683 : double dfDstX1;
8684 : double dfDstY1;
8685 : double dfDstX2;
8686 : double dfDstY2;
8687 : double dfDstX3;
8688 : double dfDstY3;
8689 :
8690 : // Source pixel total area (might be larger than the one described
8691 : // by above coordinates, if the pixel was crossing the antimeridian
8692 : // and split)
8693 : double dfArea;
8694 : };
8695 :
8696 38 : std::vector<SourcePixel> sourcePixels;
8697 :
8698 38 : XYPoly discontinuityLeft(5);
8699 38 : XYPoly discontinuityRight(5);
8700 :
8701 : /* ==================================================================== */
8702 : /* First pass: transform the 4 corners of each potential */
8703 : /* contributing source pixel to target pixel coordinates. */
8704 : /* ==================================================================== */
8705 :
8706 : // Special case for top line
8707 : {
8708 19 : int iY = 0;
8709 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8710 : {
8711 3345 : adfX1[iX] = iX + poWK->nSrcXOff;
8712 3345 : adfY1[iX] = iY + poWK->nSrcYOff;
8713 3345 : adfZ1[iX] = 0;
8714 : }
8715 :
8716 19 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8717 : adfX1.data(), adfY1.data(), adfZ1.data(),
8718 : abSuccess1.data());
8719 :
8720 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8721 : {
8722 3345 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8723 0 : abSuccess1[iX] = FALSE;
8724 : else
8725 : {
8726 3345 : adfX1[iX] -= poWK->nDstXOff;
8727 3345 : adfY1[iX] -= poWK->nDstYOff;
8728 : }
8729 : }
8730 : }
8731 :
8732 2032 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8733 : {
8734 2032 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8735 872 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8736 2032 : ? 1
8737 1160 : : -1;
8738 19 : };
8739 :
8740 : const auto FindDiscontinuity =
8741 80 : [poWK, psJob, getInsideXSign](
8742 : double dfXLeft, double dfXRight, double dfY,
8743 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8744 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8745 : {
8746 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8747 : {
8748 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8749 800 : double dfXMidReprojected = dfXMid;
8750 800 : dfYMidReprojected = dfY;
8751 800 : double dfZ = 0;
8752 800 : int nSuccess = 0;
8753 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8754 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8755 : &nSuccess);
8756 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8757 : {
8758 456 : dfXRight = dfXMid;
8759 456 : dfXMidReprojectedRight = dfXMidReprojected;
8760 : }
8761 : else
8762 : {
8763 344 : dfXLeft = dfXMid;
8764 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8765 : }
8766 : }
8767 80 : };
8768 :
8769 2685 : for (int iY = 0; iY < nSrcYSize; ++iY)
8770 : {
8771 2666 : std::swap(adfX0, adfX1);
8772 2666 : std::swap(adfY0, adfY1);
8773 2666 : std::swap(adfZ0, adfZ1);
8774 2666 : std::swap(abSuccess0, abSuccess1);
8775 :
8776 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8777 : {
8778 4833460 : adfX1[iX] = iX + poWK->nSrcXOff;
8779 4833460 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8780 4833460 : adfZ1[iX] = 0;
8781 : }
8782 :
8783 2666 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8784 : adfX1.data(), adfY1.data(), adfZ1.data(),
8785 : abSuccess1.data());
8786 :
8787 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8788 : {
8789 4833460 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8790 0 : abSuccess1[iX] = FALSE;
8791 : else
8792 : {
8793 4833460 : adfX1[iX] -= poWK->nDstXOff;
8794 4833460 : adfY1[iX] -= poWK->nDstYOff;
8795 : }
8796 : }
8797 :
8798 4833460 : for (int iX = 0; iX < nSrcXSize; ++iX)
8799 : {
8800 9661580 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8801 4830790 : abSuccess1[iX + 1])
8802 : {
8803 : /* --------------------------------------------------------------------
8804 : */
8805 : /* Do not try to apply transparent source pixels to the
8806 : * destination.*/
8807 : /* --------------------------------------------------------------------
8808 : */
8809 4830790 : const auto iSrcOffset =
8810 4830790 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8811 9560570 : if (poWK->panUnifiedSrcValid != nullptr &&
8812 4729780 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8813 : {
8814 4738340 : continue;
8815 : }
8816 :
8817 103415 : if (poWK->pafUnifiedSrcDensity != nullptr)
8818 : {
8819 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8820 : SRC_DENSITY_THRESHOLD_FLOAT)
8821 0 : continue;
8822 : }
8823 :
8824 : SourcePixel sp;
8825 103415 : sp.dfArea = 0;
8826 103415 : sp.dfDstX0 = adfX0[iX];
8827 103415 : sp.dfDstY0 = adfY0[iX];
8828 103415 : sp.dfDstX1 = adfX0[iX + 1];
8829 103415 : sp.dfDstY1 = adfY0[iX + 1];
8830 103415 : sp.dfDstX2 = adfX1[iX + 1];
8831 103415 : sp.dfDstY2 = adfY1[iX + 1];
8832 103415 : sp.dfDstX3 = adfX1[iX];
8833 103415 : sp.dfDstY3 = adfY1[iX];
8834 :
8835 : // Detect pixel that likely cross the anti-meridian and
8836 : // introduce a discontinuity when reprojected.
8837 :
8838 103415 : if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
8839 80 : std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
8840 40 : getInsideXSign(adfX0[iX]) !=
8841 80 : getInsideXSign(adfX0[iX + 1]) &&
8842 80 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8843 40 : getInsideXSign(adfX0[iX + 1]) ==
8844 103495 : getInsideXSign(adfX1[iX + 1]) &&
8845 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8846 : 0)
8847 : {
8848 : #ifdef DEBUG_VERBOSE
8849 : CPLDebug(
8850 : "WARP",
8851 : "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
8852 : "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
8853 : "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
8854 : iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
8855 : adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
8856 : adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
8857 : #endif
8858 40 : double dfXMidReprojectedLeftTop = 0;
8859 40 : double dfXMidReprojectedRightTop = 0;
8860 40 : double dfYMidReprojectedTop = 0;
8861 40 : FindDiscontinuity(
8862 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8863 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8864 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8865 : dfYMidReprojectedTop);
8866 40 : double dfXMidReprojectedLeftBottom = 0;
8867 40 : double dfXMidReprojectedRightBottom = 0;
8868 40 : double dfYMidReprojectedBottom = 0;
8869 40 : FindDiscontinuity(
8870 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8871 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8872 : dfXMidReprojectedLeftBottom,
8873 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8874 :
8875 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8876 40 : discontinuityLeft[1] =
8877 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8878 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8879 40 : dfYMidReprojectedBottom);
8880 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8881 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8882 :
8883 40 : discontinuityRight[0] =
8884 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8885 40 : discontinuityRight[1] =
8886 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8887 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8888 40 : dfYMidReprojectedBottom);
8889 40 : discontinuityRight[3] =
8890 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8891 40 : discontinuityRight[4] =
8892 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8893 :
8894 40 : sp.dfArea = getArea(discontinuityLeft) +
8895 40 : getArea(discontinuityRight);
8896 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8897 : {
8898 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8899 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8900 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8901 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8902 : }
8903 : else
8904 : {
8905 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8906 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8907 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8908 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8909 : }
8910 : }
8911 :
8912 : // Bounding box of source pixel (expressed in target pixel
8913 : // coordinates)
8914 : CPLRectObj sRect;
8915 103415 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8916 103415 : std::min(sp.dfDstX2, sp.dfDstX3));
8917 103415 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8918 103415 : std::min(sp.dfDstY2, sp.dfDstY3));
8919 103415 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8920 103415 : std::max(sp.dfDstX2, sp.dfDstX3));
8921 103415 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8922 103415 : std::max(sp.dfDstY2, sp.dfDstY3));
8923 103415 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8924 101355 : sRect.miny < iYMax && sRect.maxy > iYMin))
8925 : {
8926 10852 : continue;
8927 : }
8928 :
8929 92563 : sp.iSrcX = iX;
8930 92563 : sp.iSrcY = iY;
8931 :
8932 92563 : if (!bIsAffineNoRotation)
8933 : {
8934 : // Check polygon validity (no self-crossing)
8935 89745 : XYPair xy;
8936 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8937 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8938 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8939 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8940 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8941 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8942 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8943 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8944 : {
8945 113 : continue;
8946 : }
8947 : }
8948 :
8949 92450 : CPLQuadTreeInsertWithBounds(
8950 : hQuadTree,
8951 : reinterpret_cast<void *>(
8952 92450 : static_cast<uintptr_t>(sourcePixels.size())),
8953 : &sRect);
8954 :
8955 92450 : sourcePixels.push_back(sp);
8956 : }
8957 : }
8958 : }
8959 :
8960 38 : std::vector<double> adfRealValue(poWK->nBands);
8961 38 : std::vector<double> adfImagValue(poWK->nBands);
8962 38 : std::vector<double> adfBandDensity(poWK->nBands);
8963 38 : std::vector<double> adfWeight(poWK->nBands);
8964 :
8965 : #ifdef CHECK_SUM_WITH_GEOS
8966 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8967 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8968 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8969 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8970 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8971 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8972 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8973 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8974 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8975 :
8976 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8977 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8978 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8979 : #endif
8980 :
8981 : const XYPoly xy1{
8982 38 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8983 38 : XYPoly xy2(5);
8984 38 : XYPoly xy2_triangle(4);
8985 38 : XYPoly intersection;
8986 :
8987 : /* ==================================================================== */
8988 : /* Loop over output lines. */
8989 : /* ==================================================================== */
8990 1951 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8991 : {
8992 : CPLRectObj sRect;
8993 1932 : sRect.miny = iDstY;
8994 1932 : sRect.maxy = iDstY + 1;
8995 :
8996 : /* ====================================================================
8997 : */
8998 : /* Loop over pixels in output scanline. */
8999 : /* ====================================================================
9000 : */
9001 1403940 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
9002 : {
9003 1402010 : sRect.minx = iDstX;
9004 1402010 : sRect.maxx = iDstX + 1;
9005 1402010 : int nSourcePixels = 0;
9006 : void **pahSourcePixel =
9007 1402010 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
9008 1402010 : if (nSourcePixels == 0)
9009 : {
9010 1183090 : CPLFree(pahSourcePixel);
9011 1183100 : continue;
9012 : }
9013 :
9014 218919 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
9015 218919 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
9016 218919 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
9017 218919 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
9018 218919 : double dfDensity = 0;
9019 : // Just above zero to please Coveriy Scan
9020 218919 : double dfTotalWeight = std::numeric_limits<double>::min();
9021 :
9022 : /* ====================================================================
9023 : */
9024 : /* Iterate over each contributing source pixel to add its
9025 : */
9026 : /* value weighed by the ratio of the area of its
9027 : * intersection */
9028 : /* with the target pixel divided by the area of the source
9029 : */
9030 : /* pixel. */
9031 : /* ====================================================================
9032 : */
9033 1020550 : for (int i = 0; i < nSourcePixels; ++i)
9034 : {
9035 801628 : const int iSourcePixel = static_cast<int>(
9036 801628 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
9037 801628 : auto &sp = sourcePixels[iSourcePixel];
9038 :
9039 801628 : double dfWeight = 0.0;
9040 801628 : if (bIsAffineNoRotation)
9041 : {
9042 : // Optimization since the source pixel is a rectangle in
9043 : // target pixel coordinates
9044 16326 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
9045 16326 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
9046 16326 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
9047 16326 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
9048 16326 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
9049 16326 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
9050 16326 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
9051 16326 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
9052 16326 : dfWeight =
9053 16326 : ((dfIntersMaxX - dfIntersMinX) *
9054 16326 : (dfIntersMaxY - dfIntersMinY)) /
9055 16326 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
9056 : }
9057 : else
9058 : {
9059 : // Compute the polygon of the source pixel in target pixel
9060 : // coordinates, and shifted to the target pixel (unit square
9061 : // coordinates)
9062 :
9063 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9064 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
9065 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
9066 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
9067 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9068 :
9069 785302 : if (isConvex(xy2))
9070 : {
9071 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
9072 785302 : if (intersection.size() >= 3)
9073 : {
9074 468849 : dfWeight = getArea(intersection);
9075 : }
9076 : }
9077 : else
9078 : {
9079 : // Split xy2 into 2 triangles.
9080 0 : xy2_triangle[0] = xy2[0];
9081 0 : xy2_triangle[1] = xy2[1];
9082 0 : xy2_triangle[2] = xy2[2];
9083 0 : xy2_triangle[3] = xy2[0];
9084 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9085 : intersection);
9086 0 : if (intersection.size() >= 3)
9087 : {
9088 0 : dfWeight = getArea(intersection);
9089 : }
9090 :
9091 0 : xy2_triangle[1] = xy2[2];
9092 0 : xy2_triangle[2] = xy2[3];
9093 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9094 : intersection);
9095 0 : if (intersection.size() >= 3)
9096 : {
9097 0 : dfWeight += getArea(intersection);
9098 : }
9099 : }
9100 785302 : if (dfWeight > 0.0)
9101 : {
9102 468828 : if (sp.dfArea == 0)
9103 89592 : sp.dfArea = getArea(xy2);
9104 468828 : dfWeight /= sp.dfArea;
9105 : }
9106 :
9107 : #ifdef CHECK_SUM_WITH_GEOS
9108 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
9109 : sp.dfDstX0 - iDstX,
9110 : sp.dfDstY0 - iDstY);
9111 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
9112 : sp.dfDstX1 - iDstX,
9113 : sp.dfDstY1 - iDstY);
9114 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
9115 : sp.dfDstX2 - iDstX,
9116 : sp.dfDstY2 - iDstY);
9117 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
9118 : sp.dfDstX3 - iDstX,
9119 : sp.dfDstY3 - iDstY);
9120 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
9121 : sp.dfDstX0 - iDstX,
9122 : sp.dfDstY0 - iDstY);
9123 :
9124 : double dfWeightGEOS = 0.0;
9125 : auto hIntersection =
9126 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
9127 : if (hIntersection)
9128 : {
9129 : double dfIntersArea = 0.0;
9130 : if (GEOSArea_r(hGEOSContext, hIntersection,
9131 : &dfIntersArea) &&
9132 : dfIntersArea > 0)
9133 : {
9134 : double dfSourceArea = 0.0;
9135 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
9136 : {
9137 : dfWeightGEOS = dfIntersArea / dfSourceArea;
9138 : }
9139 : }
9140 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
9141 : }
9142 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
9143 : {
9144 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
9145 : dfWeight, dfWeightGEOS);
9146 : printf("xy2: "); // ok
9147 : for (const auto &xy : xy2)
9148 : printf("[%f, %f], ", xy.first, xy.second); // ok
9149 : printf("\n"); // ok
9150 : printf("intersection: "); // ok
9151 : for (const auto &xy : intersection)
9152 : printf("[%f, %f], ", xy.first, xy.second); // ok
9153 : printf("\n"); // ok
9154 : }
9155 : #endif
9156 : }
9157 801628 : if (dfWeight > 0.0)
9158 : {
9159 : #ifdef DEBUG_VERBOSE
9160 : #if defined(DST_X) && defined(DST_Y)
9161 : if (iDstX + poWK->nDstXOff == DST_X &&
9162 : iDstY + poWK->nDstYOff == DST_Y)
9163 : {
9164 : CPLDebug("WARP",
9165 : "iSrcX = %d, iSrcY = %d, weight =%.17g",
9166 : sp.iSrcX + poWK->nSrcXOff,
9167 : sp.iSrcY + poWK->nSrcYOff, dfWeight);
9168 : }
9169 : #endif
9170 : #endif
9171 :
9172 474104 : const GPtrDiff_t iSrcOffset =
9173 474104 : sp.iSrcX +
9174 474104 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
9175 474104 : dfTotalWeight += dfWeight;
9176 :
9177 474104 : if (poWK->pafUnifiedSrcDensity != nullptr)
9178 : {
9179 0 : dfDensity +=
9180 0 : dfWeight *
9181 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
9182 : }
9183 : else
9184 : {
9185 474104 : dfDensity += dfWeight;
9186 : }
9187 :
9188 1818730 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9189 : {
9190 : // Returns pixel value if it is not no data.
9191 : double dfBandDensity;
9192 : double dfRealValue;
9193 : double dfImagValue;
9194 2689250 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
9195 : &dfBandDensity, &dfRealValue,
9196 : &dfImagValue) &&
9197 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
9198 : {
9199 0 : continue;
9200 : }
9201 : #ifdef DEBUG_VERBOSE
9202 : #if defined(DST_X) && defined(DST_Y)
9203 : if (iDstX + poWK->nDstXOff == DST_X &&
9204 : iDstY + poWK->nDstYOff == DST_Y)
9205 : {
9206 : CPLDebug("WARP", "value * weight = %.17g",
9207 : dfRealValue * dfWeight);
9208 : }
9209 : #endif
9210 : #endif
9211 :
9212 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
9213 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
9214 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
9215 1344620 : adfWeight[iBand] += dfWeight;
9216 : }
9217 : }
9218 : }
9219 :
9220 218919 : CPLFree(pahSourcePixel);
9221 :
9222 : /* --------------------------------------------------------------------
9223 : */
9224 : /* Update destination pixel value. */
9225 : /* --------------------------------------------------------------------
9226 : */
9227 218919 : bool bHasFoundDensity = false;
9228 218919 : const GPtrDiff_t iDstOffset =
9229 218919 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
9230 827838 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9231 : {
9232 608919 : if (adfWeight[iBand] > 0)
9233 : {
9234 : const double dfBandDensity =
9235 608909 : adfBandDensity[iBand] / adfWeight[iBand];
9236 608909 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
9237 : {
9238 608909 : bHasFoundDensity = true;
9239 608909 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
9240 608909 : adfRealValue[iBand],
9241 608909 : adfImagValue[iBand],
9242 : bAvoidNoDataSingleBand);
9243 : }
9244 : }
9245 : }
9246 :
9247 218919 : if (!bHasFoundDensity)
9248 10 : continue;
9249 :
9250 218909 : if (!bAvoidNoDataSingleBand)
9251 : {
9252 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
9253 : }
9254 :
9255 : /* --------------------------------------------------------------------
9256 : */
9257 : /* Update destination density/validity masks. */
9258 : /* --------------------------------------------------------------------
9259 : */
9260 218909 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
9261 :
9262 218909 : if (poWK->panDstValid != nullptr)
9263 : {
9264 11752 : CPLMaskSet(poWK->panDstValid, iDstOffset);
9265 : }
9266 : }
9267 :
9268 : /* --------------------------------------------------------------------
9269 : */
9270 : /* Report progress to the user, and optionally cancel out. */
9271 : /* --------------------------------------------------------------------
9272 : */
9273 1932 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
9274 0 : break;
9275 : }
9276 :
9277 : #ifdef CHECK_SUM_WITH_GEOS
9278 : GEOSGeom_destroy_r(hGEOSContext, hP1);
9279 : GEOSGeom_destroy_r(hGEOSContext, hP2);
9280 : OGRGeometry::freeGEOSContext(hGEOSContext);
9281 : #endif
9282 19 : CPLQuadTreeDestroy(hQuadTree);
9283 19 : }
|