Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80 : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 13424 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 13424 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 5093 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 5093 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
171 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178 : #endif
179 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186 : #endif
187 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
193 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
194 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
195 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
196 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 :
199 : /************************************************************************/
200 : /* GWKJobStruct */
201 : /************************************************************************/
202 :
203 : struct GWKJobStruct
204 : {
205 : std::mutex &mutex;
206 : std::condition_variable &cv;
207 : int counterSingleThreaded = 0;
208 : int &counter;
209 : bool &stopFlag;
210 : GDALWarpKernel *poWK = nullptr;
211 : int iYMin = 0;
212 : int iYMax = 0;
213 : int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
214 : void *pTransformerArg = nullptr;
215 : // used by GWKRun() to assign the proper pTransformerArg
216 : void (*pfnFunc)(void *) = nullptr;
217 :
218 2934 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
219 : int &counter_, bool &stopFlag_)
220 2934 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
221 : {
222 2934 : }
223 : };
224 :
225 : struct GWKThreadData
226 : {
227 : std::unique_ptr<CPLJobQueue> poJobQueue{};
228 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
229 : int nMaxThreads{0};
230 : int counter{0};
231 : bool stopFlag{false};
232 : std::mutex mutex{};
233 : std::condition_variable cv{};
234 : bool bTransformerArgInputAssignedToThread{false};
235 : void *pTransformerArgInput{
236 : nullptr}; // owned by calling layer. Not to be destroyed
237 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
238 : int nTotalThreadCountForThisRun = 0;
239 : int nCurThreadCountForThisRun = 0;
240 : };
241 :
242 : /************************************************************************/
243 : /* GWKProgressThread() */
244 : /************************************************************************/
245 :
246 : // Return TRUE if the computation must be interrupted.
247 36 : static int GWKProgressThread(GWKJobStruct *psJob)
248 : {
249 36 : bool stop = false;
250 : {
251 36 : std::lock_guard<std::mutex> lock(psJob->mutex);
252 36 : psJob->counter++;
253 36 : stop = psJob->stopFlag;
254 : }
255 36 : psJob->cv.notify_one();
256 :
257 36 : return stop;
258 : }
259 :
260 : /************************************************************************/
261 : /* GWKProgressMonoThread() */
262 : /************************************************************************/
263 :
264 : // Return TRUE if the computation must be interrupted.
265 378762 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
266 : {
267 378762 : GDALWarpKernel *poWK = psJob->poWK;
268 378762 : if (!poWK->pfnProgress(poWK->dfProgressBase +
269 378762 : poWK->dfProgressScale *
270 378762 : (++psJob->counterSingleThreaded /
271 378762 : static_cast<double>(psJob->iYMax)),
272 : "", poWK->pProgress))
273 : {
274 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
275 1 : psJob->stopFlag = true;
276 1 : return TRUE;
277 : }
278 378761 : return FALSE;
279 : }
280 :
281 : /************************************************************************/
282 : /* GWKGenericMonoThread() */
283 : /************************************************************************/
284 :
285 2912 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
286 : void (*pfnFunc)(void *pUserData))
287 : {
288 2912 : GWKThreadData td;
289 :
290 : // NOTE: the mutex is not used.
291 2912 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
292 2912 : job.poWK = poWK;
293 2912 : job.iYMin = 0;
294 2912 : job.iYMax = poWK->nDstYSize;
295 2912 : job.pfnProgress = GWKProgressMonoThread;
296 2912 : job.pTransformerArg = poWK->pTransformerArg;
297 2912 : job.counterSingleThreaded = td.counter;
298 2912 : pfnFunc(&job);
299 2912 : td.counter = job.counterSingleThreaded;
300 :
301 5824 : return td.stopFlag ? CE_Failure : CE_None;
302 : }
303 :
304 : /************************************************************************/
305 : /* GWKThreadsCreate() */
306 : /************************************************************************/
307 :
308 1746 : void *GWKThreadsCreate(char **papszWarpOptions,
309 : GDALTransformerFunc /* pfnTransformer */,
310 : void *pTransformerArg)
311 : {
312 : const char *pszWarpThreads =
313 1746 : CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
314 1746 : if (pszWarpThreads == nullptr)
315 1729 : pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
316 :
317 1746 : int nThreads = 0;
318 1746 : if (EQUAL(pszWarpThreads, "ALL_CPUS"))
319 3 : nThreads = CPLGetNumCPUs();
320 : else
321 1743 : nThreads = atoi(pszWarpThreads);
322 1746 : if (nThreads <= 1)
323 1724 : nThreads = 0;
324 1746 : if (nThreads > 128)
325 0 : nThreads = 128;
326 :
327 1746 : GWKThreadData *psThreadData = new GWKThreadData();
328 : auto poThreadPool =
329 1746 : nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
330 1746 : if (nThreads && poThreadPool)
331 : {
332 22 : psThreadData->nMaxThreads = nThreads;
333 22 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
334 : nThreads,
335 22 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
336 44 : psThreadData->counter, psThreadData->stopFlag)));
337 :
338 22 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
339 22 : psThreadData->pTransformerArgInput = pTransformerArg;
340 : }
341 :
342 1746 : return psThreadData;
343 : }
344 :
345 : /************************************************************************/
346 : /* GWKThreadsEnd() */
347 : /************************************************************************/
348 :
349 1746 : void GWKThreadsEnd(void *psThreadDataIn)
350 : {
351 1746 : if (psThreadDataIn == nullptr)
352 0 : return;
353 :
354 1746 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
355 1746 : if (psThreadData->poJobQueue)
356 : {
357 : // cppcheck-suppress constVariableReference
358 32 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
359 : {
360 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
361 10 : GDALDestroyTransformer(pair.second);
362 : }
363 22 : psThreadData->poJobQueue.reset();
364 : }
365 1746 : delete psThreadData;
366 : }
367 :
368 : /************************************************************************/
369 : /* ThreadFuncAdapter() */
370 : /************************************************************************/
371 :
372 31 : static void ThreadFuncAdapter(void *pData)
373 : {
374 31 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
375 31 : GWKThreadData *psThreadData =
376 31 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
377 :
378 : // Look if we have already a per-thread transformer
379 31 : void *pTransformerArg = nullptr;
380 31 : const GIntBig nThreadId = CPLGetPID();
381 :
382 : {
383 62 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
384 31 : ++psThreadData->nCurThreadCountForThisRun;
385 :
386 31 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
387 31 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
388 : {
389 0 : pTransformerArg = oIter->second;
390 : }
391 31 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
392 31 : psThreadData->nCurThreadCountForThisRun ==
393 31 : psThreadData->nTotalThreadCountForThisRun)
394 : {
395 : // If we are the last thread to be started, temporarily borrow the
396 : // original transformer
397 21 : psThreadData->bTransformerArgInputAssignedToThread = true;
398 21 : pTransformerArg = psThreadData->pTransformerArgInput;
399 21 : psThreadData->mapThreadToTransformerArg[nThreadId] =
400 : pTransformerArg;
401 : }
402 :
403 31 : if (pTransformerArg == nullptr)
404 : {
405 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
406 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
407 : }
408 : }
409 :
410 : // If no transformer assigned to current thread, instantiate one
411 31 : if (pTransformerArg == nullptr)
412 : {
413 : // This somehow assumes that GDALCloneTransformer() is thread-safe
414 : // which should normally be the case.
415 : pTransformerArg =
416 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
417 :
418 : // Lock for the stop flag and the transformer map.
419 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
420 10 : if (!pTransformerArg)
421 : {
422 0 : psJob->stopFlag = true;
423 0 : return;
424 : }
425 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
426 : }
427 :
428 31 : psJob->pTransformerArg = pTransformerArg;
429 31 : psJob->pfnFunc(pData);
430 :
431 : // Give back original transformer, if borrowed.
432 : {
433 62 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
434 31 : if (psThreadData->bTransformerArgInputAssignedToThread &&
435 21 : pTransformerArg == psThreadData->pTransformerArgInput)
436 : {
437 : psThreadData->mapThreadToTransformerArg.erase(
438 21 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
439 21 : psThreadData->bTransformerArgInputAssignedToThread = false;
440 : }
441 : }
442 : }
443 :
444 : /************************************************************************/
445 : /* GWKRun() */
446 : /************************************************************************/
447 :
448 2933 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
449 : void (*pfnFunc)(void *pUserData))
450 :
451 : {
452 2933 : const int nDstYSize = poWK->nDstYSize;
453 :
454 2933 : CPLDebug("GDAL",
455 : "GDALWarpKernel()::%s() "
456 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
457 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
458 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
459 : poWK->nDstYSize);
460 :
461 2933 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
462 : {
463 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
464 0 : return CE_Failure;
465 : }
466 :
467 2933 : GWKThreadData *psThreadData =
468 : static_cast<GWKThreadData *>(poWK->psThreadData);
469 2933 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
470 : {
471 2912 : return GWKGenericMonoThread(poWK, pfnFunc);
472 : }
473 :
474 21 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
475 : // Config option mostly useful for tests to be able to test multithreading
476 : // with small rasters
477 : const int nWarpChunkSize =
478 21 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
479 21 : if (nWarpChunkSize > 0)
480 : {
481 19 : GIntBig nChunks =
482 19 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
483 19 : if (nThreads > nChunks)
484 14 : nThreads = static_cast<int>(nChunks);
485 : }
486 21 : if (nThreads <= 0)
487 17 : nThreads = 1;
488 :
489 21 : CPLDebug("WARP", "Using %d threads", nThreads);
490 :
491 21 : auto &jobs = *psThreadData->threadJobs;
492 21 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
493 : // Fill-in job structures.
494 52 : for (int i = 0; i < nThreads; ++i)
495 : {
496 31 : auto &job = jobs[i];
497 31 : job.poWK = poWK;
498 31 : job.iYMin =
499 31 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
500 31 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
501 31 : nThreads);
502 31 : if (poWK->pfnProgress != GDALDummyProgress)
503 2 : job.pfnProgress = GWKProgressThread;
504 31 : job.pfnFunc = pfnFunc;
505 : }
506 :
507 : bool bStopFlag;
508 : {
509 21 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
510 :
511 21 : psThreadData->nTotalThreadCountForThisRun = nThreads;
512 : // coverity[missing_lock]
513 21 : psThreadData->nCurThreadCountForThisRun = 0;
514 :
515 : // Start jobs.
516 52 : for (int i = 0; i < nThreads; ++i)
517 : {
518 31 : auto &job = jobs[i];
519 31 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
520 : static_cast<void *>(&job));
521 : }
522 :
523 : /* --------------------------------------------------------------------
524 : */
525 : /* Report progress. */
526 : /* --------------------------------------------------------------------
527 : */
528 21 : if (poWK->pfnProgress != GDALDummyProgress)
529 : {
530 12 : while (psThreadData->counter < nDstYSize)
531 : {
532 11 : psThreadData->cv.wait(lock);
533 11 : if (!poWK->pfnProgress(poWK->dfProgressBase +
534 11 : poWK->dfProgressScale *
535 11 : (psThreadData->counter /
536 11 : static_cast<double>(nDstYSize)),
537 : "", poWK->pProgress))
538 : {
539 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
540 1 : psThreadData->stopFlag = true;
541 1 : break;
542 : }
543 : }
544 : }
545 :
546 21 : bStopFlag = psThreadData->stopFlag;
547 : }
548 :
549 : /* -------------------------------------------------------------------- */
550 : /* Wait for all jobs to complete. */
551 : /* -------------------------------------------------------------------- */
552 21 : psThreadData->poJobQueue->WaitCompletion();
553 :
554 21 : return bStopFlag ? CE_Failure : CE_None;
555 : }
556 :
557 : /************************************************************************/
558 : /* ==================================================================== */
559 : /* GDALWarpKernel */
560 : /* ==================================================================== */
561 : /************************************************************************/
562 :
563 : /**
564 : * \class GDALWarpKernel "gdalwarper.h"
565 : *
566 : * Low level image warping class.
567 : *
568 : * This class is responsible for low level image warping for one
569 : * "chunk" of imagery. The class is essentially a structure with all
570 : * data members public - primarily so that new special-case functions
571 : * can be added without changing the class declaration.
572 : *
573 : * Applications are normally intended to interactive with warping facilities
574 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
575 : * theory be used directly if great care is taken in setting up the
576 : * control data.
577 : *
578 : * <h3>Design Issues</h3>
579 : *
580 : * The intention is that PerformWarp() would analyze the setup in terms
581 : * of the datatype, resampling type, and validity/density mask usage and
582 : * pick one of many specific implementations of the warping algorithm over
583 : * a continuum of optimization vs. generality. At one end there will be a
584 : * reference general purpose implementation of the algorithm that supports
585 : * any data type (working internally in double precision complex), all three
586 : * resampling types, and any or all of the validity/density masks. At the
587 : * other end would be highly optimized algorithms for common cases like
588 : * nearest neighbour resampling on GDT_UInt8 data with no masks.
589 : *
590 : * The full set of optimized versions have not been decided but we should
591 : * expect to have at least:
592 : * - One for each resampling algorithm for 8bit data with no masks.
593 : * - One for each resampling algorithm for float data with no masks.
594 : * - One for each resampling algorithm for float data with any/all masks
595 : * (essentially the generic case for just float data).
596 : * - One for each resampling algorithm for 8bit data with support for
597 : * input validity masks (per band or per pixel). This handles the common
598 : * case of nodata masking.
599 : * - One for each resampling algorithm for float data with support for
600 : * input validity masks (per band or per pixel). This handles the common
601 : * case of nodata masking.
602 : *
603 : * Some of the specializations would operate on all bands in one pass
604 : * (especially the ones without masking would do this), while others might
605 : * process each band individually to reduce code complexity.
606 : *
607 : * <h3>Masking Semantics</h3>
608 : *
609 : * A detailed explanation of the semantics of the validity and density masks,
610 : * and their effects on resampling kernels is needed here.
611 : */
612 :
613 : /************************************************************************/
614 : /* GDALWarpKernel Data Members */
615 : /************************************************************************/
616 :
617 : /**
618 : * \var GDALResampleAlg GDALWarpKernel::eResample;
619 : *
620 : * Resampling algorithm.
621 : *
622 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
623 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
624 : * GRA_Mode or GRA_Sum.
625 : *
626 : * This field is required. GDT_NearestNeighbour may be used as a default
627 : * value.
628 : */
629 :
630 : /**
631 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
632 : *
633 : * Working pixel data type.
634 : *
635 : * The datatype of pixels in the source image (papabySrcimage) and
636 : * destination image (papabyDstImage) buffers. Note that operations on
637 : * some data types (such as GDT_UInt8) may be much better optimized than other
638 : * less common cases.
639 : *
640 : * This field is required. It may not be GDT_Unknown.
641 : */
642 :
643 : /**
644 : * \var int GDALWarpKernel::nBands;
645 : *
646 : * Number of bands.
647 : *
648 : * The number of bands (layers) of imagery being warped. Determines the
649 : * number of entries in the papabySrcImage, papanBandSrcValid,
650 : * and papabyDstImage arrays.
651 : *
652 : * This field is required.
653 : */
654 :
655 : /**
656 : * \var int GDALWarpKernel::nSrcXSize;
657 : *
658 : * Source image width in pixels.
659 : *
660 : * This field is required.
661 : */
662 :
663 : /**
664 : * \var int GDALWarpKernel::nSrcYSize;
665 : *
666 : * Source image height in pixels.
667 : *
668 : * This field is required.
669 : */
670 :
671 : /**
672 : * \var double GDALWarpKernel::dfSrcXExtraSize;
673 : *
674 : * Number of pixels included in nSrcXSize that are present on the edges of
675 : * the area of interest to take into account the width of the kernel.
676 : *
677 : * This field is required.
678 : */
679 :
680 : /**
681 : * \var double GDALWarpKernel::dfSrcYExtraSize;
682 : *
683 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
684 : * the area of interest to take into account the height of the kernel.
685 : *
686 : * This field is required.
687 : */
688 :
689 : /**
690 : * \var int GDALWarpKernel::papabySrcImage;
691 : *
692 : * Array of source image band data.
693 : *
694 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
695 : * to image data. Each individual band of image data is organized as a single
696 : * block of image data in left to right, then bottom to top order. The actual
697 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
698 : *
699 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
700 : * the second band with eWorkingDataType set to GDT_Float32 use code like
701 : * this:
702 : *
703 : * \code
704 : * float dfPixelValue;
705 : * int nBand = 2-1; // Band indexes are zero based.
706 : * int nPixel = 3; // Zero based.
707 : * int nLine = 4; // Zero based.
708 : *
709 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
710 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
711 : * assert( nBand >= 0 && nBand < poKern->nBands );
712 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
713 : * [nPixel + nLine * poKern->nSrcXSize];
714 : * \endcode
715 : *
716 : * This field is required.
717 : */
718 :
719 : /**
720 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
721 : *
722 : * Per band validity mask for source pixels.
723 : *
724 : * Array of pixel validity mask layers for each source band. Each of
725 : * the mask layers is the same size (in pixels) as the source image with
726 : * one bit per pixel. Note that it is legal (and common) for this to be
727 : * NULL indicating that none of the pixels are invalidated, or for some
728 : * band validity masks to be NULL in which case all pixels of the band are
729 : * valid. The following code can be used to test the validity of a particular
730 : * pixel.
731 : *
732 : * \code
733 : * int bIsValid = TRUE;
734 : * int nBand = 2-1; // Band indexes are zero based.
735 : * int nPixel = 3; // Zero based.
736 : * int nLine = 4; // Zero based.
737 : *
738 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
739 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
740 : * assert( nBand >= 0 && nBand < poKern->nBands );
741 : *
742 : * if( poKern->papanBandSrcValid != NULL
743 : * && poKern->papanBandSrcValid[nBand] != NULL )
744 : * {
745 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
746 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
747 : *
748 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
749 : * }
750 : * \endcode
751 : */
752 :
753 : /**
754 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
755 : *
756 : * Per pixel validity mask for source pixels.
757 : *
758 : * A single validity mask layer that applies to the pixels of all source
759 : * bands. It is accessed similarly to papanBandSrcValid, but without the
760 : * extra level of band indirection.
761 : *
762 : * This pointer may be NULL indicating that all pixels are valid.
763 : *
764 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
765 : * the pixel isn't considered to be valid unless both arrays indicate it is
766 : * valid.
767 : */
768 :
769 : /**
770 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
771 : *
772 : * Per pixel density mask for source pixels.
773 : *
774 : * A single density mask layer that applies to the pixels of all source
775 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
776 : * which this pixel should be allowed to contribute to the output result.
777 : *
778 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
779 : *
780 : * The density for a pixel may be accessed like this:
781 : *
782 : * \code
783 : * float fDensity = 1.0;
784 : * int nPixel = 3; // Zero based.
785 : * int nLine = 4; // Zero based.
786 : *
787 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
788 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
789 : * if( poKern->pafUnifiedSrcDensity != NULL )
790 : * fDensity = poKern->pafUnifiedSrcDensity
791 : * [nPixel + nLine * poKern->nSrcXSize];
792 : * \endcode
793 : */
794 :
795 : /**
796 : * \var int GDALWarpKernel::nDstXSize;
797 : *
798 : * Width of destination image in pixels.
799 : *
800 : * This field is required.
801 : */
802 :
803 : /**
804 : * \var int GDALWarpKernel::nDstYSize;
805 : *
806 : * Height of destination image in pixels.
807 : *
808 : * This field is required.
809 : */
810 :
811 : /**
812 : * \var GByte **GDALWarpKernel::papabyDstImage;
813 : *
814 : * Array of destination image band data.
815 : *
816 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
817 : * to image data. Each individual band of image data is organized as a single
818 : * block of image data in left to right, then bottom to top order. The actual
819 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
820 : *
821 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
822 : * the second band with eWorkingDataType set to GDT_Float32 use code like
823 : * this:
824 : *
825 : * \code
826 : * float dfPixelValue;
827 : * int nBand = 2-1; // Band indexes are zero based.
828 : * int nPixel = 3; // Zero based.
829 : * int nLine = 4; // Zero based.
830 : *
831 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
832 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
833 : * assert( nBand >= 0 && nBand < poKern->nBands );
834 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
835 : * [nPixel + nLine * poKern->nSrcYSize];
836 : * \endcode
837 : *
838 : * This field is required.
839 : */
840 :
841 : /**
842 : * \var GUInt32 *GDALWarpKernel::panDstValid;
843 : *
844 : * Per pixel validity mask for destination pixels.
845 : *
846 : * A single validity mask layer that applies to the pixels of all destination
847 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
848 : * on the size of the destination image.
849 : *
850 : * This pointer may be NULL indicating that all pixels are valid.
851 : */
852 :
853 : /**
854 : * \var float *GDALWarpKernel::pafDstDensity;
855 : *
856 : * Per pixel density mask for destination pixels.
857 : *
858 : * A single density mask layer that applies to the pixels of all destination
859 : * bands. It contains values between 0.0 and 1.0.
860 : *
861 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
862 : *
863 : * The density for a pixel may be accessed like this:
864 : *
865 : * \code
866 : * float fDensity = 1.0;
867 : * int nPixel = 3; // Zero based.
868 : * int nLine = 4; // Zero based.
869 : *
870 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
871 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
872 : * if( poKern->pafDstDensity != NULL )
873 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
874 : * \endcode
875 : */
876 :
877 : /**
878 : * \var int GDALWarpKernel::nSrcXOff;
879 : *
880 : * X offset to source pixel coordinates for transformation.
881 : *
882 : * See pfnTransformer.
883 : *
884 : * This field is required.
885 : */
886 :
887 : /**
888 : * \var int GDALWarpKernel::nSrcYOff;
889 : *
890 : * Y offset to source pixel coordinates for transformation.
891 : *
892 : * See pfnTransformer.
893 : *
894 : * This field is required.
895 : */
896 :
897 : /**
898 : * \var int GDALWarpKernel::nDstXOff;
899 : *
900 : * X offset to destination pixel coordinates for transformation.
901 : *
902 : * See pfnTransformer.
903 : *
904 : * This field is required.
905 : */
906 :
907 : /**
908 : * \var int GDALWarpKernel::nDstYOff;
909 : *
910 : * Y offset to destination pixel coordinates for transformation.
911 : *
912 : * See pfnTransformer.
913 : *
914 : * This field is required.
915 : */
916 :
917 : /**
918 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
919 : *
920 : * Source/destination location transformer.
921 : *
922 : * The function to call to transform coordinates between source image
923 : * pixel/line coordinates and destination image pixel/line coordinates.
924 : * See GDALTransformerFunc() for details of the semantics of this function.
925 : *
926 : * The GDALWarpKern algorithm will only ever use this transformer in
927 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
928 : * partial or complete scanlines of points in the destination image as
929 : * input. This means, among other things, that it is safe to the
930 : * approximating transform GDALApproxTransform() as the transformation
931 : * function.
932 : *
933 : * Source and destination images may be subsets of a larger overall image.
934 : * The transformation algorithms will expect and return pixel/line coordinates
935 : * in terms of this larger image, so coordinates need to be offset by
936 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
937 : * passing to pfnTransformer, and after return from it.
938 : *
939 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
940 : * data to this function when it is called.
941 : *
942 : * This field is required.
943 : */
944 :
945 : /**
946 : * \var void *GDALWarpKernel::pTransformerArg;
947 : *
948 : * Callback data for pfnTransformer.
949 : *
950 : * This field may be NULL if not required for the pfnTransformer being used.
951 : */
952 :
953 : /**
954 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
955 : *
956 : * The function to call to report progress of the algorithm, and to check
957 : * for a requested termination of the operation. It operates according to
958 : * GDALProgressFunc() semantics.
959 : *
960 : * Generally speaking the progress function will be invoked for each
961 : * scanline of the destination buffer that has been processed.
962 : *
963 : * This field may be NULL (internally set to GDALDummyProgress()).
964 : */
965 :
966 : /**
967 : * \var void *GDALWarpKernel::pProgress;
968 : *
969 : * Callback data for pfnProgress.
970 : *
971 : * This field may be NULL if not required for the pfnProgress being used.
972 : */
973 :
974 : /************************************************************************/
975 : /* GDALWarpKernel() */
976 : /************************************************************************/
977 :
978 2963 : GDALWarpKernel::GDALWarpKernel()
979 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
980 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
981 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
982 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
983 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
984 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
985 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
986 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
987 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
988 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
989 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
990 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
991 2963 : eTieStrategy(GWKTS_First)
992 : {
993 2963 : }
994 :
995 : /************************************************************************/
996 : /* ~GDALWarpKernel() */
997 : /************************************************************************/
998 :
999 2963 : GDALWarpKernel::~GDALWarpKernel()
1000 : {
1001 2963 : }
1002 :
1003 : /************************************************************************/
1004 : /* getArea() */
1005 : /************************************************************************/
1006 :
1007 : typedef std::pair<double, double> XYPair;
1008 :
1009 : typedef std::vector<XYPair> XYPoly;
1010 :
1011 : // poly may or may not be closed.
1012 565062 : static double getArea(const XYPoly &poly)
1013 : {
1014 : // CPLAssert(poly.size() >= 2);
1015 565062 : const size_t nPointCount = poly.size();
1016 : double dfAreaSum =
1017 565062 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
1018 :
1019 1784760 : for (size_t i = 1; i < nPointCount - 1; i++)
1020 : {
1021 1219700 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
1022 : }
1023 :
1024 565062 : dfAreaSum += poly[nPointCount - 1].first *
1025 565062 : (poly[0].second - poly[nPointCount - 2].second);
1026 :
1027 565062 : return 0.5 * std::fabs(dfAreaSum);
1028 : }
1029 :
1030 : /************************************************************************/
1031 : /* CanUse4SamplesFormula() */
1032 : /************************************************************************/
1033 :
1034 4240 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
1035 : {
1036 4240 : if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
1037 : {
1038 : // Use 4-sample formula if we are not downsampling by more than a
1039 : // factor of 1:2
1040 2283 : if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
1041 1843 : return true;
1042 440 : CPLDebugOnce("WARP",
1043 : "Not using 4-sample bilinear/bicubic formula because "
1044 : "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
1045 : poWK->dfXScale, poWK->dfYScale);
1046 : }
1047 2397 : return false;
1048 : }
1049 :
1050 : /************************************************************************/
1051 : /* PerformWarp() */
1052 : /************************************************************************/
1053 :
1054 : /**
1055 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1056 : *
1057 : * This method performs the warp described in the GDALWarpKernel.
1058 : *
1059 : * @return CE_None on success or CE_Failure if an error occurs.
1060 : */
1061 :
1062 2959 : CPLErr GDALWarpKernel::PerformWarp()
1063 :
1064 : {
1065 2959 : const CPLErr eErr = Validate();
1066 :
1067 2959 : if (eErr != CE_None)
1068 1 : return eErr;
1069 :
1070 : // See #2445 and #3079.
1071 2958 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1072 : {
1073 25 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1074 : {
1075 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1076 0 : return CE_Failure;
1077 : }
1078 25 : return CE_None;
1079 : }
1080 :
1081 : /* -------------------------------------------------------------------- */
1082 : /* Pre-calculate resampling scales and window sizes for filtering. */
1083 : /* -------------------------------------------------------------------- */
1084 :
1085 2933 : dfXScale = 0.0;
1086 2933 : dfYScale = 0.0;
1087 :
1088 : // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
1089 : // heterogeneous change in shapes.
1090 : // Best would probably be a per-pixel scale computation.
1091 2933 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1092 2933 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1093 2933 : if (!pszXScale || !pszYScale)
1094 : {
1095 : // Sample points along a grid in the destination space
1096 2932 : constexpr int MAX_POINTS_PER_DIM = 10;
1097 2932 : const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
1098 2932 : const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
1099 2932 : constexpr int CORNER_COUNT_PER_SQUARE = 4;
1100 2932 : const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
1101 5864 : std::vector<double> adfX;
1102 5864 : std::vector<double> adfY;
1103 2932 : adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1104 2932 : adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1105 5864 : std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
1106 5864 : std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
1107 27878 : for (int iY = 0; iY < nPointsY; iY++)
1108 : {
1109 24946 : const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
1110 24946 : const double dfY =
1111 24946 : dfYShift + (nPointsY == 1 ? 0.0
1112 24730 : : static_cast<double>(iY) *
1113 24730 : nDstYSize / (nPointsY - 1));
1114 :
1115 264122 : for (int iX = 0; iX < nPointsX; iX++)
1116 : {
1117 239176 : const double dfXShift =
1118 239176 : (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
1119 :
1120 239176 : const double dfX =
1121 239176 : dfXShift + (nPointsX == 1 ? 0.0
1122 238974 : : static_cast<double>(iX) *
1123 238974 : nDstXSize / (nPointsX - 1));
1124 :
1125 : // Reproject a unit square at each sample point
1126 239176 : adfX.push_back(dfX);
1127 239176 : adfY.push_back(dfY);
1128 :
1129 239176 : adfX.push_back(dfX + 1);
1130 239176 : adfY.push_back(dfY);
1131 :
1132 239176 : adfX.push_back(dfX);
1133 239176 : adfY.push_back(dfY + 1);
1134 :
1135 239176 : adfX.push_back(dfX + 1);
1136 239176 : adfY.push_back(dfY + 1);
1137 : }
1138 : }
1139 2932 : pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
1140 : adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
1141 :
1142 5864 : std::vector<XYPair> adfXYScales;
1143 2932 : adfXYScales.reserve(nPoints);
1144 242108 : for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
1145 : {
1146 477304 : if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
1147 238128 : abSuccess[i + 3])
1148 : {
1149 1905020 : const auto square = [](double x) { return x * x; };
1150 :
1151 238127 : const double vx01 = adfX[i + 1] - adfX[i + 0];
1152 238127 : const double vy01 = adfY[i + 1] - adfY[i + 0];
1153 238127 : const double len01_sq = square(vx01) + square(vy01);
1154 :
1155 238127 : const double vx23 = adfX[i + 3] - adfX[i + 2];
1156 238127 : const double vy23 = adfY[i + 3] - adfY[i + 2];
1157 238127 : const double len23_sq = square(vx23) + square(vy23);
1158 :
1159 238127 : const double vx02 = adfX[i + 2] - adfX[i + 0];
1160 238127 : const double vy02 = adfY[i + 2] - adfY[i + 0];
1161 238127 : const double len02_sq = square(vx02) + square(vy02);
1162 :
1163 238127 : const double vx13 = adfX[i + 3] - adfX[i + 1];
1164 238127 : const double vy13 = adfY[i + 3] - adfY[i + 1];
1165 238127 : const double len13_sq = square(vx13) + square(vy13);
1166 :
1167 : // ~ 20 degree, heuristic
1168 238127 : constexpr double TAN_MODEST_ANGLE = 0.35;
1169 :
1170 : // 10%, heuristic
1171 238127 : constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
1172 :
1173 : // Security margin to avoid division by zero (would only
1174 : // happen in case of degenerated coordinate transformation,
1175 : // or insane upsampling)
1176 238127 : constexpr double EPSILON = 1e-10;
1177 :
1178 : // Does the transformed square looks like an almost non-rotated
1179 : // quasi-rectangle ?
1180 238127 : if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
1181 231741 : std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
1182 231714 : std::fabs(len01_sq - len23_sq) <
1183 231714 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
1184 231601 : std::fabs(len02_sq - len13_sq) <
1185 231601 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
1186 : {
1187 : // Using a geometric average here of lenAB_sq and lenCD_sq,
1188 : // hence a sqrt(), and as this is still a squared value,
1189 : // we need another sqrt() to get a distance.
1190 : const double dfXLength =
1191 231586 : std::sqrt(std::sqrt(len01_sq * len23_sq));
1192 : const double dfYLength =
1193 231586 : std::sqrt(std::sqrt(len02_sq * len13_sq));
1194 231586 : if (dfXLength > EPSILON && dfYLength > EPSILON)
1195 : {
1196 231586 : const double dfThisXScale = 1.0 / dfXLength;
1197 231586 : const double dfThisYScale = 1.0 / dfYLength;
1198 231586 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1199 231586 : }
1200 : }
1201 : else
1202 : {
1203 : // If not, then consider the area of the transformed unit
1204 : // square to determine the X/Y scales.
1205 6541 : const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
1206 6541 : {adfX[i + 1], adfY[i + 1]},
1207 6541 : {adfX[i + 3], adfY[i + 3]},
1208 26164 : {adfX[i + 2], adfY[i + 2]}};
1209 6541 : const double dfSrcArea = getArea(poly);
1210 6541 : const double dfFactor = std::sqrt(dfSrcArea);
1211 6541 : if (dfFactor > EPSILON)
1212 : {
1213 6541 : const double dfThisXScale = 1.0 / dfFactor;
1214 6541 : const double dfThisYScale = dfThisXScale;
1215 6541 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1216 : }
1217 : }
1218 : }
1219 : }
1220 :
1221 2932 : if (!adfXYScales.empty())
1222 : {
1223 : // Sort by increasing xscale * yscale
1224 2932 : std::sort(adfXYScales.begin(), adfXYScales.end(),
1225 1281170 : [](const XYPair &a, const XYPair &b)
1226 1281170 : { return a.first * a.second < b.first * b.second; });
1227 :
1228 : // Compute the per-axis maximum of scale
1229 2932 : double dfXMax = 0;
1230 2932 : double dfYMax = 0;
1231 241059 : for (const auto &[dfX, dfY] : adfXYScales)
1232 : {
1233 238127 : dfXMax = std::max(dfXMax, dfX);
1234 238127 : dfYMax = std::max(dfYMax, dfY);
1235 : }
1236 :
1237 : // Now eliminate outliers, defined as ones whose value is < 10% of
1238 : // the maximum value, typically found at a polar discontinuity, and
1239 : // compute the average of non-outlier values.
1240 2932 : dfXScale = 0;
1241 2932 : dfYScale = 0;
1242 2932 : int i = 0;
1243 2932 : constexpr double THRESHOLD = 0.1; // 10%, rather arbitrary
1244 241059 : for (const auto &[dfX, dfY] : adfXYScales)
1245 : {
1246 238127 : if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
1247 : {
1248 235939 : ++i;
1249 235939 : const double dfXDelta = dfX - dfXScale;
1250 235939 : const double dfYDelta = dfY - dfYScale;
1251 235939 : const double dfInvI = 1.0 / i;
1252 235939 : dfXScale += dfXDelta * dfInvI;
1253 235939 : dfYScale += dfYDelta * dfInvI;
1254 : }
1255 : }
1256 : }
1257 : }
1258 :
1259 : // Round to closest integer reciprocal scale if we are very close to it
1260 : const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
1261 5866 : [](double dfScale)
1262 : {
1263 5866 : if (dfScale < 1.0)
1264 : {
1265 2513 : double dfReciprocalScale = 1.0 / dfScale;
1266 2513 : const int nReciprocalScale =
1267 2513 : static_cast<int>(dfReciprocalScale + 0.5);
1268 2513 : if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
1269 2144 : dfScale = 1.0 / nReciprocalScale;
1270 : }
1271 5866 : return dfScale;
1272 : };
1273 :
1274 2933 : if (dfXScale <= 0)
1275 1 : dfXScale = 1.0;
1276 2933 : if (dfYScale <= 0)
1277 1 : dfYScale = 1.0;
1278 :
1279 2933 : dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
1280 2933 : dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
1281 :
1282 2933 : if (pszXScale != nullptr)
1283 1 : dfXScale = CPLAtof(pszXScale);
1284 2933 : if (pszYScale != nullptr)
1285 1 : dfYScale = CPLAtof(pszYScale);
1286 :
1287 2933 : if (!pszXScale || !pszYScale)
1288 2932 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1289 :
1290 2933 : const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
1291 :
1292 : // Safety check for callers that would use GDALWarpKernel without using
1293 : // GDALWarpOperation.
1294 2870 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1295 2807 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1296 5866 : !bUse4SamplesFormula)) &&
1297 346 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1298 : WARP_EXTRA_ELTS)
1299 : {
1300 0 : CPLError(CE_Failure, CPLE_AppDefined,
1301 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1302 : "their end. "
1303 : "See GDALWarpKernel class definition. If this condition is "
1304 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1305 : WARP_EXTRA_ELTS);
1306 0 : return CE_Failure;
1307 : }
1308 :
1309 2933 : dfXFilter = anGWKFilterRadius[eResample];
1310 2933 : dfYFilter = anGWKFilterRadius[eResample];
1311 :
1312 2933 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1313 2380 : : static_cast<int>(dfXFilter);
1314 2933 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1315 2384 : : static_cast<int>(dfYFilter);
1316 :
1317 : // Filter window offset depends on the parity of the kernel radius.
1318 2933 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1319 2933 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1320 :
1321 2933 : bApplyVerticalShift =
1322 2933 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1323 2933 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1324 2933 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1325 :
1326 : /* -------------------------------------------------------------------- */
1327 : /* Set up resampling functions. */
1328 : /* -------------------------------------------------------------------- */
1329 2933 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1330 12 : return GWKGeneralCase(this);
1331 :
1332 2921 : const bool bNoMasksOrDstDensityOnly =
1333 2914 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1334 5835 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1335 :
1336 2921 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
1337 : bNoMasksOrDstDensityOnly)
1338 944 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1339 :
1340 1977 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
1341 : bNoMasksOrDstDensityOnly)
1342 126 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1343 :
1344 1851 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
1345 : bNoMasksOrDstDensityOnly)
1346 677 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1347 :
1348 1174 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
1349 : bNoMasksOrDstDensityOnly)
1350 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1351 :
1352 1162 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
1353 350 : return GWKNearestByte(this);
1354 :
1355 812 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1356 165 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1357 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1358 :
1359 798 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1360 : bNoMasksOrDstDensityOnly)
1361 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1362 :
1363 793 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1364 : bNoMasksOrDstDensityOnly)
1365 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1366 :
1367 787 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1368 : bNoMasksOrDstDensityOnly)
1369 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1370 :
1371 782 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1372 : bNoMasksOrDstDensityOnly)
1373 14 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1374 :
1375 768 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1376 : bNoMasksOrDstDensityOnly)
1377 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1378 :
1379 763 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1380 : bNoMasksOrDstDensityOnly)
1381 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1382 :
1383 757 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1384 45 : return GWKNearestShort(this);
1385 :
1386 712 : if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1387 10 : return GWKNearestUnsignedShort(this);
1388 :
1389 702 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1390 : bNoMasksOrDstDensityOnly)
1391 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1392 :
1393 691 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1394 50 : return GWKNearestFloat(this);
1395 :
1396 641 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1397 : bNoMasksOrDstDensityOnly)
1398 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1399 :
1400 637 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1401 : bNoMasksOrDstDensityOnly)
1402 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1403 :
1404 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1405 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1406 : bNoMasksOrDstDensityOnly)
1407 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1408 :
1409 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1410 : bNoMasksOrDstDensityOnly)
1411 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1412 : #endif
1413 :
1414 628 : if (eResample == GRA_Average)
1415 77 : return GWKAverageOrMode(this);
1416 :
1417 551 : if (eResample == GRA_RMS)
1418 9 : return GWKAverageOrMode(this);
1419 :
1420 542 : if (eResample == GRA_Mode)
1421 45 : return GWKAverageOrMode(this);
1422 :
1423 497 : if (eResample == GRA_Max)
1424 6 : return GWKAverageOrMode(this);
1425 :
1426 491 : if (eResample == GRA_Min)
1427 5 : return GWKAverageOrMode(this);
1428 :
1429 486 : if (eResample == GRA_Med)
1430 6 : return GWKAverageOrMode(this);
1431 :
1432 480 : if (eResample == GRA_Q1)
1433 10 : return GWKAverageOrMode(this);
1434 :
1435 470 : if (eResample == GRA_Q3)
1436 5 : return GWKAverageOrMode(this);
1437 :
1438 465 : if (eResample == GRA_Sum)
1439 19 : return GWKSumPreserving(this);
1440 :
1441 446 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1442 : {
1443 219 : return GWKRealCase(this);
1444 : }
1445 :
1446 227 : return GWKGeneralCase(this);
1447 : }
1448 :
1449 : /************************************************************************/
1450 : /* Validate() */
1451 : /************************************************************************/
1452 :
1453 : /**
1454 : * \fn CPLErr GDALWarpKernel::Validate()
1455 : *
1456 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1457 : * (and return CE_Failure) if the configuration is considered to be
1458 : * invalid for some reason.
1459 : *
1460 : * This method will also do some standard defaulting such as setting
1461 : * pfnProgress to GDALDummyProgress() if it is NULL.
1462 : *
1463 : * @return CE_None on success or CE_Failure if an error is detected.
1464 : */
1465 :
1466 2959 : CPLErr GDALWarpKernel::Validate()
1467 :
1468 : {
1469 2959 : if (static_cast<size_t>(eResample) >=
1470 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1471 : {
1472 0 : CPLError(CE_Failure, CPLE_AppDefined,
1473 : "Unsupported resampling method %d.",
1474 0 : static_cast<int>(eResample));
1475 0 : return CE_Failure;
1476 : }
1477 :
1478 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1479 : // be ignored as contributing source pixels during resampling. Only taken into account by
1480 : // Average currently
1481 : const char *pszExcludedValues =
1482 2959 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1483 2959 : if (pszExcludedValues)
1484 : {
1485 : const CPLStringList aosTokens(
1486 14 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1487 14 : if ((aosTokens.size() % nBands) != 0)
1488 : {
1489 1 : CPLError(CE_Failure, CPLE_AppDefined,
1490 : "EXCLUDED_VALUES should contain one or several tuples of "
1491 : "%d values formatted like <R>,<G>,<B> or "
1492 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1493 : "tuples",
1494 : nBands);
1495 1 : return CE_Failure;
1496 : }
1497 26 : std::vector<double> adfTuple;
1498 52 : for (int i = 0; i < aosTokens.size(); ++i)
1499 : {
1500 39 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1501 39 : if (((i + 1) % nBands) == 0)
1502 : {
1503 13 : m_aadfExcludedValues.push_back(adfTuple);
1504 13 : adfTuple.clear();
1505 : }
1506 : }
1507 : }
1508 :
1509 2958 : return CE_None;
1510 : }
1511 :
1512 : /************************************************************************/
1513 : /* GWKOverlayDensity() */
1514 : /* */
1515 : /* Compute the final density for the destination pixel. This */
1516 : /* is a function of the overlay density (passed in) and the */
1517 : /* original density. */
1518 : /************************************************************************/
1519 :
1520 10022900 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1521 : double dfDensity)
1522 : {
1523 10022900 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1524 8062630 : return;
1525 :
1526 1960260 : poWK->pafDstDensity[iDstOffset] =
1527 1960260 : 1.0f -
1528 1960260 : (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1529 : }
1530 :
1531 : /************************************************************************/
1532 : /* GWKRoundValueT() */
1533 : /************************************************************************/
1534 :
1535 : template <class T, class U, bool is_signed> struct sGWKRoundValueT
1536 : {
1537 : static T eval(U);
1538 : };
1539 :
1540 : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1541 : {
1542 791525 : static T eval(U value)
1543 : {
1544 791525 : return static_cast<T>(floor(value + U(0.5)));
1545 : }
1546 : };
1547 :
1548 : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1549 : {
1550 124559887 : static T eval(U value)
1551 : {
1552 124559887 : return static_cast<T>(value + U(0.5));
1553 : }
1554 : };
1555 :
1556 125351412 : template <class T, class U> static T GWKRoundValueT(U value)
1557 : {
1558 125351412 : return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1559 : }
1560 :
1561 268974 : template <> float GWKRoundValueT<float, double>(double value)
1562 : {
1563 268974 : return static_cast<float>(value);
1564 : }
1565 :
1566 : #ifdef notused
1567 : template <> double GWKRoundValueT<double, double>(double value)
1568 : {
1569 : return value;
1570 : }
1571 : #endif
1572 :
1573 : /************************************************************************/
1574 : /* GWKClampValueT() */
1575 : /************************************************************************/
1576 :
1577 119660582 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1578 : {
1579 119660582 : if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1580 545257 : return cpl::NumericLimits<T>::min();
1581 119114984 : else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1582 772808 : return cpl::NumericLimits<T>::max();
1583 : else
1584 118342044 : return GWKRoundValueT<T, U>(value);
1585 : }
1586 :
1587 718914 : template <> float GWKClampValueT<float, double>(double dfValue)
1588 : {
1589 718914 : return static_cast<float>(dfValue);
1590 : }
1591 :
1592 : #ifdef notused
1593 : template <> double GWKClampValueT<double, double>(double dfValue)
1594 : {
1595 : return dfValue;
1596 : }
1597 : #endif
1598 :
1599 : /************************************************************************/
1600 : /* AvoidNoData() */
1601 : /************************************************************************/
1602 :
1603 1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
1604 : {
1605 : if constexpr (cpl::NumericLimits<T>::is_integer)
1606 : {
1607 1027 : if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
1608 : {
1609 515 : pDst[iDstOffset] =
1610 515 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1611 : }
1612 : else
1613 512 : pDst[iDstOffset]--;
1614 : }
1615 : else
1616 : {
1617 256 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1618 : {
1619 : using std::nextafter;
1620 0 : pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
1621 : }
1622 : else
1623 : {
1624 : using std::nextafter;
1625 256 : pDst[iDstOffset] =
1626 256 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1627 : }
1628 : }
1629 1283 : }
1630 :
1631 : /************************************************************************/
1632 : /* AvoidNoData() */
1633 : /************************************************************************/
1634 :
1635 : template <class T>
1636 13527030 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1637 : GPtrDiff_t iDstOffset)
1638 : {
1639 13527030 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1640 13527030 : T *pDst = reinterpret_cast<T *>(pabyDst);
1641 :
1642 13527030 : if (poWK->padfDstNoDataReal != nullptr &&
1643 6419188 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1644 : {
1645 640 : AvoidNoData(pDst, iDstOffset);
1646 :
1647 640 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1648 : {
1649 40 : const_cast<GDALWarpKernel *>(poWK)
1650 : ->bWarnedAboutDstNoDataReplacement = true;
1651 40 : CPLError(CE_Warning, CPLE_AppDefined,
1652 : "Value %g in the source dataset has been changed to %g "
1653 : "in the destination dataset to avoid being treated as "
1654 : "NoData. To avoid this, select a different NoData value "
1655 : "for the destination dataset.",
1656 40 : poWK->padfDstNoDataReal[iBand],
1657 40 : static_cast<double>(pDst[iDstOffset]));
1658 : }
1659 : }
1660 13527030 : }
1661 :
1662 : /************************************************************************/
1663 : /* GWKAvoidNoDataMultiBand() */
1664 : /************************************************************************/
1665 :
1666 : template <class T>
1667 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1668 : GPtrDiff_t iDstOffset)
1669 : {
1670 524573 : T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
1671 524573 : if (poWK->padfDstNoDataReal != nullptr)
1672 : {
1673 208615 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1674 : {
1675 208294 : if (poWK->padfDstNoDataReal[iBand] !=
1676 208294 : static_cast<double>(ppDst[iBand][iDstOffset]))
1677 205830 : return;
1678 : }
1679 964 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1680 : {
1681 643 : AvoidNoData(ppDst[iBand], iDstOffset);
1682 : }
1683 :
1684 321 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1685 : {
1686 21 : const_cast<GDALWarpKernel *>(poWK)
1687 : ->bWarnedAboutDstNoDataReplacement = true;
1688 42 : std::string valueSrc, valueDst;
1689 64 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1690 : {
1691 43 : if (!valueSrc.empty())
1692 : {
1693 22 : valueSrc += ',';
1694 22 : valueDst += ',';
1695 : }
1696 43 : valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
1697 43 : valueDst += CPLSPrintf(
1698 43 : "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
1699 : }
1700 21 : CPLError(CE_Warning, CPLE_AppDefined,
1701 : "Value %s in the source dataset has been changed to %s "
1702 : "in the destination dataset to avoid being treated as "
1703 : "NoData. To avoid this, select a different NoData value "
1704 : "for the destination dataset.",
1705 : valueSrc.c_str(), valueDst.c_str());
1706 : }
1707 : }
1708 : }
1709 :
1710 : /************************************************************************/
1711 : /* GWKAvoidNoDataMultiBand() */
1712 : /************************************************************************/
1713 :
1714 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1715 : GPtrDiff_t iDstOffset)
1716 : {
1717 524573 : switch (poWK->eWorkingDataType)
1718 : {
1719 523997 : case GDT_UInt8:
1720 523997 : GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
1721 523997 : break;
1722 :
1723 0 : case GDT_Int8:
1724 0 : GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
1725 0 : break;
1726 :
1727 128 : case GDT_Int16:
1728 128 : GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
1729 128 : break;
1730 :
1731 64 : case GDT_UInt16:
1732 64 : GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
1733 64 : break;
1734 :
1735 64 : case GDT_Int32:
1736 64 : GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
1737 64 : break;
1738 :
1739 64 : case GDT_UInt32:
1740 64 : GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
1741 64 : break;
1742 :
1743 64 : case GDT_Int64:
1744 64 : GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
1745 64 : break;
1746 :
1747 64 : case GDT_UInt64:
1748 64 : GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
1749 64 : break;
1750 :
1751 0 : case GDT_Float16:
1752 0 : GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
1753 0 : break;
1754 :
1755 64 : case GDT_Float32:
1756 64 : GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
1757 64 : break;
1758 :
1759 64 : case GDT_Float64:
1760 64 : GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
1761 64 : break;
1762 :
1763 0 : case GDT_CInt16:
1764 : case GDT_CInt32:
1765 : case GDT_CFloat16:
1766 : case GDT_CFloat32:
1767 : case GDT_CFloat64:
1768 : case GDT_Unknown:
1769 : case GDT_TypeCount:
1770 0 : break;
1771 : }
1772 524573 : }
1773 :
1774 : /************************************************************************/
1775 : /* GWKSetPixelValueRealT() */
1776 : /************************************************************************/
1777 :
1778 : template <class T>
1779 9992427 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1780 : GPtrDiff_t iDstOffset, double dfDensity,
1781 : T value, bool bAvoidNoDataSingleBand)
1782 : {
1783 9992427 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1784 :
1785 : /* -------------------------------------------------------------------- */
1786 : /* If the source density is less than 100% we need to fetch the */
1787 : /* existing destination value, and mix it with the source to */
1788 : /* get the new "to apply" value. Also compute composite */
1789 : /* density. */
1790 : /* */
1791 : /* We avoid mixing if density is very near one or risk mixing */
1792 : /* in very extreme nodata values and causing odd results (#1610) */
1793 : /* -------------------------------------------------------------------- */
1794 9992427 : if (dfDensity < 0.9999)
1795 : {
1796 945508 : if (dfDensity < 0.0001)
1797 0 : return true;
1798 :
1799 945508 : double dfDstDensity = 1.0;
1800 :
1801 945508 : if (poWK->pafDstDensity != nullptr)
1802 944036 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1803 1472 : else if (poWK->panDstValid != nullptr &&
1804 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1805 0 : dfDstDensity = 0.0;
1806 :
1807 : // It seems like we also ought to be testing panDstValid[] here!
1808 :
1809 945508 : const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1810 :
1811 : // The destination density is really only relative to the portion
1812 : // not occluded by the overlay.
1813 945508 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1814 :
1815 945508 : const double dfReal =
1816 945508 : (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1817 945508 : (dfDensity + dfDstInfluence);
1818 :
1819 : /* --------------------------------------------------------------------
1820 : */
1821 : /* Actually apply the destination value. */
1822 : /* */
1823 : /* Avoid using the destination nodata value for integer datatypes
1824 : */
1825 : /* if by chance it is equal to the computed pixel value. */
1826 : /* --------------------------------------------------------------------
1827 : */
1828 945508 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1829 : }
1830 : else
1831 : {
1832 9046916 : pDst[iDstOffset] = value;
1833 : }
1834 :
1835 9992427 : if (bAvoidNoDataSingleBand)
1836 8719761 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1837 :
1838 9992427 : return true;
1839 : }
1840 :
1841 : /************************************************************************/
1842 : /* ClampRoundAndAvoidNoData() */
1843 : /************************************************************************/
1844 :
1845 : template <class T>
1846 5107725 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1847 : GPtrDiff_t iDstOffset, double dfReal,
1848 : bool bAvoidNoDataSingleBand)
1849 : {
1850 5107725 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1851 5107725 : T *pDst = reinterpret_cast<T *>(pabyDst);
1852 :
1853 : if constexpr (cpl::NumericLimits<T>::is_integer)
1854 : {
1855 : using std::floor;
1856 4610595 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1857 5298 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1858 4605295 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1859 23620 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1860 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1861 10410 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1862 : else
1863 4571265 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1864 : }
1865 : else
1866 : {
1867 497130 : pDst[iDstOffset] = static_cast<T>(dfReal);
1868 : }
1869 :
1870 5107725 : if (bAvoidNoDataSingleBand)
1871 4807319 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1872 5107725 : }
1873 :
1874 : /************************************************************************/
1875 : /* GWKSetPixelValue() */
1876 : /************************************************************************/
1877 :
1878 4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1879 : GPtrDiff_t iDstOffset, double dfDensity,
1880 : double dfReal, double dfImag,
1881 : bool bAvoidNoDataSingleBand)
1882 :
1883 : {
1884 4012410 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1885 :
1886 : /* -------------------------------------------------------------------- */
1887 : /* If the source density is less than 100% we need to fetch the */
1888 : /* existing destination value, and mix it with the source to */
1889 : /* get the new "to apply" value. Also compute composite */
1890 : /* density. */
1891 : /* */
1892 : /* We avoid mixing if density is very near one or risk mixing */
1893 : /* in very extreme nodata values and causing odd results (#1610) */
1894 : /* -------------------------------------------------------------------- */
1895 4012410 : if (dfDensity < 0.9999)
1896 : {
1897 800 : if (dfDensity < 0.0001)
1898 0 : return true;
1899 :
1900 800 : double dfDstDensity = 1.0;
1901 800 : if (poWK->pafDstDensity != nullptr)
1902 800 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1903 0 : else if (poWK->panDstValid != nullptr &&
1904 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1905 0 : dfDstDensity = 0.0;
1906 :
1907 800 : double dfDstReal = 0.0;
1908 800 : double dfDstImag = 0.0;
1909 : // It seems like we also ought to be testing panDstValid[] here!
1910 :
1911 : // TODO(schwehr): Factor out this repreated type of set.
1912 800 : switch (poWK->eWorkingDataType)
1913 : {
1914 0 : case GDT_UInt8:
1915 0 : dfDstReal = pabyDst[iDstOffset];
1916 0 : dfDstImag = 0.0;
1917 0 : break;
1918 :
1919 0 : case GDT_Int8:
1920 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1921 0 : dfDstImag = 0.0;
1922 0 : break;
1923 :
1924 400 : case GDT_Int16:
1925 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1926 400 : dfDstImag = 0.0;
1927 400 : break;
1928 :
1929 400 : case GDT_UInt16:
1930 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1931 400 : dfDstImag = 0.0;
1932 400 : break;
1933 :
1934 0 : case GDT_Int32:
1935 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1936 0 : dfDstImag = 0.0;
1937 0 : break;
1938 :
1939 0 : case GDT_UInt32:
1940 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1941 0 : dfDstImag = 0.0;
1942 0 : break;
1943 :
1944 0 : case GDT_Int64:
1945 0 : dfDstReal = static_cast<double>(
1946 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1947 0 : dfDstImag = 0.0;
1948 0 : break;
1949 :
1950 0 : case GDT_UInt64:
1951 0 : dfDstReal = static_cast<double>(
1952 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1953 0 : dfDstImag = 0.0;
1954 0 : break;
1955 :
1956 0 : case GDT_Float16:
1957 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1958 0 : dfDstImag = 0.0;
1959 0 : break;
1960 :
1961 0 : case GDT_Float32:
1962 0 : dfDstReal =
1963 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1964 0 : dfDstImag = 0.0;
1965 0 : break;
1966 :
1967 0 : case GDT_Float64:
1968 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1969 0 : dfDstImag = 0.0;
1970 0 : break;
1971 :
1972 0 : case GDT_CInt16:
1973 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1974 0 : dfDstImag =
1975 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1976 0 : break;
1977 :
1978 0 : case GDT_CInt32:
1979 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1980 0 : dfDstImag =
1981 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1982 0 : break;
1983 :
1984 0 : case GDT_CFloat16:
1985 : dfDstReal =
1986 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1987 : dfDstImag =
1988 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1989 0 : break;
1990 :
1991 0 : case GDT_CFloat32:
1992 0 : dfDstReal =
1993 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
1994 0 : dfDstImag = double(
1995 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
1996 0 : break;
1997 :
1998 0 : case GDT_CFloat64:
1999 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
2000 0 : dfDstImag =
2001 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
2002 0 : break;
2003 :
2004 0 : case GDT_Unknown:
2005 : case GDT_TypeCount:
2006 0 : CPLAssert(false);
2007 : return false;
2008 : }
2009 :
2010 : // The destination density is really only relative to the portion
2011 : // not occluded by the overlay.
2012 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2013 :
2014 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2015 800 : (dfDensity + dfDstInfluence);
2016 :
2017 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
2018 800 : (dfDensity + dfDstInfluence);
2019 : }
2020 :
2021 : /* -------------------------------------------------------------------- */
2022 : /* Actually apply the destination value. */
2023 : /* */
2024 : /* Avoid using the destination nodata value for integer datatypes */
2025 : /* if by chance it is equal to the computed pixel value. */
2026 : /* -------------------------------------------------------------------- */
2027 :
2028 4012410 : switch (poWK->eWorkingDataType)
2029 : {
2030 3290010 : case GDT_UInt8:
2031 3290010 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2032 : bAvoidNoDataSingleBand);
2033 3290010 : break;
2034 :
2035 0 : case GDT_Int8:
2036 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2037 : bAvoidNoDataSingleBand);
2038 0 : break;
2039 :
2040 7472 : case GDT_Int16:
2041 7472 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2042 : bAvoidNoDataSingleBand);
2043 7472 : break;
2044 :
2045 464 : case GDT_UInt16:
2046 464 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2047 : bAvoidNoDataSingleBand);
2048 464 : break;
2049 :
2050 63 : case GDT_UInt32:
2051 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2052 : bAvoidNoDataSingleBand);
2053 63 : break;
2054 :
2055 63 : case GDT_Int32:
2056 63 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2057 : bAvoidNoDataSingleBand);
2058 63 : break;
2059 :
2060 0 : case GDT_UInt64:
2061 0 : ClampRoundAndAvoidNoData<std::uint64_t>(
2062 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2063 0 : break;
2064 :
2065 0 : case GDT_Int64:
2066 0 : ClampRoundAndAvoidNoData<std::int64_t>(
2067 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2068 0 : break;
2069 :
2070 0 : case GDT_Float16:
2071 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2072 : bAvoidNoDataSingleBand);
2073 0 : break;
2074 :
2075 478957 : case GDT_Float32:
2076 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2077 : bAvoidNoDataSingleBand);
2078 478957 : break;
2079 :
2080 149 : case GDT_Float64:
2081 149 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2082 : bAvoidNoDataSingleBand);
2083 149 : break;
2084 :
2085 234079 : case GDT_CInt16:
2086 : {
2087 : typedef GInt16 T;
2088 234079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2089 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2090 0 : cpl::NumericLimits<T>::min();
2091 234079 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2092 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2093 0 : cpl::NumericLimits<T>::max();
2094 : else
2095 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2096 234079 : static_cast<T>(floor(dfReal + 0.5));
2097 234079 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2098 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2099 0 : cpl::NumericLimits<T>::min();
2100 234079 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2101 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2102 0 : cpl::NumericLimits<T>::max();
2103 : else
2104 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2105 234079 : static_cast<T>(floor(dfImag + 0.5));
2106 234079 : break;
2107 : }
2108 :
2109 379 : case GDT_CInt32:
2110 : {
2111 : typedef GInt32 T;
2112 379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2113 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2114 0 : cpl::NumericLimits<T>::min();
2115 379 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2116 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2117 0 : cpl::NumericLimits<T>::max();
2118 : else
2119 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2120 379 : static_cast<T>(floor(dfReal + 0.5));
2121 379 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2122 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2123 0 : cpl::NumericLimits<T>::min();
2124 379 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2125 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2126 0 : cpl::NumericLimits<T>::max();
2127 : else
2128 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2129 379 : static_cast<T>(floor(dfImag + 0.5));
2130 379 : break;
2131 : }
2132 :
2133 0 : case GDT_CFloat16:
2134 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
2135 0 : static_cast<GFloat16>(dfReal);
2136 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
2137 0 : static_cast<GFloat16>(dfImag);
2138 0 : break;
2139 :
2140 394 : case GDT_CFloat32:
2141 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
2142 394 : static_cast<float>(dfReal);
2143 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
2144 394 : static_cast<float>(dfImag);
2145 394 : break;
2146 :
2147 380 : case GDT_CFloat64:
2148 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
2149 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
2150 380 : break;
2151 :
2152 0 : case GDT_Unknown:
2153 : case GDT_TypeCount:
2154 0 : return false;
2155 : }
2156 :
2157 4012410 : return true;
2158 : }
2159 :
2160 : /************************************************************************/
2161 : /* GWKSetPixelValueReal() */
2162 : /************************************************************************/
2163 :
2164 1330540 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2165 : GPtrDiff_t iDstOffset, double dfDensity,
2166 : double dfReal, bool bAvoidNoDataSingleBand)
2167 :
2168 : {
2169 1330540 : GByte *pabyDst = poWK->papabyDstImage[iBand];
2170 :
2171 : /* -------------------------------------------------------------------- */
2172 : /* If the source density is less than 100% we need to fetch the */
2173 : /* existing destination value, and mix it with the source to */
2174 : /* get the new "to apply" value. Also compute composite */
2175 : /* density. */
2176 : /* */
2177 : /* We avoid mixing if density is very near one or risk mixing */
2178 : /* in very extreme nodata values and causing odd results (#1610) */
2179 : /* -------------------------------------------------------------------- */
2180 1330540 : if (dfDensity < 0.9999)
2181 : {
2182 600 : if (dfDensity < 0.0001)
2183 0 : return true;
2184 :
2185 600 : double dfDstReal = 0.0;
2186 600 : double dfDstDensity = 1.0;
2187 :
2188 600 : if (poWK->pafDstDensity != nullptr)
2189 600 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
2190 0 : else if (poWK->panDstValid != nullptr &&
2191 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
2192 0 : dfDstDensity = 0.0;
2193 :
2194 : // It seems like we also ought to be testing panDstValid[] here!
2195 :
2196 600 : switch (poWK->eWorkingDataType)
2197 : {
2198 0 : case GDT_UInt8:
2199 0 : dfDstReal = pabyDst[iDstOffset];
2200 0 : break;
2201 :
2202 0 : case GDT_Int8:
2203 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
2204 0 : break;
2205 :
2206 300 : case GDT_Int16:
2207 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
2208 300 : break;
2209 :
2210 300 : case GDT_UInt16:
2211 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
2212 300 : break;
2213 :
2214 0 : case GDT_Int32:
2215 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
2216 0 : break;
2217 :
2218 0 : case GDT_UInt32:
2219 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
2220 0 : break;
2221 :
2222 0 : case GDT_Int64:
2223 0 : dfDstReal = static_cast<double>(
2224 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
2225 0 : break;
2226 :
2227 0 : case GDT_UInt64:
2228 0 : dfDstReal = static_cast<double>(
2229 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
2230 0 : break;
2231 :
2232 0 : case GDT_Float16:
2233 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
2234 0 : break;
2235 :
2236 0 : case GDT_Float32:
2237 0 : dfDstReal =
2238 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
2239 0 : break;
2240 :
2241 0 : case GDT_Float64:
2242 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
2243 0 : break;
2244 :
2245 0 : case GDT_CInt16:
2246 : case GDT_CInt32:
2247 : case GDT_CFloat16:
2248 : case GDT_CFloat32:
2249 : case GDT_CFloat64:
2250 : case GDT_Unknown:
2251 : case GDT_TypeCount:
2252 0 : CPLAssert(false);
2253 : return false;
2254 : }
2255 :
2256 : // The destination density is really only relative to the portion
2257 : // not occluded by the overlay.
2258 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2259 :
2260 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2261 600 : (dfDensity + dfDstInfluence);
2262 : }
2263 :
2264 : /* -------------------------------------------------------------------- */
2265 : /* Actually apply the destination value. */
2266 : /* */
2267 : /* Avoid using the destination nodata value for integer datatypes */
2268 : /* if by chance it is equal to the computed pixel value. */
2269 : /* -------------------------------------------------------------------- */
2270 :
2271 1330540 : switch (poWK->eWorkingDataType)
2272 : {
2273 1308410 : case GDT_UInt8:
2274 1308410 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2275 : bAvoidNoDataSingleBand);
2276 1308410 : break;
2277 :
2278 0 : case GDT_Int8:
2279 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2280 : bAvoidNoDataSingleBand);
2281 0 : break;
2282 :
2283 1309 : case GDT_Int16:
2284 1309 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2285 : bAvoidNoDataSingleBand);
2286 1309 : break;
2287 :
2288 475 : case GDT_UInt16:
2289 475 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2290 : bAvoidNoDataSingleBand);
2291 475 : break;
2292 :
2293 539 : case GDT_UInt32:
2294 539 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2295 : bAvoidNoDataSingleBand);
2296 539 : break;
2297 :
2298 1342 : case GDT_Int32:
2299 1342 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2300 : bAvoidNoDataSingleBand);
2301 1342 : break;
2302 :
2303 224 : case GDT_UInt64:
2304 224 : ClampRoundAndAvoidNoData<std::uint64_t>(
2305 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2306 224 : break;
2307 :
2308 224 : case GDT_Int64:
2309 224 : ClampRoundAndAvoidNoData<std::int64_t>(
2310 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2311 224 : break;
2312 :
2313 0 : case GDT_Float16:
2314 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2315 : bAvoidNoDataSingleBand);
2316 0 : break;
2317 :
2318 3538 : case GDT_Float32:
2319 3538 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2320 : bAvoidNoDataSingleBand);
2321 3538 : break;
2322 :
2323 14486 : case GDT_Float64:
2324 14486 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2325 : bAvoidNoDataSingleBand);
2326 14486 : break;
2327 :
2328 0 : case GDT_CInt16:
2329 : case GDT_CInt32:
2330 : case GDT_CFloat16:
2331 : case GDT_CFloat32:
2332 : case GDT_CFloat64:
2333 0 : return false;
2334 :
2335 0 : case GDT_Unknown:
2336 : case GDT_TypeCount:
2337 0 : CPLAssert(false);
2338 : return false;
2339 : }
2340 :
2341 1330540 : return true;
2342 : }
2343 :
2344 : /************************************************************************/
2345 : /* GWKGetPixelValue() */
2346 : /************************************************************************/
2347 :
2348 : /* It is assumed that panUnifiedSrcValid has been checked before */
2349 :
2350 30268000 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2351 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2352 : double *pdfReal, double *pdfImag)
2353 :
2354 : {
2355 30268000 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2356 :
2357 60536000 : if (poWK->papanBandSrcValid != nullptr &&
2358 30268000 : poWK->papanBandSrcValid[iBand] != nullptr &&
2359 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2360 : {
2361 0 : *pdfDensity = 0.0;
2362 0 : return false;
2363 : }
2364 :
2365 30268000 : *pdfReal = 0.0;
2366 30268000 : *pdfImag = 0.0;
2367 :
2368 : // TODO(schwehr): Fix casting.
2369 30268000 : switch (poWK->eWorkingDataType)
2370 : {
2371 29191100 : case GDT_UInt8:
2372 29191100 : *pdfReal = pabySrc[iSrcOffset];
2373 29191100 : *pdfImag = 0.0;
2374 29191100 : break;
2375 :
2376 0 : case GDT_Int8:
2377 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2378 0 : *pdfImag = 0.0;
2379 0 : break;
2380 :
2381 28232 : case GDT_Int16:
2382 28232 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2383 28232 : *pdfImag = 0.0;
2384 28232 : break;
2385 :
2386 166 : case GDT_UInt16:
2387 166 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2388 166 : *pdfImag = 0.0;
2389 166 : break;
2390 :
2391 63 : case GDT_Int32:
2392 63 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2393 63 : *pdfImag = 0.0;
2394 63 : break;
2395 :
2396 63 : case GDT_UInt32:
2397 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2398 63 : *pdfImag = 0.0;
2399 63 : break;
2400 :
2401 0 : case GDT_Int64:
2402 0 : *pdfReal = static_cast<double>(
2403 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2404 0 : *pdfImag = 0.0;
2405 0 : break;
2406 :
2407 0 : case GDT_UInt64:
2408 0 : *pdfReal = static_cast<double>(
2409 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2410 0 : *pdfImag = 0.0;
2411 0 : break;
2412 :
2413 0 : case GDT_Float16:
2414 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2415 0 : *pdfImag = 0.0;
2416 0 : break;
2417 :
2418 1047220 : case GDT_Float32:
2419 1047220 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2420 1047220 : *pdfImag = 0.0;
2421 1047220 : break;
2422 :
2423 587 : case GDT_Float64:
2424 587 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2425 587 : *pdfImag = 0.0;
2426 587 : break;
2427 :
2428 133 : case GDT_CInt16:
2429 133 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2430 133 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2431 133 : break;
2432 :
2433 133 : case GDT_CInt32:
2434 133 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2435 133 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2436 133 : break;
2437 :
2438 0 : case GDT_CFloat16:
2439 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2440 0 : *pdfImag =
2441 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2442 0 : break;
2443 :
2444 194 : case GDT_CFloat32:
2445 194 : *pdfReal =
2446 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2447 194 : *pdfImag =
2448 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2449 194 : break;
2450 :
2451 138 : case GDT_CFloat64:
2452 138 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2453 138 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2454 138 : break;
2455 :
2456 0 : case GDT_Unknown:
2457 : case GDT_TypeCount:
2458 0 : CPLAssert(false);
2459 : *pdfDensity = 0.0;
2460 : return false;
2461 : }
2462 :
2463 30268000 : if (poWK->pafUnifiedSrcDensity != nullptr)
2464 4194800 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2465 : else
2466 26073200 : *pdfDensity = 1.0;
2467 :
2468 30268000 : return *pdfDensity != 0.0;
2469 : }
2470 :
2471 : /************************************************************************/
2472 : /* GWKGetPixelValueReal() */
2473 : /************************************************************************/
2474 :
2475 15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2476 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2477 : double *pdfReal)
2478 :
2479 : {
2480 15516 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2481 :
2482 31034 : if (poWK->papanBandSrcValid != nullptr &&
2483 15518 : poWK->papanBandSrcValid[iBand] != nullptr &&
2484 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2485 : {
2486 0 : *pdfDensity = 0.0;
2487 0 : return false;
2488 : }
2489 :
2490 15516 : switch (poWK->eWorkingDataType)
2491 : {
2492 1 : case GDT_UInt8:
2493 1 : *pdfReal = pabySrc[iSrcOffset];
2494 1 : break;
2495 :
2496 0 : case GDT_Int8:
2497 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2498 0 : break;
2499 :
2500 1 : case GDT_Int16:
2501 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2502 1 : break;
2503 :
2504 1 : case GDT_UInt16:
2505 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2506 1 : break;
2507 :
2508 982 : case GDT_Int32:
2509 982 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2510 982 : break;
2511 :
2512 179 : case GDT_UInt32:
2513 179 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2514 179 : break;
2515 :
2516 112 : case GDT_Int64:
2517 112 : *pdfReal = static_cast<double>(
2518 112 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2519 112 : break;
2520 :
2521 112 : case GDT_UInt64:
2522 112 : *pdfReal = static_cast<double>(
2523 112 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2524 112 : break;
2525 :
2526 0 : case GDT_Float16:
2527 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2528 0 : break;
2529 :
2530 2 : case GDT_Float32:
2531 2 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2532 2 : break;
2533 :
2534 14126 : case GDT_Float64:
2535 14126 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2536 14126 : break;
2537 :
2538 0 : case GDT_CInt16:
2539 : case GDT_CInt32:
2540 : case GDT_CFloat16:
2541 : case GDT_CFloat32:
2542 : case GDT_CFloat64:
2543 : case GDT_Unknown:
2544 : case GDT_TypeCount:
2545 0 : CPLAssert(false);
2546 : return false;
2547 : }
2548 :
2549 15516 : if (poWK->pafUnifiedSrcDensity != nullptr)
2550 0 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2551 : else
2552 15516 : *pdfDensity = 1.0;
2553 :
2554 15516 : return *pdfDensity != 0.0;
2555 : }
2556 :
2557 : /************************************************************************/
2558 : /* GWKGetPixelRow() */
2559 : /************************************************************************/
2560 :
2561 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2562 : /* data-types. */
2563 :
2564 2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2565 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2566 : double *padfDensity, double adfReal[],
2567 : double *padfImag)
2568 : {
2569 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2570 2369710 : const int nSrcLen = nHalfSrcLen * 2;
2571 2369710 : bool bHasValid = false;
2572 :
2573 2369710 : if (padfDensity != nullptr)
2574 : {
2575 : // Init the density.
2576 3384030 : for (int i = 0; i < nSrcLen; i += 2)
2577 : {
2578 2211910 : padfDensity[i] = 1.0;
2579 2211910 : padfDensity[i + 1] = 1.0;
2580 : }
2581 :
2582 1172120 : if (poWK->panUnifiedSrcValid != nullptr)
2583 : {
2584 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2585 : {
2586 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2587 2067740 : bHasValid = true;
2588 : else
2589 74323 : padfDensity[i] = 0.0;
2590 :
2591 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2592 2068400 : bHasValid = true;
2593 : else
2594 73668 : padfDensity[i + 1] = 0.0;
2595 : }
2596 :
2597 : // Reset or fail as needed.
2598 1139400 : if (bHasValid)
2599 1116590 : bHasValid = false;
2600 : else
2601 22806 : return false;
2602 : }
2603 :
2604 1149320 : if (poWK->papanBandSrcValid != nullptr &&
2605 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2606 : {
2607 0 : for (int i = 0; i < nSrcLen; i += 2)
2608 : {
2609 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2610 0 : bHasValid = true;
2611 : else
2612 0 : padfDensity[i] = 0.0;
2613 :
2614 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2615 0 : iSrcOffset + i + 1))
2616 0 : bHasValid = true;
2617 : else
2618 0 : padfDensity[i + 1] = 0.0;
2619 : }
2620 :
2621 : // Reset or fail as needed.
2622 0 : if (bHasValid)
2623 0 : bHasValid = false;
2624 : else
2625 0 : return false;
2626 : }
2627 : }
2628 :
2629 : // TODO(schwehr): Fix casting.
2630 : // Fetch data.
2631 2346910 : switch (poWK->eWorkingDataType)
2632 : {
2633 1136680 : case GDT_UInt8:
2634 : {
2635 1136680 : GByte *pSrc =
2636 1136680 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2637 1136680 : pSrc += iSrcOffset;
2638 3281570 : for (int i = 0; i < nSrcLen; i += 2)
2639 : {
2640 2144890 : adfReal[i] = pSrc[i];
2641 2144890 : adfReal[i + 1] = pSrc[i + 1];
2642 : }
2643 1136680 : break;
2644 : }
2645 :
2646 0 : case GDT_Int8:
2647 : {
2648 0 : GInt8 *pSrc =
2649 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2650 0 : pSrc += iSrcOffset;
2651 0 : for (int i = 0; i < nSrcLen; i += 2)
2652 : {
2653 0 : adfReal[i] = pSrc[i];
2654 0 : adfReal[i + 1] = pSrc[i + 1];
2655 : }
2656 0 : break;
2657 : }
2658 :
2659 5950 : case GDT_Int16:
2660 : {
2661 5950 : GInt16 *pSrc =
2662 5950 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2663 5950 : pSrc += iSrcOffset;
2664 22164 : for (int i = 0; i < nSrcLen; i += 2)
2665 : {
2666 16214 : adfReal[i] = pSrc[i];
2667 16214 : adfReal[i + 1] = pSrc[i + 1];
2668 : }
2669 5950 : break;
2670 : }
2671 :
2672 4310 : case GDT_UInt16:
2673 : {
2674 4310 : GUInt16 *pSrc =
2675 4310 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2676 4310 : pSrc += iSrcOffset;
2677 18884 : for (int i = 0; i < nSrcLen; i += 2)
2678 : {
2679 14574 : adfReal[i] = pSrc[i];
2680 14574 : adfReal[i + 1] = pSrc[i + 1];
2681 : }
2682 4310 : break;
2683 : }
2684 :
2685 946 : case GDT_Int32:
2686 : {
2687 946 : GInt32 *pSrc =
2688 946 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2689 946 : pSrc += iSrcOffset;
2690 2624 : for (int i = 0; i < nSrcLen; i += 2)
2691 : {
2692 1678 : adfReal[i] = pSrc[i];
2693 1678 : adfReal[i + 1] = pSrc[i + 1];
2694 : }
2695 946 : break;
2696 : }
2697 :
2698 946 : case GDT_UInt32:
2699 : {
2700 946 : GUInt32 *pSrc =
2701 946 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2702 946 : pSrc += iSrcOffset;
2703 2624 : for (int i = 0; i < nSrcLen; i += 2)
2704 : {
2705 1678 : adfReal[i] = pSrc[i];
2706 1678 : adfReal[i + 1] = pSrc[i + 1];
2707 : }
2708 946 : break;
2709 : }
2710 :
2711 196 : case GDT_Int64:
2712 : {
2713 196 : auto pSrc =
2714 196 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2715 196 : pSrc += iSrcOffset;
2716 392 : for (int i = 0; i < nSrcLen; i += 2)
2717 : {
2718 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2719 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2720 : }
2721 196 : break;
2722 : }
2723 :
2724 196 : case GDT_UInt64:
2725 : {
2726 196 : auto pSrc =
2727 196 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2728 196 : pSrc += iSrcOffset;
2729 392 : for (int i = 0; i < nSrcLen; i += 2)
2730 : {
2731 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2732 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2733 : }
2734 196 : break;
2735 : }
2736 :
2737 0 : case GDT_Float16:
2738 : {
2739 0 : GFloat16 *pSrc =
2740 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2741 0 : pSrc += iSrcOffset;
2742 0 : for (int i = 0; i < nSrcLen; i += 2)
2743 : {
2744 0 : adfReal[i] = pSrc[i];
2745 0 : adfReal[i + 1] = pSrc[i + 1];
2746 : }
2747 0 : break;
2748 : }
2749 :
2750 25270 : case GDT_Float32:
2751 : {
2752 25270 : float *pSrc =
2753 25270 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2754 25270 : pSrc += iSrcOffset;
2755 121739 : for (int i = 0; i < nSrcLen; i += 2)
2756 : {
2757 96469 : adfReal[i] = double(pSrc[i]);
2758 96469 : adfReal[i + 1] = double(pSrc[i + 1]);
2759 : }
2760 25270 : break;
2761 : }
2762 :
2763 946 : case GDT_Float64:
2764 : {
2765 946 : double *pSrc =
2766 946 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2767 946 : pSrc += iSrcOffset;
2768 2624 : for (int i = 0; i < nSrcLen; i += 2)
2769 : {
2770 1678 : adfReal[i] = pSrc[i];
2771 1678 : adfReal[i + 1] = pSrc[i + 1];
2772 : }
2773 946 : break;
2774 : }
2775 :
2776 1169220 : case GDT_CInt16:
2777 : {
2778 1169220 : GInt16 *pSrc =
2779 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2780 1169220 : pSrc += 2 * iSrcOffset;
2781 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2782 : {
2783 3506800 : adfReal[i] = pSrc[2 * i];
2784 3506800 : padfImag[i] = pSrc[2 * i + 1];
2785 :
2786 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2787 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2788 : }
2789 1169220 : break;
2790 : }
2791 :
2792 750 : case GDT_CInt32:
2793 : {
2794 750 : GInt32 *pSrc =
2795 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2796 750 : pSrc += 2 * iSrcOffset;
2797 2232 : for (int i = 0; i < nSrcLen; i += 2)
2798 : {
2799 1482 : adfReal[i] = pSrc[2 * i];
2800 1482 : padfImag[i] = pSrc[2 * i + 1];
2801 :
2802 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2803 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2804 : }
2805 750 : break;
2806 : }
2807 :
2808 0 : case GDT_CFloat16:
2809 : {
2810 0 : GFloat16 *pSrc =
2811 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2812 0 : pSrc += 2 * iSrcOffset;
2813 0 : for (int i = 0; i < nSrcLen; i += 2)
2814 : {
2815 0 : adfReal[i] = pSrc[2 * i];
2816 0 : padfImag[i] = pSrc[2 * i + 1];
2817 :
2818 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2819 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2820 : }
2821 0 : break;
2822 : }
2823 :
2824 750 : case GDT_CFloat32:
2825 : {
2826 750 : float *pSrc =
2827 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2828 750 : pSrc += 2 * iSrcOffset;
2829 2232 : for (int i = 0; i < nSrcLen; i += 2)
2830 : {
2831 1482 : adfReal[i] = double(pSrc[2 * i]);
2832 1482 : padfImag[i] = double(pSrc[2 * i + 1]);
2833 :
2834 1482 : adfReal[i + 1] = double(pSrc[2 * i + 2]);
2835 1482 : padfImag[i + 1] = double(pSrc[2 * i + 3]);
2836 : }
2837 750 : break;
2838 : }
2839 :
2840 750 : case GDT_CFloat64:
2841 : {
2842 750 : double *pSrc =
2843 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2844 750 : pSrc += 2 * iSrcOffset;
2845 2232 : for (int i = 0; i < nSrcLen; i += 2)
2846 : {
2847 1482 : adfReal[i] = pSrc[2 * i];
2848 1482 : padfImag[i] = pSrc[2 * i + 1];
2849 :
2850 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2851 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2852 : }
2853 750 : break;
2854 : }
2855 :
2856 0 : case GDT_Unknown:
2857 : case GDT_TypeCount:
2858 0 : CPLAssert(false);
2859 : if (padfDensity)
2860 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2861 : return false;
2862 : }
2863 :
2864 2346910 : if (padfDensity == nullptr)
2865 1197590 : return true;
2866 :
2867 1149320 : if (poWK->pafUnifiedSrcDensity == nullptr)
2868 : {
2869 3256740 : for (int i = 0; i < nSrcLen; i += 2)
2870 : {
2871 : // Take into account earlier calcs.
2872 2127390 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2873 : {
2874 2087480 : padfDensity[i] = 1.0;
2875 2087480 : bHasValid = true;
2876 : }
2877 :
2878 2127390 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2879 : {
2880 2088140 : padfDensity[i + 1] = 1.0;
2881 2088140 : bHasValid = true;
2882 : }
2883 : }
2884 : }
2885 : else
2886 : {
2887 70068 : for (int i = 0; i < nSrcLen; i += 2)
2888 : {
2889 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2890 50103 : padfDensity[i] =
2891 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2892 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2893 49252 : bHasValid = true;
2894 :
2895 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2896 50103 : padfDensity[i + 1] =
2897 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2898 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2899 49170 : bHasValid = true;
2900 : }
2901 : }
2902 :
2903 1149320 : return bHasValid;
2904 : }
2905 :
2906 : /************************************************************************/
2907 : /* GWKGetPixelT() */
2908 : /************************************************************************/
2909 :
2910 : template <class T>
2911 10002719 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2912 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2913 :
2914 : {
2915 10002719 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2916 :
2917 22733143 : if ((poWK->panUnifiedSrcValid != nullptr &&
2918 20005418 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2919 10002719 : (poWK->papanBandSrcValid != nullptr &&
2920 589836 : poWK->papanBandSrcValid[iBand] != nullptr &&
2921 589836 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2922 : {
2923 9 : *pdfDensity = 0.0;
2924 9 : return false;
2925 : }
2926 :
2927 10002709 : *pValue = pSrc[iSrcOffset];
2928 :
2929 10002709 : if (poWK->pafUnifiedSrcDensity == nullptr)
2930 8880346 : *pdfDensity = 1.0;
2931 : else
2932 1122362 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2933 :
2934 10002709 : return *pdfDensity != 0.0;
2935 : }
2936 :
2937 : /************************************************************************/
2938 : /* GWKBilinearResample() */
2939 : /* Set of bilinear interpolators */
2940 : /************************************************************************/
2941 :
2942 77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2943 : double dfSrcX, double dfSrcY,
2944 : double *pdfDensity, double *pdfReal,
2945 : double *pdfImag)
2946 :
2947 : {
2948 : // Save as local variables to avoid following pointers.
2949 77448 : const int nSrcXSize = poWK->nSrcXSize;
2950 77448 : const int nSrcYSize = poWK->nSrcYSize;
2951 :
2952 77448 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2953 77448 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2954 77448 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2955 77448 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2956 77448 : bool bShifted = false;
2957 :
2958 77448 : if (iSrcX == -1)
2959 : {
2960 1534 : iSrcX = 0;
2961 1534 : dfRatioX = 1;
2962 : }
2963 77448 : if (iSrcY == -1)
2964 : {
2965 7734 : iSrcY = 0;
2966 7734 : dfRatioY = 1;
2967 : }
2968 77448 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2969 :
2970 : // Shift so we don't overrun the array.
2971 77448 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2972 77330 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2973 77330 : iSrcOffset + nSrcXSize + 1)
2974 : {
2975 230 : bShifted = true;
2976 230 : --iSrcOffset;
2977 : }
2978 :
2979 77448 : double adfDensity[2] = {0.0, 0.0};
2980 77448 : double adfReal[2] = {0.0, 0.0};
2981 77448 : double adfImag[2] = {0.0, 0.0};
2982 77448 : double dfAccumulatorReal = 0.0;
2983 77448 : double dfAccumulatorImag = 0.0;
2984 77448 : double dfAccumulatorDensity = 0.0;
2985 77448 : double dfAccumulatorDivisor = 0.0;
2986 :
2987 77448 : const GPtrDiff_t nSrcPixels =
2988 77448 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2989 : // Get pixel row.
2990 77448 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2991 154896 : iSrcOffset < nSrcPixels &&
2992 77448 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2993 : adfImag))
2994 : {
2995 71504 : double dfMult1 = dfRatioX * dfRatioY;
2996 71504 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2997 :
2998 : // Shifting corrected.
2999 71504 : if (bShifted)
3000 : {
3001 230 : adfReal[0] = adfReal[1];
3002 230 : adfImag[0] = adfImag[1];
3003 230 : adfDensity[0] = adfDensity[1];
3004 : }
3005 :
3006 : // Upper Left Pixel.
3007 71504 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3008 71504 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3009 : {
3010 66050 : dfAccumulatorDivisor += dfMult1;
3011 :
3012 66050 : dfAccumulatorReal += adfReal[0] * dfMult1;
3013 66050 : dfAccumulatorImag += adfImag[0] * dfMult1;
3014 66050 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3015 : }
3016 :
3017 : // Upper Right Pixel.
3018 71504 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3019 70609 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3020 : {
3021 65335 : dfAccumulatorDivisor += dfMult2;
3022 :
3023 65335 : dfAccumulatorReal += adfReal[1] * dfMult2;
3024 65335 : dfAccumulatorImag += adfImag[1] * dfMult2;
3025 65335 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3026 : }
3027 : }
3028 :
3029 : // Get pixel row.
3030 77448 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
3031 228032 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
3032 73136 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
3033 : adfReal, adfImag))
3034 : {
3035 67577 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
3036 67577 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3037 :
3038 : // Shifting corrected
3039 67577 : if (bShifted)
3040 : {
3041 112 : adfReal[0] = adfReal[1];
3042 112 : adfImag[0] = adfImag[1];
3043 112 : adfDensity[0] = adfDensity[1];
3044 : }
3045 :
3046 : // Lower Left Pixel
3047 67577 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3048 67577 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3049 : {
3050 62298 : dfAccumulatorDivisor += dfMult1;
3051 :
3052 62298 : dfAccumulatorReal += adfReal[0] * dfMult1;
3053 62298 : dfAccumulatorImag += adfImag[0] * dfMult1;
3054 62298 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3055 : }
3056 :
3057 : // Lower Right Pixel.
3058 67577 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3059 66800 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3060 : {
3061 61823 : dfAccumulatorDivisor += dfMult2;
3062 :
3063 61823 : dfAccumulatorReal += adfReal[1] * dfMult2;
3064 61823 : dfAccumulatorImag += adfImag[1] * dfMult2;
3065 61823 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3066 : }
3067 : }
3068 :
3069 : /* -------------------------------------------------------------------- */
3070 : /* Return result. */
3071 : /* -------------------------------------------------------------------- */
3072 77448 : if (dfAccumulatorDivisor == 1.0)
3073 : {
3074 45929 : *pdfReal = dfAccumulatorReal;
3075 45929 : *pdfImag = dfAccumulatorImag;
3076 45929 : *pdfDensity = dfAccumulatorDensity;
3077 45929 : return false;
3078 : }
3079 31519 : else if (dfAccumulatorDivisor < 0.00001)
3080 : {
3081 0 : *pdfReal = 0.0;
3082 0 : *pdfImag = 0.0;
3083 0 : *pdfDensity = 0.0;
3084 0 : return false;
3085 : }
3086 : else
3087 : {
3088 31519 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
3089 31519 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
3090 31519 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
3091 31519 : return true;
3092 : }
3093 : }
3094 :
3095 : template <class T>
3096 7278342 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3097 : int iBand, double dfSrcX,
3098 : double dfSrcY, T *pValue)
3099 :
3100 : {
3101 :
3102 7278342 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3103 7278342 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3104 7278342 : GPtrDiff_t iSrcOffset =
3105 7278342 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3106 7278342 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
3107 7278342 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
3108 :
3109 7278342 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
3110 :
3111 7278342 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3112 4978059 : iSrcY + 1 < poWK->nSrcYSize)
3113 : {
3114 4931242 : const double dfAccumulator =
3115 4931242 : (double(pSrc[iSrcOffset]) * dfRatioX +
3116 4931242 : double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
3117 : dfRatioY +
3118 4931242 : (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
3119 4931242 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
3120 4931242 : (1.0 - dfRatioX)) *
3121 4931242 : (1.0 - dfRatioY);
3122 :
3123 4931242 : *pValue = GWKRoundValueT<T>(dfAccumulator);
3124 :
3125 4931242 : return true;
3126 : }
3127 :
3128 2347100 : double dfAccumulatorDivisor = 0.0;
3129 2347100 : double dfAccumulator = 0.0;
3130 :
3131 : // Upper Left Pixel.
3132 2347100 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
3133 278940 : iSrcY < poWK->nSrcYSize)
3134 : {
3135 278940 : const double dfMult = dfRatioX * dfRatioY;
3136 :
3137 278940 : dfAccumulatorDivisor += dfMult;
3138 :
3139 278940 : dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
3140 : }
3141 :
3142 : // Upper Right Pixel.
3143 2347100 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3144 1980536 : iSrcY < poWK->nSrcYSize)
3145 : {
3146 1980536 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
3147 :
3148 1980536 : dfAccumulatorDivisor += dfMult;
3149 :
3150 1980536 : dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
3151 : }
3152 :
3153 : // Lower Right Pixel.
3154 2347100 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3155 2081444 : iSrcY + 1 < poWK->nSrcYSize)
3156 : {
3157 2001333 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3158 :
3159 2001333 : dfAccumulatorDivisor += dfMult;
3160 :
3161 2001333 : dfAccumulator +=
3162 2001333 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
3163 : }
3164 :
3165 : // Lower Left Pixel.
3166 2347100 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3167 379800 : iSrcY + 1 < poWK->nSrcYSize)
3168 : {
3169 299461 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
3170 :
3171 299461 : dfAccumulatorDivisor += dfMult;
3172 :
3173 299461 : dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
3174 : }
3175 :
3176 : /* -------------------------------------------------------------------- */
3177 : /* Return result. */
3178 : /* -------------------------------------------------------------------- */
3179 2347100 : double dfValue = 0.0;
3180 :
3181 2347100 : if (dfAccumulatorDivisor < 0.00001)
3182 : {
3183 0 : *pValue = 0;
3184 0 : return false;
3185 : }
3186 2347100 : else if (dfAccumulatorDivisor == 1.0)
3187 : {
3188 7320 : dfValue = dfAccumulator;
3189 : }
3190 : else
3191 : {
3192 2339778 : dfValue = dfAccumulator / dfAccumulatorDivisor;
3193 : }
3194 :
3195 2347100 : *pValue = GWKRoundValueT<T>(dfValue);
3196 :
3197 2347100 : return true;
3198 : }
3199 :
3200 : /************************************************************************/
3201 : /* GWKCubicResample() */
3202 : /* Set of bicubic interpolators using cubic convolution. */
3203 : /************************************************************************/
3204 :
3205 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
3206 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
3207 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
3208 :
3209 : template <typename T>
3210 1810720 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
3211 : T f1, T f2, T f3)
3212 : {
3213 1810720 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
3214 1810720 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
3215 1810720 : distance3 * (3 * (f1 - f2) + f3 - f0)));
3216 : }
3217 :
3218 : /************************************************************************/
3219 : /* GWKCubicComputeWeights() */
3220 : /************************************************************************/
3221 :
3222 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
3223 :
3224 : template <typename T>
3225 80324960 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
3226 : {
3227 80324960 : const T halfX = T(0.5) * x;
3228 80324960 : const T threeX = T(3.0) * x;
3229 80324960 : const T halfX2 = halfX * x;
3230 :
3231 80324960 : coeffs[0] = halfX * (-1 + x * (2 - x));
3232 80324960 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
3233 80324960 : coeffs[2] = halfX * (1 + x * (4 - threeX));
3234 80324960 : coeffs[3] = halfX2 * (-1 + x);
3235 80324960 : }
3236 :
3237 14682546 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
3238 : {
3239 14682546 : return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
3240 14682546 : v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
3241 : }
3242 :
3243 : #if 0
3244 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
3245 : // instead of 17.
3246 : // TODO(schwehr): Use an inline function.
3247 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
3248 : { \
3249 : const double dfX = dfX_; \
3250 : dfHalfX = 0.5 * dfX; \
3251 : const double dfThreeX = 3.0 * dfX; \
3252 : const double dfXMinus1 = dfX - 1; \
3253 : \
3254 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
3255 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
3256 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
3257 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
3258 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
3259 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
3260 : }
3261 :
3262 : // TODO(schwehr): Use an inline function.
3263 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
3264 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
3265 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
3266 : #endif
3267 :
3268 302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3269 : double dfSrcX, double dfSrcY,
3270 : double *pdfDensity, double *pdfReal,
3271 : double *pdfImag)
3272 :
3273 : {
3274 302045 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3275 302045 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3276 302045 : GPtrDiff_t iSrcOffset =
3277 302045 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3278 302045 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3279 302045 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3280 302045 : double adfDensity[4] = {};
3281 302045 : double adfReal[4] = {};
3282 302045 : double adfImag[4] = {};
3283 :
3284 : // Get the bilinear interpolation at the image borders.
3285 302045 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3286 286140 : iSrcY + 2 >= poWK->nSrcYSize)
3287 24670 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3288 24670 : pdfDensity, pdfReal, pdfImag);
3289 :
3290 277375 : double adfValueDens[4] = {};
3291 277375 : double adfValueReal[4] = {};
3292 277375 : double adfValueImag[4] = {};
3293 :
3294 277375 : double adfCoeffsX[4] = {};
3295 277375 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3296 :
3297 1240570 : for (GPtrDiff_t i = -1; i < 3; i++)
3298 : {
3299 1009640 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3300 998035 : 2, adfDensity, adfReal, adfImag) ||
3301 998035 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3302 980395 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3303 2979770 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3304 972094 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3305 : {
3306 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3307 46449 : pdfDensity, pdfReal, pdfImag);
3308 : }
3309 :
3310 963196 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3311 963196 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3312 963196 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3313 : }
3314 :
3315 : /* -------------------------------------------------------------------- */
3316 : /* For now, if we have any pixels missing in the kernel area, */
3317 : /* we fallback on using bilinear interpolation. Ideally we */
3318 : /* should do "weight adjustment" of our results similarly to */
3319 : /* what is done for the cubic spline and lanc. interpolators. */
3320 : /* -------------------------------------------------------------------- */
3321 :
3322 230926 : double adfCoeffsY[4] = {};
3323 230926 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3324 :
3325 230926 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3326 230926 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3327 230926 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3328 :
3329 230926 : return true;
3330 : }
3331 :
3332 : #ifdef USE_SSE2
3333 :
3334 : /************************************************************************/
3335 : /* XMMLoad4Values() */
3336 : /* */
3337 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3338 : /* m128 register. */
3339 : /************************************************************************/
3340 :
3341 462280000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3342 : {
3343 : unsigned int i;
3344 462280000 : memcpy(&i, ptr, 4);
3345 924560000 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3346 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3347 : // 32-bit integers.
3348 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3349 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3350 : #else
3351 924560000 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3352 924560000 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3353 : #endif
3354 924560000 : return _mm_cvtepi32_ps(xmm_i);
3355 : }
3356 :
3357 1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3358 : {
3359 : GUInt64 i;
3360 1108340 : memcpy(&i, ptr, 8);
3361 2216690 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3362 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3363 : // 32-bit integers.
3364 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3365 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3366 : #else
3367 2216690 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3368 : #endif
3369 2216690 : return _mm_cvtepi32_ps(xmm_i);
3370 : }
3371 :
3372 : /************************************************************************/
3373 : /* XMMHorizontalAdd() */
3374 : /* */
3375 : /* Return the sum of the 4 floating points of the register. */
3376 : /************************************************************************/
3377 :
3378 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3379 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3380 : {
3381 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3382 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3383 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3384 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3385 : return _mm_cvtss_f32(sums);
3386 : }
3387 : #else
3388 115847000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3389 : {
3390 115847000 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3391 115847000 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3392 115847000 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3393 115847000 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3394 115847000 : return _mm_cvtss_f32(sums);
3395 : }
3396 : #endif
3397 :
3398 : #endif // define USE_SSE2
3399 :
3400 : /************************************************************************/
3401 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3402 : /************************************************************************/
3403 :
3404 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3405 : // because there are a few assumptions above those types.
3406 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3407 : // perf benefit.
3408 :
3409 : template <class T>
3410 389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3411 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3412 : double *pdfDensity, double *pdfReal)
3413 : {
3414 389755 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3415 389755 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3416 389755 : const GPtrDiff_t iSrcOffset =
3417 389755 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3418 :
3419 : // Get the bilinear interpolation at the image borders.
3420 389755 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3421 387271 : iSrcY + 2 >= poWK->nSrcYSize)
3422 : {
3423 2484 : double adfImagIgnored[4] = {};
3424 2484 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3425 2484 : pdfDensity, pdfReal, adfImagIgnored);
3426 : }
3427 :
3428 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3429 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3430 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3431 :
3432 : // TODO(schwehr): Explain the magic numbers.
3433 : float afTemp[4 + 4 + 4 + 1];
3434 : float *pafAligned =
3435 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3436 : float *pafCoeffs = pafAligned;
3437 : float *pafDensity = pafAligned + 4;
3438 : float *pafValue = pafAligned + 8;
3439 :
3440 : const float fHalfDeltaX = 0.5f * fDeltaX;
3441 : const float fThreeDeltaX = 3.0f * fDeltaX;
3442 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3443 :
3444 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3445 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3446 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3447 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3448 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3449 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3450 :
3451 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3452 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3453 : i++, iOffset += poWK->nSrcXSize)
3454 : {
3455 : const __m128 xmmDensity =
3456 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3457 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3458 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3459 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3460 :
3461 : const __m128 xmmValues =
3462 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3463 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3464 : }
3465 : if (_mm_movemask_ps(xmmMaskLowDensity))
3466 : {
3467 : double adfImagIgnored[4] = {};
3468 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3469 : pdfDensity, pdfReal, adfImagIgnored);
3470 : }
3471 :
3472 : const float fHalfDeltaY = 0.5f * fDeltaY;
3473 : const float fThreeDeltaY = 3.0f * fDeltaY;
3474 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3475 :
3476 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3477 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3478 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3479 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3480 :
3481 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3482 :
3483 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3484 : const __m128 xmmValue = _mm_load_ps(pafValue);
3485 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3486 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3487 :
3488 : // We did all above computations on float32 whereas the general case is
3489 : // float64. Not sure if one is fundamentally more correct than the other
3490 : // one, but we want our optimization to give the same result as the
3491 : // general case as much as possible, so if the resulting value is
3492 : // close to some_int_value + 0.5, redo the computation with the general
3493 : // case.
3494 : // Note: If other types than Byte or UInt16, will need changes.
3495 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3496 : return true;
3497 :
3498 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3499 :
3500 387271 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3501 387271 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3502 :
3503 387271 : double adfValueDens[4] = {};
3504 387271 : double adfValueReal[4] = {};
3505 :
3506 387271 : double adfCoeffsX[4] = {};
3507 387271 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3508 :
3509 387271 : double adfCoeffsY[4] = {};
3510 387271 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3511 :
3512 1930200 : for (GPtrDiff_t i = -1; i < 3; i++)
3513 : {
3514 1544480 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3515 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3516 1544480 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3517 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3518 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 1] <
3519 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3520 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 2] <
3521 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3522 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 3] <
3523 : SRC_DENSITY_THRESHOLD_FLOAT)
3524 : {
3525 1551 : double adfImagIgnored[4] = {};
3526 1551 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3527 : pdfDensity, pdfReal,
3528 1551 : adfImagIgnored);
3529 : }
3530 : #endif
3531 :
3532 3085860 : adfValueDens[i + 1] =
3533 1542930 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3534 :
3535 1542930 : adfValueReal[i + 1] = CONVOL4(
3536 : adfCoeffsX,
3537 1542930 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3538 : }
3539 :
3540 385720 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3541 385720 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3542 :
3543 385720 : return true;
3544 : }
3545 :
3546 : /************************************************************************/
3547 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3548 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3549 : /************************************************************************/
3550 :
3551 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3552 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3553 : double *pdfDensity, double *pdfReal)
3554 :
3555 : {
3556 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3557 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3558 0 : const GPtrDiff_t iSrcOffset =
3559 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3560 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3561 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3562 :
3563 : // Get the bilinear interpolation at the image borders.
3564 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3565 0 : iSrcY + 2 >= poWK->nSrcYSize)
3566 : {
3567 0 : double adfImagIgnored[4] = {};
3568 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3569 0 : pdfDensity, pdfReal, adfImagIgnored);
3570 : }
3571 :
3572 0 : double adfCoeffsX[4] = {};
3573 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3574 :
3575 0 : double adfCoeffsY[4] = {};
3576 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3577 :
3578 0 : double adfValueDens[4] = {};
3579 0 : double adfValueReal[4] = {};
3580 0 : double adfDensity[4] = {};
3581 0 : double adfReal[4] = {};
3582 0 : double adfImagIgnored[4] = {};
3583 :
3584 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3585 : {
3586 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3587 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3588 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3589 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3590 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3591 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3592 : {
3593 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3594 : pdfDensity, pdfReal,
3595 0 : adfImagIgnored);
3596 : }
3597 :
3598 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3599 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3600 : }
3601 :
3602 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3603 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3604 :
3605 0 : return true;
3606 : }
3607 :
3608 : template <class T>
3609 2300964 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3610 : int iBand, double dfSrcX,
3611 : double dfSrcY, T *pValue)
3612 :
3613 : {
3614 2300964 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3615 2300964 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3616 2300964 : const GPtrDiff_t iSrcOffset =
3617 2300964 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3618 2300964 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3619 2300964 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3620 2300964 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3621 2300964 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3622 :
3623 : // Get the bilinear interpolation at the image borders.
3624 2300964 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3625 1883033 : iSrcY + 2 >= poWK->nSrcYSize)
3626 490244 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3627 490244 : pValue);
3628 :
3629 1810720 : double adfCoeffs[4] = {};
3630 1810720 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3631 :
3632 1810720 : double adfValue[4] = {};
3633 :
3634 9053590 : for (GPtrDiff_t i = -1; i < 3; i++)
3635 : {
3636 7242876 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3637 :
3638 7242876 : adfValue[i + 1] = CONVOL4(
3639 : adfCoeffs,
3640 7242876 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3641 : }
3642 :
3643 : const double dfValue =
3644 1810720 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3645 : adfValue[1], adfValue[2], adfValue[3]);
3646 :
3647 1810720 : *pValue = GWKClampValueT<T>(dfValue);
3648 :
3649 1810720 : return true;
3650 : }
3651 :
3652 : /************************************************************************/
3653 : /* GWKLanczosSinc() */
3654 : /************************************************************************/
3655 :
3656 : /*
3657 : * Lanczos windowed sinc interpolation kernel with radius r.
3658 : * /
3659 : * | sinc(x) * sinc(x/r), if |x| < r
3660 : * L(x) = | 1, if x = 0 ,
3661 : * | 0, otherwise
3662 : * \
3663 : *
3664 : * where sinc(x) = sin(PI * x) / (PI * x).
3665 : */
3666 :
3667 1632 : static double GWKLanczosSinc(double dfX)
3668 : {
3669 1632 : if (dfX == 0.0)
3670 0 : return 1.0;
3671 :
3672 1632 : const double dfPIX = M_PI * dfX;
3673 1632 : const double dfPIXoverR = dfPIX / 3;
3674 1632 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3675 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3676 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3677 1632 : const double dfSinPIXoverR = sin(dfPIXoverR);
3678 1632 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3679 1632 : const double dfSinPIXMulSinPIXoverR =
3680 1632 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3681 1632 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3682 : }
3683 :
3684 106692 : static double GWKLanczosSinc4Values(double *padfValues)
3685 : {
3686 533460 : for (int i = 0; i < 4; i++)
3687 : {
3688 426768 : if (padfValues[i] == 0.0)
3689 : {
3690 0 : padfValues[i] = 1.0;
3691 : }
3692 : else
3693 : {
3694 426768 : const double dfPIX = M_PI * padfValues[i];
3695 426768 : const double dfPIXoverR = dfPIX / 3;
3696 426768 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3697 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3698 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3699 426768 : const double dfSinPIXoverR = sin(dfPIXoverR);
3700 426768 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3701 426768 : const double dfSinPIXMulSinPIXoverR =
3702 426768 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3703 426768 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3704 : }
3705 : }
3706 106692 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3707 : }
3708 :
3709 : /************************************************************************/
3710 : /* GWKBilinear() */
3711 : /************************************************************************/
3712 :
3713 6339310 : static double GWKBilinear(double dfX)
3714 : {
3715 6339310 : double dfAbsX = fabs(dfX);
3716 6339310 : if (dfAbsX <= 1.0)
3717 5869990 : return 1 - dfAbsX;
3718 : else
3719 469322 : return 0.0;
3720 : }
3721 :
3722 236458 : static double GWKBilinear4Values(double *padfValues)
3723 : {
3724 236458 : double dfAbsX0 = fabs(padfValues[0]);
3725 236458 : double dfAbsX1 = fabs(padfValues[1]);
3726 236458 : double dfAbsX2 = fabs(padfValues[2]);
3727 236458 : double dfAbsX3 = fabs(padfValues[3]);
3728 236458 : if (dfAbsX0 <= 1.0)
3729 236458 : padfValues[0] = 1 - dfAbsX0;
3730 : else
3731 0 : padfValues[0] = 0.0;
3732 236458 : if (dfAbsX1 <= 1.0)
3733 236458 : padfValues[1] = 1 - dfAbsX1;
3734 : else
3735 0 : padfValues[1] = 0.0;
3736 236458 : if (dfAbsX2 <= 1.0)
3737 236458 : padfValues[2] = 1 - dfAbsX2;
3738 : else
3739 0 : padfValues[2] = 0.0;
3740 236458 : if (dfAbsX3 <= 1.0)
3741 236442 : padfValues[3] = 1 - dfAbsX3;
3742 : else
3743 16 : padfValues[3] = 0.0;
3744 236458 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3745 : }
3746 :
3747 : /************************************************************************/
3748 : /* GWKCubic() */
3749 : /************************************************************************/
3750 :
3751 86934 : static double GWKCubic(double dfX)
3752 : {
3753 86934 : return CubicKernel(dfX);
3754 : }
3755 :
3756 2963710 : static double GWKCubic4Values(double *padfValues)
3757 : {
3758 2963710 : const double dfAbsX_0 = fabs(padfValues[0]);
3759 2963710 : const double dfAbsX_1 = fabs(padfValues[1]);
3760 2963710 : const double dfAbsX_2 = fabs(padfValues[2]);
3761 2963710 : const double dfAbsX_3 = fabs(padfValues[3]);
3762 2963710 : const double dfX2_0 = padfValues[0] * padfValues[0];
3763 2963710 : const double dfX2_1 = padfValues[1] * padfValues[1];
3764 2963710 : const double dfX2_2 = padfValues[2] * padfValues[2];
3765 2963710 : const double dfX2_3 = padfValues[3] * padfValues[3];
3766 :
3767 2963710 : double dfVal0 = 0.0;
3768 2963710 : if (dfAbsX_0 <= 1.0)
3769 1117140 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3770 1846570 : else if (dfAbsX_0 <= 2.0)
3771 1846400 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3772 :
3773 2963710 : double dfVal1 = 0.0;
3774 2963710 : if (dfAbsX_1 <= 1.0)
3775 1844850 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3776 1118860 : else if (dfAbsX_1 <= 2.0)
3777 1118860 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3778 :
3779 2963710 : double dfVal2 = 0.0;
3780 2963710 : if (dfAbsX_2 <= 1.0)
3781 1855340 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3782 1108360 : else if (dfAbsX_2 <= 2.0)
3783 1108360 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3784 :
3785 2963710 : double dfVal3 = 0.0;
3786 2963710 : if (dfAbsX_3 <= 1.0)
3787 1127350 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3788 1836360 : else if (dfAbsX_3 <= 2.0)
3789 1836200 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3790 :
3791 2963710 : padfValues[0] = dfVal0;
3792 2963710 : padfValues[1] = dfVal1;
3793 2963710 : padfValues[2] = dfVal2;
3794 2963710 : padfValues[3] = dfVal3;
3795 2963710 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3796 : }
3797 :
3798 : /************************************************************************/
3799 : /* GWKBSpline() */
3800 : /************************************************************************/
3801 :
3802 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3803 : // Equation 8 with (B,C)=(1,0)
3804 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3805 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3806 :
3807 139200 : static double GWKBSpline(double x)
3808 : {
3809 139200 : const double xp2 = x + 2.0;
3810 139200 : const double xp1 = x + 1.0;
3811 139200 : const double xm1 = x - 1.0;
3812 :
3813 : // This will most likely be used, so we'll compute it ahead of time to
3814 : // avoid stalling the processor.
3815 139200 : const double xp2c = xp2 * xp2 * xp2;
3816 :
3817 : // Note that the test is computed only if it is needed.
3818 : // TODO(schwehr): Make this easier to follow.
3819 : return xp2 > 0.0
3820 278400 : ? ((xp1 > 0.0)
3821 139200 : ? ((x > 0.0)
3822 124806 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3823 90308 : 6.0 * x * x * x
3824 : : 0.0) +
3825 124806 : -4.0 * xp1 * xp1 * xp1
3826 : : 0.0) +
3827 : xp2c
3828 139200 : : 0.0; // * 0.166666666666666666666
3829 : }
3830 :
3831 2220680 : static double GWKBSpline4Values(double *padfValues)
3832 : {
3833 11103400 : for (int i = 0; i < 4; i++)
3834 : {
3835 8882740 : const double x = padfValues[i];
3836 8882740 : const double xp2 = x + 2.0;
3837 8882740 : const double xp1 = x + 1.0;
3838 8882740 : const double xm1 = x - 1.0;
3839 :
3840 : // This will most likely be used, so we'll compute it ahead of time to
3841 : // avoid stalling the processor.
3842 8882740 : const double xp2c = xp2 * xp2 * xp2;
3843 :
3844 : // Note that the test is computed only if it is needed.
3845 : // TODO(schwehr): Make this easier to follow.
3846 8882740 : padfValues[i] =
3847 : (xp2 > 0.0)
3848 17765500 : ? ((xp1 > 0.0)
3849 8882740 : ? ((x > 0.0)
3850 6661820 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3851 4438260 : 6.0 * x * x * x
3852 : : 0.0) +
3853 6661820 : -4.0 * xp1 * xp1 * xp1
3854 : : 0.0) +
3855 : xp2c
3856 : : 0.0; // * 0.166666666666666666666
3857 : }
3858 2220680 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3859 : }
3860 : /************************************************************************/
3861 : /* GWKResampleWrkStruct */
3862 : /************************************************************************/
3863 :
3864 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3865 :
3866 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3867 : double dfSrcX, double dfSrcY,
3868 : double *pdfDensity, double *pdfReal,
3869 : double *pdfImag,
3870 : GWKResampleWrkStruct *psWrkStruct);
3871 :
3872 : struct _GWKResampleWrkStruct
3873 : {
3874 : pfnGWKResampleType pfnGWKResample;
3875 :
3876 : // Space for saved X weights.
3877 : double *padfWeightsX;
3878 : bool *pabCalcX;
3879 :
3880 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3881 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3882 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3883 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3884 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3885 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3886 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3887 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3888 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3889 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3890 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3891 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3892 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3893 :
3894 : // Space for saving a row of pixels.
3895 : double *padfRowDensity;
3896 : double *padfRowReal;
3897 : double *padfRowImag;
3898 : };
3899 :
3900 : /************************************************************************/
3901 : /* GWKResampleCreateWrkStruct() */
3902 : /************************************************************************/
3903 :
3904 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3905 : double dfSrcY, double *pdfDensity, double *pdfReal,
3906 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3907 :
3908 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3909 : double dfSrcX, double dfSrcY,
3910 : double *pdfDensity, double *pdfReal,
3911 : double *pdfImag,
3912 : GWKResampleWrkStruct *psWrkStruct);
3913 :
3914 397 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3915 : {
3916 397 : const int nXDist = (poWK->nXRadius + 1) * 2;
3917 397 : const int nYDist = (poWK->nYRadius + 1) * 2;
3918 :
3919 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3920 397 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3921 :
3922 : // Alloc space for saved X weights.
3923 397 : psWrkStruct->padfWeightsX =
3924 397 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3925 397 : psWrkStruct->pabCalcX =
3926 397 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3927 :
3928 397 : psWrkStruct->padfWeightsY =
3929 397 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3930 397 : psWrkStruct->iLastSrcX = -10;
3931 397 : psWrkStruct->iLastSrcY = -10;
3932 397 : psWrkStruct->dfLastDeltaX = -10;
3933 397 : psWrkStruct->dfLastDeltaY = -10;
3934 :
3935 : // Alloc space for saving a row of pixels.
3936 397 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3937 363 : poWK->panUnifiedSrcValid == nullptr &&
3938 340 : poWK->papanBandSrcValid == nullptr)
3939 : {
3940 340 : psWrkStruct->padfRowDensity = nullptr;
3941 : }
3942 : else
3943 : {
3944 57 : psWrkStruct->padfRowDensity =
3945 57 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3946 : }
3947 397 : psWrkStruct->padfRowReal =
3948 397 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3949 397 : psWrkStruct->padfRowImag =
3950 397 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3951 :
3952 397 : if (poWK->eResample == GRA_Lanczos)
3953 : {
3954 63 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3955 :
3956 63 : if (poWK->dfXScale < 1)
3957 : {
3958 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3959 4 : psWrkStruct->dfSinPiXScaleOver3 =
3960 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3961 4 : psWrkStruct->dfCosPiXScaleOver3);
3962 : // "Naive":
3963 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3964 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3965 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3966 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3967 4 : psWrkStruct->dfCosPiXScaleOver3 -
3968 4 : 3) *
3969 4 : psWrkStruct->dfCosPiXScaleOver3;
3970 4 : psWrkStruct->dfSinPiXScale = sqrt(
3971 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3972 : }
3973 :
3974 63 : if (poWK->dfYScale < 1)
3975 : {
3976 13 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3977 13 : psWrkStruct->dfSinPiYScaleOver3 =
3978 13 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3979 13 : psWrkStruct->dfCosPiYScaleOver3);
3980 : // "Naive":
3981 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3982 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3983 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3984 13 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3985 13 : psWrkStruct->dfCosPiYScaleOver3 -
3986 13 : 3) *
3987 13 : psWrkStruct->dfCosPiYScaleOver3;
3988 13 : psWrkStruct->dfSinPiYScale = sqrt(
3989 13 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3990 : }
3991 : }
3992 : else
3993 334 : psWrkStruct->pfnGWKResample = GWKResample;
3994 :
3995 397 : return psWrkStruct;
3996 : }
3997 :
3998 : /************************************************************************/
3999 : /* GWKResampleDeleteWrkStruct() */
4000 : /************************************************************************/
4001 :
4002 397 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
4003 : {
4004 397 : CPLFree(psWrkStruct->padfWeightsX);
4005 397 : CPLFree(psWrkStruct->padfWeightsY);
4006 397 : CPLFree(psWrkStruct->pabCalcX);
4007 397 : CPLFree(psWrkStruct->padfRowDensity);
4008 397 : CPLFree(psWrkStruct->padfRowReal);
4009 397 : CPLFree(psWrkStruct->padfRowImag);
4010 397 : CPLFree(psWrkStruct);
4011 397 : }
4012 :
4013 : /************************************************************************/
4014 : /* GWKResample() */
4015 : /************************************************************************/
4016 :
4017 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4018 : double dfSrcY, double *pdfDensity, double *pdfReal,
4019 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
4020 :
4021 : {
4022 : // Save as local variables to avoid following pointers in loops.
4023 239383 : const int nSrcXSize = poWK->nSrcXSize;
4024 239383 : const int nSrcYSize = poWK->nSrcYSize;
4025 :
4026 239383 : double dfAccumulatorReal = 0.0;
4027 239383 : double dfAccumulatorImag = 0.0;
4028 239383 : double dfAccumulatorDensity = 0.0;
4029 239383 : double dfAccumulatorWeight = 0.0;
4030 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4031 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4032 239383 : const GPtrDiff_t iSrcOffset =
4033 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4034 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4035 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4036 :
4037 239383 : const double dfXScale = poWK->dfXScale;
4038 239383 : const double dfYScale = poWK->dfYScale;
4039 :
4040 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
4041 :
4042 : // Space for saved X weights.
4043 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
4044 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
4045 :
4046 : // Space for saving a row of pixels.
4047 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
4048 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
4049 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
4050 :
4051 : // Mark as needing calculation (don't calculate the weights yet,
4052 : // because a mask may render it unnecessary).
4053 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
4054 :
4055 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
4056 239383 : CPLAssert(pfnGetWeight);
4057 :
4058 : // Skip sampling over edge of image.
4059 239383 : int j = poWK->nFiltInitY;
4060 239383 : int jMax = poWK->nYRadius;
4061 239383 : if (iSrcY + j < 0)
4062 566 : j = -iSrcY;
4063 239383 : if (iSrcY + jMax >= nSrcYSize)
4064 662 : jMax = nSrcYSize - iSrcY - 1;
4065 :
4066 239383 : int iMin = poWK->nFiltInitX;
4067 239383 : int iMax = poWK->nXRadius;
4068 239383 : if (iSrcX + iMin < 0)
4069 566 : iMin = -iSrcX;
4070 239383 : if (iSrcX + iMax >= nSrcXSize)
4071 659 : iMax = nSrcXSize - iSrcX - 1;
4072 :
4073 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
4074 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
4075 :
4076 239383 : GPtrDiff_t iRowOffset =
4077 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
4078 :
4079 : // Loop over pixel rows in the kernel.
4080 1445930 : for (; j <= jMax; ++j)
4081 : {
4082 1206540 : iRowOffset += nSrcXSize;
4083 :
4084 : // Get pixel values.
4085 : // We can potentially read extra elements after the "normal" end of the
4086 : // source arrays, but the contract of papabySrcImage[iBand],
4087 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4088 : // is to have WARP_EXTRA_ELTS reserved at their end.
4089 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4090 : padfRowDensity, padfRowReal, padfRowImag))
4091 72 : continue;
4092 :
4093 : // Calculate the Y weight.
4094 : double dfWeight1 = (bYScaleBelow1)
4095 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
4096 1600 : : pfnGetWeight(j - dfDeltaY);
4097 :
4098 : // Iterate over pixels in row.
4099 1206470 : double dfAccumulatorRealLocal = 0.0;
4100 1206470 : double dfAccumulatorImagLocal = 0.0;
4101 1206470 : double dfAccumulatorDensityLocal = 0.0;
4102 1206470 : double dfAccumulatorWeightLocal = 0.0;
4103 :
4104 7317420 : for (int i = iMin; i <= iMax; ++i)
4105 : {
4106 : // Skip sampling if pixel has zero density.
4107 6110940 : if (padfRowDensity != nullptr &&
4108 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4109 546 : continue;
4110 :
4111 6110400 : double dfWeight2 = 0.0;
4112 :
4113 : // Make or use a cached set of weights for this row.
4114 6110400 : if (pabCalcX[i - iMin])
4115 : {
4116 : // Use saved weight value instead of recomputing it.
4117 4903920 : dfWeight2 = padfWeightsX[i - iMin];
4118 : }
4119 : else
4120 : {
4121 : // Calculate & save the X weight.
4122 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
4123 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
4124 1600 : : pfnGetWeight(i - dfDeltaX);
4125 :
4126 1206480 : pabCalcX[i - iMin] = true;
4127 : }
4128 :
4129 : // Accumulate!
4130 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
4131 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
4132 6110400 : if (padfRowDensity != nullptr)
4133 76731 : dfAccumulatorDensityLocal +=
4134 76731 : padfRowDensity[i - iMin] * dfWeight2;
4135 6110400 : dfAccumulatorWeightLocal += dfWeight2;
4136 : }
4137 :
4138 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
4139 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
4140 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
4141 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
4142 : }
4143 :
4144 239383 : if (dfAccumulatorWeight < 0.000001 ||
4145 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
4146 : {
4147 0 : *pdfDensity = 0.0;
4148 0 : return false;
4149 : }
4150 :
4151 : // Calculate the output taking into account weighting.
4152 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4153 : {
4154 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
4155 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
4156 239380 : if (padfRowDensity != nullptr)
4157 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
4158 : else
4159 237496 : *pdfDensity = 1.0;
4160 : }
4161 : else
4162 : {
4163 3 : *pdfReal = dfAccumulatorReal;
4164 3 : *pdfImag = dfAccumulatorImag;
4165 3 : if (padfRowDensity != nullptr)
4166 3 : *pdfDensity = dfAccumulatorDensity;
4167 : else
4168 0 : *pdfDensity = 1.0;
4169 : }
4170 :
4171 239383 : return true;
4172 : }
4173 :
4174 : /************************************************************************/
4175 : /* GWKResampleOptimizedLanczos() */
4176 : /************************************************************************/
4177 :
4178 617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
4179 : double dfSrcX, double dfSrcY,
4180 : double *pdfDensity, double *pdfReal,
4181 : double *pdfImag,
4182 : GWKResampleWrkStruct *psWrkStruct)
4183 :
4184 : {
4185 : // Save as local variables to avoid following pointers in loops.
4186 617144 : const int nSrcXSize = poWK->nSrcXSize;
4187 617144 : const int nSrcYSize = poWK->nSrcYSize;
4188 :
4189 617144 : double dfAccumulatorReal = 0.0;
4190 617144 : double dfAccumulatorImag = 0.0;
4191 617144 : double dfAccumulatorDensity = 0.0;
4192 617144 : double dfAccumulatorWeight = 0.0;
4193 617144 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4194 617144 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4195 617144 : const GPtrDiff_t iSrcOffset =
4196 617144 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4197 617144 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4198 617144 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4199 :
4200 617144 : const double dfXScale = poWK->dfXScale;
4201 617144 : const double dfYScale = poWK->dfYScale;
4202 :
4203 : // Space for saved X weights.
4204 617144 : double *const padfWeightsXShifted =
4205 617144 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
4206 617144 : double *const padfWeightsYShifted =
4207 617144 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
4208 :
4209 : // Space for saving a row of pixels.
4210 617144 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
4211 617144 : double *const padfRowReal = psWrkStruct->padfRowReal;
4212 617144 : double *const padfRowImag = psWrkStruct->padfRowImag;
4213 :
4214 : // Skip sampling over edge of image.
4215 617144 : int jMin = poWK->nFiltInitY;
4216 617144 : int jMax = poWK->nYRadius;
4217 617144 : if (iSrcY + jMin < 0)
4218 16572 : jMin = -iSrcY;
4219 617144 : if (iSrcY + jMax >= nSrcYSize)
4220 5782 : jMax = nSrcYSize - iSrcY - 1;
4221 :
4222 617144 : int iMin = poWK->nFiltInitX;
4223 617144 : int iMax = poWK->nXRadius;
4224 617144 : if (iSrcX + iMin < 0)
4225 15797 : iMin = -iSrcX;
4226 617144 : if (iSrcX + iMax >= nSrcXSize)
4227 4657 : iMax = nSrcXSize - iSrcX - 1;
4228 :
4229 617144 : if (dfXScale < 1.0)
4230 : {
4231 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
4232 200179 : iMin++;
4233 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
4234 0 : iMax--;
4235 :
4236 : // clang-format off
4237 : /*
4238 : Naive version:
4239 : for (int i = iMin; i <= iMax; ++i)
4240 : {
4241 : psWrkStruct->padfWeightsXShifted[i] =
4242 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
4243 : }
4244 :
4245 : but given that:
4246 :
4247 : GWKLanczosSinc(x):
4248 : if (dfX == 0.0)
4249 : return 1.0;
4250 :
4251 : const double dfPIX = M_PI * dfX;
4252 : const double dfPIXoverR = dfPIX / 3;
4253 : const double dfPIX2overR = dfPIX * dfPIXoverR;
4254 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
4255 :
4256 : and
4257 : sin (a + b) = sin a cos b + cos a sin b.
4258 : cos (a + b) = cos a cos b - sin a sin b.
4259 :
4260 : we can skip any sin() computation within the loop
4261 : */
4262 : // clang-format on
4263 :
4264 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
4265 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4266 : {
4267 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4268 :
4269 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4270 71790 : double dfCosOver3 = cos(dfPIXover3);
4271 71790 : double dfSinOver3 = sin(dfPIXover3);
4272 :
4273 : // "Naive":
4274 : // double dfSin = sin( M_PI * dfX );
4275 : // double dfCos = cos( M_PI * dfX );
4276 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4277 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4278 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4279 :
4280 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4281 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4282 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4283 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4284 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4285 71790 : padfWeightsXShifted[iMin] =
4286 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4287 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
4288 : {
4289 1564690 : dfX += dfXScale;
4290 1564690 : const double dfNewSin =
4291 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4292 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4293 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
4294 1564690 : padfWeightsXShifted[i] =
4295 : dfX == 0
4296 1564690 : ? 1.0
4297 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4298 1564690 : const double dfNewCos =
4299 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4300 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4301 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
4302 1564690 : dfSin = dfNewSin;
4303 1564690 : dfCos = dfNewCos;
4304 1564690 : dfSinOver3 = dfNewSinOver3;
4305 1564690 : dfCosOver3 = dfNewCosOver3;
4306 : }
4307 :
4308 71790 : psWrkStruct->iLastSrcX = iSrcX;
4309 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4310 : }
4311 : }
4312 : else
4313 : {
4314 757542 : while (iMin - dfDeltaX < -3.0)
4315 343260 : iMin++;
4316 414282 : while (iMax - dfDeltaX > 3.0)
4317 0 : iMax--;
4318 :
4319 414282 : if (iSrcX != psWrkStruct->iLastSrcX ||
4320 209580 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4321 : {
4322 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4323 : // following trigonometric formulas.
4324 :
4325 : // TODO(schwehr): Move this somewhere where it can be rendered at
4326 : // LaTeX.
4327 : // clang-format off
4328 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4329 : // cos(M_PI * dfBase) * sin(M_PI * k)
4330 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4331 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4332 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4333 :
4334 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4335 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4336 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4337 : // clang-format on
4338 :
4339 414282 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4340 414282 : const double dfSin2PIDeltaXOver3 =
4341 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4342 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4343 414282 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4344 414282 : const double dfSinPIDeltaX =
4345 414282 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4346 414282 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4347 414282 : const double dfInvPI2Over3xSinPIDeltaX =
4348 : dfInvPI2Over3 * dfSinPIDeltaX;
4349 414282 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4350 414282 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4351 414282 : const double dfSinPIOver3 = 0.8660254037844386;
4352 414282 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4353 414282 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4354 : const double padfCst[] = {
4355 414282 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4356 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4357 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4358 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4359 414282 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4360 :
4361 2936860 : for (int i = iMin; i <= iMax; ++i)
4362 : {
4363 2522570 : const double dfX = i - dfDeltaX;
4364 2522570 : if (dfX == 0.0)
4365 58282 : padfWeightsXShifted[i] = 1.0;
4366 : else
4367 2464290 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4368 : #if DEBUG_VERBOSE
4369 : // TODO(schwehr): AlmostEqual.
4370 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4371 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4372 : #endif
4373 : }
4374 :
4375 414282 : psWrkStruct->iLastSrcX = iSrcX;
4376 414282 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4377 : }
4378 : }
4379 :
4380 617144 : if (dfYScale < 1.0)
4381 : {
4382 406666 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4383 203804 : jMin++;
4384 206462 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4385 3600 : jMax--;
4386 :
4387 : // clang-format off
4388 : /*
4389 : Naive version:
4390 : for (int j = jMin; j <= jMax; ++j)
4391 : {
4392 : padfWeightsYShifted[j] =
4393 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4394 : }
4395 : */
4396 : // clang-format on
4397 :
4398 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4399 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4400 : {
4401 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4402 :
4403 383 : double dfPIYover3 = M_PI / 3 * dfY;
4404 383 : double dfCosOver3 = cos(dfPIYover3);
4405 383 : double dfSinOver3 = sin(dfPIYover3);
4406 :
4407 : // "Naive":
4408 : // double dfSin = sin( M_PI * dfY );
4409 : // double dfCos = cos( M_PI * dfY );
4410 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4411 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4412 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4413 :
4414 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4415 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4416 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4417 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4418 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4419 383 : padfWeightsYShifted[jMin] =
4420 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4421 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4422 : {
4423 6935 : dfY += dfYScale;
4424 6935 : const double dfNewSin =
4425 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4426 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4427 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4428 6935 : padfWeightsYShifted[j] =
4429 : dfY == 0
4430 6935 : ? 1.0
4431 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4432 6935 : const double dfNewCos =
4433 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4434 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4435 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4436 6935 : dfSin = dfNewSin;
4437 6935 : dfCos = dfNewCos;
4438 6935 : dfSinOver3 = dfNewSinOver3;
4439 6935 : dfCosOver3 = dfNewCosOver3;
4440 : }
4441 :
4442 383 : psWrkStruct->iLastSrcY = iSrcY;
4443 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4444 : }
4445 : }
4446 : else
4447 : {
4448 681542 : while (jMin - dfDeltaY < -3.0)
4449 267260 : jMin++;
4450 414282 : while (jMax - dfDeltaY > 3.0)
4451 0 : jMax--;
4452 :
4453 414282 : if (iSrcY != psWrkStruct->iLastSrcY ||
4454 413631 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4455 : {
4456 1132 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4457 1132 : const double dfSin2PIDeltaYOver3 =
4458 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4459 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4460 1132 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4461 1132 : const double dfSinPIDeltaY =
4462 1132 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4463 1132 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4464 1132 : const double dfInvPI2Over3xSinPIDeltaY =
4465 : dfInvPI2Over3 * dfSinPIDeltaY;
4466 1132 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4467 1132 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4468 1132 : const double dfSinPIOver3 = 0.8660254037844386;
4469 1132 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4470 1132 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4471 : const double padfCst[] = {
4472 1132 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4473 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4474 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4475 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4476 1132 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4477 :
4478 7925 : for (int j = jMin; j <= jMax; ++j)
4479 : {
4480 6793 : const double dfY = j - dfDeltaY;
4481 6793 : if (dfY == 0.0)
4482 468 : padfWeightsYShifted[j] = 1.0;
4483 : else
4484 6325 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4485 : #if DEBUG_VERBOSE
4486 : // TODO(schwehr): AlmostEqual.
4487 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4488 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4489 : #endif
4490 : }
4491 :
4492 1132 : psWrkStruct->iLastSrcY = iSrcY;
4493 1132 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4494 : }
4495 : }
4496 :
4497 : // If we have no density information, we can simply compute the
4498 : // accumulated weight.
4499 617144 : if (padfRowDensity == nullptr)
4500 : {
4501 617144 : double dfRowAccWeight = 0.0;
4502 7903490 : for (int i = iMin; i <= iMax; ++i)
4503 : {
4504 7286350 : dfRowAccWeight += padfWeightsXShifted[i];
4505 : }
4506 617144 : double dfColAccWeight = 0.0;
4507 7961240 : for (int j = jMin; j <= jMax; ++j)
4508 : {
4509 7344100 : dfColAccWeight += padfWeightsYShifted[j];
4510 : }
4511 617144 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4512 : }
4513 :
4514 : // Loop over pixel rows in the kernel.
4515 :
4516 617144 : if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
4517 616524 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4518 : !padfRowDensity)
4519 : {
4520 : // Optimization for Byte case without any masking/alpha
4521 :
4522 616524 : if (dfAccumulatorWeight < 0.000001)
4523 : {
4524 0 : *pdfDensity = 0.0;
4525 0 : return false;
4526 : }
4527 :
4528 616524 : const GByte *pSrc =
4529 616524 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4530 616524 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4531 :
4532 : #if defined(USE_SSE2)
4533 616524 : if (iMax - iMin + 1 == 6)
4534 : {
4535 : // This is just an optimized version of the general case in
4536 : // the else clause.
4537 :
4538 346854 : pSrc += iMin;
4539 346854 : int j = jMin;
4540 : const auto fourXWeights =
4541 346854 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4542 :
4543 : // Process 2 lines at the same time.
4544 1375860 : for (; j < jMax; j += 2)
4545 : {
4546 : const XMMReg4Double v_acc =
4547 1029000 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4548 : const XMMReg4Double v_acc2 =
4549 1029000 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4550 1029000 : const double dfRowAcc = v_acc.GetHorizSum();
4551 1029000 : const double dfRowAccEnd =
4552 1029000 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4553 1029000 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4554 1029000 : dfAccumulatorReal +=
4555 1029000 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4556 1029000 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4557 1029000 : const double dfRowAcc2End =
4558 1029000 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4559 1029000 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4560 1029000 : dfAccumulatorReal +=
4561 1029000 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4562 1029000 : pSrc += 2 * nSrcXSize;
4563 : }
4564 346854 : if (j == jMax)
4565 : {
4566 : // Process last line if there's an odd number of them.
4567 :
4568 : const XMMReg4Double v_acc =
4569 88077 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4570 88077 : const double dfRowAcc = v_acc.GetHorizSum();
4571 88077 : const double dfRowAccEnd =
4572 88077 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4573 88077 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4574 88077 : dfAccumulatorReal +=
4575 88077 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4576 : }
4577 : }
4578 : else
4579 : #endif
4580 : {
4581 5464740 : for (int j = jMin; j <= jMax; ++j)
4582 : {
4583 5195070 : int i = iMin;
4584 5195070 : double dfRowAcc1 = 0.0;
4585 5195070 : double dfRowAcc2 = 0.0;
4586 : // A bit of loop unrolling
4587 62755200 : for (; i < iMax; i += 2)
4588 : {
4589 57560100 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4590 57560100 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4591 : }
4592 5195070 : if (i == iMax)
4593 : {
4594 : // Process last column if there's an odd number of them.
4595 427335 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4596 : }
4597 :
4598 5195070 : dfAccumulatorReal +=
4599 5195070 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4600 5195070 : pSrc += nSrcXSize;
4601 : }
4602 : }
4603 :
4604 : // Calculate the output taking into account weighting.
4605 616524 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4606 : {
4607 562318 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4608 562318 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4609 562318 : *pdfDensity = 1.0;
4610 : }
4611 : else
4612 : {
4613 54206 : *pdfReal = dfAccumulatorReal;
4614 54206 : *pdfDensity = 1.0;
4615 : }
4616 :
4617 616524 : return true;
4618 : }
4619 :
4620 620 : GPtrDiff_t iRowOffset =
4621 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4622 :
4623 620 : int nCountValid = 0;
4624 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4625 :
4626 3560 : for (int j = jMin; j <= jMax; ++j)
4627 : {
4628 2940 : iRowOffset += nSrcXSize;
4629 :
4630 : // Get pixel values.
4631 : // We can potentially read extra elements after the "normal" end of the
4632 : // source arrays, but the contract of papabySrcImage[iBand],
4633 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4634 : // is to have WARP_EXTRA_ELTS reserved at their end.
4635 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4636 : padfRowDensity, padfRowReal, padfRowImag))
4637 0 : continue;
4638 :
4639 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4640 :
4641 : // Iterate over pixels in row.
4642 2940 : if (padfRowDensity != nullptr)
4643 : {
4644 0 : for (int i = iMin; i <= iMax; ++i)
4645 : {
4646 : // Skip sampling if pixel has zero density.
4647 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4648 0 : continue;
4649 :
4650 0 : nCountValid++;
4651 :
4652 : // Use a cached set of weights for this row.
4653 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4654 :
4655 : // Accumulate!
4656 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4657 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4658 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4659 0 : dfAccumulatorWeight += dfWeight2;
4660 : }
4661 : }
4662 2940 : else if (bIsNonComplex)
4663 : {
4664 1764 : double dfRowAccReal = 0.0;
4665 10560 : for (int i = iMin; i <= iMax; ++i)
4666 : {
4667 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4668 :
4669 : // Accumulate!
4670 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4671 : }
4672 :
4673 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4674 : }
4675 : else
4676 : {
4677 1176 : double dfRowAccReal = 0.0;
4678 1176 : double dfRowAccImag = 0.0;
4679 7040 : for (int i = iMin; i <= iMax; ++i)
4680 : {
4681 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4682 :
4683 : // Accumulate!
4684 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4685 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4686 : }
4687 :
4688 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4689 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4690 : }
4691 : }
4692 :
4693 620 : if (dfAccumulatorWeight < 0.000001 ||
4694 0 : (padfRowDensity != nullptr &&
4695 0 : (dfAccumulatorDensity < 0.000001 ||
4696 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4697 : {
4698 0 : *pdfDensity = 0.0;
4699 0 : return false;
4700 : }
4701 :
4702 : // Calculate the output taking into account weighting.
4703 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4704 : {
4705 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4706 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4707 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4708 0 : if (padfRowDensity != nullptr)
4709 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4710 : else
4711 0 : *pdfDensity = 1.0;
4712 : }
4713 : else
4714 : {
4715 620 : *pdfReal = dfAccumulatorReal;
4716 620 : *pdfImag = dfAccumulatorImag;
4717 620 : if (padfRowDensity != nullptr)
4718 0 : *pdfDensity = dfAccumulatorDensity;
4719 : else
4720 620 : *pdfDensity = 1.0;
4721 : }
4722 :
4723 620 : return true;
4724 : }
4725 :
4726 : /************************************************************************/
4727 : /* GWKComputeWeights() */
4728 : /************************************************************************/
4729 :
4730 1222150 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4731 : double dfDeltaX, double dfXScale, int jMin,
4732 : int jMax, double dfDeltaY, double dfYScale,
4733 : double *padfWeightsHorizontal,
4734 : double *padfWeightsVertical, double &dfInvWeights)
4735 : {
4736 :
4737 1222150 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4738 1222150 : CPLAssert(pfnGetWeight);
4739 1222150 : const FilterFunc4ValuesType pfnGetWeight4Values =
4740 1222150 : apfGWKFilter4Values[eResample];
4741 1222150 : CPLAssert(pfnGetWeight4Values);
4742 :
4743 1222150 : int i = iMin; // Used after for.
4744 1222150 : int iC = 0; // Used after for.
4745 : // Not zero, but as close as possible to it, to avoid potential division by
4746 : // zero at end of function
4747 1222150 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4748 2990960 : for (; i + 2 < iMax; i += 4, iC += 4)
4749 : {
4750 1768820 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4751 1768820 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4752 1768820 : padfWeightsHorizontal[iC + 2] =
4753 1768820 : padfWeightsHorizontal[iC + 1] + dfXScale;
4754 1768820 : padfWeightsHorizontal[iC + 3] =
4755 1768820 : padfWeightsHorizontal[iC + 2] + dfXScale;
4756 1768820 : dfAccumulatorWeightHorizontal +=
4757 1768820 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4758 : }
4759 1280860 : for (; i <= iMax; ++i, ++iC)
4760 : {
4761 58719 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4762 58719 : padfWeightsHorizontal[iC] = dfWeight;
4763 58719 : dfAccumulatorWeightHorizontal += dfWeight;
4764 : }
4765 :
4766 1222150 : int j = jMin; // Used after for.
4767 1222150 : int jC = 0; // Used after for.
4768 : // Not zero, but as close as possible to it, to avoid potential division by
4769 : // zero at end of function
4770 1222150 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4771 2984620 : for (; j + 2 < jMax; j += 4, jC += 4)
4772 : {
4773 1762470 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4774 1762470 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4775 1762470 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4776 1762470 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4777 1762470 : dfAccumulatorWeightVertical +=
4778 1762470 : pfnGetWeight4Values(padfWeightsVertical + jC);
4779 : }
4780 1288930 : for (; j <= jMax; ++j, ++jC)
4781 : {
4782 66786 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4783 66786 : padfWeightsVertical[jC] = dfWeight;
4784 66786 : dfAccumulatorWeightVertical += dfWeight;
4785 : }
4786 :
4787 1222150 : dfInvWeights =
4788 1222150 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4789 1222150 : }
4790 :
4791 : /************************************************************************/
4792 : /* GWKResampleNoMasksT() */
4793 : /************************************************************************/
4794 :
4795 : template <class T>
4796 : static bool
4797 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4798 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4799 : double *padfWeightsVertical, double &dfInvWeights)
4800 :
4801 : {
4802 : // Commonly used; save locally.
4803 : const int nSrcXSize = poWK->nSrcXSize;
4804 : const int nSrcYSize = poWK->nSrcYSize;
4805 :
4806 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4807 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4808 : const GPtrDiff_t iSrcOffset =
4809 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4810 :
4811 : const int nXRadius = poWK->nXRadius;
4812 : const int nYRadius = poWK->nYRadius;
4813 :
4814 : // Politely refuse to process invalid coordinates or obscenely small image.
4815 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4816 : nYRadius > nSrcYSize)
4817 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4818 : pValue);
4819 :
4820 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4821 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4822 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4823 :
4824 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4825 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4826 :
4827 : int iMin = 1 - nXRadius;
4828 : if (iSrcX + iMin < 0)
4829 : iMin = -iSrcX;
4830 : int iMax = nXRadius;
4831 : if (iSrcX + iMax >= nSrcXSize - 1)
4832 : iMax = nSrcXSize - 1 - iSrcX;
4833 :
4834 : int jMin = 1 - nYRadius;
4835 : if (iSrcY + jMin < 0)
4836 : jMin = -iSrcY;
4837 : int jMax = nYRadius;
4838 : if (iSrcY + jMax >= nSrcYSize - 1)
4839 : jMax = nSrcYSize - 1 - iSrcY;
4840 :
4841 : if (iBand == 0)
4842 : {
4843 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4844 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4845 : padfWeightsVertical, dfInvWeights);
4846 : }
4847 :
4848 : // Loop over all rows in the kernel.
4849 : double dfAccumulator = 0.0;
4850 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4851 : {
4852 : const GPtrDiff_t iSampJ =
4853 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4854 :
4855 : // Loop over all pixels in the row.
4856 : double dfAccumulatorLocal = 0.0;
4857 : double dfAccumulatorLocal2 = 0.0;
4858 : int iC = 0;
4859 : int i = iMin;
4860 : // Process by chunk of 4 cols.
4861 : for (; i + 2 < iMax; i += 4, iC += 4)
4862 : {
4863 : // Retrieve the pixel & accumulate.
4864 : dfAccumulatorLocal +=
4865 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4866 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4867 : padfWeightsHorizontal[iC + 1];
4868 : dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4869 : padfWeightsHorizontal[iC + 2];
4870 : dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4871 : padfWeightsHorizontal[iC + 3];
4872 : }
4873 : dfAccumulatorLocal += dfAccumulatorLocal2;
4874 : if (i < iMax)
4875 : {
4876 : dfAccumulatorLocal +=
4877 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4878 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4879 : padfWeightsHorizontal[iC + 1];
4880 : i += 2;
4881 : iC += 2;
4882 : }
4883 : if (i == iMax)
4884 : {
4885 : dfAccumulatorLocal +=
4886 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4887 : }
4888 :
4889 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4890 : }
4891 :
4892 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4893 :
4894 : return true;
4895 : }
4896 :
4897 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4898 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4899 : #if defined(USE_SSE2)
4900 :
4901 : /************************************************************************/
4902 : /* GWKResampleNoMasks_SSE2_T() */
4903 : /************************************************************************/
4904 :
4905 : template <class T>
4906 1775366 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4907 : double dfSrcX, double dfSrcY, T *pValue,
4908 : double *padfWeightsHorizontal,
4909 : double *padfWeightsVertical,
4910 : double &dfInvWeights)
4911 : {
4912 : // Commonly used; save locally.
4913 1775366 : const int nSrcXSize = poWK->nSrcXSize;
4914 1775366 : const int nSrcYSize = poWK->nSrcYSize;
4915 :
4916 1775366 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4917 1775366 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4918 1775366 : const GPtrDiff_t iSrcOffset =
4919 1775366 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4920 1775366 : const int nXRadius = poWK->nXRadius;
4921 1775366 : const int nYRadius = poWK->nYRadius;
4922 :
4923 : // Politely refuse to process invalid coordinates or obscenely small image.
4924 1775366 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4925 : nYRadius > nSrcYSize)
4926 3 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4927 3 : pValue);
4928 :
4929 1775364 : const T *pSrcBand =
4930 1775364 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4931 :
4932 1775364 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4933 1775364 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4934 1775364 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4935 1775364 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4936 :
4937 1775364 : int iMin = 1 - nXRadius;
4938 1775364 : if (iSrcX + iMin < 0)
4939 22616 : iMin = -iSrcX;
4940 1775364 : int iMax = nXRadius;
4941 1775364 : if (iSrcX + iMax >= nSrcXSize - 1)
4942 9506 : iMax = nSrcXSize - 1 - iSrcX;
4943 :
4944 1775364 : int jMin = 1 - nYRadius;
4945 1775364 : if (iSrcY + jMin < 0)
4946 26049 : jMin = -iSrcY;
4947 1775364 : int jMax = nYRadius;
4948 1775364 : if (iSrcY + jMax >= nSrcYSize - 1)
4949 13135 : jMax = nSrcYSize - 1 - iSrcY;
4950 :
4951 1775364 : if (iBand == 0)
4952 : {
4953 1222146 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4954 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4955 : padfWeightsVertical, dfInvWeights);
4956 : }
4957 :
4958 1775364 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4959 : // Process by chunk of 4 rows.
4960 1775364 : int jC = 0;
4961 1775364 : int j = jMin;
4962 1775364 : double dfAccumulator = 0.0;
4963 5023910 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4964 : {
4965 : // Loop over all pixels in the row.
4966 3248546 : int iC = 0;
4967 3248546 : int i = iMin;
4968 : // Process by chunk of 4 cols.
4969 3248546 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4970 3248546 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4971 3248546 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4972 3248546 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4973 11835082 : for (; i + 2 < iMax; i += 4, iC += 4)
4974 : {
4975 : // Retrieve the pixel & accumulate.
4976 8586546 : XMMReg4Double v_pixels_1 =
4977 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4978 8586546 : XMMReg4Double v_pixels_2 =
4979 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4980 8586546 : XMMReg4Double v_pixels_3 =
4981 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4982 8586546 : XMMReg4Double v_pixels_4 =
4983 8586546 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4984 :
4985 8586546 : XMMReg4Double v_padfWeight =
4986 8586546 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4987 :
4988 8586546 : v_acc_1 += v_pixels_1 * v_padfWeight;
4989 8586546 : v_acc_2 += v_pixels_2 * v_padfWeight;
4990 8586546 : v_acc_3 += v_pixels_3 * v_padfWeight;
4991 8586546 : v_acc_4 += v_pixels_4 * v_padfWeight;
4992 : }
4993 :
4994 3248546 : if (i < iMax)
4995 : {
4996 49932 : XMMReg2Double v_pixels_1 =
4997 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4998 49932 : XMMReg2Double v_pixels_2 =
4999 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
5000 49932 : XMMReg2Double v_pixels_3 =
5001 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
5002 49932 : XMMReg2Double v_pixels_4 =
5003 49932 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
5004 :
5005 49932 : XMMReg2Double v_padfWeight =
5006 49932 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
5007 :
5008 49932 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
5009 49932 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
5010 49932 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
5011 49932 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
5012 :
5013 49932 : i += 2;
5014 49932 : iC += 2;
5015 : }
5016 :
5017 3248546 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
5018 3248546 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
5019 3248546 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
5020 3248546 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
5021 :
5022 3248546 : if (i == iMax)
5023 : {
5024 27545 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
5025 27545 : padfWeightsHorizontal[iC];
5026 27545 : dfAccumulatorLocal_2 +=
5027 27545 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
5028 27545 : padfWeightsHorizontal[iC];
5029 27545 : dfAccumulatorLocal_3 +=
5030 27545 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
5031 27545 : padfWeightsHorizontal[iC];
5032 27545 : dfAccumulatorLocal_4 +=
5033 27545 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
5034 27545 : padfWeightsHorizontal[iC];
5035 : }
5036 :
5037 3248546 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
5038 3248546 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
5039 3248546 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
5040 3248546 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
5041 : }
5042 1866210 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
5043 : {
5044 : // Loop over all pixels in the row.
5045 90850 : int iC = 0;
5046 90850 : int i = iMin;
5047 : // Process by chunk of 4 cols.
5048 90850 : XMMReg4Double v_acc = XMMReg4Double::Zero();
5049 243258 : for (; i + 2 < iMax; i += 4, iC += 4)
5050 : {
5051 : // Retrieve the pixel & accumulate.
5052 152408 : XMMReg4Double v_pixels =
5053 152408 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
5054 152408 : XMMReg4Double v_padfWeight =
5055 152408 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
5056 :
5057 152408 : v_acc += v_pixels * v_padfWeight;
5058 : }
5059 :
5060 90850 : double dfAccumulatorLocal = v_acc.GetHorizSum();
5061 :
5062 90850 : if (i < iMax)
5063 : {
5064 2090 : dfAccumulatorLocal +=
5065 2090 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
5066 2090 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
5067 2090 : padfWeightsHorizontal[iC + 1];
5068 2090 : i += 2;
5069 2090 : iC += 2;
5070 : }
5071 90850 : if (i == iMax)
5072 : {
5073 1839 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
5074 1839 : padfWeightsHorizontal[iC];
5075 : }
5076 :
5077 90850 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
5078 : }
5079 :
5080 1775364 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
5081 :
5082 1775364 : return true;
5083 : }
5084 :
5085 : /************************************************************************/
5086 : /* GWKResampleNoMasksT<GByte>() */
5087 : /************************************************************************/
5088 :
5089 : template <>
5090 1270240 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
5091 : double dfSrcX, double dfSrcY, GByte *pValue,
5092 : double *padfWeightsHorizontal,
5093 : double *padfWeightsVertical,
5094 : double &dfInvWeights)
5095 : {
5096 1270240 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5097 : padfWeightsHorizontal, padfWeightsVertical,
5098 1270240 : dfInvWeights);
5099 : }
5100 :
5101 : /************************************************************************/
5102 : /* GWKResampleNoMasksT<GInt16>() */
5103 : /************************************************************************/
5104 :
5105 : template <>
5106 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
5107 : double dfSrcX, double dfSrcY, GInt16 *pValue,
5108 : double *padfWeightsHorizontal,
5109 : double *padfWeightsVertical,
5110 : double &dfInvWeights)
5111 : {
5112 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5113 : padfWeightsHorizontal, padfWeightsVertical,
5114 252563 : dfInvWeights);
5115 : }
5116 :
5117 : /************************************************************************/
5118 : /* GWKResampleNoMasksT<GUInt16>() */
5119 : /************************************************************************/
5120 :
5121 : template <>
5122 250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
5123 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
5124 : double *padfWeightsHorizontal,
5125 : double *padfWeightsVertical,
5126 : double &dfInvWeights)
5127 : {
5128 250063 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5129 : padfWeightsHorizontal, padfWeightsVertical,
5130 250063 : dfInvWeights);
5131 : }
5132 :
5133 : /************************************************************************/
5134 : /* GWKResampleNoMasksT<float>() */
5135 : /************************************************************************/
5136 :
5137 : template <>
5138 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
5139 : double dfSrcX, double dfSrcY, float *pValue,
5140 : double *padfWeightsHorizontal,
5141 : double *padfWeightsVertical,
5142 : double &dfInvWeights)
5143 : {
5144 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5145 : padfWeightsHorizontal, padfWeightsVertical,
5146 2500 : dfInvWeights);
5147 : }
5148 :
5149 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5150 :
5151 : /************************************************************************/
5152 : /* GWKResampleNoMasksT<double>() */
5153 : /************************************************************************/
5154 :
5155 : template <>
5156 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
5157 : double dfSrcX, double dfSrcY, double *pValue,
5158 : double *padfWeightsHorizontal,
5159 : double *padfWeightsVertical,
5160 : double &dfInvWeights)
5161 : {
5162 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5163 : padfWeightsHorizontal, padfWeightsVertical,
5164 : dfInvWeights);
5165 : }
5166 :
5167 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
5168 :
5169 : #endif /* defined(USE_SSE2) */
5170 :
5171 : /************************************************************************/
5172 : /* GWKRoundSourceCoordinates() */
5173 : /************************************************************************/
5174 :
5175 1000 : static void GWKRoundSourceCoordinates(
5176 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
5177 : double dfSrcCoordPrecision, double dfErrorThreshold,
5178 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
5179 : double dfDstY)
5180 : {
5181 1000 : double dfPct = 0.8;
5182 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
5183 : {
5184 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
5185 : }
5186 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
5187 :
5188 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5189 : {
5190 500000 : const double dfXBefore = padfX[iDstX];
5191 500000 : const double dfYBefore = padfY[iDstX];
5192 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5193 : dfSrcCoordPrecision;
5194 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5195 : dfSrcCoordPrecision;
5196 :
5197 : // If we are in an uncertainty zone, go to non-approximated
5198 : // transformation.
5199 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
5200 : // be at least 10 times greater than the approximation error.
5201 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
5202 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
5203 : {
5204 180090 : padfX[iDstX] = iDstX + dfDstXOff;
5205 180090 : padfY[iDstX] = dfDstY;
5206 180090 : padfZ[iDstX] = 0.0;
5207 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
5208 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
5209 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5210 : dfSrcCoordPrecision;
5211 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5212 : dfSrcCoordPrecision;
5213 : }
5214 : }
5215 1000 : }
5216 :
5217 : /************************************************************************/
5218 : /* GWKCheckAndComputeSrcOffsets() */
5219 : /************************************************************************/
5220 : static CPL_INLINE bool
5221 152627000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5222 : int _iDstY, double *_padfX, double *_padfY,
5223 : int _nSrcXSize, int _nSrcYSize,
5224 : GPtrDiff_t &iSrcOffset)
5225 : {
5226 152627000 : const GDALWarpKernel *_poWK = psJob->poWK;
5227 152832000 : for (int iTry = 0; iTry < 2; ++iTry)
5228 : {
5229 152832000 : if (iTry == 1)
5230 : {
5231 : // If the source coordinate is slightly outside of the source raster
5232 : // retry to transform it alone, so that the exact coordinate
5233 : // transformer is used.
5234 :
5235 205552 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5236 205552 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5237 205552 : double dfZ = 0;
5238 205552 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5239 205552 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5240 205552 : _pabSuccess + _iDstX);
5241 : }
5242 152832000 : if (!_pabSuccess[_iDstX])
5243 3614790 : return false;
5244 :
5245 : // If this happens this is likely the symptom of a bug somewhere.
5246 149218000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5247 : {
5248 : static bool bNanCoordFound = false;
5249 0 : if (!bNanCoordFound)
5250 : {
5251 0 : CPLDebug("WARP",
5252 : "GWKCheckAndComputeSrcOffsets(): "
5253 : "NaN coordinate found on point %d.",
5254 : _iDstX);
5255 0 : bNanCoordFound = true;
5256 : }
5257 0 : return false;
5258 : }
5259 :
5260 : /* --------------------------------------------------------------------
5261 : */
5262 : /* Figure out what pixel we want in our source raster, and skip */
5263 : /* further processing if it is well off the source image. */
5264 : /* --------------------------------------------------------------------
5265 : */
5266 : /* We test against the value before casting to avoid the */
5267 : /* problem of asymmetric truncation effects around zero. That is */
5268 : /* -0.5 will be 0 when cast to an int. */
5269 149218000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5270 : {
5271 : // If the source coordinate is slightly outside of the source raster
5272 : // retry to transform it alone, so that the exact coordinate
5273 : // transformer is used.
5274 6006520 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5275 42277 : continue;
5276 5964240 : return false;
5277 : }
5278 :
5279 143211000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5280 : {
5281 : // If the source coordinate is slightly outside of the source raster
5282 : // retry to transform it alone, so that the exact coordinate
5283 : // transformer is used.
5284 6203470 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5285 64466 : continue;
5286 6139010 : return false;
5287 : }
5288 :
5289 : // Check for potential overflow when casting from float to int, (if
5290 : // operating outside natural projection area, padfX/Y can be a very huge
5291 : // positive number before doing the actual conversion), as such cast is
5292 : // undefined behavior that can trigger exception with some compilers
5293 : // (see #6753)
5294 137008000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5295 : {
5296 : // If the source coordinate is slightly outside of the source raster
5297 : // retry to transform it alone, so that the exact coordinate
5298 : // transformer is used.
5299 3932310 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5300 47544 : continue;
5301 3884760 : return false;
5302 : }
5303 133075000 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5304 : {
5305 : // If the source coordinate is slightly outside of the source raster
5306 : // retry to transform it alone, so that the exact coordinate
5307 : // transformer is used.
5308 4488370 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5309 51265 : continue;
5310 4437110 : return false;
5311 : }
5312 :
5313 128587000 : break;
5314 : }
5315 :
5316 128587000 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5317 128587000 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5318 128587000 : if (iSrcX == _nSrcXSize)
5319 0 : iSrcX--;
5320 128587000 : if (iSrcY == _nSrcYSize)
5321 0 : iSrcY--;
5322 :
5323 : // Those checks should normally be OK given the previous ones.
5324 128587000 : CPLAssert(iSrcX >= 0);
5325 128587000 : CPLAssert(iSrcY >= 0);
5326 128587000 : CPLAssert(iSrcX < _nSrcXSize);
5327 128587000 : CPLAssert(iSrcY < _nSrcYSize);
5328 :
5329 128587000 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5330 :
5331 128587000 : return true;
5332 : }
5333 :
5334 : /************************************************************************/
5335 : /* GWKOneSourceCornerFailsToReproject() */
5336 : /************************************************************************/
5337 :
5338 917 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5339 : {
5340 917 : GDALWarpKernel *poWK = psJob->poWK;
5341 2741 : for (int iY = 0; iY <= 1; ++iY)
5342 : {
5343 5478 : for (int iX = 0; iX <= 1; ++iX)
5344 : {
5345 3654 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5346 3654 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5347 3654 : double dfZTmp = 0;
5348 3654 : int nSuccess = FALSE;
5349 3654 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5350 : &dfYTmp, &dfZTmp, &nSuccess);
5351 3654 : if (!nSuccess)
5352 6 : return true;
5353 : }
5354 : }
5355 911 : return false;
5356 : }
5357 :
5358 : /************************************************************************/
5359 : /* GWKAdjustSrcOffsetOnEdge() */
5360 : /************************************************************************/
5361 :
5362 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5363 : GPtrDiff_t &iSrcOffset)
5364 : {
5365 9714 : GDALWarpKernel *poWK = psJob->poWK;
5366 9714 : const int nSrcXSize = poWK->nSrcXSize;
5367 9714 : const int nSrcYSize = poWK->nSrcYSize;
5368 :
5369 : // Check if the computed source position slightly altered
5370 : // fails to reproject. If so, then we are at the edge of
5371 : // the validity area, and it is worth checking neighbour
5372 : // source pixels for validity.
5373 9714 : int nSuccess = FALSE;
5374 : {
5375 9714 : double dfXTmp =
5376 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5377 9714 : double dfYTmp =
5378 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5379 9714 : double dfZTmp = 0;
5380 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5381 : &dfZTmp, &nSuccess);
5382 : }
5383 9714 : if (nSuccess)
5384 : {
5385 6996 : double dfXTmp =
5386 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5387 6996 : double dfYTmp =
5388 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5389 6996 : double dfZTmp = 0;
5390 6996 : nSuccess = FALSE;
5391 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5392 : &dfZTmp, &nSuccess);
5393 : }
5394 9714 : if (nSuccess)
5395 : {
5396 5624 : double dfXTmp =
5397 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5398 5624 : double dfYTmp =
5399 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5400 5624 : double dfZTmp = 0;
5401 5624 : nSuccess = FALSE;
5402 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5403 : &dfZTmp, &nSuccess);
5404 : }
5405 :
5406 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5407 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5408 : {
5409 1860 : iSrcOffset++;
5410 1860 : return true;
5411 : }
5412 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5413 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5414 : {
5415 1334 : iSrcOffset += nSrcXSize;
5416 1334 : return true;
5417 : }
5418 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5419 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5420 : {
5421 956 : iSrcOffset--;
5422 956 : return true;
5423 : }
5424 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5425 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5426 : {
5427 340 : iSrcOffset -= nSrcXSize;
5428 340 : return true;
5429 : }
5430 :
5431 5224 : return false;
5432 : }
5433 :
5434 : /************************************************************************/
5435 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5436 : /************************************************************************/
5437 :
5438 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5439 : GPtrDiff_t &iSrcOffset)
5440 : {
5441 0 : GDALWarpKernel *poWK = psJob->poWK;
5442 0 : const int nSrcXSize = poWK->nSrcXSize;
5443 0 : const int nSrcYSize = poWK->nSrcYSize;
5444 :
5445 : // Check if the computed source position slightly altered
5446 : // fails to reproject. If so, then we are at the edge of
5447 : // the validity area, and it is worth checking neighbour
5448 : // source pixels for validity.
5449 0 : int nSuccess = FALSE;
5450 : {
5451 0 : double dfXTmp =
5452 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5453 0 : double dfYTmp =
5454 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5455 0 : double dfZTmp = 0;
5456 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5457 : &dfZTmp, &nSuccess);
5458 : }
5459 0 : if (nSuccess)
5460 : {
5461 0 : double dfXTmp =
5462 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5463 0 : double dfYTmp =
5464 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5465 0 : double dfZTmp = 0;
5466 0 : nSuccess = FALSE;
5467 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5468 : &dfZTmp, &nSuccess);
5469 : }
5470 0 : if (nSuccess)
5471 : {
5472 0 : double dfXTmp =
5473 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5474 0 : double dfYTmp =
5475 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5476 0 : double dfZTmp = 0;
5477 0 : nSuccess = FALSE;
5478 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5479 : &dfZTmp, &nSuccess);
5480 : }
5481 :
5482 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5483 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5484 : SRC_DENSITY_THRESHOLD_FLOAT)
5485 : {
5486 0 : iSrcOffset++;
5487 0 : return true;
5488 : }
5489 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5490 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5491 : SRC_DENSITY_THRESHOLD_FLOAT)
5492 : {
5493 0 : iSrcOffset += nSrcXSize;
5494 0 : return true;
5495 : }
5496 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5497 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5498 : SRC_DENSITY_THRESHOLD_FLOAT)
5499 : {
5500 0 : iSrcOffset--;
5501 0 : return true;
5502 : }
5503 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5504 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5505 : SRC_DENSITY_THRESHOLD_FLOAT)
5506 : {
5507 0 : iSrcOffset -= nSrcXSize;
5508 0 : return true;
5509 : }
5510 :
5511 0 : return false;
5512 : }
5513 :
5514 : /************************************************************************/
5515 : /* GWKGeneralCase() */
5516 : /* */
5517 : /* This is the most general case. It attempts to handle all */
5518 : /* possible features with relatively little concern for */
5519 : /* efficiency. */
5520 : /************************************************************************/
5521 :
5522 239 : static void GWKGeneralCaseThread(void *pData)
5523 : {
5524 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5525 239 : GDALWarpKernel *poWK = psJob->poWK;
5526 239 : const int iYMin = psJob->iYMin;
5527 239 : const int iYMax = psJob->iYMax;
5528 : const double dfMultFactorVerticalShiftPipeline =
5529 239 : poWK->bApplyVerticalShift
5530 239 : ? CPLAtof(CSLFetchNameValueDef(
5531 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5532 : "1.0"))
5533 239 : : 0.0;
5534 : const bool bAvoidNoDataSingleBand =
5535 239 : poWK->nBands == 1 ||
5536 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5537 239 : "UNIFIED_SRC_NODATA", "FALSE"));
5538 :
5539 239 : int nDstXSize = poWK->nDstXSize;
5540 239 : int nSrcXSize = poWK->nSrcXSize;
5541 239 : int nSrcYSize = poWK->nSrcYSize;
5542 :
5543 : /* -------------------------------------------------------------------- */
5544 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5545 : /* scanlines worth of positions. */
5546 : /* -------------------------------------------------------------------- */
5547 : // For x, 2 *, because we cache the precomputed values at the end.
5548 : double *padfX =
5549 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5550 : double *padfY =
5551 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5552 : double *padfZ =
5553 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5554 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5555 :
5556 239 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5557 :
5558 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5559 239 : if (poWK->eResample != GRA_NearestNeighbour)
5560 : {
5561 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5562 : }
5563 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5564 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5565 239 : const double dfErrorThreshold = CPLAtof(
5566 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5567 :
5568 : const bool bOneSourceCornerFailsToReproject =
5569 239 : GWKOneSourceCornerFailsToReproject(psJob);
5570 :
5571 : // Precompute values.
5572 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5573 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5574 :
5575 : /* ==================================================================== */
5576 : /* Loop over output lines. */
5577 : /* ==================================================================== */
5578 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5579 : {
5580 : /* --------------------------------------------------------------------
5581 : */
5582 : /* Setup points to transform to source image space. */
5583 : /* --------------------------------------------------------------------
5584 : */
5585 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5586 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5587 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5588 236160 : padfY[iDstX] = dfY;
5589 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5590 :
5591 : /* --------------------------------------------------------------------
5592 : */
5593 : /* Transform the points from destination pixel/line coordinates */
5594 : /* to source pixel/line coordinates. */
5595 : /* --------------------------------------------------------------------
5596 : */
5597 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5598 : padfY, padfZ, pabSuccess);
5599 6230 : if (dfSrcCoordPrecision > 0.0)
5600 : {
5601 0 : GWKRoundSourceCoordinates(
5602 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5603 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5604 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5605 : }
5606 :
5607 : /* ====================================================================
5608 : */
5609 : /* Loop over pixels in output scanline. */
5610 : /* ====================================================================
5611 : */
5612 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5613 : {
5614 236160 : GPtrDiff_t iSrcOffset = 0;
5615 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5616 : padfX, padfY, nSrcXSize,
5617 : nSrcYSize, iSrcOffset))
5618 0 : continue;
5619 :
5620 : /* --------------------------------------------------------------------
5621 : */
5622 : /* Do not try to apply transparent/invalid source pixels to the
5623 : */
5624 : /* destination. This currently ignores the multi-pixel input
5625 : */
5626 : /* of bilinear and cubic resamples. */
5627 : /* --------------------------------------------------------------------
5628 : */
5629 236160 : double dfDensity = 1.0;
5630 :
5631 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5632 : {
5633 1200 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5634 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5635 : {
5636 0 : if (!bOneSourceCornerFailsToReproject)
5637 : {
5638 0 : continue;
5639 : }
5640 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5641 : psJob, iSrcOffset))
5642 : {
5643 0 : dfDensity =
5644 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5645 : }
5646 : else
5647 : {
5648 0 : continue;
5649 : }
5650 : }
5651 : }
5652 :
5653 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5654 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5655 : {
5656 0 : if (!bOneSourceCornerFailsToReproject)
5657 : {
5658 0 : continue;
5659 : }
5660 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5661 : {
5662 0 : continue;
5663 : }
5664 : }
5665 :
5666 : /* ====================================================================
5667 : */
5668 : /* Loop processing each band. */
5669 : /* ====================================================================
5670 : */
5671 236160 : bool bHasFoundDensity = false;
5672 :
5673 236160 : const GPtrDiff_t iDstOffset =
5674 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5675 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5676 : {
5677 236160 : double dfBandDensity = 0.0;
5678 236160 : double dfValueReal = 0.0;
5679 236160 : double dfValueImag = 0.0;
5680 :
5681 : /* --------------------------------------------------------------------
5682 : */
5683 : /* Collect the source value. */
5684 : /* --------------------------------------------------------------------
5685 : */
5686 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5687 : nSrcYSize == 1)
5688 : {
5689 : // FALSE is returned if dfBandDensity == 0, which is
5690 : // checked below.
5691 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5692 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5693 : &dfValueImag));
5694 : }
5695 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5696 : {
5697 248 : GWKBilinearResample4Sample(
5698 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5699 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5700 : &dfValueReal, &dfValueImag);
5701 : }
5702 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5703 : {
5704 248 : GWKCubicResample4Sample(
5705 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5706 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5707 : &dfValueReal, &dfValueImag);
5708 : }
5709 : else
5710 : #ifdef DEBUG
5711 : // Only useful for clang static analyzer.
5712 235096 : if (psWrkStruct != nullptr)
5713 : #endif
5714 : {
5715 235096 : psWrkStruct->pfnGWKResample(
5716 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5717 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5718 : &dfValueReal, &dfValueImag, psWrkStruct);
5719 : }
5720 :
5721 : // If we didn't find any valid inputs skip to next band.
5722 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5723 0 : continue;
5724 :
5725 236160 : if (poWK->bApplyVerticalShift)
5726 : {
5727 0 : if (!std::isfinite(padfZ[iDstX]))
5728 0 : continue;
5729 : // Subtract padfZ[] since the coordinate transformation is
5730 : // from target to source
5731 0 : dfValueReal =
5732 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5733 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5734 : }
5735 :
5736 236160 : bHasFoundDensity = true;
5737 :
5738 : /* --------------------------------------------------------------------
5739 : */
5740 : /* We have a computed value from the source. Now apply it
5741 : * to */
5742 : /* the destination pixel. */
5743 : /* --------------------------------------------------------------------
5744 : */
5745 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5746 : dfValueReal, dfValueImag,
5747 : bAvoidNoDataSingleBand);
5748 : }
5749 :
5750 236160 : if (!bHasFoundDensity)
5751 0 : continue;
5752 :
5753 236160 : if (!bAvoidNoDataSingleBand)
5754 : {
5755 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
5756 : }
5757 :
5758 : /* --------------------------------------------------------------------
5759 : */
5760 : /* Update destination density/validity masks. */
5761 : /* --------------------------------------------------------------------
5762 : */
5763 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5764 :
5765 236160 : if (poWK->panDstValid != nullptr)
5766 : {
5767 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5768 : }
5769 : } /* Next iDstX */
5770 :
5771 : /* --------------------------------------------------------------------
5772 : */
5773 : /* Report progress to the user, and optionally cancel out. */
5774 : /* --------------------------------------------------------------------
5775 : */
5776 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5777 0 : break;
5778 : }
5779 :
5780 : /* -------------------------------------------------------------------- */
5781 : /* Cleanup and return. */
5782 : /* -------------------------------------------------------------------- */
5783 239 : CPLFree(padfX);
5784 239 : CPLFree(padfY);
5785 239 : CPLFree(padfZ);
5786 239 : CPLFree(pabSuccess);
5787 239 : if (psWrkStruct)
5788 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5789 239 : }
5790 :
5791 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5792 : {
5793 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5794 : }
5795 :
5796 : /************************************************************************/
5797 : /* GWKRealCase() */
5798 : /* */
5799 : /* General case for non-complex data types. */
5800 : /************************************************************************/
5801 :
5802 219 : static void GWKRealCaseThread(void *pData)
5803 :
5804 : {
5805 219 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5806 219 : GDALWarpKernel *poWK = psJob->poWK;
5807 219 : const int iYMin = psJob->iYMin;
5808 219 : const int iYMax = psJob->iYMax;
5809 :
5810 219 : const int nDstXSize = poWK->nDstXSize;
5811 219 : const int nSrcXSize = poWK->nSrcXSize;
5812 219 : const int nSrcYSize = poWK->nSrcYSize;
5813 : const double dfMultFactorVerticalShiftPipeline =
5814 219 : poWK->bApplyVerticalShift
5815 219 : ? CPLAtof(CSLFetchNameValueDef(
5816 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5817 : "1.0"))
5818 219 : : 0.0;
5819 : const bool bAvoidNoDataSingleBand =
5820 297 : poWK->nBands == 1 ||
5821 78 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5822 219 : "UNIFIED_SRC_NODATA", "FALSE"));
5823 :
5824 : /* -------------------------------------------------------------------- */
5825 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5826 : /* scanlines worth of positions. */
5827 : /* -------------------------------------------------------------------- */
5828 :
5829 : // For x, 2 *, because we cache the precomputed values at the end.
5830 : double *padfX =
5831 219 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5832 : double *padfY =
5833 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5834 : double *padfZ =
5835 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5836 219 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5837 :
5838 219 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5839 :
5840 219 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5841 219 : if (poWK->eResample != GRA_NearestNeighbour)
5842 : {
5843 177 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5844 : }
5845 219 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5846 219 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5847 219 : const double dfErrorThreshold = CPLAtof(
5848 219 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5849 :
5850 626 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5851 407 : poWK->papanBandSrcValid == nullptr &&
5852 188 : poWK->pafUnifiedSrcDensity != nullptr;
5853 :
5854 : const bool bOneSourceCornerFailsToReproject =
5855 219 : GWKOneSourceCornerFailsToReproject(psJob);
5856 :
5857 : // Precompute values.
5858 22605 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5859 22386 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5860 :
5861 : /* ==================================================================== */
5862 : /* Loop over output lines. */
5863 : /* ==================================================================== */
5864 25393 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5865 : {
5866 : /* --------------------------------------------------------------------
5867 : */
5868 : /* Setup points to transform to source image space. */
5869 : /* --------------------------------------------------------------------
5870 : */
5871 25174 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5872 25174 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5873 44331500 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5874 44306300 : padfY[iDstX] = dfY;
5875 25174 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5876 :
5877 : /* --------------------------------------------------------------------
5878 : */
5879 : /* Transform the points from destination pixel/line coordinates */
5880 : /* to source pixel/line coordinates. */
5881 : /* --------------------------------------------------------------------
5882 : */
5883 25174 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5884 : padfY, padfZ, pabSuccess);
5885 25174 : if (dfSrcCoordPrecision > 0.0)
5886 : {
5887 0 : GWKRoundSourceCoordinates(
5888 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5889 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5890 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5891 : }
5892 :
5893 : /* ====================================================================
5894 : */
5895 : /* Loop over pixels in output scanline. */
5896 : /* ====================================================================
5897 : */
5898 44331500 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5899 : {
5900 44306300 : GPtrDiff_t iSrcOffset = 0;
5901 44306300 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5902 : padfX, padfY, nSrcXSize,
5903 : nSrcYSize, iSrcOffset))
5904 43567600 : continue;
5905 :
5906 : /* --------------------------------------------------------------------
5907 : */
5908 : /* Do not try to apply transparent/invalid source pixels to the
5909 : */
5910 : /* destination. This currently ignores the multi-pixel input
5911 : */
5912 : /* of bilinear and cubic resamples. */
5913 : /* --------------------------------------------------------------------
5914 : */
5915 31793100 : double dfDensity = 1.0;
5916 :
5917 31793100 : if (poWK->pafUnifiedSrcDensity != nullptr)
5918 : {
5919 1656100 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5920 1656100 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5921 : {
5922 1525010 : if (!bOneSourceCornerFailsToReproject)
5923 : {
5924 1525010 : continue;
5925 : }
5926 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5927 : psJob, iSrcOffset))
5928 : {
5929 0 : dfDensity =
5930 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5931 : }
5932 : else
5933 : {
5934 0 : continue;
5935 : }
5936 : }
5937 : }
5938 :
5939 59897300 : if (poWK->panUnifiedSrcValid != nullptr &&
5940 29629200 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5941 : {
5942 29531600 : if (!bOneSourceCornerFailsToReproject)
5943 : {
5944 29529300 : continue;
5945 : }
5946 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5947 : {
5948 0 : continue;
5949 : }
5950 : }
5951 :
5952 : /* ====================================================================
5953 : */
5954 : /* Loop processing each band. */
5955 : /* ====================================================================
5956 : */
5957 738768 : bool bHasFoundDensity = false;
5958 :
5959 738768 : const GPtrDiff_t iDstOffset =
5960 738768 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5961 2069310 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5962 : {
5963 1330540 : double dfBandDensity = 0.0;
5964 1330540 : double dfValueReal = 0.0;
5965 :
5966 : /* --------------------------------------------------------------------
5967 : */
5968 : /* Collect the source value. */
5969 : /* --------------------------------------------------------------------
5970 : */
5971 1330540 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5972 : nSrcYSize == 1)
5973 : {
5974 : // FALSE is returned if dfBandDensity == 0, which is
5975 : // checked below.
5976 15516 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5977 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5978 : }
5979 1315030 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5980 : {
5981 2046 : double dfValueImagIgnored = 0.0;
5982 2046 : GWKBilinearResample4Sample(
5983 2046 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5984 2046 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5985 2046 : &dfValueReal, &dfValueImagIgnored);
5986 : }
5987 1312980 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5988 : {
5989 691552 : if (bSrcMaskIsDensity)
5990 : {
5991 389755 : if (poWK->eWorkingDataType == GDT_UInt8)
5992 : {
5993 389755 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5994 389755 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5995 389755 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5996 : &dfValueReal);
5997 : }
5998 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
5999 : {
6000 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
6001 0 : GUInt16>(poWK, iBand,
6002 0 : padfX[iDstX] - poWK->nSrcXOff,
6003 0 : padfY[iDstX] - poWK->nSrcYOff,
6004 : &dfBandDensity, &dfValueReal);
6005 : }
6006 : else
6007 : {
6008 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
6009 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6010 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6011 : &dfValueReal);
6012 : }
6013 : }
6014 : else
6015 : {
6016 301797 : double dfValueImagIgnored = 0.0;
6017 301797 : GWKCubicResample4Sample(
6018 301797 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6019 301797 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6020 : &dfValueReal, &dfValueImagIgnored);
6021 691552 : }
6022 : }
6023 : else
6024 : #ifdef DEBUG
6025 : // Only useful for clang static analyzer.
6026 621431 : if (psWrkStruct != nullptr)
6027 : #endif
6028 : {
6029 621431 : double dfValueImagIgnored = 0.0;
6030 621431 : psWrkStruct->pfnGWKResample(
6031 621431 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6032 621431 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6033 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6034 : }
6035 :
6036 : // If we didn't find any valid inputs skip to next band.
6037 1330540 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6038 0 : continue;
6039 :
6040 1330540 : if (poWK->bApplyVerticalShift)
6041 : {
6042 0 : if (!std::isfinite(padfZ[iDstX]))
6043 0 : continue;
6044 : // Subtract padfZ[] since the coordinate transformation is
6045 : // from target to source
6046 0 : dfValueReal =
6047 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6048 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6049 : }
6050 :
6051 1330540 : bHasFoundDensity = true;
6052 :
6053 : /* --------------------------------------------------------------------
6054 : */
6055 : /* We have a computed value from the source. Now apply it
6056 : * to */
6057 : /* the destination pixel. */
6058 : /* --------------------------------------------------------------------
6059 : */
6060 1330540 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6061 : dfValueReal, bAvoidNoDataSingleBand);
6062 : }
6063 :
6064 738768 : if (!bHasFoundDensity)
6065 0 : continue;
6066 :
6067 738768 : if (!bAvoidNoDataSingleBand)
6068 : {
6069 100295 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6070 : }
6071 :
6072 : /* --------------------------------------------------------------------
6073 : */
6074 : /* Update destination density/validity masks. */
6075 : /* --------------------------------------------------------------------
6076 : */
6077 738768 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6078 :
6079 738768 : if (poWK->panDstValid != nullptr)
6080 : {
6081 104586 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6082 : }
6083 : } // Next iDstX.
6084 :
6085 : /* --------------------------------------------------------------------
6086 : */
6087 : /* Report progress to the user, and optionally cancel out. */
6088 : /* --------------------------------------------------------------------
6089 : */
6090 25174 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6091 0 : break;
6092 : }
6093 :
6094 : /* -------------------------------------------------------------------- */
6095 : /* Cleanup and return. */
6096 : /* -------------------------------------------------------------------- */
6097 219 : CPLFree(padfX);
6098 219 : CPLFree(padfY);
6099 219 : CPLFree(padfZ);
6100 219 : CPLFree(pabSuccess);
6101 219 : if (psWrkStruct)
6102 177 : GWKResampleDeleteWrkStruct(psWrkStruct);
6103 219 : }
6104 :
6105 219 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6106 : {
6107 219 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6108 : }
6109 :
6110 : /************************************************************************/
6111 : /* GWKCubicResampleNoMasks4MultiBandT() */
6112 : /************************************************************************/
6113 :
6114 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6115 : /* and enough SSE registries */
6116 : #if defined(USE_SSE2)
6117 :
6118 115847000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6119 : const __m128 row2, const __m128 row3,
6120 : const __m128 weightsXY0,
6121 : const __m128 weightsXY1,
6122 : const __m128 weightsXY2,
6123 : const __m128 weightsXY3)
6124 : {
6125 810929000 : return XMMHorizontalAdd(_mm_add_ps(
6126 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6127 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6128 115847000 : _mm_mul_ps(row3, weightsXY3))));
6129 : }
6130 :
6131 : template <class T>
6132 39602542 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6133 : double dfSrcX, double dfSrcY,
6134 : const GPtrDiff_t iDstOffset)
6135 : {
6136 39602542 : const double dfSrcXShifted = dfSrcX - 0.5;
6137 39602542 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6138 39602542 : const double dfSrcYShifted = dfSrcY - 0.5;
6139 39602542 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6140 39602542 : const GPtrDiff_t iSrcOffset =
6141 39602542 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6142 :
6143 : // Get the bilinear interpolation at the image borders.
6144 39602542 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6145 38630062 : iSrcY + 2 >= poWK->nSrcYSize)
6146 : {
6147 3947270 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6148 : {
6149 : T value;
6150 2960450 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6151 : &value);
6152 2960450 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6153 : value;
6154 986817 : }
6155 : }
6156 : else
6157 : {
6158 38615662 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6159 38615662 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6160 :
6161 : float afCoeffsX[4];
6162 : float afCoeffsY[4];
6163 38615662 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6164 38615662 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6165 38615662 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6166 : const auto weightsXY0 =
6167 77231324 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6168 : const auto weightsXY1 =
6169 77231324 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6170 : const auto weightsXY2 =
6171 77231324 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6172 : const auto weightsXY3 =
6173 38615662 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6174 :
6175 38615662 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6176 :
6177 38615662 : int iBand = 0;
6178 : // Process 2 bands at a time
6179 77231324 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6180 : {
6181 38615662 : const T *CPL_RESTRICT pBand0 =
6182 38615662 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6183 38615662 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6184 : const auto row1_0 =
6185 38615662 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6186 : const auto row2_0 =
6187 38615662 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6188 : const auto row3_0 =
6189 38615662 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6190 :
6191 38615662 : const T *CPL_RESTRICT pBand1 =
6192 38615662 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6193 38615662 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6194 : const auto row1_1 =
6195 38615662 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6196 : const auto row2_1 =
6197 38615662 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6198 : const auto row3_1 =
6199 38615662 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6200 :
6201 : const float fValue_0 =
6202 38615662 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6203 : weightsXY1, weightsXY2, weightsXY3);
6204 :
6205 : const float fValue_1 =
6206 38615662 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6207 : weightsXY1, weightsXY2, weightsXY3);
6208 :
6209 38615662 : T *CPL_RESTRICT pDstBand0 =
6210 38615662 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6211 38615662 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6212 :
6213 38615662 : T *CPL_RESTRICT pDstBand1 =
6214 38615662 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6215 38615662 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6216 : }
6217 38615662 : if (iBand < poWK->nBands)
6218 : {
6219 38615662 : const T *CPL_RESTRICT pBand0 =
6220 38615662 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6221 38615662 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6222 : const auto row1 =
6223 38615662 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6224 : const auto row2 =
6225 38615662 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6226 : const auto row3 =
6227 38615662 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6228 :
6229 : const float fValue =
6230 38615662 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6231 : weightsXY2, weightsXY3);
6232 :
6233 38615662 : T *CPL_RESTRICT pDstBand =
6234 38615662 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6235 38615662 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6236 : }
6237 : }
6238 :
6239 39602542 : if (poWK->pafDstDensity)
6240 37448501 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6241 39602542 : }
6242 :
6243 : #endif // defined(USE_SSE2)
6244 :
6245 : /************************************************************************/
6246 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6247 : /************************************************************************/
6248 :
6249 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6250 1844 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6251 :
6252 : {
6253 1844 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6254 1844 : GDALWarpKernel *poWK = psJob->poWK;
6255 1844 : const int iYMin = psJob->iYMin;
6256 1844 : const int iYMax = psJob->iYMax;
6257 1826 : const double dfMultFactorVerticalShiftPipeline =
6258 1844 : poWK->bApplyVerticalShift
6259 18 : ? CPLAtof(CSLFetchNameValueDef(
6260 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6261 : "1.0"))
6262 : : 0.0;
6263 :
6264 1844 : const int nDstXSize = poWK->nDstXSize;
6265 1844 : const int nSrcXSize = poWK->nSrcXSize;
6266 1844 : const int nSrcYSize = poWK->nSrcYSize;
6267 :
6268 : /* -------------------------------------------------------------------- */
6269 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6270 : /* scanlines worth of positions. */
6271 : /* -------------------------------------------------------------------- */
6272 :
6273 : // For x, 2 *, because we cache the precomputed values at the end.
6274 : double *padfX =
6275 1844 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6276 : double *padfY =
6277 1844 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6278 : double *padfZ =
6279 1844 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6280 1844 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6281 :
6282 1844 : const int nXRadius = poWK->nXRadius;
6283 : double *padfWeightsX =
6284 1844 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6285 : double *padfWeightsY = static_cast<double *>(
6286 1844 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6287 1844 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6288 1844 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6289 1844 : const double dfErrorThreshold = CPLAtof(
6290 1844 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6291 :
6292 : // Precompute values.
6293 418917 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6294 417073 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6295 :
6296 : /* ==================================================================== */
6297 : /* Loop over output lines. */
6298 : /* ==================================================================== */
6299 293464 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6300 : {
6301 : /* --------------------------------------------------------------------
6302 : */
6303 : /* Setup points to transform to source image space. */
6304 : /* --------------------------------------------------------------------
6305 : */
6306 291621 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6307 291621 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6308 98590159 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6309 98298535 : padfY[iDstX] = dfY;
6310 291621 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6311 :
6312 : /* --------------------------------------------------------------------
6313 : */
6314 : /* Transform the points from destination pixel/line coordinates */
6315 : /* to source pixel/line coordinates. */
6316 : /* --------------------------------------------------------------------
6317 : */
6318 291621 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6319 : padfY, padfZ, pabSuccess);
6320 291621 : if (dfSrcCoordPrecision > 0.0)
6321 : {
6322 1000 : GWKRoundSourceCoordinates(
6323 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6324 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6325 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6326 : }
6327 :
6328 : /* ====================================================================
6329 : */
6330 : /* Loop over pixels in output scanline. */
6331 : /* ====================================================================
6332 : */
6333 98590159 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6334 : {
6335 98298535 : GPtrDiff_t iSrcOffset = 0;
6336 98298535 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6337 : padfX, padfY, nSrcXSize,
6338 : nSrcYSize, iSrcOffset))
6339 49822126 : continue;
6340 :
6341 : /* ====================================================================
6342 : */
6343 : /* Loop processing each band. */
6344 : /* ====================================================================
6345 : */
6346 88079019 : const GPtrDiff_t iDstOffset =
6347 88079019 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6348 :
6349 : #if defined(USE_SSE2)
6350 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6351 : (std::is_same<T, GByte>::value ||
6352 : std::is_same<T, GUInt16>::value))
6353 : {
6354 40668141 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6355 : {
6356 39602542 : GWKCubicResampleNoMasks4MultiBandT<T>(
6357 39602542 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6358 39602542 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6359 :
6360 39602542 : continue;
6361 : }
6362 : }
6363 : #endif // defined(USE_SSE2)
6364 :
6365 48476490 : [[maybe_unused]] double dfInvWeights = 0;
6366 134855396 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6367 : {
6368 86378910 : T value = 0;
6369 : if constexpr (eResample == GRA_NearestNeighbour)
6370 : {
6371 78474930 : value = reinterpret_cast<T *>(
6372 78474930 : poWK->papabySrcImage[iBand])[iSrcOffset];
6373 : }
6374 : else if constexpr (bUse4SamplesFormula)
6375 : {
6376 : if constexpr (eResample == GRA_Bilinear)
6377 3827651 : GWKBilinearResampleNoMasks4SampleT(
6378 3827651 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6379 3827651 : padfY[iDstX] - poWK->nSrcYOff, &value);
6380 : else
6381 2300964 : GWKCubicResampleNoMasks4SampleT(
6382 2300964 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6383 2300964 : padfY[iDstX] - poWK->nSrcYOff, &value);
6384 : }
6385 : else
6386 : {
6387 1775365 : GWKResampleNoMasksT(
6388 1775365 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6389 1775365 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6390 : padfWeightsY, dfInvWeights);
6391 : }
6392 :
6393 86378910 : if (poWK->bApplyVerticalShift)
6394 : {
6395 818 : if (!std::isfinite(padfZ[iDstX]))
6396 0 : continue;
6397 : // Subtract padfZ[] since the coordinate transformation is
6398 : // from target to source
6399 818 : value = GWKClampValueT<T>(
6400 818 : double(value) * poWK->dfMultFactorVerticalShift -
6401 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6402 : }
6403 :
6404 86378910 : if (poWK->pafDstDensity)
6405 12985339 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6406 :
6407 86378910 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6408 : value;
6409 : }
6410 : }
6411 :
6412 : /* --------------------------------------------------------------------
6413 : */
6414 : /* Report progress to the user, and optionally cancel out. */
6415 : /* --------------------------------------------------------------------
6416 : */
6417 291621 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6418 1 : break;
6419 : }
6420 :
6421 : /* -------------------------------------------------------------------- */
6422 : /* Cleanup and return. */
6423 : /* -------------------------------------------------------------------- */
6424 1844 : CPLFree(padfX);
6425 1844 : CPLFree(padfY);
6426 1844 : CPLFree(padfZ);
6427 1844 : CPLFree(pabSuccess);
6428 1844 : CPLFree(padfWeightsX);
6429 1844 : CPLFree(padfWeightsY);
6430 1844 : }
6431 :
6432 : template <class T, GDALResampleAlg eResample>
6433 995 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6434 : {
6435 995 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6436 : pData);
6437 995 : }
6438 :
6439 : template <class T, GDALResampleAlg eResample>
6440 849 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6441 :
6442 : {
6443 849 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6444 849 : GDALWarpKernel *poWK = psJob->poWK;
6445 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6446 849 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
6447 849 : if (bUse4SamplesFormula)
6448 792 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6449 : pData);
6450 : else
6451 57 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6452 : pData);
6453 849 : }
6454 :
6455 944 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6456 : {
6457 944 : return GWKRun(
6458 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6459 944 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6460 : }
6461 :
6462 126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6463 : {
6464 126 : return GWKRun(
6465 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6466 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6467 126 : GRA_Bilinear>);
6468 : }
6469 :
6470 677 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6471 : {
6472 677 : return GWKRun(
6473 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6474 677 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6475 : }
6476 :
6477 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6478 : {
6479 9 : return GWKRun(
6480 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6481 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6482 : }
6483 :
6484 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6485 :
6486 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6487 : {
6488 : return GWKRun(
6489 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6490 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6491 : }
6492 : #endif
6493 :
6494 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6495 : {
6496 12 : return GWKRun(
6497 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6498 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6499 : }
6500 :
6501 : /************************************************************************/
6502 : /* GWKNearestByte() */
6503 : /* */
6504 : /* Case for 8bit input data with nearest neighbour resampling */
6505 : /* using valid flags. Should be as fast as possible for this */
6506 : /* particular transformation type. */
6507 : /************************************************************************/
6508 :
6509 459 : template <class T> static void GWKNearestThread(void *pData)
6510 :
6511 : {
6512 459 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6513 459 : GDALWarpKernel *poWK = psJob->poWK;
6514 459 : const int iYMin = psJob->iYMin;
6515 459 : const int iYMax = psJob->iYMax;
6516 459 : const double dfMultFactorVerticalShiftPipeline =
6517 459 : poWK->bApplyVerticalShift
6518 0 : ? CPLAtof(CSLFetchNameValueDef(
6519 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6520 : "1.0"))
6521 : : 0.0;
6522 459 : const bool bAvoidNoDataSingleBand =
6523 525 : poWK->nBands == 1 ||
6524 66 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
6525 : "UNIFIED_SRC_NODATA", "FALSE"));
6526 :
6527 459 : const int nDstXSize = poWK->nDstXSize;
6528 459 : const int nSrcXSize = poWK->nSrcXSize;
6529 459 : const int nSrcYSize = poWK->nSrcYSize;
6530 :
6531 : /* -------------------------------------------------------------------- */
6532 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6533 : /* scanlines worth of positions. */
6534 : /* -------------------------------------------------------------------- */
6535 :
6536 : // For x, 2 *, because we cache the precomputed values at the end.
6537 : double *padfX =
6538 459 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6539 : double *padfY =
6540 459 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6541 : double *padfZ =
6542 459 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6543 459 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6544 :
6545 459 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6546 459 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6547 459 : const double dfErrorThreshold = CPLAtof(
6548 459 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6549 :
6550 : const bool bOneSourceCornerFailsToReproject =
6551 459 : GWKOneSourceCornerFailsToReproject(psJob);
6552 :
6553 : // Precompute values.
6554 62854 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6555 62395 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6556 :
6557 : /* ==================================================================== */
6558 : /* Loop over output lines. */
6559 : /* ==================================================================== */
6560 48162 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6561 : {
6562 :
6563 : /* --------------------------------------------------------------------
6564 : */
6565 : /* Setup points to transform to source image space. */
6566 : /* --------------------------------------------------------------------
6567 : */
6568 47703 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6569 47703 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6570 9833535 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6571 9785833 : padfY[iDstX] = dfY;
6572 47703 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6573 :
6574 : /* --------------------------------------------------------------------
6575 : */
6576 : /* Transform the points from destination pixel/line coordinates */
6577 : /* to source pixel/line coordinates. */
6578 : /* --------------------------------------------------------------------
6579 : */
6580 47703 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6581 : padfY, padfZ, pabSuccess);
6582 47703 : if (dfSrcCoordPrecision > 0.0)
6583 : {
6584 0 : GWKRoundSourceCoordinates(
6585 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6586 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6587 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6588 : }
6589 : /* ====================================================================
6590 : */
6591 : /* Loop over pixels in output scanline. */
6592 : /* ====================================================================
6593 : */
6594 9833535 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6595 : {
6596 9785833 : GPtrDiff_t iSrcOffset = 0;
6597 9785833 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6598 : padfX, padfY, nSrcXSize,
6599 : nSrcYSize, iSrcOffset))
6600 2358945 : continue;
6601 :
6602 : /* --------------------------------------------------------------------
6603 : */
6604 : /* Do not try to apply invalid source pixels to the dest. */
6605 : /* --------------------------------------------------------------------
6606 : */
6607 9606143 : if (poWK->panUnifiedSrcValid != nullptr &&
6608 1127399 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6609 : {
6610 49043 : if (!bOneSourceCornerFailsToReproject)
6611 : {
6612 41558 : continue;
6613 : }
6614 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6615 : {
6616 5224 : continue;
6617 : }
6618 : }
6619 :
6620 : /* --------------------------------------------------------------------
6621 : */
6622 : /* Do not try to apply transparent source pixels to the
6623 : * destination.*/
6624 : /* --------------------------------------------------------------------
6625 : */
6626 8431960 : double dfDensity = 1.0;
6627 :
6628 8431960 : if (poWK->pafUnifiedSrcDensity != nullptr)
6629 : {
6630 1557335 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6631 1557335 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6632 1005075 : continue;
6633 : }
6634 :
6635 : /* ====================================================================
6636 : */
6637 : /* Loop processing each band. */
6638 : /* ====================================================================
6639 : */
6640 :
6641 7426888 : const GPtrDiff_t iDstOffset =
6642 7426888 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6643 :
6644 17415958 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6645 : {
6646 9989020 : T value = 0;
6647 9989020 : double dfBandDensity = 0.0;
6648 :
6649 : /* --------------------------------------------------------------------
6650 : */
6651 : /* Collect the source value. */
6652 : /* --------------------------------------------------------------------
6653 : */
6654 9989020 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6655 : &value))
6656 : {
6657 :
6658 9989010 : if (poWK->bApplyVerticalShift)
6659 : {
6660 0 : if (!std::isfinite(padfZ[iDstX]))
6661 0 : continue;
6662 : // Subtract padfZ[] since the coordinate transformation
6663 : // is from target to source
6664 0 : value = GWKClampValueT<T>(
6665 0 : double(value) * poWK->dfMultFactorVerticalShift -
6666 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6667 : }
6668 :
6669 9989010 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6670 : dfBandDensity, value,
6671 : bAvoidNoDataSingleBand);
6672 : }
6673 : }
6674 :
6675 : /* --------------------------------------------------------------------
6676 : */
6677 : /* Mark this pixel valid/opaque in the output. */
6678 : /* --------------------------------------------------------------------
6679 : */
6680 :
6681 7426888 : if (!bAvoidNoDataSingleBand)
6682 : {
6683 424278 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6684 : }
6685 :
6686 7426888 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6687 :
6688 7426888 : if (poWK->panDstValid != nullptr)
6689 : {
6690 6156885 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6691 : }
6692 : } /* Next iDstX */
6693 :
6694 : /* --------------------------------------------------------------------
6695 : */
6696 : /* Report progress to the user, and optionally cancel out. */
6697 : /* --------------------------------------------------------------------
6698 : */
6699 47703 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6700 0 : break;
6701 : }
6702 :
6703 : /* -------------------------------------------------------------------- */
6704 : /* Cleanup and return. */
6705 : /* -------------------------------------------------------------------- */
6706 459 : CPLFree(padfX);
6707 459 : CPLFree(padfY);
6708 459 : CPLFree(padfZ);
6709 459 : CPLFree(pabSuccess);
6710 459 : }
6711 :
6712 350 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6713 : {
6714 350 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6715 : }
6716 :
6717 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6718 : {
6719 14 : return GWKRun(
6720 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6721 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6722 : }
6723 :
6724 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6725 : {
6726 5 : return GWKRun(
6727 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6728 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6729 5 : GRA_Bilinear>);
6730 : }
6731 :
6732 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6733 : {
6734 6 : return GWKRun(
6735 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6736 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6737 6 : GRA_Bilinear>);
6738 : }
6739 :
6740 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6741 : {
6742 4 : return GWKRun(
6743 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6744 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6745 4 : GRA_Bilinear>);
6746 : }
6747 :
6748 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6749 :
6750 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6751 : {
6752 : return GWKRun(
6753 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6754 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6755 : GRA_Bilinear>);
6756 : }
6757 : #endif
6758 :
6759 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6760 : {
6761 5 : return GWKRun(
6762 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6763 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6764 : }
6765 :
6766 14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6767 : {
6768 14 : return GWKRun(
6769 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6770 14 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6771 : }
6772 :
6773 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6774 : {
6775 6 : return GWKRun(
6776 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6777 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6778 : }
6779 :
6780 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6781 : {
6782 5 : return GWKRun(
6783 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6784 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6785 : }
6786 :
6787 45 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6788 : {
6789 45 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6790 : }
6791 :
6792 10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6793 : {
6794 10 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6795 : }
6796 :
6797 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6798 : {
6799 11 : return GWKRun(
6800 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6801 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6802 : }
6803 :
6804 50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6805 : {
6806 50 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6807 : }
6808 :
6809 : /************************************************************************/
6810 : /* GWKAverageOrMode() */
6811 : /* */
6812 : /************************************************************************/
6813 :
6814 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6815 : ((iSrcY == iSrcYMin) \
6816 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6817 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6818 : : 1.0)
6819 :
6820 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6821 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6822 : ? dfWeightY \
6823 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6824 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6825 : : dfWeightY)
6826 :
6827 : static void GWKAverageOrModeThread(void *pData);
6828 :
6829 163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6830 : {
6831 163 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6832 : }
6833 :
6834 : /************************************************************************/
6835 : /* GWKAverageOrModeComputeLineCoords() */
6836 : /************************************************************************/
6837 :
6838 8183 : static void GWKAverageOrModeComputeLineCoords(
6839 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6840 : double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6841 : int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6842 : double dfErrorThreshold)
6843 : {
6844 8183 : const GDALWarpKernel *poWK = psJob->poWK;
6845 8183 : const int nDstXSize = poWK->nDstXSize;
6846 :
6847 : // Setup points to transform to source image space.
6848 2097530 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6849 : {
6850 2089340 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6851 2089340 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6852 2089340 : padfZ[iDstX] = 0.0;
6853 2089340 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6854 2089340 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6855 2089340 : padfZ2[iDstX] = 0.0;
6856 : }
6857 :
6858 : /* ----------------------------------------------------------------- */
6859 : /* Transform the points from destination pixel/line coordinates */
6860 : /* to source pixel/line coordinates. */
6861 : /* ----------------------------------------------------------------- */
6862 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6863 : padfZ, pabSuccess);
6864 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6865 : padfY2, padfZ2, pabSuccess2);
6866 :
6867 8183 : if (dfSrcCoordPrecision > 0.0)
6868 : {
6869 0 : GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6870 : dfSrcCoordPrecision, dfErrorThreshold,
6871 0 : poWK->pfnTransformer, psJob->pTransformerArg,
6872 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6873 0 : GWKRoundSourceCoordinates(
6874 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6875 0 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6876 0 : 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6877 : }
6878 8183 : }
6879 :
6880 : /************************************************************************/
6881 : /* GWKAverageOrModeComputeSourceCoords() */
6882 : /************************************************************************/
6883 :
6884 2089340 : static bool GWKAverageOrModeComputeSourceCoords(
6885 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6886 : double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6887 : // Output:
6888 : bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6889 : double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6890 : {
6891 2089340 : const GDALWarpKernel *poWK = psJob->poWK;
6892 2089340 : const int nSrcXSize = poWK->nSrcXSize;
6893 2089340 : const int nSrcYSize = poWK->nSrcYSize;
6894 :
6895 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6896 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6897 2089340 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6898 1992640 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6899 1992640 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6900 1965720 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6901 1965720 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6902 1912820 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6903 1912310 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6904 1910810 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6905 : {
6906 178602 : return false;
6907 : }
6908 :
6909 : // Compute corners in source crs.
6910 :
6911 : // The transformation might not have preserved ordering of
6912 : // coordinates so do the necessary swapping (#5433).
6913 : // NOTE: this is really an approximative fix. To do something
6914 : // more precise we would for example need to compute the
6915 : // transformation of coordinates in the
6916 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6917 : // coordinates, and take the bounding box of the got source
6918 : // coordinates.
6919 :
6920 1910740 : if (padfX[iDstX] > padfX2[iDstX])
6921 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
6922 :
6923 : // Detect situations where the target pixel is close to the
6924 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6925 : // close to the left-most and right-most columns of the source
6926 : // raster. The 2 value below was experimentally determined to
6927 : // avoid false-positives and false-negatives.
6928 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6929 1910740 : bWrapOverX = false;
6930 1910740 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6931 1910740 : if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
6932 1903470 : 2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
6933 17795 : nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
6934 : {
6935 : // Check there is a discontinuity by checking at mid-pixel.
6936 : // NOTE: all this remains fragile. To confidently
6937 : // detect antimeridian warping we should probably try to access
6938 : // georeferenced coordinates, and not rely only on tests on
6939 : // image space coordinates. But accessing georeferenced
6940 : // coordinates from here is not trivial, and we would for example
6941 : // have to handle both geographic, Mercator, etc.
6942 : // Let's hope this heuristics is good enough for now.
6943 1200 : double x = iDstX + 0.5 + poWK->nDstXOff;
6944 1200 : double y = iDstY + poWK->nDstYOff;
6945 1200 : double z = 0;
6946 1200 : int bSuccess = FALSE;
6947 1200 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6948 : &bSuccess);
6949 1200 : if (bSuccess && x < padfX[iDstX])
6950 : {
6951 1192 : bWrapOverX = true;
6952 1192 : std::swap(padfX[iDstX], padfX2[iDstX]);
6953 1192 : padfX2[iDstX] += nSrcXSize;
6954 : }
6955 : }
6956 :
6957 1910740 : dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6958 1910740 : dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6959 1910740 : constexpr double EPSILON = 1e-10;
6960 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6961 1910740 : if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6962 372 : return false;
6963 1910370 : iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6964 1910370 : iSrcXMax = static_cast<int>(
6965 1910370 : std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
6966 1910370 : if (!bWrapOverX)
6967 1909180 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6968 1910370 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6969 472 : iSrcXMax++;
6970 :
6971 1910370 : if (padfY[iDstX] > padfY2[iDstX])
6972 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
6973 1910370 : dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6974 1910370 : dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6975 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6976 1910370 : if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
6977 238 : return false;
6978 1910130 : iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
6979 1910130 : iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
6980 1910130 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6981 0 : iSrcYMax++;
6982 :
6983 1910130 : return true;
6984 : }
6985 :
6986 : /************************************************************************/
6987 : /* GWKModeRealType() */
6988 : /************************************************************************/
6989 :
6990 17780 : template <class T> static inline bool IsSame(T a, T b)
6991 : {
6992 17780 : return a == b;
6993 : }
6994 :
6995 0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
6996 : {
6997 0 : return a == b || (CPLIsNan(a) && CPLIsNan(b));
6998 : }
6999 :
7000 18 : template <> bool IsSame<float>(float a, float b)
7001 : {
7002 18 : return a == b || (std::isnan(a) && std::isnan(b));
7003 : }
7004 :
7005 56 : template <> bool IsSame<double>(double a, double b)
7006 : {
7007 56 : return a == b || (std::isnan(a) && std::isnan(b));
7008 : }
7009 :
7010 19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
7011 : {
7012 19 : const GDALWarpKernel *poWK = psJob->poWK;
7013 19 : const int iYMin = psJob->iYMin;
7014 19 : const int iYMax = psJob->iYMax;
7015 19 : const int nDstXSize = poWK->nDstXSize;
7016 19 : const int nSrcXSize = poWK->nSrcXSize;
7017 19 : const int nSrcYSize = poWK->nSrcYSize;
7018 19 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7019 :
7020 19 : T *pVals = nullptr;
7021 19 : float *pafCounts = nullptr;
7022 :
7023 19 : if (nSrcXSize > 0 && nSrcYSize > 0)
7024 : {
7025 : pVals = static_cast<T *>(
7026 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
7027 : pafCounts = static_cast<float *>(
7028 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7029 19 : if (pVals == nullptr || pafCounts == nullptr)
7030 : {
7031 0 : VSIFree(pVals);
7032 0 : VSIFree(pafCounts);
7033 0 : return;
7034 : }
7035 : }
7036 :
7037 : /* -------------------------------------------------------------------- */
7038 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7039 : /* scanlines worth of positions. */
7040 : /* -------------------------------------------------------------------- */
7041 :
7042 : double *padfX =
7043 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7044 : double *padfY =
7045 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7046 : double *padfZ =
7047 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7048 : double *padfX2 =
7049 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7050 : double *padfY2 =
7051 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7052 : double *padfZ2 =
7053 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7054 19 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7055 19 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7056 :
7057 19 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7058 19 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7059 19 : const double dfErrorThreshold = CPLAtof(
7060 19 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7061 19 : const bool bAvoidNoDataSingleBand =
7062 19 : poWK->nBands == 1 ||
7063 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7064 : "UNIFIED_SRC_NODATA", "FALSE"));
7065 :
7066 19 : const int nXMargin =
7067 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7068 19 : const int nYMargin =
7069 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7070 :
7071 : /* ==================================================================== */
7072 : /* Loop over output lines. */
7073 : /* ==================================================================== */
7074 116 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7075 : {
7076 97 : GWKAverageOrModeComputeLineCoords(
7077 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7078 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7079 :
7080 : // Loop over pixels in output scanline.
7081 3514 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7082 : {
7083 3417 : GPtrDiff_t iSrcOffset = 0;
7084 3417 : double dfDensity = 1.0;
7085 3417 : bool bHasFoundDensity = false;
7086 :
7087 3417 : bool bWrapOverX = false;
7088 3417 : double dfXMin = 0;
7089 3417 : double dfYMin = 0;
7090 3417 : double dfXMax = 0;
7091 3417 : double dfYMax = 0;
7092 3417 : int iSrcXMin = 0;
7093 3417 : int iSrcYMin = 0;
7094 3417 : int iSrcXMax = 0;
7095 3417 : int iSrcYMax = 0;
7096 3417 : if (!GWKAverageOrModeComputeSourceCoords(
7097 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7098 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7099 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7100 : {
7101 0 : continue;
7102 : }
7103 :
7104 3417 : const GPtrDiff_t iDstOffset =
7105 3417 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7106 :
7107 : // Loop processing each band.
7108 6834 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7109 : {
7110 3417 : double dfBandDensity = 0.0;
7111 :
7112 3417 : int nBins = 0;
7113 3417 : int iModeIndex = -1;
7114 3417 : T nVal{};
7115 :
7116 10248 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7117 : {
7118 6831 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7119 6831 : iSrcOffset =
7120 6831 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7121 20530 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7122 : iSrcX++, iSrcOffset++)
7123 : {
7124 13699 : if (bWrapOverX)
7125 0 : iSrcOffset =
7126 0 : (iSrcX % nSrcXSize) +
7127 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7128 :
7129 13699 : if (poWK->panUnifiedSrcValid != nullptr &&
7130 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7131 0 : continue;
7132 :
7133 13699 : if (GWKGetPixelT(poWK, iBand, iSrcOffset,
7134 27398 : &dfBandDensity, &nVal) &&
7135 13699 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7136 : {
7137 13699 : const double dfWeight =
7138 13699 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7139 :
7140 : // Check array for existing entry.
7141 13699 : int i = 0;
7142 29194 : for (i = 0; i < nBins; ++i)
7143 : {
7144 17807 : if (IsSame(pVals[i], nVal))
7145 : {
7146 :
7147 2312 : pafCounts[i] +=
7148 2312 : static_cast<float>(dfWeight);
7149 2312 : bool bValIsMaxCount =
7150 2312 : (pafCounts[i] > pafCounts[iModeIndex]);
7151 :
7152 2312 : if (!bValIsMaxCount &&
7153 1498 : pafCounts[i] == pafCounts[iModeIndex])
7154 : {
7155 1490 : switch (eTieStrategy)
7156 : {
7157 1477 : case GWKTS_First:
7158 1477 : break;
7159 6 : case GWKTS_Min:
7160 6 : bValIsMaxCount =
7161 6 : nVal < pVals[iModeIndex];
7162 6 : break;
7163 7 : case GWKTS_Max:
7164 7 : bValIsMaxCount =
7165 7 : nVal > pVals[iModeIndex];
7166 7 : break;
7167 : }
7168 : }
7169 :
7170 2312 : if (bValIsMaxCount)
7171 : {
7172 817 : iModeIndex = i;
7173 : }
7174 :
7175 2312 : break;
7176 : }
7177 : }
7178 :
7179 : // Add to arr if entry not already there.
7180 13699 : if (i == nBins)
7181 : {
7182 11387 : pVals[i] = nVal;
7183 11387 : pafCounts[i] = static_cast<float>(dfWeight);
7184 :
7185 11387 : if (iModeIndex < 0)
7186 3417 : iModeIndex = i;
7187 :
7188 11387 : ++nBins;
7189 : }
7190 : }
7191 : }
7192 : }
7193 :
7194 3417 : if (iModeIndex != -1)
7195 : {
7196 3417 : nVal = pVals[iModeIndex];
7197 3417 : dfBandDensity = 1;
7198 3417 : bHasFoundDensity = true;
7199 : }
7200 :
7201 : // We have a computed value from the source. Now apply it
7202 : // to the destination pixel
7203 3417 : if (bHasFoundDensity)
7204 : {
7205 3417 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
7206 : dfBandDensity, nVal,
7207 : bAvoidNoDataSingleBand);
7208 : }
7209 : }
7210 :
7211 3417 : if (!bHasFoundDensity)
7212 0 : continue;
7213 :
7214 3417 : if (!bAvoidNoDataSingleBand)
7215 : {
7216 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7217 : }
7218 :
7219 : /* --------------------------------------------------------------------
7220 : */
7221 : /* Update destination density/validity masks. */
7222 : /* --------------------------------------------------------------------
7223 : */
7224 3417 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7225 :
7226 3417 : if (poWK->panDstValid != nullptr)
7227 : {
7228 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7229 : }
7230 : } /* Next iDstX */
7231 :
7232 : /* --------------------------------------------------------------------
7233 : */
7234 : /* Report progress to the user, and optionally cancel out. */
7235 : /* --------------------------------------------------------------------
7236 : */
7237 97 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7238 0 : break;
7239 : }
7240 :
7241 : /* -------------------------------------------------------------------- */
7242 : /* Cleanup and return. */
7243 : /* -------------------------------------------------------------------- */
7244 19 : CPLFree(padfX);
7245 19 : CPLFree(padfY);
7246 19 : CPLFree(padfZ);
7247 19 : CPLFree(padfX2);
7248 19 : CPLFree(padfY2);
7249 19 : CPLFree(padfZ2);
7250 19 : CPLFree(pabSuccess);
7251 19 : CPLFree(pabSuccess2);
7252 19 : VSIFree(pVals);
7253 19 : VSIFree(pafCounts);
7254 : }
7255 :
7256 : /************************************************************************/
7257 : /* GWKModeComplexType() */
7258 : /************************************************************************/
7259 :
7260 8 : static void GWKModeComplexType(GWKJobStruct *psJob)
7261 : {
7262 8 : const GDALWarpKernel *poWK = psJob->poWK;
7263 8 : const int iYMin = psJob->iYMin;
7264 8 : const int iYMax = psJob->iYMax;
7265 8 : const int nDstXSize = poWK->nDstXSize;
7266 8 : const int nSrcXSize = poWK->nSrcXSize;
7267 8 : const int nSrcYSize = poWK->nSrcYSize;
7268 8 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7269 : const double dfMultFactorVerticalShiftPipeline =
7270 8 : poWK->bApplyVerticalShift
7271 8 : ? CPLAtof(CSLFetchNameValueDef(
7272 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7273 : "1.0"))
7274 8 : : 0.0;
7275 : const bool bAvoidNoDataSingleBand =
7276 8 : poWK->nBands == 1 ||
7277 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7278 8 : "UNIFIED_SRC_NODATA", "FALSE"));
7279 :
7280 8 : double *padfRealVals = nullptr;
7281 8 : double *padfImagVals = nullptr;
7282 8 : float *pafCounts = nullptr;
7283 :
7284 8 : if (nSrcXSize > 0 && nSrcYSize > 0)
7285 : {
7286 : padfRealVals = static_cast<double *>(
7287 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7288 : padfImagVals = static_cast<double *>(
7289 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7290 : pafCounts = static_cast<float *>(
7291 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7292 8 : if (padfRealVals == nullptr || padfImagVals == nullptr ||
7293 : pafCounts == nullptr)
7294 : {
7295 0 : VSIFree(padfRealVals);
7296 0 : VSIFree(padfImagVals);
7297 0 : VSIFree(pafCounts);
7298 0 : return;
7299 : }
7300 : }
7301 :
7302 : /* -------------------------------------------------------------------- */
7303 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7304 : /* scanlines worth of positions. */
7305 : /* -------------------------------------------------------------------- */
7306 :
7307 : double *padfX =
7308 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7309 : double *padfY =
7310 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7311 : double *padfZ =
7312 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7313 : double *padfX2 =
7314 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7315 : double *padfY2 =
7316 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7317 : double *padfZ2 =
7318 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7319 8 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7320 8 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7321 :
7322 8 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7323 8 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7324 8 : const double dfErrorThreshold = CPLAtof(
7325 8 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7326 :
7327 : const int nXMargin =
7328 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7329 : const int nYMargin =
7330 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7331 :
7332 : /* ==================================================================== */
7333 : /* Loop over output lines. */
7334 : /* ==================================================================== */
7335 16 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7336 : {
7337 8 : GWKAverageOrModeComputeLineCoords(
7338 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7339 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7340 :
7341 : // Loop over pixels in output scanline.
7342 16 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7343 : {
7344 8 : GPtrDiff_t iSrcOffset = 0;
7345 8 : double dfDensity = 1.0;
7346 8 : bool bHasFoundDensity = false;
7347 :
7348 8 : bool bWrapOverX = false;
7349 8 : double dfXMin = 0;
7350 8 : double dfYMin = 0;
7351 8 : double dfXMax = 0;
7352 8 : double dfYMax = 0;
7353 8 : int iSrcXMin = 0;
7354 8 : int iSrcYMin = 0;
7355 8 : int iSrcXMax = 0;
7356 8 : int iSrcYMax = 0;
7357 8 : if (!GWKAverageOrModeComputeSourceCoords(
7358 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7359 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7360 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7361 : {
7362 0 : continue;
7363 : }
7364 :
7365 8 : const GPtrDiff_t iDstOffset =
7366 8 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7367 :
7368 : // Loop processing each band.
7369 16 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7370 : {
7371 8 : double dfBandDensity = 0.0;
7372 :
7373 8 : int nBins = 0;
7374 8 : int iModeIndex = -1;
7375 8 : double dfValueReal = 0;
7376 8 : double dfValueImag = 0;
7377 :
7378 16 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7379 : {
7380 8 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7381 8 : iSrcOffset =
7382 8 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7383 38 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7384 : iSrcX++, iSrcOffset++)
7385 : {
7386 30 : if (bWrapOverX)
7387 0 : iSrcOffset =
7388 0 : (iSrcX % nSrcXSize) +
7389 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7390 :
7391 30 : if (poWK->panUnifiedSrcValid != nullptr &&
7392 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7393 0 : continue;
7394 :
7395 30 : if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7396 : &dfBandDensity, &dfValueReal,
7397 60 : &dfValueImag) &&
7398 30 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7399 : {
7400 30 : const double dfWeight =
7401 30 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7402 :
7403 : // Check array for existing entry.
7404 30 : int i = 0;
7405 49 : for (i = 0; i < nBins; ++i)
7406 : {
7407 47 : if (IsSame(padfRealVals[i], dfValueReal) &&
7408 14 : IsSame(padfImagVals[i], dfValueImag))
7409 : {
7410 :
7411 14 : pafCounts[i] +=
7412 14 : static_cast<float>(dfWeight);
7413 14 : bool bValIsMaxCount =
7414 14 : (pafCounts[i] > pafCounts[iModeIndex]);
7415 :
7416 14 : if (!bValIsMaxCount &&
7417 6 : pafCounts[i] == pafCounts[iModeIndex])
7418 : {
7419 3 : switch (eTieStrategy)
7420 : {
7421 3 : case GWKTS_First:
7422 3 : break;
7423 0 : case GWKTS_Min:
7424 0 : bValIsMaxCount =
7425 0 : dfValueReal <
7426 0 : padfRealVals[iModeIndex];
7427 0 : break;
7428 0 : case GWKTS_Max:
7429 0 : bValIsMaxCount =
7430 0 : dfValueReal >
7431 0 : padfRealVals[iModeIndex];
7432 0 : break;
7433 : }
7434 : }
7435 :
7436 14 : if (bValIsMaxCount)
7437 : {
7438 8 : iModeIndex = i;
7439 : }
7440 :
7441 14 : break;
7442 : }
7443 : }
7444 :
7445 : // Add to arr if entry not already there.
7446 30 : if (i == nBins)
7447 : {
7448 16 : padfRealVals[i] = dfValueReal;
7449 16 : padfImagVals[i] = dfValueImag;
7450 16 : pafCounts[i] = static_cast<float>(dfWeight);
7451 :
7452 16 : if (iModeIndex < 0)
7453 8 : iModeIndex = i;
7454 :
7455 16 : ++nBins;
7456 : }
7457 : }
7458 : }
7459 : }
7460 :
7461 8 : if (iModeIndex != -1)
7462 : {
7463 8 : dfValueReal = padfRealVals[iModeIndex];
7464 8 : dfValueImag = padfImagVals[iModeIndex];
7465 8 : dfBandDensity = 1;
7466 :
7467 8 : if (poWK->bApplyVerticalShift)
7468 : {
7469 0 : if (!std::isfinite(padfZ[iDstX]))
7470 0 : continue;
7471 : // Subtract padfZ[] since the coordinate
7472 : // transformation is from target to source
7473 0 : dfValueReal =
7474 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7475 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7476 : }
7477 :
7478 8 : bHasFoundDensity = true;
7479 : }
7480 :
7481 : // We have a computed value from the source. Now apply it
7482 : // to the destination pixel
7483 8 : if (bHasFoundDensity)
7484 : {
7485 8 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7486 : dfValueReal, dfValueImag,
7487 : bAvoidNoDataSingleBand);
7488 : }
7489 : }
7490 :
7491 8 : if (!bHasFoundDensity)
7492 0 : continue;
7493 :
7494 8 : if (!bAvoidNoDataSingleBand)
7495 : {
7496 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7497 : }
7498 :
7499 : /* --------------------------------------------------------------------
7500 : */
7501 : /* Update destination density/validity masks. */
7502 : /* --------------------------------------------------------------------
7503 : */
7504 8 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7505 :
7506 8 : if (poWK->panDstValid != nullptr)
7507 : {
7508 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7509 : }
7510 : } /* Next iDstX */
7511 :
7512 : /* --------------------------------------------------------------------
7513 : */
7514 : /* Report progress to the user, and optionally cancel out. */
7515 : /* --------------------------------------------------------------------
7516 : */
7517 8 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7518 0 : break;
7519 : }
7520 :
7521 : /* -------------------------------------------------------------------- */
7522 : /* Cleanup and return. */
7523 : /* -------------------------------------------------------------------- */
7524 8 : CPLFree(padfX);
7525 8 : CPLFree(padfY);
7526 8 : CPLFree(padfZ);
7527 8 : CPLFree(padfX2);
7528 8 : CPLFree(padfY2);
7529 8 : CPLFree(padfZ2);
7530 8 : CPLFree(pabSuccess);
7531 8 : CPLFree(pabSuccess2);
7532 8 : VSIFree(padfRealVals);
7533 8 : VSIFree(padfImagVals);
7534 8 : VSIFree(pafCounts);
7535 : }
7536 :
7537 : /************************************************************************/
7538 : /* GWKAverageOrModeThread() */
7539 : /************************************************************************/
7540 :
7541 : // Overall logic based on GWKGeneralCaseThread().
7542 163 : static void GWKAverageOrModeThread(void *pData)
7543 : {
7544 163 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7545 163 : const GDALWarpKernel *poWK = psJob->poWK;
7546 163 : const int iYMin = psJob->iYMin;
7547 163 : const int iYMax = psJob->iYMax;
7548 : const double dfMultFactorVerticalShiftPipeline =
7549 163 : poWK->bApplyVerticalShift
7550 163 : ? CPLAtof(CSLFetchNameValueDef(
7551 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7552 : "1.0"))
7553 163 : : 0.0;
7554 : const bool bAvoidNoDataSingleBand =
7555 194 : poWK->nBands == 1 ||
7556 31 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7557 163 : "UNIFIED_SRC_NODATA", "FALSE"));
7558 :
7559 163 : const int nDstXSize = poWK->nDstXSize;
7560 163 : const int nSrcXSize = poWK->nSrcXSize;
7561 :
7562 : /* -------------------------------------------------------------------- */
7563 : /* Find out which algorithm to use (small optim.) */
7564 : /* -------------------------------------------------------------------- */
7565 :
7566 : // Only used for GRA_Mode
7567 163 : float *pafCounts = nullptr;
7568 163 : int nBins = 0;
7569 163 : int nBinsOffset = 0;
7570 163 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7571 :
7572 : // Only used with Q1, Med and Q3
7573 163 : float quant = 0.0f;
7574 :
7575 : // To control array allocation only when data type is complex
7576 163 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7577 :
7578 163 : if (poWK->eResample == GRA_Mode)
7579 : {
7580 45 : if (poWK->bApplyVerticalShift)
7581 : {
7582 0 : return GWKModeComplexType(psJob);
7583 : }
7584 :
7585 45 : switch (poWK->eWorkingDataType)
7586 : {
7587 7 : case GDT_UInt8:
7588 7 : nBins = 256;
7589 7 : break;
7590 :
7591 0 : case GDT_Int8:
7592 0 : nBins = 256;
7593 0 : nBinsOffset = nBins / 2;
7594 0 : break;
7595 :
7596 1 : case GDT_UInt16:
7597 1 : nBins = 65536;
7598 1 : break;
7599 :
7600 10 : case GDT_Int16:
7601 10 : nBins = 65536;
7602 10 : nBinsOffset = nBins / 2;
7603 10 : break;
7604 :
7605 10 : case GDT_Int32:
7606 10 : return GWKModeRealType<int32_t>(psJob);
7607 :
7608 1 : case GDT_UInt32:
7609 1 : return GWKModeRealType<uint32_t>(psJob);
7610 :
7611 1 : case GDT_Int64:
7612 1 : return GWKModeRealType<int64_t>(psJob);
7613 :
7614 1 : case GDT_UInt64:
7615 1 : return GWKModeRealType<uint64_t>(psJob);
7616 :
7617 0 : case GDT_Float16:
7618 0 : return GWKModeRealType<GFloat16>(psJob);
7619 :
7620 4 : case GDT_Float32:
7621 4 : return GWKModeRealType<float>(psJob);
7622 :
7623 2 : case GDT_Float64:
7624 2 : return GWKModeRealType<double>(psJob);
7625 :
7626 8 : case GDT_CInt16:
7627 : case GDT_CInt32:
7628 : case GDT_CFloat16:
7629 : case GDT_CFloat32:
7630 : case GDT_CFloat64:
7631 8 : return GWKModeComplexType(psJob);
7632 :
7633 0 : case GDT_Unknown:
7634 : case GDT_TypeCount:
7635 0 : CPLAssert(false);
7636 : return;
7637 : }
7638 :
7639 18 : if (nBins)
7640 : {
7641 : pafCounts =
7642 18 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7643 18 : if (pafCounts == nullptr)
7644 0 : return;
7645 : }
7646 : }
7647 118 : else if (poWK->eResample == GRA_Med)
7648 : {
7649 6 : quant = 0.5f;
7650 : }
7651 112 : else if (poWK->eResample == GRA_Q1)
7652 : {
7653 10 : quant = 0.25f;
7654 : }
7655 102 : else if (poWK->eResample == GRA_Q3)
7656 : {
7657 5 : quant = 0.75f;
7658 : }
7659 97 : else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7660 11 : poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7661 : {
7662 : // Other resample algorithms not permitted here.
7663 0 : CPLError(CE_Fatal, CPLE_AppDefined,
7664 : "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7665 : "illegal resample");
7666 : }
7667 :
7668 136 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7669 :
7670 : /* -------------------------------------------------------------------- */
7671 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7672 : /* scanlines worth of positions. */
7673 : /* -------------------------------------------------------------------- */
7674 :
7675 : double *padfX =
7676 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7677 : double *padfY =
7678 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7679 : double *padfZ =
7680 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7681 : double *padfX2 =
7682 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7683 : double *padfY2 =
7684 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7685 : double *padfZ2 =
7686 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7687 136 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7688 136 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7689 :
7690 136 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7691 136 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7692 136 : const double dfErrorThreshold = CPLAtof(
7693 136 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7694 :
7695 : const double dfExcludedValuesThreshold =
7696 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7697 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7698 136 : 100.0;
7699 : const double dfNodataValuesThreshold =
7700 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7701 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7702 136 : 100.0;
7703 :
7704 : const int nXMargin =
7705 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7706 : const int nYMargin =
7707 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7708 :
7709 : /* ==================================================================== */
7710 : /* Loop over output lines. */
7711 : /* ==================================================================== */
7712 8214 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7713 : {
7714 8078 : GWKAverageOrModeComputeLineCoords(
7715 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7716 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7717 :
7718 : /* ====================================================================
7719 : */
7720 : /* Loop over pixels in output scanline. */
7721 : /* ====================================================================
7722 : */
7723 2094000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7724 : {
7725 2085920 : GPtrDiff_t iSrcOffset = 0;
7726 2085920 : double dfDensity = 1.0;
7727 2085920 : bool bHasFoundDensity = false;
7728 :
7729 2085920 : bool bWrapOverX = false;
7730 2085920 : double dfXMin = 0;
7731 2085920 : double dfYMin = 0;
7732 2085920 : double dfXMax = 0;
7733 2085920 : double dfYMax = 0;
7734 2085920 : int iSrcXMin = 0;
7735 2085920 : int iSrcYMin = 0;
7736 2085920 : int iSrcXMax = 0;
7737 2085920 : int iSrcYMax = 0;
7738 2085920 : if (!GWKAverageOrModeComputeSourceCoords(
7739 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7740 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7741 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7742 : {
7743 687183 : continue;
7744 : }
7745 :
7746 1906710 : const GPtrDiff_t iDstOffset =
7747 1906710 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7748 :
7749 1906710 : bool bDone = false;
7750 :
7751 : // Special Average mode where we process all bands together,
7752 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7753 1906710 : constexpr double EPSILON = 1e-10;
7754 4614100 : if (poWK->eResample == GRA_Average &&
7755 800681 : (!poWK->m_aadfExcludedValues.empty() ||
7756 589832 : dfNodataValuesThreshold < 1 - EPSILON) &&
7757 2707390 : !poWK->bApplyVerticalShift && !bIsComplex)
7758 : {
7759 589832 : double dfTotalWeightInvalid = 0.0;
7760 589832 : double dfTotalWeightExcluded = 0.0;
7761 589832 : double dfTotalWeightRegular = 0.0;
7762 1179660 : std::vector<double> adfValueReal(poWK->nBands, 0);
7763 1179660 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7764 : std::vector<int> anCountExcludedValues(
7765 589832 : poWK->m_aadfExcludedValues.size(), 0);
7766 :
7767 2162710 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7768 : {
7769 1572880 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7770 1572880 : iSrcOffset =
7771 1572880 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7772 6291500 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7773 : iSrcX++, iSrcOffset++)
7774 : {
7775 4718620 : if (bWrapOverX)
7776 0 : iSrcOffset =
7777 0 : (iSrcX % nSrcXSize) +
7778 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7779 :
7780 4718620 : const double dfWeight =
7781 4718620 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7782 4718620 : if (dfWeight <= 0)
7783 0 : continue;
7784 :
7785 4718640 : if (poWK->panUnifiedSrcValid != nullptr &&
7786 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7787 : {
7788 3 : dfTotalWeightInvalid += dfWeight;
7789 3 : continue;
7790 : }
7791 :
7792 4718620 : bool bAllValid = true;
7793 8651150 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7794 : {
7795 7340300 : double dfBandDensity = 0;
7796 7340300 : double dfValueImagTmp = 0;
7797 11272800 : if (!(GWKGetPixelValue(
7798 : poWK, iBand, iSrcOffset, &dfBandDensity,
7799 7340300 : &adfValueReal[iBand], &dfValueImagTmp) &&
7800 3932530 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7801 : {
7802 3407770 : bAllValid = false;
7803 3407770 : break;
7804 : }
7805 : }
7806 :
7807 4718620 : if (!bAllValid)
7808 : {
7809 3407770 : dfTotalWeightInvalid += dfWeight;
7810 3407770 : continue;
7811 : }
7812 :
7813 1310850 : bool bExcludedValueFound = false;
7814 2490500 : for (size_t i = 0;
7815 2490500 : i < poWK->m_aadfExcludedValues.size(); ++i)
7816 : {
7817 1179670 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7818 : {
7819 22 : bExcludedValueFound = true;
7820 22 : ++anCountExcludedValues[i];
7821 22 : dfTotalWeightExcluded += dfWeight;
7822 22 : break;
7823 : }
7824 : }
7825 1310850 : if (!bExcludedValueFound)
7826 : {
7827 : // Weighted incremental algorithm mean
7828 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7829 1310830 : dfTotalWeightRegular += dfWeight;
7830 5243290 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7831 : {
7832 3932460 : adfValueAveraged[iBand] +=
7833 7864930 : (dfWeight / dfTotalWeightRegular) *
7834 7864930 : (adfValueReal[iBand] -
7835 3932460 : adfValueAveraged[iBand]);
7836 : }
7837 : }
7838 : }
7839 : }
7840 :
7841 589832 : const double dfTotalWeight = dfTotalWeightInvalid +
7842 : dfTotalWeightExcluded +
7843 : dfTotalWeightRegular;
7844 589832 : if (dfTotalWeightInvalid > 0 &&
7845 : dfTotalWeightInvalid >=
7846 458751 : dfNodataValuesThreshold * dfTotalWeight)
7847 : {
7848 : // Do nothing. Let bHasFoundDensity to false.
7849 : }
7850 131085 : else if (dfTotalWeightExcluded > 0 &&
7851 : dfTotalWeightExcluded >=
7852 7 : dfExcludedValuesThreshold * dfTotalWeight)
7853 : {
7854 : // Find the most represented excluded value tuple
7855 3 : size_t iExcludedValue = 0;
7856 3 : int nExcludedValueCount = 0;
7857 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7858 : ++i)
7859 : {
7860 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
7861 : {
7862 3 : iExcludedValue = i;
7863 3 : nExcludedValueCount = anCountExcludedValues[i];
7864 : }
7865 : }
7866 :
7867 3 : bHasFoundDensity = true;
7868 :
7869 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7870 : {
7871 9 : GWKSetPixelValue(
7872 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7873 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7874 : 0, bAvoidNoDataSingleBand);
7875 : }
7876 :
7877 3 : if (!bAvoidNoDataSingleBand)
7878 : {
7879 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7880 3 : }
7881 : }
7882 131082 : else if (dfTotalWeightRegular > 0)
7883 : {
7884 131082 : bHasFoundDensity = true;
7885 :
7886 524324 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7887 : {
7888 393242 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7889 : /* dfBandDensity = */ 1.0,
7890 393242 : adfValueAveraged[iBand], 0,
7891 : bAvoidNoDataSingleBand);
7892 : }
7893 :
7894 131082 : if (!bAvoidNoDataSingleBand)
7895 : {
7896 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7897 : }
7898 : }
7899 :
7900 : // Skip below loop on bands
7901 589832 : bDone = true;
7902 : }
7903 :
7904 : /* ====================================================================
7905 : */
7906 : /* Loop processing each band. */
7907 : /* ====================================================================
7908 : */
7909 :
7910 4730010 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7911 : {
7912 2823310 : double dfBandDensity = 0.0;
7913 2823310 : double dfValueReal = 0.0;
7914 2823310 : double dfValueImag = 0.0;
7915 2823310 : double dfValueRealTmp = 0.0;
7916 2823310 : double dfValueImagTmp = 0.0;
7917 :
7918 : /* --------------------------------------------------------------------
7919 : */
7920 : /* Collect the source value. */
7921 : /* --------------------------------------------------------------------
7922 : */
7923 :
7924 : // Loop over source lines and pixels - 3 possible algorithms.
7925 :
7926 2823310 : if (poWK->eResample == GRA_Average)
7927 : {
7928 300849 : double dfTotalWeight = 0.0;
7929 :
7930 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7931 : // in gcore/overview.cpp.
7932 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7933 : {
7934 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7935 330459 : iSrcOffset = iSrcXMin +
7936 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7937 773407 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7938 : iSrcX++, iSrcOffset++)
7939 : {
7940 442948 : if (bWrapOverX)
7941 1371 : iSrcOffset =
7942 1371 : (iSrcX % nSrcXSize) +
7943 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7944 :
7945 442952 : if (poWK->panUnifiedSrcValid != nullptr &&
7946 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7947 : iSrcOffset))
7948 : {
7949 1 : continue;
7950 : }
7951 :
7952 442947 : if (GWKGetPixelValue(
7953 : poWK, iBand, iSrcOffset, &dfBandDensity,
7954 885894 : &dfValueRealTmp, &dfValueImagTmp) &&
7955 442947 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7956 : {
7957 442947 : const double dfWeight =
7958 442947 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7959 442947 : if (dfWeight > 0)
7960 : {
7961 : // Weighted incremental algorithm mean
7962 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7963 442947 : dfTotalWeight += dfWeight;
7964 442947 : dfValueReal +=
7965 442947 : (dfWeight / dfTotalWeight) *
7966 442947 : (dfValueRealTmp - dfValueReal);
7967 442947 : if (bIsComplex)
7968 : {
7969 252 : dfValueImag +=
7970 252 : (dfWeight / dfTotalWeight) *
7971 252 : (dfValueImagTmp - dfValueImag);
7972 : }
7973 : }
7974 : }
7975 : }
7976 : }
7977 :
7978 300849 : if (dfTotalWeight > 0)
7979 : {
7980 300849 : if (poWK->bApplyVerticalShift)
7981 : {
7982 0 : if (!std::isfinite(padfZ[iDstX]))
7983 0 : continue;
7984 : // Subtract padfZ[] since the coordinate
7985 : // transformation is from target to source
7986 0 : dfValueReal =
7987 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7988 0 : padfZ[iDstX] *
7989 : dfMultFactorVerticalShiftPipeline;
7990 : }
7991 :
7992 300849 : dfBandDensity = 1;
7993 300849 : bHasFoundDensity = true;
7994 : }
7995 : } // GRA_Average.
7996 :
7997 2522460 : else if (poWK->eResample == GRA_RMS)
7998 : {
7999 300416 : double dfTotalReal = 0.0;
8000 300416 : double dfTotalImag = 0.0;
8001 300416 : double dfTotalWeight = 0.0;
8002 : // This code adapted from GDALDownsampleChunk32R_AverageT()
8003 : // in gcore/overview.cpp.
8004 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8005 : {
8006 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8007 330162 : iSrcOffset = iSrcXMin +
8008 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8009 772930 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8010 : iSrcX++, iSrcOffset++)
8011 : {
8012 442768 : if (bWrapOverX)
8013 1371 : iSrcOffset =
8014 1371 : (iSrcX % nSrcXSize) +
8015 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8016 :
8017 442768 : if (poWK->panUnifiedSrcValid != nullptr &&
8018 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8019 : iSrcOffset))
8020 : {
8021 0 : continue;
8022 : }
8023 :
8024 442768 : if (GWKGetPixelValue(
8025 : poWK, iBand, iSrcOffset, &dfBandDensity,
8026 885536 : &dfValueRealTmp, &dfValueImagTmp) &&
8027 442768 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8028 : {
8029 442768 : const double dfWeight =
8030 442768 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8031 442768 : dfTotalWeight += dfWeight;
8032 442768 : dfTotalReal +=
8033 442768 : dfValueRealTmp * dfValueRealTmp * dfWeight;
8034 442768 : if (bIsComplex)
8035 48 : dfTotalImag += dfValueImagTmp *
8036 48 : dfValueImagTmp * dfWeight;
8037 : }
8038 : }
8039 : }
8040 :
8041 300416 : if (dfTotalWeight > 0)
8042 : {
8043 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
8044 :
8045 300416 : if (poWK->bApplyVerticalShift)
8046 : {
8047 0 : if (!std::isfinite(padfZ[iDstX]))
8048 0 : continue;
8049 : // Subtract padfZ[] since the coordinate
8050 : // transformation is from target to source
8051 0 : dfValueReal =
8052 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8053 0 : padfZ[iDstX] *
8054 : dfMultFactorVerticalShiftPipeline;
8055 : }
8056 :
8057 300416 : if (bIsComplex)
8058 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
8059 :
8060 300416 : dfBandDensity = 1;
8061 300416 : bHasFoundDensity = true;
8062 : }
8063 : } // GRA_RMS.
8064 :
8065 2222040 : else if (poWK->eResample == GRA_Mode)
8066 : {
8067 496623 : float fMaxCount = 0.0f;
8068 496623 : int nMode = -1;
8069 496623 : bool bHasSourceValues = false;
8070 :
8071 496623 : memset(pafCounts, 0, nBins * sizeof(float));
8072 :
8073 1612560 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8074 : {
8075 1115940 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8076 1115940 : iSrcOffset = iSrcXMin +
8077 1115940 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8078 4703370 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8079 : iSrcX++, iSrcOffset++)
8080 : {
8081 3587430 : if (bWrapOverX)
8082 1371 : iSrcOffset =
8083 1371 : (iSrcX % nSrcXSize) +
8084 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8085 :
8086 3587430 : if (poWK->panUnifiedSrcValid != nullptr &&
8087 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8088 : iSrcOffset))
8089 0 : continue;
8090 :
8091 3587430 : if (GWKGetPixelValue(
8092 : poWK, iBand, iSrcOffset, &dfBandDensity,
8093 7174870 : &dfValueRealTmp, &dfValueImagTmp) &&
8094 3587430 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8095 : {
8096 3587430 : bHasSourceValues = true;
8097 3587430 : const int nVal =
8098 3587430 : static_cast<int>(dfValueRealTmp);
8099 3587430 : const int iBin = nVal + nBinsOffset;
8100 3587430 : const double dfWeight =
8101 3587430 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8102 :
8103 : // Sum the density.
8104 3587430 : pafCounts[iBin] += static_cast<float>(dfWeight);
8105 : // Is it the most common value so far?
8106 3587430 : bool bUpdateMode = pafCounts[iBin] > fMaxCount;
8107 3587430 : if (!bUpdateMode &&
8108 750293 : pafCounts[iBin] == fMaxCount)
8109 : {
8110 217592 : switch (eTieStrategy)
8111 : {
8112 217584 : case GWKTS_First:
8113 217584 : break;
8114 4 : case GWKTS_Min:
8115 4 : bUpdateMode = nVal < nMode;
8116 4 : break;
8117 4 : case GWKTS_Max:
8118 4 : bUpdateMode = nVal > nMode;
8119 4 : break;
8120 : }
8121 : }
8122 3587430 : if (bUpdateMode)
8123 : {
8124 2837140 : nMode = nVal;
8125 2837140 : fMaxCount = pafCounts[iBin];
8126 : }
8127 : }
8128 : }
8129 : }
8130 :
8131 496623 : if (bHasSourceValues)
8132 : {
8133 496623 : dfValueReal = nMode;
8134 496623 : dfBandDensity = 1;
8135 496623 : bHasFoundDensity = true;
8136 : }
8137 : } // GRA_Mode.
8138 :
8139 1725420 : else if (poWK->eResample == GRA_Max)
8140 : {
8141 335037 : bool bFoundValid = false;
8142 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
8143 : // This code adapted from nAlgo 1 method, GRA_Average.
8144 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8145 : {
8146 952975 : iSrcOffset = iSrcXMin +
8147 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8148 4376740 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8149 : iSrcX++, iSrcOffset++)
8150 : {
8151 3423770 : if (bWrapOverX)
8152 1371 : iSrcOffset =
8153 1371 : (iSrcX % nSrcXSize) +
8154 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8155 :
8156 3426580 : if (poWK->panUnifiedSrcValid != nullptr &&
8157 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8158 : iSrcOffset))
8159 : {
8160 2446 : continue;
8161 : }
8162 :
8163 : // Returns pixel value if it is not no data.
8164 3421320 : if (GWKGetPixelValue(
8165 : poWK, iBand, iSrcOffset, &dfBandDensity,
8166 6842640 : &dfValueRealTmp, &dfValueImagTmp) &&
8167 3421320 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8168 : {
8169 3421320 : bFoundValid = true;
8170 3421320 : if (dfTotalReal < dfValueRealTmp)
8171 : {
8172 442234 : dfTotalReal = dfValueRealTmp;
8173 : }
8174 : }
8175 : }
8176 : }
8177 :
8178 335037 : if (bFoundValid)
8179 : {
8180 335037 : dfValueReal = dfTotalReal;
8181 :
8182 335037 : if (poWK->bApplyVerticalShift)
8183 : {
8184 0 : if (!std::isfinite(padfZ[iDstX]))
8185 0 : continue;
8186 : // Subtract padfZ[] since the coordinate
8187 : // transformation is from target to source
8188 0 : dfValueReal =
8189 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8190 0 : padfZ[iDstX] *
8191 : dfMultFactorVerticalShiftPipeline;
8192 : }
8193 :
8194 335037 : dfBandDensity = 1;
8195 335037 : bHasFoundDensity = true;
8196 : }
8197 : }
8198 :
8199 1390380 : else if (poWK->eResample == GRA_Min)
8200 : {
8201 335012 : bool bFoundValid = false;
8202 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
8203 : // This code adapted from nAlgo 1 method, GRA_Average.
8204 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8205 : {
8206 952710 : iSrcOffset = iSrcXMin +
8207 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8208 4373670 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8209 : iSrcX++, iSrcOffset++)
8210 : {
8211 3420960 : if (bWrapOverX)
8212 1371 : iSrcOffset =
8213 1371 : (iSrcX % nSrcXSize) +
8214 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8215 :
8216 3420960 : if (poWK->panUnifiedSrcValid != nullptr &&
8217 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8218 : iSrcOffset))
8219 : {
8220 0 : continue;
8221 : }
8222 :
8223 : // Returns pixel value if it is not no data.
8224 3420960 : if (GWKGetPixelValue(
8225 : poWK, iBand, iSrcOffset, &dfBandDensity,
8226 6841920 : &dfValueRealTmp, &dfValueImagTmp) &&
8227 3420960 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8228 : {
8229 3420960 : bFoundValid = true;
8230 3420960 : if (dfTotalReal > dfValueRealTmp)
8231 : {
8232 442628 : dfTotalReal = dfValueRealTmp;
8233 : }
8234 : }
8235 : }
8236 : }
8237 :
8238 335012 : if (bFoundValid)
8239 : {
8240 335012 : dfValueReal = dfTotalReal;
8241 :
8242 335012 : if (poWK->bApplyVerticalShift)
8243 : {
8244 0 : if (!std::isfinite(padfZ[iDstX]))
8245 0 : continue;
8246 : // Subtract padfZ[] since the coordinate
8247 : // transformation is from target to source
8248 0 : dfValueReal =
8249 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8250 0 : padfZ[iDstX] *
8251 : dfMultFactorVerticalShiftPipeline;
8252 : }
8253 :
8254 335012 : dfBandDensity = 1;
8255 335012 : bHasFoundDensity = true;
8256 : }
8257 : } // GRA_Min.
8258 :
8259 : else
8260 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
8261 : {
8262 1055370 : CPLAssert(quant > 0.0f);
8263 :
8264 1055370 : bool bFoundValid = false;
8265 1055370 : std::vector<double> dfRealValuesTmp;
8266 :
8267 : // This code adapted from nAlgo 1 method, GRA_Average.
8268 4014130 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8269 : {
8270 2958760 : iSrcOffset = iSrcXMin +
8271 2958760 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8272 13421300 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8273 : iSrcX++, iSrcOffset++)
8274 : {
8275 10462500 : if (bWrapOverX)
8276 4113 : iSrcOffset =
8277 4113 : (iSrcX % nSrcXSize) +
8278 4113 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8279 :
8280 10659100 : if (poWK->panUnifiedSrcValid != nullptr &&
8281 196608 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8282 : iSrcOffset))
8283 : {
8284 195449 : continue;
8285 : }
8286 :
8287 : // Returns pixel value if it is not no data.
8288 10267100 : if (GWKGetPixelValue(
8289 : poWK, iBand, iSrcOffset, &dfBandDensity,
8290 20534100 : &dfValueRealTmp, &dfValueImagTmp) &&
8291 10267100 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8292 : {
8293 10267100 : bFoundValid = true;
8294 10267100 : dfRealValuesTmp.push_back(dfValueRealTmp);
8295 : }
8296 : }
8297 : }
8298 :
8299 1055370 : if (bFoundValid)
8300 : {
8301 1006150 : std::sort(dfRealValuesTmp.begin(),
8302 : dfRealValuesTmp.end());
8303 : int quantIdx = static_cast<int>(
8304 1006150 : std::ceil(quant * dfRealValuesTmp.size() - 1));
8305 1006150 : dfValueReal = dfRealValuesTmp[quantIdx];
8306 :
8307 1006150 : if (poWK->bApplyVerticalShift)
8308 : {
8309 0 : if (!std::isfinite(padfZ[iDstX]))
8310 0 : continue;
8311 : // Subtract padfZ[] since the coordinate
8312 : // transformation is from target to source
8313 0 : dfValueReal =
8314 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8315 0 : padfZ[iDstX] *
8316 : dfMultFactorVerticalShiftPipeline;
8317 : }
8318 :
8319 1006150 : dfBandDensity = 1;
8320 1006150 : bHasFoundDensity = true;
8321 1006150 : dfRealValuesTmp.clear();
8322 : }
8323 : } // Quantile.
8324 :
8325 : /* --------------------------------------------------------------------
8326 : */
8327 : /* We have a computed value from the source. Now apply it
8328 : * to */
8329 : /* the destination pixel. */
8330 : /* --------------------------------------------------------------------
8331 : */
8332 2823310 : if (bHasFoundDensity)
8333 : {
8334 : // TODO: Should we compute dfBandDensity in fct of
8335 : // nCount/nCount2, or use as a threshold to set the dest
8336 : // value?
8337 : // dfBandDensity = (float) nCount / nCount2;
8338 : // if( (float) nCount / nCount2 > 0.1 )
8339 : // or fix gdalwarp crop_to_cutline to crop partially
8340 : // overlapping pixels.
8341 2774080 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8342 : dfValueReal, dfValueImag,
8343 : bAvoidNoDataSingleBand);
8344 : }
8345 : }
8346 :
8347 1906710 : if (!bHasFoundDensity)
8348 507971 : continue;
8349 :
8350 1398740 : if (!bAvoidNoDataSingleBand)
8351 : {
8352 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
8353 : }
8354 :
8355 : /* --------------------------------------------------------------------
8356 : */
8357 : /* Update destination density/validity masks. */
8358 : /* --------------------------------------------------------------------
8359 : */
8360 1398740 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8361 :
8362 1398740 : if (poWK->panDstValid != nullptr)
8363 : {
8364 1184 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8365 : }
8366 : } /* Next iDstX */
8367 :
8368 : /* --------------------------------------------------------------------
8369 : */
8370 : /* Report progress to the user, and optionally cancel out. */
8371 : /* --------------------------------------------------------------------
8372 : */
8373 8078 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8374 0 : break;
8375 : }
8376 :
8377 : /* -------------------------------------------------------------------- */
8378 : /* Cleanup and return. */
8379 : /* -------------------------------------------------------------------- */
8380 136 : CPLFree(padfX);
8381 136 : CPLFree(padfY);
8382 136 : CPLFree(padfZ);
8383 136 : CPLFree(padfX2);
8384 136 : CPLFree(padfY2);
8385 136 : CPLFree(padfZ2);
8386 136 : CPLFree(pabSuccess);
8387 136 : CPLFree(pabSuccess2);
8388 136 : VSIFree(pafCounts);
8389 : }
8390 :
8391 : /************************************************************************/
8392 : /* getOrientation() */
8393 : /************************************************************************/
8394 :
8395 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8396 : // -1 if it is counter-clockwise oriented,
8397 : // or 0 if it is colinear.
8398 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8399 : {
8400 2355910 : const double p1x = p1.first;
8401 2355910 : const double p1y = p1.second;
8402 2355910 : const double p2x = p2.first;
8403 2355910 : const double p2y = p2.second;
8404 2355910 : const double p3x = p3.first;
8405 2355910 : const double p3y = p3.second;
8406 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8407 2355910 : if (std::abs(val) < 1e-20)
8408 2690 : return 0;
8409 2353220 : else if (val > 0)
8410 0 : return 1;
8411 : else
8412 2353220 : return -1;
8413 : }
8414 :
8415 : /************************************************************************/
8416 : /* isConvex() */
8417 : /************************************************************************/
8418 :
8419 : // poly must be closed
8420 785302 : static bool isConvex(const XYPoly &poly)
8421 : {
8422 785302 : const size_t n = poly.size();
8423 785302 : size_t i = 0;
8424 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8425 785302 : ++i;
8426 2355910 : for (; i < n - 2; ++i)
8427 : {
8428 : const int orientation =
8429 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8430 1570600 : if (orientation != 0)
8431 : {
8432 1567910 : if (last_orientation == 0)
8433 0 : last_orientation = orientation;
8434 1567910 : else if (orientation != last_orientation)
8435 0 : return false;
8436 : }
8437 : }
8438 785302 : return true;
8439 : }
8440 :
8441 : /************************************************************************/
8442 : /* pointIntersectsConvexPoly() */
8443 : /************************************************************************/
8444 :
8445 : // Returns whether xy intersects poly, that must be closed and convex.
8446 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8447 : {
8448 6049100 : const size_t n = poly.size();
8449 6049100 : double dx1 = xy.first - poly[0].first;
8450 6049100 : double dy1 = xy.second - poly[0].second;
8451 6049100 : double dx2 = poly[1].first - poly[0].first;
8452 6049100 : double dy2 = poly[1].second - poly[0].second;
8453 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8454 :
8455 : // Check if the point remains on the same side (left/right) of all edges
8456 14556400 : for (size_t i = 2; i < n; i++)
8457 : {
8458 12793100 : dx1 = xy.first - poly[i - 1].first;
8459 12793100 : dy1 = xy.second - poly[i - 1].second;
8460 :
8461 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8462 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8463 :
8464 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8465 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8466 725558 : prevCrossProduct = crossProduct;
8467 12067500 : else if (prevCrossProduct * crossProduct < 0)
8468 4285760 : return false;
8469 : }
8470 :
8471 1763340 : return true;
8472 : }
8473 :
8474 : /************************************************************************/
8475 : /* getIntersection() */
8476 : /************************************************************************/
8477 :
8478 : /* Returns intersection of [p1,p2] with [p3,p4], if
8479 : * it is a single point, and the 2 segments are not colinear.
8480 : */
8481 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8482 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8483 : {
8484 11811000 : const double x1 = p1.first;
8485 11811000 : const double y1 = p1.second;
8486 11811000 : const double x2 = p2.first;
8487 11811000 : const double y2 = p2.second;
8488 11811000 : const double x3 = p3.first;
8489 11811000 : const double y3 = p3.second;
8490 11811000 : const double x4 = p4.first;
8491 11811000 : const double y4 = p4.second;
8492 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8493 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8494 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8495 9260780 : return false;
8496 :
8497 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8498 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8499 973924 : return false;
8500 :
8501 1576340 : const double t = t_num / denom;
8502 1576340 : xy.first = x1 + t * (x2 - x1);
8503 1576340 : xy.second = y1 + t * (y2 - y1);
8504 1576340 : return true;
8505 : }
8506 :
8507 : /************************************************************************/
8508 : /* getConvexPolyIntersection() */
8509 : /************************************************************************/
8510 :
8511 : // poly1 and poly2 must be closed and convex.
8512 : // The returned intersection will not necessary be closed.
8513 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8514 : XYPoly &intersection)
8515 : {
8516 785302 : intersection.clear();
8517 :
8518 : // Add all points of poly1 inside poly2
8519 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8520 : {
8521 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8522 1187430 : intersection.push_back(poly1[i]);
8523 : }
8524 785302 : if (intersection.size() == poly1.size() - 1)
8525 : {
8526 : // poly1 is inside poly2
8527 119100 : return;
8528 : }
8529 :
8530 : // Add all points of poly2 inside poly1
8531 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8532 : {
8533 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8534 575904 : intersection.push_back(poly2[i]);
8535 : }
8536 :
8537 : // Compute the intersection of all edges of both polygons
8538 726972 : XYPair xy;
8539 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8540 : {
8541 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8542 : {
8543 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8544 11631600 : poly2[i2 + 1], xy))
8545 : {
8546 1576230 : intersection.push_back(xy);
8547 : }
8548 : }
8549 : }
8550 :
8551 726972 : if (intersection.empty())
8552 60770 : return;
8553 :
8554 : // Find lowest-left point in intersection set
8555 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8556 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8557 3772450 : for (const auto &pair : intersection)
8558 : {
8559 3106240 : const double x = pair.first;
8560 3106240 : const double y = pair.second;
8561 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8562 : {
8563 1096040 : lowest_x = x;
8564 1096040 : lowest_y = y;
8565 : }
8566 : }
8567 :
8568 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8569 : {
8570 5737980 : const double p1x_diff = p1.first - lowest_x;
8571 5737980 : const double p1y_diff = p1.second - lowest_y;
8572 5737980 : const double p2x_diff = p2.first - lowest_x;
8573 5737980 : const double p2y_diff = p2.second - lowest_y;
8574 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8575 : {
8576 2655420 : if (p1x_diff >= 0)
8577 : {
8578 2655420 : if (p2x_diff >= 0)
8579 2655420 : return p1.first < p2.first;
8580 0 : return true;
8581 : }
8582 : else
8583 : {
8584 0 : if (p2x_diff >= 0)
8585 0 : return false;
8586 0 : return p1.first < p2.first;
8587 : }
8588 : }
8589 :
8590 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8591 1046960 : return p1.second < p2.second;
8592 :
8593 : double tan_p1;
8594 2035600 : if (p1x_diff == 0.0)
8595 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8596 : else
8597 1570980 : tan_p1 = p1y_diff / p1x_diff;
8598 :
8599 : double tan_p2;
8600 2035600 : if (p2x_diff == 0.0)
8601 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8602 : else
8603 1196080 : tan_p2 = p2y_diff / p2x_diff;
8604 :
8605 2035600 : if (tan_p1 >= 0)
8606 : {
8607 1904790 : if (tan_p2 >= 0)
8608 1881590 : return tan_p1 < tan_p2;
8609 : else
8610 23199 : return true;
8611 : }
8612 : else
8613 : {
8614 130806 : if (tan_p2 >= 0)
8615 103900 : return false;
8616 : else
8617 26906 : return tan_p1 < tan_p2;
8618 : }
8619 666202 : };
8620 :
8621 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8622 : // hull
8623 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8624 :
8625 : // Remove duplicated points
8626 666202 : size_t j = 1;
8627 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8628 : {
8629 2440040 : if (intersection[i] != intersection[i - 1])
8630 : {
8631 1452560 : if (j < i)
8632 545275 : intersection[j] = intersection[i];
8633 1452560 : ++j;
8634 : }
8635 : }
8636 666202 : intersection.resize(j);
8637 : }
8638 :
8639 : /************************************************************************/
8640 : /* GWKSumPreserving() */
8641 : /************************************************************************/
8642 :
8643 : static void GWKSumPreservingThread(void *pData);
8644 :
8645 19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8646 : {
8647 19 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8648 : }
8649 :
8650 19 : static void GWKSumPreservingThread(void *pData)
8651 : {
8652 19 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8653 19 : GDALWarpKernel *poWK = psJob->poWK;
8654 19 : const int iYMin = psJob->iYMin;
8655 19 : const int iYMax = psJob->iYMax;
8656 : const bool bIsAffineNoRotation =
8657 19 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8658 28 : poWK->pTransformerArg) &&
8659 : // for debug/testing purposes
8660 9 : CPLTestBool(
8661 19 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8662 : const bool bAvoidNoDataSingleBand =
8663 21 : poWK->nBands == 1 ||
8664 2 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
8665 19 : "UNIFIED_SRC_NODATA", "FALSE"));
8666 :
8667 19 : const int nDstXSize = poWK->nDstXSize;
8668 19 : const int nSrcXSize = poWK->nSrcXSize;
8669 19 : const int nSrcYSize = poWK->nSrcYSize;
8670 :
8671 38 : std::vector<double> adfX0(nSrcXSize + 1);
8672 38 : std::vector<double> adfY0(nSrcXSize + 1);
8673 38 : std::vector<double> adfZ0(nSrcXSize + 1);
8674 38 : std::vector<double> adfX1(nSrcXSize + 1);
8675 38 : std::vector<double> adfY1(nSrcXSize + 1);
8676 38 : std::vector<double> adfZ1(nSrcXSize + 1);
8677 38 : std::vector<int> abSuccess0(nSrcXSize + 1);
8678 38 : std::vector<int> abSuccess1(nSrcXSize + 1);
8679 :
8680 : CPLRectObj sGlobalBounds;
8681 19 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8682 19 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8683 19 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8684 19 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8685 19 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8686 :
8687 : struct SourcePixel
8688 : {
8689 : int iSrcX;
8690 : int iSrcY;
8691 :
8692 : // Coordinates of source pixel in target pixel coordinates
8693 : double dfDstX0;
8694 : double dfDstY0;
8695 : double dfDstX1;
8696 : double dfDstY1;
8697 : double dfDstX2;
8698 : double dfDstY2;
8699 : double dfDstX3;
8700 : double dfDstY3;
8701 :
8702 : // Source pixel total area (might be larger than the one described
8703 : // by above coordinates, if the pixel was crossing the antimeridian
8704 : // and split)
8705 : double dfArea;
8706 : };
8707 :
8708 38 : std::vector<SourcePixel> sourcePixels;
8709 :
8710 38 : XYPoly discontinuityLeft(5);
8711 38 : XYPoly discontinuityRight(5);
8712 :
8713 : /* ==================================================================== */
8714 : /* First pass: transform the 4 corners of each potential */
8715 : /* contributing source pixel to target pixel coordinates. */
8716 : /* ==================================================================== */
8717 :
8718 : // Special case for top line
8719 : {
8720 19 : int iY = 0;
8721 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8722 : {
8723 3345 : adfX1[iX] = iX + poWK->nSrcXOff;
8724 3345 : adfY1[iX] = iY + poWK->nSrcYOff;
8725 3345 : adfZ1[iX] = 0;
8726 : }
8727 :
8728 19 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8729 : adfX1.data(), adfY1.data(), adfZ1.data(),
8730 : abSuccess1.data());
8731 :
8732 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8733 : {
8734 3345 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8735 0 : abSuccess1[iX] = FALSE;
8736 : else
8737 : {
8738 3345 : adfX1[iX] -= poWK->nDstXOff;
8739 3345 : adfY1[iX] -= poWK->nDstYOff;
8740 : }
8741 : }
8742 : }
8743 :
8744 2032 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8745 : {
8746 2032 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8747 872 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8748 2032 : ? 1
8749 1160 : : -1;
8750 19 : };
8751 :
8752 : const auto FindDiscontinuity =
8753 80 : [poWK, psJob, getInsideXSign](
8754 : double dfXLeft, double dfXRight, double dfY,
8755 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8756 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8757 : {
8758 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8759 : {
8760 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8761 800 : double dfXMidReprojected = dfXMid;
8762 800 : dfYMidReprojected = dfY;
8763 800 : double dfZ = 0;
8764 800 : int nSuccess = 0;
8765 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8766 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8767 : &nSuccess);
8768 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8769 : {
8770 456 : dfXRight = dfXMid;
8771 456 : dfXMidReprojectedRight = dfXMidReprojected;
8772 : }
8773 : else
8774 : {
8775 344 : dfXLeft = dfXMid;
8776 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8777 : }
8778 : }
8779 80 : };
8780 :
8781 2685 : for (int iY = 0; iY < nSrcYSize; ++iY)
8782 : {
8783 2666 : std::swap(adfX0, adfX1);
8784 2666 : std::swap(adfY0, adfY1);
8785 2666 : std::swap(adfZ0, adfZ1);
8786 2666 : std::swap(abSuccess0, abSuccess1);
8787 :
8788 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8789 : {
8790 4833460 : adfX1[iX] = iX + poWK->nSrcXOff;
8791 4833460 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8792 4833460 : adfZ1[iX] = 0;
8793 : }
8794 :
8795 2666 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8796 : adfX1.data(), adfY1.data(), adfZ1.data(),
8797 : abSuccess1.data());
8798 :
8799 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8800 : {
8801 4833460 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8802 0 : abSuccess1[iX] = FALSE;
8803 : else
8804 : {
8805 4833460 : adfX1[iX] -= poWK->nDstXOff;
8806 4833460 : adfY1[iX] -= poWK->nDstYOff;
8807 : }
8808 : }
8809 :
8810 4833460 : for (int iX = 0; iX < nSrcXSize; ++iX)
8811 : {
8812 9661580 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8813 4830790 : abSuccess1[iX + 1])
8814 : {
8815 : /* --------------------------------------------------------------------
8816 : */
8817 : /* Do not try to apply transparent source pixels to the
8818 : * destination.*/
8819 : /* --------------------------------------------------------------------
8820 : */
8821 4830790 : const auto iSrcOffset =
8822 4830790 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8823 9560570 : if (poWK->panUnifiedSrcValid != nullptr &&
8824 4729780 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8825 : {
8826 4738340 : continue;
8827 : }
8828 :
8829 103415 : if (poWK->pafUnifiedSrcDensity != nullptr)
8830 : {
8831 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8832 : SRC_DENSITY_THRESHOLD_FLOAT)
8833 0 : continue;
8834 : }
8835 :
8836 : SourcePixel sp;
8837 103415 : sp.dfArea = 0;
8838 103415 : sp.dfDstX0 = adfX0[iX];
8839 103415 : sp.dfDstY0 = adfY0[iX];
8840 103415 : sp.dfDstX1 = adfX0[iX + 1];
8841 103415 : sp.dfDstY1 = adfY0[iX + 1];
8842 103415 : sp.dfDstX2 = adfX1[iX + 1];
8843 103415 : sp.dfDstY2 = adfY1[iX + 1];
8844 103415 : sp.dfDstX3 = adfX1[iX];
8845 103415 : sp.dfDstY3 = adfY1[iX];
8846 :
8847 : // Detect pixel that likely cross the anti-meridian and
8848 : // introduce a discontinuity when reprojected.
8849 :
8850 103415 : if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
8851 80 : std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
8852 40 : getInsideXSign(adfX0[iX]) !=
8853 80 : getInsideXSign(adfX0[iX + 1]) &&
8854 80 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8855 40 : getInsideXSign(adfX0[iX + 1]) ==
8856 103495 : getInsideXSign(adfX1[iX + 1]) &&
8857 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8858 : 0)
8859 : {
8860 : #ifdef DEBUG_VERBOSE
8861 : CPLDebug(
8862 : "WARP",
8863 : "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
8864 : "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
8865 : "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
8866 : iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
8867 : adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
8868 : adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
8869 : #endif
8870 40 : double dfXMidReprojectedLeftTop = 0;
8871 40 : double dfXMidReprojectedRightTop = 0;
8872 40 : double dfYMidReprojectedTop = 0;
8873 40 : FindDiscontinuity(
8874 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8875 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8876 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8877 : dfYMidReprojectedTop);
8878 40 : double dfXMidReprojectedLeftBottom = 0;
8879 40 : double dfXMidReprojectedRightBottom = 0;
8880 40 : double dfYMidReprojectedBottom = 0;
8881 40 : FindDiscontinuity(
8882 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8883 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8884 : dfXMidReprojectedLeftBottom,
8885 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8886 :
8887 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8888 40 : discontinuityLeft[1] =
8889 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8890 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8891 40 : dfYMidReprojectedBottom);
8892 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8893 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8894 :
8895 40 : discontinuityRight[0] =
8896 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8897 40 : discontinuityRight[1] =
8898 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8899 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8900 40 : dfYMidReprojectedBottom);
8901 40 : discontinuityRight[3] =
8902 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8903 40 : discontinuityRight[4] =
8904 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8905 :
8906 40 : sp.dfArea = getArea(discontinuityLeft) +
8907 40 : getArea(discontinuityRight);
8908 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8909 : {
8910 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8911 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8912 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8913 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8914 : }
8915 : else
8916 : {
8917 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8918 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8919 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8920 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8921 : }
8922 : }
8923 :
8924 : // Bounding box of source pixel (expressed in target pixel
8925 : // coordinates)
8926 : CPLRectObj sRect;
8927 103415 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8928 103415 : std::min(sp.dfDstX2, sp.dfDstX3));
8929 103415 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8930 103415 : std::min(sp.dfDstY2, sp.dfDstY3));
8931 103415 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8932 103415 : std::max(sp.dfDstX2, sp.dfDstX3));
8933 103415 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8934 103415 : std::max(sp.dfDstY2, sp.dfDstY3));
8935 103415 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8936 101355 : sRect.miny < iYMax && sRect.maxy > iYMin))
8937 : {
8938 10852 : continue;
8939 : }
8940 :
8941 92563 : sp.iSrcX = iX;
8942 92563 : sp.iSrcY = iY;
8943 :
8944 92563 : if (!bIsAffineNoRotation)
8945 : {
8946 : // Check polygon validity (no self-crossing)
8947 89745 : XYPair xy;
8948 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8949 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8950 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8951 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8952 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8953 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8954 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8955 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8956 : {
8957 113 : continue;
8958 : }
8959 : }
8960 :
8961 92450 : CPLQuadTreeInsertWithBounds(
8962 : hQuadTree,
8963 : reinterpret_cast<void *>(
8964 92450 : static_cast<uintptr_t>(sourcePixels.size())),
8965 : &sRect);
8966 :
8967 92450 : sourcePixels.push_back(sp);
8968 : }
8969 : }
8970 : }
8971 :
8972 38 : std::vector<double> adfRealValue(poWK->nBands);
8973 38 : std::vector<double> adfImagValue(poWK->nBands);
8974 38 : std::vector<double> adfBandDensity(poWK->nBands);
8975 38 : std::vector<double> adfWeight(poWK->nBands);
8976 :
8977 : #ifdef CHECK_SUM_WITH_GEOS
8978 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8979 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8980 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8981 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8982 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8983 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8984 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8985 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8986 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8987 :
8988 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8989 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8990 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8991 : #endif
8992 :
8993 : const XYPoly xy1{
8994 38 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8995 38 : XYPoly xy2(5);
8996 38 : XYPoly xy2_triangle(4);
8997 38 : XYPoly intersection;
8998 :
8999 : /* ==================================================================== */
9000 : /* Loop over output lines. */
9001 : /* ==================================================================== */
9002 1951 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
9003 : {
9004 : CPLRectObj sRect;
9005 1932 : sRect.miny = iDstY;
9006 1932 : sRect.maxy = iDstY + 1;
9007 :
9008 : /* ====================================================================
9009 : */
9010 : /* Loop over pixels in output scanline. */
9011 : /* ====================================================================
9012 : */
9013 1403940 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
9014 : {
9015 1402010 : sRect.minx = iDstX;
9016 1402010 : sRect.maxx = iDstX + 1;
9017 1402010 : int nSourcePixels = 0;
9018 : void **pahSourcePixel =
9019 1402010 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
9020 1402010 : if (nSourcePixels == 0)
9021 : {
9022 1183090 : CPLFree(pahSourcePixel);
9023 1183100 : continue;
9024 : }
9025 :
9026 218919 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
9027 218919 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
9028 218919 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
9029 218919 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
9030 218919 : double dfDensity = 0;
9031 : // Just above zero to please Coveriy Scan
9032 218919 : double dfTotalWeight = std::numeric_limits<double>::min();
9033 :
9034 : /* ====================================================================
9035 : */
9036 : /* Iterate over each contributing source pixel to add its
9037 : */
9038 : /* value weighed by the ratio of the area of its
9039 : * intersection */
9040 : /* with the target pixel divided by the area of the source
9041 : */
9042 : /* pixel. */
9043 : /* ====================================================================
9044 : */
9045 1020550 : for (int i = 0; i < nSourcePixels; ++i)
9046 : {
9047 801628 : const int iSourcePixel = static_cast<int>(
9048 801628 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
9049 801628 : auto &sp = sourcePixels[iSourcePixel];
9050 :
9051 801628 : double dfWeight = 0.0;
9052 801628 : if (bIsAffineNoRotation)
9053 : {
9054 : // Optimization since the source pixel is a rectangle in
9055 : // target pixel coordinates
9056 16326 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
9057 16326 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
9058 16326 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
9059 16326 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
9060 16326 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
9061 16326 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
9062 16326 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
9063 16326 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
9064 16326 : dfWeight =
9065 16326 : ((dfIntersMaxX - dfIntersMinX) *
9066 16326 : (dfIntersMaxY - dfIntersMinY)) /
9067 16326 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
9068 : }
9069 : else
9070 : {
9071 : // Compute the polygon of the source pixel in target pixel
9072 : // coordinates, and shifted to the target pixel (unit square
9073 : // coordinates)
9074 :
9075 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9076 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
9077 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
9078 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
9079 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9080 :
9081 785302 : if (isConvex(xy2))
9082 : {
9083 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
9084 785302 : if (intersection.size() >= 3)
9085 : {
9086 468849 : dfWeight = getArea(intersection);
9087 : }
9088 : }
9089 : else
9090 : {
9091 : // Split xy2 into 2 triangles.
9092 0 : xy2_triangle[0] = xy2[0];
9093 0 : xy2_triangle[1] = xy2[1];
9094 0 : xy2_triangle[2] = xy2[2];
9095 0 : xy2_triangle[3] = xy2[0];
9096 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9097 : intersection);
9098 0 : if (intersection.size() >= 3)
9099 : {
9100 0 : dfWeight = getArea(intersection);
9101 : }
9102 :
9103 0 : xy2_triangle[1] = xy2[2];
9104 0 : xy2_triangle[2] = xy2[3];
9105 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9106 : intersection);
9107 0 : if (intersection.size() >= 3)
9108 : {
9109 0 : dfWeight += getArea(intersection);
9110 : }
9111 : }
9112 785302 : if (dfWeight > 0.0)
9113 : {
9114 468828 : if (sp.dfArea == 0)
9115 89592 : sp.dfArea = getArea(xy2);
9116 468828 : dfWeight /= sp.dfArea;
9117 : }
9118 :
9119 : #ifdef CHECK_SUM_WITH_GEOS
9120 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
9121 : sp.dfDstX0 - iDstX,
9122 : sp.dfDstY0 - iDstY);
9123 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
9124 : sp.dfDstX1 - iDstX,
9125 : sp.dfDstY1 - iDstY);
9126 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
9127 : sp.dfDstX2 - iDstX,
9128 : sp.dfDstY2 - iDstY);
9129 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
9130 : sp.dfDstX3 - iDstX,
9131 : sp.dfDstY3 - iDstY);
9132 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
9133 : sp.dfDstX0 - iDstX,
9134 : sp.dfDstY0 - iDstY);
9135 :
9136 : double dfWeightGEOS = 0.0;
9137 : auto hIntersection =
9138 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
9139 : if (hIntersection)
9140 : {
9141 : double dfIntersArea = 0.0;
9142 : if (GEOSArea_r(hGEOSContext, hIntersection,
9143 : &dfIntersArea) &&
9144 : dfIntersArea > 0)
9145 : {
9146 : double dfSourceArea = 0.0;
9147 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
9148 : {
9149 : dfWeightGEOS = dfIntersArea / dfSourceArea;
9150 : }
9151 : }
9152 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
9153 : }
9154 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
9155 : {
9156 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
9157 : dfWeight, dfWeightGEOS);
9158 : printf("xy2: "); // ok
9159 : for (const auto &xy : xy2)
9160 : printf("[%f, %f], ", xy.first, xy.second); // ok
9161 : printf("\n"); // ok
9162 : printf("intersection: "); // ok
9163 : for (const auto &xy : intersection)
9164 : printf("[%f, %f], ", xy.first, xy.second); // ok
9165 : printf("\n"); // ok
9166 : }
9167 : #endif
9168 : }
9169 801628 : if (dfWeight > 0.0)
9170 : {
9171 : #ifdef DEBUG_VERBOSE
9172 : #if defined(DST_X) && defined(DST_Y)
9173 : if (iDstX + poWK->nDstXOff == DST_X &&
9174 : iDstY + poWK->nDstYOff == DST_Y)
9175 : {
9176 : CPLDebug("WARP",
9177 : "iSrcX = %d, iSrcY = %d, weight =%.17g",
9178 : sp.iSrcX + poWK->nSrcXOff,
9179 : sp.iSrcY + poWK->nSrcYOff, dfWeight);
9180 : }
9181 : #endif
9182 : #endif
9183 :
9184 474104 : const GPtrDiff_t iSrcOffset =
9185 474104 : sp.iSrcX +
9186 474104 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
9187 474104 : dfTotalWeight += dfWeight;
9188 :
9189 474104 : if (poWK->pafUnifiedSrcDensity != nullptr)
9190 : {
9191 0 : dfDensity +=
9192 0 : dfWeight *
9193 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
9194 : }
9195 : else
9196 : {
9197 474104 : dfDensity += dfWeight;
9198 : }
9199 :
9200 1818730 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9201 : {
9202 : // Returns pixel value if it is not no data.
9203 : double dfBandDensity;
9204 : double dfRealValue;
9205 : double dfImagValue;
9206 2689250 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
9207 : &dfBandDensity, &dfRealValue,
9208 : &dfImagValue) &&
9209 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
9210 : {
9211 0 : continue;
9212 : }
9213 : #ifdef DEBUG_VERBOSE
9214 : #if defined(DST_X) && defined(DST_Y)
9215 : if (iDstX + poWK->nDstXOff == DST_X &&
9216 : iDstY + poWK->nDstYOff == DST_Y)
9217 : {
9218 : CPLDebug("WARP", "value * weight = %.17g",
9219 : dfRealValue * dfWeight);
9220 : }
9221 : #endif
9222 : #endif
9223 :
9224 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
9225 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
9226 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
9227 1344620 : adfWeight[iBand] += dfWeight;
9228 : }
9229 : }
9230 : }
9231 :
9232 218919 : CPLFree(pahSourcePixel);
9233 :
9234 : /* --------------------------------------------------------------------
9235 : */
9236 : /* Update destination pixel value. */
9237 : /* --------------------------------------------------------------------
9238 : */
9239 218919 : bool bHasFoundDensity = false;
9240 218919 : const GPtrDiff_t iDstOffset =
9241 218919 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
9242 827838 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9243 : {
9244 608919 : if (adfWeight[iBand] > 0)
9245 : {
9246 : const double dfBandDensity =
9247 608909 : adfBandDensity[iBand] / adfWeight[iBand];
9248 608909 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
9249 : {
9250 608909 : bHasFoundDensity = true;
9251 608909 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
9252 608909 : adfRealValue[iBand],
9253 608909 : adfImagValue[iBand],
9254 : bAvoidNoDataSingleBand);
9255 : }
9256 : }
9257 : }
9258 :
9259 218919 : if (!bHasFoundDensity)
9260 10 : continue;
9261 :
9262 218909 : if (!bAvoidNoDataSingleBand)
9263 : {
9264 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
9265 : }
9266 :
9267 : /* --------------------------------------------------------------------
9268 : */
9269 : /* Update destination density/validity masks. */
9270 : /* --------------------------------------------------------------------
9271 : */
9272 218909 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
9273 :
9274 218909 : if (poWK->panDstValid != nullptr)
9275 : {
9276 11752 : CPLMaskSet(poWK->panDstValid, iDstOffset);
9277 : }
9278 : }
9279 :
9280 : /* --------------------------------------------------------------------
9281 : */
9282 : /* Report progress to the user, and optionally cancel out. */
9283 : /* --------------------------------------------------------------------
9284 : */
9285 1932 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
9286 0 : break;
9287 : }
9288 :
9289 : #ifdef CHECK_SUM_WITH_GEOS
9290 : GEOSGeom_destroy_r(hGEOSContext, hP1);
9291 : GEOSGeom_destroy_r(hGEOSContext, hP2);
9292 : OGRGeometry::freeGEOSContext(hGEOSContext);
9293 : #endif
9294 19 : CPLQuadTreeDestroy(hQuadTree);
9295 19 : }
|