Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80 : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 13419 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 13419 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 5093 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 5093 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 5093 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 5093 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
171 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178 : #endif
179 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186 : #endif
187 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
193 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
194 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
195 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
196 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 :
199 : /************************************************************************/
200 : /* GWKJobStruct */
201 : /************************************************************************/
202 :
203 : struct GWKJobStruct
204 : {
205 : std::mutex &mutex;
206 : std::condition_variable &cv;
207 : int counterSingleThreaded = 0;
208 : int &counter;
209 : bool &stopFlag;
210 : GDALWarpKernel *poWK = nullptr;
211 : int iYMin = 0;
212 : int iYMax = 0;
213 : int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
214 : void *pTransformerArg = nullptr;
215 : // used by GWKRun() to assign the proper pTransformerArg
216 : void (*pfnFunc)(void *) = nullptr;
217 :
218 2932 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
219 : int &counter_, bool &stopFlag_)
220 2932 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
221 : {
222 2932 : }
223 : };
224 :
225 : struct GWKThreadData
226 : {
227 : std::unique_ptr<CPLJobQueue> poJobQueue{};
228 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
229 : int nMaxThreads{0};
230 : int counter{0};
231 : bool stopFlag{false};
232 : std::mutex mutex{};
233 : std::condition_variable cv{};
234 : bool bTransformerArgInputAssignedToThread{false};
235 : void *pTransformerArgInput{
236 : nullptr}; // owned by calling layer. Not to be destroyed
237 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
238 : int nTotalThreadCountForThisRun = 0;
239 : int nCurThreadCountForThisRun = 0;
240 : };
241 :
242 : /************************************************************************/
243 : /* GWKProgressThread() */
244 : /************************************************************************/
245 :
246 : // Return TRUE if the computation must be interrupted.
247 36 : static int GWKProgressThread(GWKJobStruct *psJob)
248 : {
249 36 : bool stop = false;
250 : {
251 36 : std::lock_guard<std::mutex> lock(psJob->mutex);
252 36 : psJob->counter++;
253 36 : stop = psJob->stopFlag;
254 : }
255 36 : psJob->cv.notify_one();
256 :
257 36 : return stop;
258 : }
259 :
260 : /************************************************************************/
261 : /* GWKProgressMonoThread() */
262 : /************************************************************************/
263 :
264 : // Return TRUE if the computation must be interrupted.
265 378617 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
266 : {
267 378617 : GDALWarpKernel *poWK = psJob->poWK;
268 378617 : if (!poWK->pfnProgress(poWK->dfProgressBase +
269 378617 : poWK->dfProgressScale *
270 378617 : (++psJob->counterSingleThreaded /
271 378617 : static_cast<double>(psJob->iYMax)),
272 : "", poWK->pProgress))
273 : {
274 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
275 1 : psJob->stopFlag = true;
276 1 : return TRUE;
277 : }
278 378616 : return FALSE;
279 : }
280 :
281 : /************************************************************************/
282 : /* GWKGenericMonoThread() */
283 : /************************************************************************/
284 :
285 2910 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
286 : void (*pfnFunc)(void *pUserData))
287 : {
288 2910 : GWKThreadData td;
289 :
290 : // NOTE: the mutex is not used.
291 2910 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
292 2910 : job.poWK = poWK;
293 2910 : job.iYMin = 0;
294 2910 : job.iYMax = poWK->nDstYSize;
295 2910 : job.pfnProgress = GWKProgressMonoThread;
296 2910 : job.pTransformerArg = poWK->pTransformerArg;
297 2910 : job.counterSingleThreaded = td.counter;
298 2910 : pfnFunc(&job);
299 2910 : td.counter = job.counterSingleThreaded;
300 :
301 5820 : return td.stopFlag ? CE_Failure : CE_None;
302 : }
303 :
304 : /************************************************************************/
305 : /* GWKThreadsCreate() */
306 : /************************************************************************/
307 :
308 1744 : void *GWKThreadsCreate(char **papszWarpOptions,
309 : GDALTransformerFunc /* pfnTransformer */,
310 : void *pTransformerArg)
311 : {
312 : const char *pszWarpThreads =
313 1744 : CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
314 1744 : if (pszWarpThreads == nullptr)
315 1727 : pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
316 :
317 1744 : int nThreads = 0;
318 1744 : if (EQUAL(pszWarpThreads, "ALL_CPUS"))
319 3 : nThreads = CPLGetNumCPUs();
320 : else
321 1741 : nThreads = atoi(pszWarpThreads);
322 1744 : if (nThreads <= 1)
323 1722 : nThreads = 0;
324 1744 : if (nThreads > 128)
325 0 : nThreads = 128;
326 :
327 1744 : GWKThreadData *psThreadData = new GWKThreadData();
328 : auto poThreadPool =
329 1744 : nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
330 1744 : if (nThreads && poThreadPool)
331 : {
332 22 : psThreadData->nMaxThreads = nThreads;
333 22 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
334 : nThreads,
335 22 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
336 44 : psThreadData->counter, psThreadData->stopFlag)));
337 :
338 22 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
339 22 : psThreadData->pTransformerArgInput = pTransformerArg;
340 : }
341 :
342 1744 : return psThreadData;
343 : }
344 :
345 : /************************************************************************/
346 : /* GWKThreadsEnd() */
347 : /************************************************************************/
348 :
349 1744 : void GWKThreadsEnd(void *psThreadDataIn)
350 : {
351 1744 : if (psThreadDataIn == nullptr)
352 0 : return;
353 :
354 1744 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
355 1744 : if (psThreadData->poJobQueue)
356 : {
357 : // cppcheck-suppress constVariableReference
358 32 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
359 : {
360 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
361 10 : GDALDestroyTransformer(pair.second);
362 : }
363 22 : psThreadData->poJobQueue.reset();
364 : }
365 1744 : delete psThreadData;
366 : }
367 :
368 : /************************************************************************/
369 : /* ThreadFuncAdapter() */
370 : /************************************************************************/
371 :
372 31 : static void ThreadFuncAdapter(void *pData)
373 : {
374 31 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
375 31 : GWKThreadData *psThreadData =
376 31 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
377 :
378 : // Look if we have already a per-thread transformer
379 31 : void *pTransformerArg = nullptr;
380 31 : const GIntBig nThreadId = CPLGetPID();
381 :
382 : {
383 62 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
384 31 : ++psThreadData->nCurThreadCountForThisRun;
385 :
386 31 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
387 31 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
388 : {
389 1 : pTransformerArg = oIter->second;
390 : }
391 30 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
392 30 : psThreadData->nCurThreadCountForThisRun ==
393 30 : psThreadData->nTotalThreadCountForThisRun)
394 : {
395 : // If we are the last thread to be started, temporarily borrow the
396 : // original transformer
397 20 : psThreadData->bTransformerArgInputAssignedToThread = true;
398 20 : pTransformerArg = psThreadData->pTransformerArgInput;
399 20 : psThreadData->mapThreadToTransformerArg[nThreadId] =
400 : pTransformerArg;
401 : }
402 :
403 31 : if (pTransformerArg == nullptr)
404 : {
405 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
406 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
407 : }
408 : }
409 :
410 : // If no transformer assigned to current thread, instantiate one
411 31 : if (pTransformerArg == nullptr)
412 : {
413 : // This somehow assumes that GDALCloneTransformer() is thread-safe
414 : // which should normally be the case.
415 : pTransformerArg =
416 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
417 :
418 : // Lock for the stop flag and the transformer map.
419 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
420 10 : if (!pTransformerArg)
421 : {
422 0 : psJob->stopFlag = true;
423 0 : return;
424 : }
425 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
426 : }
427 :
428 31 : psJob->pTransformerArg = pTransformerArg;
429 31 : psJob->pfnFunc(pData);
430 :
431 : // Give back original transformer, if borrowed.
432 : {
433 62 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
434 31 : if (psThreadData->bTransformerArgInputAssignedToThread &&
435 21 : pTransformerArg == psThreadData->pTransformerArgInput)
436 : {
437 : psThreadData->mapThreadToTransformerArg.erase(
438 20 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
439 20 : psThreadData->bTransformerArgInputAssignedToThread = false;
440 : }
441 : }
442 : }
443 :
444 : /************************************************************************/
445 : /* GWKRun() */
446 : /************************************************************************/
447 :
448 2931 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
449 : void (*pfnFunc)(void *pUserData))
450 :
451 : {
452 2931 : const int nDstYSize = poWK->nDstYSize;
453 :
454 2931 : CPLDebug("GDAL",
455 : "GDALWarpKernel()::%s() "
456 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
457 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
458 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
459 : poWK->nDstYSize);
460 :
461 2931 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
462 : {
463 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
464 0 : return CE_Failure;
465 : }
466 :
467 2931 : GWKThreadData *psThreadData =
468 : static_cast<GWKThreadData *>(poWK->psThreadData);
469 2931 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
470 : {
471 2910 : return GWKGenericMonoThread(poWK, pfnFunc);
472 : }
473 :
474 21 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
475 : // Config option mostly useful for tests to be able to test multithreading
476 : // with small rasters
477 : const int nWarpChunkSize =
478 21 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
479 21 : if (nWarpChunkSize > 0)
480 : {
481 19 : GIntBig nChunks =
482 19 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
483 19 : if (nThreads > nChunks)
484 14 : nThreads = static_cast<int>(nChunks);
485 : }
486 21 : if (nThreads <= 0)
487 17 : nThreads = 1;
488 :
489 21 : CPLDebug("WARP", "Using %d threads", nThreads);
490 :
491 21 : auto &jobs = *psThreadData->threadJobs;
492 21 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
493 : // Fill-in job structures.
494 52 : for (int i = 0; i < nThreads; ++i)
495 : {
496 31 : auto &job = jobs[i];
497 31 : job.poWK = poWK;
498 31 : job.iYMin =
499 31 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
500 31 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
501 31 : nThreads);
502 31 : if (poWK->pfnProgress != GDALDummyProgress)
503 2 : job.pfnProgress = GWKProgressThread;
504 31 : job.pfnFunc = pfnFunc;
505 : }
506 :
507 : bool bStopFlag;
508 : {
509 21 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
510 :
511 21 : psThreadData->nTotalThreadCountForThisRun = nThreads;
512 : // coverity[missing_lock]
513 21 : psThreadData->nCurThreadCountForThisRun = 0;
514 :
515 : // Start jobs.
516 52 : for (int i = 0; i < nThreads; ++i)
517 : {
518 31 : auto &job = jobs[i];
519 31 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
520 : static_cast<void *>(&job));
521 : }
522 :
523 : /* --------------------------------------------------------------------
524 : */
525 : /* Report progress. */
526 : /* --------------------------------------------------------------------
527 : */
528 21 : if (poWK->pfnProgress != GDALDummyProgress)
529 : {
530 3 : while (psThreadData->counter < nDstYSize)
531 : {
532 2 : psThreadData->cv.wait(lock);
533 2 : if (!poWK->pfnProgress(poWK->dfProgressBase +
534 2 : poWK->dfProgressScale *
535 2 : (psThreadData->counter /
536 2 : static_cast<double>(nDstYSize)),
537 : "", poWK->pProgress))
538 : {
539 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
540 1 : psThreadData->stopFlag = true;
541 1 : break;
542 : }
543 : }
544 : }
545 :
546 21 : bStopFlag = psThreadData->stopFlag;
547 : }
548 :
549 : /* -------------------------------------------------------------------- */
550 : /* Wait for all jobs to complete. */
551 : /* -------------------------------------------------------------------- */
552 21 : psThreadData->poJobQueue->WaitCompletion();
553 :
554 21 : return bStopFlag ? CE_Failure : CE_None;
555 : }
556 :
557 : /************************************************************************/
558 : /* ==================================================================== */
559 : /* GDALWarpKernel */
560 : /* ==================================================================== */
561 : /************************************************************************/
562 :
563 : /**
564 : * \class GDALWarpKernel "gdalwarper.h"
565 : *
566 : * Low level image warping class.
567 : *
568 : * This class is responsible for low level image warping for one
569 : * "chunk" of imagery. The class is essentially a structure with all
570 : * data members public - primarily so that new special-case functions
571 : * can be added without changing the class declaration.
572 : *
573 : * Applications are normally intended to interactive with warping facilities
574 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
575 : * theory be used directly if great care is taken in setting up the
576 : * control data.
577 : *
578 : * <h3>Design Issues</h3>
579 : *
580 : * The intention is that PerformWarp() would analyze the setup in terms
581 : * of the datatype, resampling type, and validity/density mask usage and
582 : * pick one of many specific implementations of the warping algorithm over
583 : * a continuum of optimization vs. generality. At one end there will be a
584 : * reference general purpose implementation of the algorithm that supports
585 : * any data type (working internally in double precision complex), all three
586 : * resampling types, and any or all of the validity/density masks. At the
587 : * other end would be highly optimized algorithms for common cases like
588 : * nearest neighbour resampling on GDT_UInt8 data with no masks.
589 : *
590 : * The full set of optimized versions have not been decided but we should
591 : * expect to have at least:
592 : * - One for each resampling algorithm for 8bit data with no masks.
593 : * - One for each resampling algorithm for float data with no masks.
594 : * - One for each resampling algorithm for float data with any/all masks
595 : * (essentially the generic case for just float data).
596 : * - One for each resampling algorithm for 8bit data with support for
597 : * input validity masks (per band or per pixel). This handles the common
598 : * case of nodata masking.
599 : * - One for each resampling algorithm for float data with support for
600 : * input validity masks (per band or per pixel). This handles the common
601 : * case of nodata masking.
602 : *
603 : * Some of the specializations would operate on all bands in one pass
604 : * (especially the ones without masking would do this), while others might
605 : * process each band individually to reduce code complexity.
606 : *
607 : * <h3>Masking Semantics</h3>
608 : *
609 : * A detailed explanation of the semantics of the validity and density masks,
610 : * and their effects on resampling kernels is needed here.
611 : */
612 :
613 : /************************************************************************/
614 : /* GDALWarpKernel Data Members */
615 : /************************************************************************/
616 :
617 : /**
618 : * \var GDALResampleAlg GDALWarpKernel::eResample;
619 : *
620 : * Resampling algorithm.
621 : *
622 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
623 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
624 : * GRA_Mode or GRA_Sum.
625 : *
626 : * This field is required. GDT_NearestNeighbour may be used as a default
627 : * value.
628 : */
629 :
630 : /**
631 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
632 : *
633 : * Working pixel data type.
634 : *
635 : * The datatype of pixels in the source image (papabySrcimage) and
636 : * destination image (papabyDstImage) buffers. Note that operations on
637 : * some data types (such as GDT_UInt8) may be much better optimized than other
638 : * less common cases.
639 : *
640 : * This field is required. It may not be GDT_Unknown.
641 : */
642 :
643 : /**
644 : * \var int GDALWarpKernel::nBands;
645 : *
646 : * Number of bands.
647 : *
648 : * The number of bands (layers) of imagery being warped. Determines the
649 : * number of entries in the papabySrcImage, papanBandSrcValid,
650 : * and papabyDstImage arrays.
651 : *
652 : * This field is required.
653 : */
654 :
655 : /**
656 : * \var int GDALWarpKernel::nSrcXSize;
657 : *
658 : * Source image width in pixels.
659 : *
660 : * This field is required.
661 : */
662 :
663 : /**
664 : * \var int GDALWarpKernel::nSrcYSize;
665 : *
666 : * Source image height in pixels.
667 : *
668 : * This field is required.
669 : */
670 :
671 : /**
672 : * \var double GDALWarpKernel::dfSrcXExtraSize;
673 : *
674 : * Number of pixels included in nSrcXSize that are present on the edges of
675 : * the area of interest to take into account the width of the kernel.
676 : *
677 : * This field is required.
678 : */
679 :
680 : /**
681 : * \var double GDALWarpKernel::dfSrcYExtraSize;
682 : *
683 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
684 : * the area of interest to take into account the height of the kernel.
685 : *
686 : * This field is required.
687 : */
688 :
689 : /**
690 : * \var int GDALWarpKernel::papabySrcImage;
691 : *
692 : * Array of source image band data.
693 : *
694 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
695 : * to image data. Each individual band of image data is organized as a single
696 : * block of image data in left to right, then bottom to top order. The actual
697 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
698 : *
699 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
700 : * the second band with eWorkingDataType set to GDT_Float32 use code like
701 : * this:
702 : *
703 : * \code
704 : * float dfPixelValue;
705 : * int nBand = 2-1; // Band indexes are zero based.
706 : * int nPixel = 3; // Zero based.
707 : * int nLine = 4; // Zero based.
708 : *
709 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
710 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
711 : * assert( nBand >= 0 && nBand < poKern->nBands );
712 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
713 : * [nPixel + nLine * poKern->nSrcXSize];
714 : * \endcode
715 : *
716 : * This field is required.
717 : */
718 :
719 : /**
720 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
721 : *
722 : * Per band validity mask for source pixels.
723 : *
724 : * Array of pixel validity mask layers for each source band. Each of
725 : * the mask layers is the same size (in pixels) as the source image with
726 : * one bit per pixel. Note that it is legal (and common) for this to be
727 : * NULL indicating that none of the pixels are invalidated, or for some
728 : * band validity masks to be NULL in which case all pixels of the band are
729 : * valid. The following code can be used to test the validity of a particular
730 : * pixel.
731 : *
732 : * \code
733 : * int bIsValid = TRUE;
734 : * int nBand = 2-1; // Band indexes are zero based.
735 : * int nPixel = 3; // Zero based.
736 : * int nLine = 4; // Zero based.
737 : *
738 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
739 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
740 : * assert( nBand >= 0 && nBand < poKern->nBands );
741 : *
742 : * if( poKern->papanBandSrcValid != NULL
743 : * && poKern->papanBandSrcValid[nBand] != NULL )
744 : * {
745 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
746 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
747 : *
748 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
749 : * }
750 : * \endcode
751 : */
752 :
753 : /**
754 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
755 : *
756 : * Per pixel validity mask for source pixels.
757 : *
758 : * A single validity mask layer that applies to the pixels of all source
759 : * bands. It is accessed similarly to papanBandSrcValid, but without the
760 : * extra level of band indirection.
761 : *
762 : * This pointer may be NULL indicating that all pixels are valid.
763 : *
764 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
765 : * the pixel isn't considered to be valid unless both arrays indicate it is
766 : * valid.
767 : */
768 :
769 : /**
770 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
771 : *
772 : * Per pixel density mask for source pixels.
773 : *
774 : * A single density mask layer that applies to the pixels of all source
775 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
776 : * which this pixel should be allowed to contribute to the output result.
777 : *
778 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
779 : *
780 : * The density for a pixel may be accessed like this:
781 : *
782 : * \code
783 : * float fDensity = 1.0;
784 : * int nPixel = 3; // Zero based.
785 : * int nLine = 4; // Zero based.
786 : *
787 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
788 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
789 : * if( poKern->pafUnifiedSrcDensity != NULL )
790 : * fDensity = poKern->pafUnifiedSrcDensity
791 : * [nPixel + nLine * poKern->nSrcXSize];
792 : * \endcode
793 : */
794 :
795 : /**
796 : * \var int GDALWarpKernel::nDstXSize;
797 : *
798 : * Width of destination image in pixels.
799 : *
800 : * This field is required.
801 : */
802 :
803 : /**
804 : * \var int GDALWarpKernel::nDstYSize;
805 : *
806 : * Height of destination image in pixels.
807 : *
808 : * This field is required.
809 : */
810 :
811 : /**
812 : * \var GByte **GDALWarpKernel::papabyDstImage;
813 : *
814 : * Array of destination image band data.
815 : *
816 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
817 : * to image data. Each individual band of image data is organized as a single
818 : * block of image data in left to right, then bottom to top order. The actual
819 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
820 : *
821 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
822 : * the second band with eWorkingDataType set to GDT_Float32 use code like
823 : * this:
824 : *
825 : * \code
826 : * float dfPixelValue;
827 : * int nBand = 2-1; // Band indexes are zero based.
828 : * int nPixel = 3; // Zero based.
829 : * int nLine = 4; // Zero based.
830 : *
831 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
832 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
833 : * assert( nBand >= 0 && nBand < poKern->nBands );
834 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
835 : * [nPixel + nLine * poKern->nSrcYSize];
836 : * \endcode
837 : *
838 : * This field is required.
839 : */
840 :
841 : /**
842 : * \var GUInt32 *GDALWarpKernel::panDstValid;
843 : *
844 : * Per pixel validity mask for destination pixels.
845 : *
846 : * A single validity mask layer that applies to the pixels of all destination
847 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
848 : * on the size of the destination image.
849 : *
850 : * This pointer may be NULL indicating that all pixels are valid.
851 : */
852 :
853 : /**
854 : * \var float *GDALWarpKernel::pafDstDensity;
855 : *
856 : * Per pixel density mask for destination pixels.
857 : *
858 : * A single density mask layer that applies to the pixels of all destination
859 : * bands. It contains values between 0.0 and 1.0.
860 : *
861 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
862 : *
863 : * The density for a pixel may be accessed like this:
864 : *
865 : * \code
866 : * float fDensity = 1.0;
867 : * int nPixel = 3; // Zero based.
868 : * int nLine = 4; // Zero based.
869 : *
870 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
871 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
872 : * if( poKern->pafDstDensity != NULL )
873 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
874 : * \endcode
875 : */
876 :
877 : /**
878 : * \var int GDALWarpKernel::nSrcXOff;
879 : *
880 : * X offset to source pixel coordinates for transformation.
881 : *
882 : * See pfnTransformer.
883 : *
884 : * This field is required.
885 : */
886 :
887 : /**
888 : * \var int GDALWarpKernel::nSrcYOff;
889 : *
890 : * Y offset to source pixel coordinates for transformation.
891 : *
892 : * See pfnTransformer.
893 : *
894 : * This field is required.
895 : */
896 :
897 : /**
898 : * \var int GDALWarpKernel::nDstXOff;
899 : *
900 : * X offset to destination pixel coordinates for transformation.
901 : *
902 : * See pfnTransformer.
903 : *
904 : * This field is required.
905 : */
906 :
907 : /**
908 : * \var int GDALWarpKernel::nDstYOff;
909 : *
910 : * Y offset to destination pixel coordinates for transformation.
911 : *
912 : * See pfnTransformer.
913 : *
914 : * This field is required.
915 : */
916 :
917 : /**
918 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
919 : *
920 : * Source/destination location transformer.
921 : *
922 : * The function to call to transform coordinates between source image
923 : * pixel/line coordinates and destination image pixel/line coordinates.
924 : * See GDALTransformerFunc() for details of the semantics of this function.
925 : *
926 : * The GDALWarpKern algorithm will only ever use this transformer in
927 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
928 : * partial or complete scanlines of points in the destination image as
929 : * input. This means, among other things, that it is safe to the
930 : * approximating transform GDALApproxTransform() as the transformation
931 : * function.
932 : *
933 : * Source and destination images may be subsets of a larger overall image.
934 : * The transformation algorithms will expect and return pixel/line coordinates
935 : * in terms of this larger image, so coordinates need to be offset by
936 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
937 : * passing to pfnTransformer, and after return from it.
938 : *
939 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
940 : * data to this function when it is called.
941 : *
942 : * This field is required.
943 : */
944 :
945 : /**
946 : * \var void *GDALWarpKernel::pTransformerArg;
947 : *
948 : * Callback data for pfnTransformer.
949 : *
950 : * This field may be NULL if not required for the pfnTransformer being used.
951 : */
952 :
953 : /**
954 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
955 : *
956 : * The function to call to report progress of the algorithm, and to check
957 : * for a requested termination of the operation. It operates according to
958 : * GDALProgressFunc() semantics.
959 : *
960 : * Generally speaking the progress function will be invoked for each
961 : * scanline of the destination buffer that has been processed.
962 : *
963 : * This field may be NULL (internally set to GDALDummyProgress()).
964 : */
965 :
966 : /**
967 : * \var void *GDALWarpKernel::pProgress;
968 : *
969 : * Callback data for pfnProgress.
970 : *
971 : * This field may be NULL if not required for the pfnProgress being used.
972 : */
973 :
974 : /************************************************************************/
975 : /* GDALWarpKernel() */
976 : /************************************************************************/
977 :
978 2961 : GDALWarpKernel::GDALWarpKernel()
979 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
980 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
981 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
982 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
983 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
984 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
985 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
986 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
987 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
988 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
989 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
990 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
991 2961 : eTieStrategy(GWKTS_First)
992 : {
993 2961 : }
994 :
995 : /************************************************************************/
996 : /* ~GDALWarpKernel() */
997 : /************************************************************************/
998 :
999 2961 : GDALWarpKernel::~GDALWarpKernel()
1000 : {
1001 2961 : }
1002 :
1003 : /************************************************************************/
1004 : /* PerformWarp() */
1005 : /************************************************************************/
1006 :
1007 : /**
1008 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1009 : *
1010 : * This method performs the warp described in the GDALWarpKernel.
1011 : *
1012 : * @return CE_None on success or CE_Failure if an error occurs.
1013 : */
1014 :
1015 2957 : CPLErr GDALWarpKernel::PerformWarp()
1016 :
1017 : {
1018 2957 : const CPLErr eErr = Validate();
1019 :
1020 2957 : if (eErr != CE_None)
1021 1 : return eErr;
1022 :
1023 : // See #2445 and #3079.
1024 2956 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1025 : {
1026 25 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1027 : {
1028 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1029 0 : return CE_Failure;
1030 : }
1031 25 : return CE_None;
1032 : }
1033 :
1034 : /* -------------------------------------------------------------------- */
1035 : /* Pre-calculate resampling scales and window sizes for filtering. */
1036 : /* -------------------------------------------------------------------- */
1037 :
1038 2931 : dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1039 2931 : dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1040 2931 : if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1041 1462 : dfXScale = 1.0;
1042 2931 : if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1043 1188 : dfYScale = 1.0;
1044 2931 : if (dfXScale < 1.0)
1045 : {
1046 595 : double dfXReciprocalScale = 1.0 / dfXScale;
1047 595 : const int nXReciprocalScale =
1048 595 : static_cast<int>(dfXReciprocalScale + 0.5);
1049 595 : if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1050 463 : dfXScale = 1.0 / nXReciprocalScale;
1051 : }
1052 2931 : if (dfYScale < 1.0)
1053 : {
1054 536 : double dfYReciprocalScale = 1.0 / dfYScale;
1055 536 : const int nYReciprocalScale =
1056 536 : static_cast<int>(dfYReciprocalScale + 0.5);
1057 536 : if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1058 379 : dfYScale = 1.0 / nYReciprocalScale;
1059 : }
1060 :
1061 : // XSCALE and YSCALE undocumented for now. Can help in some cases.
1062 : // Best would probably be a per-pixel scale computation.
1063 2931 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1064 2931 : if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1065 1 : dfXScale = CPLAtof(pszXScale);
1066 2931 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1067 2931 : if (pszYScale != nullptr)
1068 1 : dfYScale = CPLAtof(pszYScale);
1069 :
1070 : // If the xscale is significantly lower than the yscale, this is highly
1071 : // suspicious of a situation of wrapping a very large virtual file in
1072 : // geographic coordinates with left and right parts being close to the
1073 : // antimeridian. In that situation, the xscale computed by the above method
1074 : // is completely wrong. Prefer doing an average of a few sample points
1075 : // instead
1076 2931 : if ((dfYScale / dfXScale > 100 ||
1077 1 : (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1078 : {
1079 : // Sample points along a grid
1080 4 : const int nPointsX = std::min(10, nDstXSize);
1081 4 : const int nPointsY = std::min(10, nDstYSize);
1082 4 : const int nPoints = 3 * nPointsX * nPointsY;
1083 8 : std::vector<double> padfX;
1084 8 : std::vector<double> padfY;
1085 8 : std::vector<double> padfZ(nPoints);
1086 8 : std::vector<int> pabSuccess(nPoints);
1087 44 : for (int iY = 0; iY < nPointsY; iY++)
1088 : {
1089 440 : for (int iX = 0; iX < nPointsX; iX++)
1090 : {
1091 400 : const double dfX =
1092 : nPointsX == 1
1093 400 : ? 0.0
1094 400 : : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1095 400 : const double dfY =
1096 : nPointsY == 1
1097 400 : ? 0.0
1098 400 : : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1099 :
1100 : // Reproject each destination sample point and its neighbours
1101 : // at (x+1,y) and (x,y+1), so as to get the local scale.
1102 400 : padfX.push_back(dfX);
1103 400 : padfY.push_back(dfY);
1104 :
1105 400 : padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1106 400 : padfY.push_back(dfY);
1107 :
1108 400 : padfX.push_back(dfX);
1109 400 : padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1110 : }
1111 : }
1112 4 : pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1113 4 : &padfZ[0], &pabSuccess[0]);
1114 :
1115 : // Compute the xscale at each sampling point
1116 8 : std::vector<double> adfXScales;
1117 404 : for (int i = 0; i < nPoints; i += 3)
1118 : {
1119 400 : if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1120 : {
1121 : const double dfPointXScale =
1122 400 : 1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1123 800 : std::abs(padfX[i + 2] - padfX[i]));
1124 400 : adfXScales.push_back(dfPointXScale);
1125 : }
1126 : }
1127 :
1128 : // Sort by increasing xcale
1129 4 : std::sort(adfXScales.begin(), adfXScales.end());
1130 :
1131 4 : if (!adfXScales.empty())
1132 : {
1133 : // Compute the average of scales, but eliminate outliers small
1134 : // scales, if some samples are just along the discontinuity.
1135 4 : const double dfMaxPointXScale = adfXScales.back();
1136 4 : double dfSumPointXScale = 0;
1137 4 : int nCountPointScale = 0;
1138 404 : for (double dfPointXScale : adfXScales)
1139 : {
1140 400 : if (dfPointXScale > dfMaxPointXScale / 10)
1141 : {
1142 398 : dfSumPointXScale += dfPointXScale;
1143 398 : nCountPointScale++;
1144 : }
1145 : }
1146 4 : if (nCountPointScale > 0) // should always be true
1147 : {
1148 4 : const double dfXScaleFromSampling =
1149 4 : dfSumPointXScale / nCountPointScale;
1150 : #if DEBUG_VERBOSE
1151 : CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1152 : dfXScaleFromSampling);
1153 : #endif
1154 4 : dfXScale = dfXScaleFromSampling;
1155 : }
1156 : }
1157 : }
1158 :
1159 : #if DEBUG_VERBOSE
1160 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1161 : #endif
1162 :
1163 2931 : const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1164 :
1165 : // Safety check for callers that would use GDALWarpKernel without using
1166 : // GDALWarpOperation.
1167 2868 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1168 2805 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1169 5862 : !bUse4SamplesFormula)) &&
1170 390 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1171 : WARP_EXTRA_ELTS)
1172 : {
1173 0 : CPLError(CE_Failure, CPLE_AppDefined,
1174 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1175 : "their end. "
1176 : "See GDALWarpKernel class definition. If this condition is "
1177 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1178 : WARP_EXTRA_ELTS);
1179 0 : return CE_Failure;
1180 : }
1181 :
1182 2931 : dfXFilter = anGWKFilterRadius[eResample];
1183 2931 : dfYFilter = anGWKFilterRadius[eResample];
1184 :
1185 2931 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1186 2420 : : static_cast<int>(dfXFilter);
1187 2931 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1188 2423 : : static_cast<int>(dfYFilter);
1189 :
1190 : // Filter window offset depends on the parity of the kernel radius.
1191 2931 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1192 2931 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1193 :
1194 2931 : bApplyVerticalShift =
1195 2931 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1196 2931 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1197 2931 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1198 :
1199 : /* -------------------------------------------------------------------- */
1200 : /* Set up resampling functions. */
1201 : /* -------------------------------------------------------------------- */
1202 2931 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1203 12 : return GWKGeneralCase(this);
1204 :
1205 2919 : const bool bNoMasksOrDstDensityOnly =
1206 2912 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1207 5831 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1208 :
1209 2919 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
1210 : bNoMasksOrDstDensityOnly)
1211 943 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1212 :
1213 1976 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
1214 : bNoMasksOrDstDensityOnly)
1215 126 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1216 :
1217 1850 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
1218 : bNoMasksOrDstDensityOnly)
1219 676 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1220 :
1221 1174 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
1222 : bNoMasksOrDstDensityOnly)
1223 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1224 :
1225 1162 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
1226 350 : return GWKNearestByte(this);
1227 :
1228 812 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1229 165 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1230 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1231 :
1232 798 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1233 : bNoMasksOrDstDensityOnly)
1234 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1235 :
1236 793 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1237 : bNoMasksOrDstDensityOnly)
1238 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1239 :
1240 787 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1241 : bNoMasksOrDstDensityOnly)
1242 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1243 :
1244 782 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1245 : bNoMasksOrDstDensityOnly)
1246 14 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1247 :
1248 768 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1249 : bNoMasksOrDstDensityOnly)
1250 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1251 :
1252 763 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1253 : bNoMasksOrDstDensityOnly)
1254 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1255 :
1256 757 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1257 45 : return GWKNearestShort(this);
1258 :
1259 712 : if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1260 10 : return GWKNearestUnsignedShort(this);
1261 :
1262 702 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1263 : bNoMasksOrDstDensityOnly)
1264 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1265 :
1266 691 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1267 50 : return GWKNearestFloat(this);
1268 :
1269 641 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1270 : bNoMasksOrDstDensityOnly)
1271 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1272 :
1273 637 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1274 : bNoMasksOrDstDensityOnly)
1275 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1276 :
1277 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1278 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1279 : bNoMasksOrDstDensityOnly)
1280 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1281 :
1282 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1283 : bNoMasksOrDstDensityOnly)
1284 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1285 : #endif
1286 :
1287 628 : if (eResample == GRA_Average)
1288 77 : return GWKAverageOrMode(this);
1289 :
1290 551 : if (eResample == GRA_RMS)
1291 9 : return GWKAverageOrMode(this);
1292 :
1293 542 : if (eResample == GRA_Mode)
1294 45 : return GWKAverageOrMode(this);
1295 :
1296 497 : if (eResample == GRA_Max)
1297 6 : return GWKAverageOrMode(this);
1298 :
1299 491 : if (eResample == GRA_Min)
1300 5 : return GWKAverageOrMode(this);
1301 :
1302 486 : if (eResample == GRA_Med)
1303 6 : return GWKAverageOrMode(this);
1304 :
1305 480 : if (eResample == GRA_Q1)
1306 10 : return GWKAverageOrMode(this);
1307 :
1308 470 : if (eResample == GRA_Q3)
1309 5 : return GWKAverageOrMode(this);
1310 :
1311 465 : if (eResample == GRA_Sum)
1312 19 : return GWKSumPreserving(this);
1313 :
1314 446 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1315 : {
1316 219 : return GWKRealCase(this);
1317 : }
1318 :
1319 227 : return GWKGeneralCase(this);
1320 : }
1321 :
1322 : /************************************************************************/
1323 : /* Validate() */
1324 : /************************************************************************/
1325 :
1326 : /**
1327 : * \fn CPLErr GDALWarpKernel::Validate()
1328 : *
1329 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1330 : * (and return CE_Failure) if the configuration is considered to be
1331 : * invalid for some reason.
1332 : *
1333 : * This method will also do some standard defaulting such as setting
1334 : * pfnProgress to GDALDummyProgress() if it is NULL.
1335 : *
1336 : * @return CE_None on success or CE_Failure if an error is detected.
1337 : */
1338 :
1339 2957 : CPLErr GDALWarpKernel::Validate()
1340 :
1341 : {
1342 2957 : if (static_cast<size_t>(eResample) >=
1343 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1344 : {
1345 0 : CPLError(CE_Failure, CPLE_AppDefined,
1346 : "Unsupported resampling method %d.",
1347 0 : static_cast<int>(eResample));
1348 0 : return CE_Failure;
1349 : }
1350 :
1351 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1352 : // be ignored as contributing source pixels during resampling. Only taken into account by
1353 : // Average currently
1354 : const char *pszExcludedValues =
1355 2957 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1356 2957 : if (pszExcludedValues)
1357 : {
1358 : const CPLStringList aosTokens(
1359 14 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1360 14 : if ((aosTokens.size() % nBands) != 0)
1361 : {
1362 1 : CPLError(CE_Failure, CPLE_AppDefined,
1363 : "EXCLUDED_VALUES should contain one or several tuples of "
1364 : "%d values formatted like <R>,<G>,<B> or "
1365 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1366 : "tuples",
1367 : nBands);
1368 1 : return CE_Failure;
1369 : }
1370 26 : std::vector<double> adfTuple;
1371 52 : for (int i = 0; i < aosTokens.size(); ++i)
1372 : {
1373 39 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1374 39 : if (((i + 1) % nBands) == 0)
1375 : {
1376 13 : m_aadfExcludedValues.push_back(adfTuple);
1377 13 : adfTuple.clear();
1378 : }
1379 : }
1380 : }
1381 :
1382 2956 : return CE_None;
1383 : }
1384 :
1385 : /************************************************************************/
1386 : /* GWKOverlayDensity() */
1387 : /* */
1388 : /* Compute the final density for the destination pixel. This */
1389 : /* is a function of the overlay density (passed in) and the */
1390 : /* original density. */
1391 : /************************************************************************/
1392 :
1393 10022900 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1394 : double dfDensity)
1395 : {
1396 10022900 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1397 8062630 : return;
1398 :
1399 1960260 : poWK->pafDstDensity[iDstOffset] =
1400 1960260 : 1.0f -
1401 1960260 : (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1402 : }
1403 :
1404 : /************************************************************************/
1405 : /* GWKRoundValueT() */
1406 : /************************************************************************/
1407 :
1408 : template <class T, class U, bool is_signed> struct sGWKRoundValueT
1409 : {
1410 : static T eval(U);
1411 : };
1412 :
1413 : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1414 : {
1415 791525 : static T eval(U value)
1416 : {
1417 791525 : return static_cast<T>(floor(value + U(0.5)));
1418 : }
1419 : };
1420 :
1421 : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1422 : {
1423 124557197 : static T eval(U value)
1424 : {
1425 124557197 : return static_cast<T>(value + U(0.5));
1426 : }
1427 : };
1428 :
1429 125348722 : template <class T, class U> static T GWKRoundValueT(U value)
1430 : {
1431 125348722 : return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1432 : }
1433 :
1434 268974 : template <> float GWKRoundValueT<float, double>(double value)
1435 : {
1436 268974 : return static_cast<float>(value);
1437 : }
1438 :
1439 : #ifdef notused
1440 : template <> double GWKRoundValueT<double, double>(double value)
1441 : {
1442 : return value;
1443 : }
1444 : #endif
1445 :
1446 : /************************************************************************/
1447 : /* GWKClampValueT() */
1448 : /************************************************************************/
1449 :
1450 120170134 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1451 : {
1452 120170134 : if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1453 545370 : return cpl::NumericLimits<T>::min();
1454 119624726 : else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1455 772965 : return cpl::NumericLimits<T>::max();
1456 : else
1457 118851926 : return GWKRoundValueT<T, U>(value);
1458 : }
1459 :
1460 718914 : template <> float GWKClampValueT<float, double>(double dfValue)
1461 : {
1462 718914 : return static_cast<float>(dfValue);
1463 : }
1464 :
1465 : #ifdef notused
1466 : template <> double GWKClampValueT<double, double>(double dfValue)
1467 : {
1468 : return dfValue;
1469 : }
1470 : #endif
1471 :
1472 : /************************************************************************/
1473 : /* AvoidNoData() */
1474 : /************************************************************************/
1475 :
1476 1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
1477 : {
1478 : if constexpr (cpl::NumericLimits<T>::is_integer)
1479 : {
1480 1027 : if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
1481 : {
1482 515 : pDst[iDstOffset] =
1483 515 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1484 : }
1485 : else
1486 512 : pDst[iDstOffset]--;
1487 : }
1488 : else
1489 : {
1490 256 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1491 : {
1492 : using std::nextafter;
1493 0 : pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
1494 : }
1495 : else
1496 : {
1497 : using std::nextafter;
1498 256 : pDst[iDstOffset] =
1499 256 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1500 : }
1501 : }
1502 1283 : }
1503 :
1504 : /************************************************************************/
1505 : /* AvoidNoData() */
1506 : /************************************************************************/
1507 :
1508 : template <class T>
1509 13527030 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1510 : GPtrDiff_t iDstOffset)
1511 : {
1512 13527030 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1513 13527030 : T *pDst = reinterpret_cast<T *>(pabyDst);
1514 :
1515 13527030 : if (poWK->padfDstNoDataReal != nullptr &&
1516 6419188 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1517 : {
1518 640 : AvoidNoData(pDst, iDstOffset);
1519 :
1520 640 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1521 : {
1522 40 : const_cast<GDALWarpKernel *>(poWK)
1523 : ->bWarnedAboutDstNoDataReplacement = true;
1524 40 : CPLError(CE_Warning, CPLE_AppDefined,
1525 : "Value %g in the source dataset has been changed to %g "
1526 : "in the destination dataset to avoid being treated as "
1527 : "NoData. To avoid this, select a different NoData value "
1528 : "for the destination dataset.",
1529 40 : poWK->padfDstNoDataReal[iBand],
1530 40 : static_cast<double>(pDst[iDstOffset]));
1531 : }
1532 : }
1533 13527030 : }
1534 :
1535 : /************************************************************************/
1536 : /* GWKAvoidNoDataMultiBand() */
1537 : /************************************************************************/
1538 :
1539 : template <class T>
1540 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1541 : GPtrDiff_t iDstOffset)
1542 : {
1543 524573 : T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
1544 524573 : if (poWK->padfDstNoDataReal != nullptr)
1545 : {
1546 208615 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1547 : {
1548 208294 : if (poWK->padfDstNoDataReal[iBand] !=
1549 208294 : static_cast<double>(ppDst[iBand][iDstOffset]))
1550 205830 : return;
1551 : }
1552 964 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1553 : {
1554 643 : AvoidNoData(ppDst[iBand], iDstOffset);
1555 : }
1556 :
1557 321 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1558 : {
1559 21 : const_cast<GDALWarpKernel *>(poWK)
1560 : ->bWarnedAboutDstNoDataReplacement = true;
1561 42 : std::string valueSrc, valueDst;
1562 64 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1563 : {
1564 43 : if (!valueSrc.empty())
1565 : {
1566 22 : valueSrc += ',';
1567 22 : valueDst += ',';
1568 : }
1569 43 : valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
1570 43 : valueDst += CPLSPrintf(
1571 43 : "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
1572 : }
1573 21 : CPLError(CE_Warning, CPLE_AppDefined,
1574 : "Value %s in the source dataset has been changed to %s "
1575 : "in the destination dataset to avoid being treated as "
1576 : "NoData. To avoid this, select a different NoData value "
1577 : "for the destination dataset.",
1578 : valueSrc.c_str(), valueDst.c_str());
1579 : }
1580 : }
1581 : }
1582 :
1583 : /************************************************************************/
1584 : /* GWKAvoidNoDataMultiBand() */
1585 : /************************************************************************/
1586 :
1587 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1588 : GPtrDiff_t iDstOffset)
1589 : {
1590 524573 : switch (poWK->eWorkingDataType)
1591 : {
1592 523997 : case GDT_UInt8:
1593 523997 : GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
1594 523997 : break;
1595 :
1596 0 : case GDT_Int8:
1597 0 : GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
1598 0 : break;
1599 :
1600 128 : case GDT_Int16:
1601 128 : GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
1602 128 : break;
1603 :
1604 64 : case GDT_UInt16:
1605 64 : GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
1606 64 : break;
1607 :
1608 64 : case GDT_Int32:
1609 64 : GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
1610 64 : break;
1611 :
1612 64 : case GDT_UInt32:
1613 64 : GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
1614 64 : break;
1615 :
1616 64 : case GDT_Int64:
1617 64 : GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
1618 64 : break;
1619 :
1620 64 : case GDT_UInt64:
1621 64 : GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
1622 64 : break;
1623 :
1624 0 : case GDT_Float16:
1625 0 : GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
1626 0 : break;
1627 :
1628 64 : case GDT_Float32:
1629 64 : GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
1630 64 : break;
1631 :
1632 64 : case GDT_Float64:
1633 64 : GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
1634 64 : break;
1635 :
1636 0 : case GDT_CInt16:
1637 : case GDT_CInt32:
1638 : case GDT_CFloat16:
1639 : case GDT_CFloat32:
1640 : case GDT_CFloat64:
1641 : case GDT_Unknown:
1642 : case GDT_TypeCount:
1643 0 : break;
1644 : }
1645 524573 : }
1646 :
1647 : /************************************************************************/
1648 : /* GWKSetPixelValueRealT() */
1649 : /************************************************************************/
1650 :
1651 : template <class T>
1652 9992427 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1653 : GPtrDiff_t iDstOffset, double dfDensity,
1654 : T value, bool bAvoidNoDataSingleBand)
1655 : {
1656 9992427 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1657 :
1658 : /* -------------------------------------------------------------------- */
1659 : /* If the source density is less than 100% we need to fetch the */
1660 : /* existing destination value, and mix it with the source to */
1661 : /* get the new "to apply" value. Also compute composite */
1662 : /* density. */
1663 : /* */
1664 : /* We avoid mixing if density is very near one or risk mixing */
1665 : /* in very extreme nodata values and causing odd results (#1610) */
1666 : /* -------------------------------------------------------------------- */
1667 9992427 : if (dfDensity < 0.9999)
1668 : {
1669 945508 : if (dfDensity < 0.0001)
1670 0 : return true;
1671 :
1672 945508 : double dfDstDensity = 1.0;
1673 :
1674 945508 : if (poWK->pafDstDensity != nullptr)
1675 944036 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1676 1472 : else if (poWK->panDstValid != nullptr &&
1677 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1678 0 : dfDstDensity = 0.0;
1679 :
1680 : // It seems like we also ought to be testing panDstValid[] here!
1681 :
1682 945508 : const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1683 :
1684 : // The destination density is really only relative to the portion
1685 : // not occluded by the overlay.
1686 945508 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1687 :
1688 945508 : const double dfReal =
1689 945508 : (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1690 945508 : (dfDensity + dfDstInfluence);
1691 :
1692 : /* --------------------------------------------------------------------
1693 : */
1694 : /* Actually apply the destination value. */
1695 : /* */
1696 : /* Avoid using the destination nodata value for integer datatypes
1697 : */
1698 : /* if by chance it is equal to the computed pixel value. */
1699 : /* --------------------------------------------------------------------
1700 : */
1701 945508 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1702 : }
1703 : else
1704 : {
1705 9046916 : pDst[iDstOffset] = value;
1706 : }
1707 :
1708 9992427 : if (bAvoidNoDataSingleBand)
1709 8719761 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1710 :
1711 9992427 : return true;
1712 : }
1713 :
1714 : /************************************************************************/
1715 : /* ClampRoundAndAvoidNoData() */
1716 : /************************************************************************/
1717 :
1718 : template <class T>
1719 5107725 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1720 : GPtrDiff_t iDstOffset, double dfReal,
1721 : bool bAvoidNoDataSingleBand)
1722 : {
1723 5107725 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1724 5107725 : T *pDst = reinterpret_cast<T *>(pabyDst);
1725 :
1726 : if constexpr (cpl::NumericLimits<T>::is_integer)
1727 : {
1728 : using std::floor;
1729 4610595 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1730 5308 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1731 4605285 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1732 23628 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1733 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1734 10410 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1735 : else
1736 4571245 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1737 : }
1738 : else
1739 : {
1740 497130 : pDst[iDstOffset] = static_cast<T>(dfReal);
1741 : }
1742 :
1743 5107725 : if (bAvoidNoDataSingleBand)
1744 4807319 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1745 5107725 : }
1746 :
1747 : /************************************************************************/
1748 : /* GWKSetPixelValue() */
1749 : /************************************************************************/
1750 :
1751 4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1752 : GPtrDiff_t iDstOffset, double dfDensity,
1753 : double dfReal, double dfImag,
1754 : bool bAvoidNoDataSingleBand)
1755 :
1756 : {
1757 4012410 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1758 :
1759 : /* -------------------------------------------------------------------- */
1760 : /* If the source density is less than 100% we need to fetch the */
1761 : /* existing destination value, and mix it with the source to */
1762 : /* get the new "to apply" value. Also compute composite */
1763 : /* density. */
1764 : /* */
1765 : /* We avoid mixing if density is very near one or risk mixing */
1766 : /* in very extreme nodata values and causing odd results (#1610) */
1767 : /* -------------------------------------------------------------------- */
1768 4012410 : if (dfDensity < 0.9999)
1769 : {
1770 800 : if (dfDensity < 0.0001)
1771 0 : return true;
1772 :
1773 800 : double dfDstDensity = 1.0;
1774 800 : if (poWK->pafDstDensity != nullptr)
1775 800 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1776 0 : else if (poWK->panDstValid != nullptr &&
1777 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1778 0 : dfDstDensity = 0.0;
1779 :
1780 800 : double dfDstReal = 0.0;
1781 800 : double dfDstImag = 0.0;
1782 : // It seems like we also ought to be testing panDstValid[] here!
1783 :
1784 : // TODO(schwehr): Factor out this repreated type of set.
1785 800 : switch (poWK->eWorkingDataType)
1786 : {
1787 0 : case GDT_UInt8:
1788 0 : dfDstReal = pabyDst[iDstOffset];
1789 0 : dfDstImag = 0.0;
1790 0 : break;
1791 :
1792 0 : case GDT_Int8:
1793 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1794 0 : dfDstImag = 0.0;
1795 0 : break;
1796 :
1797 400 : case GDT_Int16:
1798 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1799 400 : dfDstImag = 0.0;
1800 400 : break;
1801 :
1802 400 : case GDT_UInt16:
1803 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1804 400 : dfDstImag = 0.0;
1805 400 : break;
1806 :
1807 0 : case GDT_Int32:
1808 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1809 0 : dfDstImag = 0.0;
1810 0 : break;
1811 :
1812 0 : case GDT_UInt32:
1813 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1814 0 : dfDstImag = 0.0;
1815 0 : break;
1816 :
1817 0 : case GDT_Int64:
1818 0 : dfDstReal = static_cast<double>(
1819 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1820 0 : dfDstImag = 0.0;
1821 0 : break;
1822 :
1823 0 : case GDT_UInt64:
1824 0 : dfDstReal = static_cast<double>(
1825 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1826 0 : dfDstImag = 0.0;
1827 0 : break;
1828 :
1829 0 : case GDT_Float16:
1830 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1831 0 : dfDstImag = 0.0;
1832 0 : break;
1833 :
1834 0 : case GDT_Float32:
1835 0 : dfDstReal =
1836 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1837 0 : dfDstImag = 0.0;
1838 0 : break;
1839 :
1840 0 : case GDT_Float64:
1841 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1842 0 : dfDstImag = 0.0;
1843 0 : break;
1844 :
1845 0 : case GDT_CInt16:
1846 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1847 0 : dfDstImag =
1848 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1849 0 : break;
1850 :
1851 0 : case GDT_CInt32:
1852 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1853 0 : dfDstImag =
1854 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1855 0 : break;
1856 :
1857 0 : case GDT_CFloat16:
1858 : dfDstReal =
1859 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1860 : dfDstImag =
1861 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1862 0 : break;
1863 :
1864 0 : case GDT_CFloat32:
1865 0 : dfDstReal =
1866 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
1867 0 : dfDstImag = double(
1868 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
1869 0 : break;
1870 :
1871 0 : case GDT_CFloat64:
1872 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1873 0 : dfDstImag =
1874 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1875 0 : break;
1876 :
1877 0 : case GDT_Unknown:
1878 : case GDT_TypeCount:
1879 0 : CPLAssert(false);
1880 : return false;
1881 : }
1882 :
1883 : // The destination density is really only relative to the portion
1884 : // not occluded by the overlay.
1885 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1886 :
1887 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1888 800 : (dfDensity + dfDstInfluence);
1889 :
1890 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1891 800 : (dfDensity + dfDstInfluence);
1892 : }
1893 :
1894 : /* -------------------------------------------------------------------- */
1895 : /* Actually apply the destination value. */
1896 : /* */
1897 : /* Avoid using the destination nodata value for integer datatypes */
1898 : /* if by chance it is equal to the computed pixel value. */
1899 : /* -------------------------------------------------------------------- */
1900 :
1901 4012410 : switch (poWK->eWorkingDataType)
1902 : {
1903 3290010 : case GDT_UInt8:
1904 3290010 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
1905 : bAvoidNoDataSingleBand);
1906 3290010 : break;
1907 :
1908 0 : case GDT_Int8:
1909 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
1910 : bAvoidNoDataSingleBand);
1911 0 : break;
1912 :
1913 7472 : case GDT_Int16:
1914 7472 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
1915 : bAvoidNoDataSingleBand);
1916 7472 : break;
1917 :
1918 464 : case GDT_UInt16:
1919 464 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
1920 : bAvoidNoDataSingleBand);
1921 464 : break;
1922 :
1923 63 : case GDT_UInt32:
1924 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
1925 : bAvoidNoDataSingleBand);
1926 63 : break;
1927 :
1928 63 : case GDT_Int32:
1929 63 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
1930 : bAvoidNoDataSingleBand);
1931 63 : break;
1932 :
1933 0 : case GDT_UInt64:
1934 0 : ClampRoundAndAvoidNoData<std::uint64_t>(
1935 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
1936 0 : break;
1937 :
1938 0 : case GDT_Int64:
1939 0 : ClampRoundAndAvoidNoData<std::int64_t>(
1940 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
1941 0 : break;
1942 :
1943 0 : case GDT_Float16:
1944 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
1945 : bAvoidNoDataSingleBand);
1946 0 : break;
1947 :
1948 478957 : case GDT_Float32:
1949 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
1950 : bAvoidNoDataSingleBand);
1951 478957 : break;
1952 :
1953 149 : case GDT_Float64:
1954 149 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
1955 : bAvoidNoDataSingleBand);
1956 149 : break;
1957 :
1958 234079 : case GDT_CInt16:
1959 : {
1960 : typedef GInt16 T;
1961 234079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1962 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1963 0 : cpl::NumericLimits<T>::min();
1964 234079 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1965 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1966 0 : cpl::NumericLimits<T>::max();
1967 : else
1968 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1969 234079 : static_cast<T>(floor(dfReal + 0.5));
1970 234079 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1971 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1972 0 : cpl::NumericLimits<T>::min();
1973 234079 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1974 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1975 0 : cpl::NumericLimits<T>::max();
1976 : else
1977 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1978 234079 : static_cast<T>(floor(dfImag + 0.5));
1979 234079 : break;
1980 : }
1981 :
1982 379 : case GDT_CInt32:
1983 : {
1984 : typedef GInt32 T;
1985 379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1986 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1987 0 : cpl::NumericLimits<T>::min();
1988 379 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1989 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1990 0 : cpl::NumericLimits<T>::max();
1991 : else
1992 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1993 379 : static_cast<T>(floor(dfReal + 0.5));
1994 379 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1995 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1996 0 : cpl::NumericLimits<T>::min();
1997 379 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1998 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1999 0 : cpl::NumericLimits<T>::max();
2000 : else
2001 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2002 379 : static_cast<T>(floor(dfImag + 0.5));
2003 379 : break;
2004 : }
2005 :
2006 0 : case GDT_CFloat16:
2007 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
2008 0 : static_cast<GFloat16>(dfReal);
2009 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
2010 0 : static_cast<GFloat16>(dfImag);
2011 0 : break;
2012 :
2013 394 : case GDT_CFloat32:
2014 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
2015 394 : static_cast<float>(dfReal);
2016 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
2017 394 : static_cast<float>(dfImag);
2018 394 : break;
2019 :
2020 380 : case GDT_CFloat64:
2021 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
2022 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
2023 380 : break;
2024 :
2025 0 : case GDT_Unknown:
2026 : case GDT_TypeCount:
2027 0 : return false;
2028 : }
2029 :
2030 4012410 : return true;
2031 : }
2032 :
2033 : /************************************************************************/
2034 : /* GWKSetPixelValueReal() */
2035 : /************************************************************************/
2036 :
2037 1330540 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2038 : GPtrDiff_t iDstOffset, double dfDensity,
2039 : double dfReal, bool bAvoidNoDataSingleBand)
2040 :
2041 : {
2042 1330540 : GByte *pabyDst = poWK->papabyDstImage[iBand];
2043 :
2044 : /* -------------------------------------------------------------------- */
2045 : /* If the source density is less than 100% we need to fetch the */
2046 : /* existing destination value, and mix it with the source to */
2047 : /* get the new "to apply" value. Also compute composite */
2048 : /* density. */
2049 : /* */
2050 : /* We avoid mixing if density is very near one or risk mixing */
2051 : /* in very extreme nodata values and causing odd results (#1610) */
2052 : /* -------------------------------------------------------------------- */
2053 1330540 : if (dfDensity < 0.9999)
2054 : {
2055 600 : if (dfDensity < 0.0001)
2056 0 : return true;
2057 :
2058 600 : double dfDstReal = 0.0;
2059 600 : double dfDstDensity = 1.0;
2060 :
2061 600 : if (poWK->pafDstDensity != nullptr)
2062 600 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
2063 0 : else if (poWK->panDstValid != nullptr &&
2064 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
2065 0 : dfDstDensity = 0.0;
2066 :
2067 : // It seems like we also ought to be testing panDstValid[] here!
2068 :
2069 600 : switch (poWK->eWorkingDataType)
2070 : {
2071 0 : case GDT_UInt8:
2072 0 : dfDstReal = pabyDst[iDstOffset];
2073 0 : break;
2074 :
2075 0 : case GDT_Int8:
2076 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
2077 0 : break;
2078 :
2079 300 : case GDT_Int16:
2080 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
2081 300 : break;
2082 :
2083 300 : case GDT_UInt16:
2084 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
2085 300 : break;
2086 :
2087 0 : case GDT_Int32:
2088 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
2089 0 : break;
2090 :
2091 0 : case GDT_UInt32:
2092 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
2093 0 : break;
2094 :
2095 0 : case GDT_Int64:
2096 0 : dfDstReal = static_cast<double>(
2097 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
2098 0 : break;
2099 :
2100 0 : case GDT_UInt64:
2101 0 : dfDstReal = static_cast<double>(
2102 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
2103 0 : break;
2104 :
2105 0 : case GDT_Float16:
2106 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
2107 0 : break;
2108 :
2109 0 : case GDT_Float32:
2110 0 : dfDstReal =
2111 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
2112 0 : break;
2113 :
2114 0 : case GDT_Float64:
2115 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
2116 0 : break;
2117 :
2118 0 : case GDT_CInt16:
2119 : case GDT_CInt32:
2120 : case GDT_CFloat16:
2121 : case GDT_CFloat32:
2122 : case GDT_CFloat64:
2123 : case GDT_Unknown:
2124 : case GDT_TypeCount:
2125 0 : CPLAssert(false);
2126 : return false;
2127 : }
2128 :
2129 : // The destination density is really only relative to the portion
2130 : // not occluded by the overlay.
2131 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2132 :
2133 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2134 600 : (dfDensity + dfDstInfluence);
2135 : }
2136 :
2137 : /* -------------------------------------------------------------------- */
2138 : /* Actually apply the destination value. */
2139 : /* */
2140 : /* Avoid using the destination nodata value for integer datatypes */
2141 : /* if by chance it is equal to the computed pixel value. */
2142 : /* -------------------------------------------------------------------- */
2143 :
2144 1330540 : switch (poWK->eWorkingDataType)
2145 : {
2146 1308410 : case GDT_UInt8:
2147 1308410 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2148 : bAvoidNoDataSingleBand);
2149 1308410 : break;
2150 :
2151 0 : case GDT_Int8:
2152 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2153 : bAvoidNoDataSingleBand);
2154 0 : break;
2155 :
2156 1309 : case GDT_Int16:
2157 1309 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2158 : bAvoidNoDataSingleBand);
2159 1309 : break;
2160 :
2161 475 : case GDT_UInt16:
2162 475 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2163 : bAvoidNoDataSingleBand);
2164 475 : break;
2165 :
2166 539 : case GDT_UInt32:
2167 539 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2168 : bAvoidNoDataSingleBand);
2169 539 : break;
2170 :
2171 1342 : case GDT_Int32:
2172 1342 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2173 : bAvoidNoDataSingleBand);
2174 1342 : break;
2175 :
2176 224 : case GDT_UInt64:
2177 224 : ClampRoundAndAvoidNoData<std::uint64_t>(
2178 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2179 224 : break;
2180 :
2181 224 : case GDT_Int64:
2182 224 : ClampRoundAndAvoidNoData<std::int64_t>(
2183 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2184 224 : break;
2185 :
2186 0 : case GDT_Float16:
2187 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2188 : bAvoidNoDataSingleBand);
2189 0 : break;
2190 :
2191 3538 : case GDT_Float32:
2192 3538 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2193 : bAvoidNoDataSingleBand);
2194 3538 : break;
2195 :
2196 14486 : case GDT_Float64:
2197 14486 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2198 : bAvoidNoDataSingleBand);
2199 14486 : break;
2200 :
2201 0 : case GDT_CInt16:
2202 : case GDT_CInt32:
2203 : case GDT_CFloat16:
2204 : case GDT_CFloat32:
2205 : case GDT_CFloat64:
2206 0 : return false;
2207 :
2208 0 : case GDT_Unknown:
2209 : case GDT_TypeCount:
2210 0 : CPLAssert(false);
2211 : return false;
2212 : }
2213 :
2214 1330540 : return true;
2215 : }
2216 :
2217 : /************************************************************************/
2218 : /* GWKGetPixelValue() */
2219 : /************************************************************************/
2220 :
2221 : /* It is assumed that panUnifiedSrcValid has been checked before */
2222 :
2223 30506400 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2224 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2225 : double *pdfReal, double *pdfImag)
2226 :
2227 : {
2228 30506400 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2229 :
2230 61012700 : if (poWK->papanBandSrcValid != nullptr &&
2231 30506400 : poWK->papanBandSrcValid[iBand] != nullptr &&
2232 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2233 : {
2234 0 : *pdfDensity = 0.0;
2235 0 : return false;
2236 : }
2237 :
2238 30506400 : *pdfReal = 0.0;
2239 30506400 : *pdfImag = 0.0;
2240 :
2241 : // TODO(schwehr): Fix casting.
2242 30506400 : switch (poWK->eWorkingDataType)
2243 : {
2244 29429400 : case GDT_UInt8:
2245 29429400 : *pdfReal = pabySrc[iSrcOffset];
2246 29429400 : *pdfImag = 0.0;
2247 29429400 : break;
2248 :
2249 0 : case GDT_Int8:
2250 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2251 0 : *pdfImag = 0.0;
2252 0 : break;
2253 :
2254 28232 : case GDT_Int16:
2255 28232 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2256 28232 : *pdfImag = 0.0;
2257 28232 : break;
2258 :
2259 166 : case GDT_UInt16:
2260 166 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2261 166 : *pdfImag = 0.0;
2262 166 : break;
2263 :
2264 63 : case GDT_Int32:
2265 63 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2266 63 : *pdfImag = 0.0;
2267 63 : break;
2268 :
2269 63 : case GDT_UInt32:
2270 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2271 63 : *pdfImag = 0.0;
2272 63 : break;
2273 :
2274 0 : case GDT_Int64:
2275 0 : *pdfReal = static_cast<double>(
2276 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2277 0 : *pdfImag = 0.0;
2278 0 : break;
2279 :
2280 0 : case GDT_UInt64:
2281 0 : *pdfReal = static_cast<double>(
2282 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2283 0 : *pdfImag = 0.0;
2284 0 : break;
2285 :
2286 0 : case GDT_Float16:
2287 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2288 0 : *pdfImag = 0.0;
2289 0 : break;
2290 :
2291 1047220 : case GDT_Float32:
2292 1047220 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2293 1047220 : *pdfImag = 0.0;
2294 1047220 : break;
2295 :
2296 587 : case GDT_Float64:
2297 587 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2298 587 : *pdfImag = 0.0;
2299 587 : break;
2300 :
2301 133 : case GDT_CInt16:
2302 133 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2303 133 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2304 133 : break;
2305 :
2306 133 : case GDT_CInt32:
2307 133 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2308 133 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2309 133 : break;
2310 :
2311 0 : case GDT_CFloat16:
2312 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2313 0 : *pdfImag =
2314 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2315 0 : break;
2316 :
2317 194 : case GDT_CFloat32:
2318 194 : *pdfReal =
2319 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2320 194 : *pdfImag =
2321 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2322 194 : break;
2323 :
2324 138 : case GDT_CFloat64:
2325 138 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2326 138 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2327 138 : break;
2328 :
2329 0 : case GDT_Unknown:
2330 : case GDT_TypeCount:
2331 0 : CPLAssert(false);
2332 : *pdfDensity = 0.0;
2333 : return false;
2334 : }
2335 :
2336 30506400 : if (poWK->pafUnifiedSrcDensity != nullptr)
2337 4194800 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2338 : else
2339 26311600 : *pdfDensity = 1.0;
2340 :
2341 30506400 : return *pdfDensity != 0.0;
2342 : }
2343 :
2344 : /************************************************************************/
2345 : /* GWKGetPixelValueReal() */
2346 : /************************************************************************/
2347 :
2348 15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2349 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2350 : double *pdfReal)
2351 :
2352 : {
2353 15516 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2354 :
2355 31034 : if (poWK->papanBandSrcValid != nullptr &&
2356 15518 : poWK->papanBandSrcValid[iBand] != nullptr &&
2357 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2358 : {
2359 0 : *pdfDensity = 0.0;
2360 0 : return false;
2361 : }
2362 :
2363 15516 : switch (poWK->eWorkingDataType)
2364 : {
2365 1 : case GDT_UInt8:
2366 1 : *pdfReal = pabySrc[iSrcOffset];
2367 1 : break;
2368 :
2369 0 : case GDT_Int8:
2370 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2371 0 : break;
2372 :
2373 1 : case GDT_Int16:
2374 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2375 1 : break;
2376 :
2377 1 : case GDT_UInt16:
2378 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2379 1 : break;
2380 :
2381 982 : case GDT_Int32:
2382 982 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2383 982 : break;
2384 :
2385 179 : case GDT_UInt32:
2386 179 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2387 179 : break;
2388 :
2389 112 : case GDT_Int64:
2390 112 : *pdfReal = static_cast<double>(
2391 112 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2392 112 : break;
2393 :
2394 112 : case GDT_UInt64:
2395 112 : *pdfReal = static_cast<double>(
2396 112 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2397 112 : break;
2398 :
2399 0 : case GDT_Float16:
2400 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2401 0 : break;
2402 :
2403 2 : case GDT_Float32:
2404 2 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2405 2 : break;
2406 :
2407 14126 : case GDT_Float64:
2408 14126 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2409 14126 : break;
2410 :
2411 0 : case GDT_CInt16:
2412 : case GDT_CInt32:
2413 : case GDT_CFloat16:
2414 : case GDT_CFloat32:
2415 : case GDT_CFloat64:
2416 : case GDT_Unknown:
2417 : case GDT_TypeCount:
2418 0 : CPLAssert(false);
2419 : return false;
2420 : }
2421 :
2422 15516 : if (poWK->pafUnifiedSrcDensity != nullptr)
2423 0 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2424 : else
2425 15516 : *pdfDensity = 1.0;
2426 :
2427 15516 : return *pdfDensity != 0.0;
2428 : }
2429 :
2430 : /************************************************************************/
2431 : /* GWKGetPixelRow() */
2432 : /************************************************************************/
2433 :
2434 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2435 : /* data-types. */
2436 :
2437 2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2438 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2439 : double *padfDensity, double adfReal[],
2440 : double *padfImag)
2441 : {
2442 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2443 2369710 : const int nSrcLen = nHalfSrcLen * 2;
2444 2369710 : bool bHasValid = false;
2445 :
2446 2369710 : if (padfDensity != nullptr)
2447 : {
2448 : // Init the density.
2449 3384030 : for (int i = 0; i < nSrcLen; i += 2)
2450 : {
2451 2211910 : padfDensity[i] = 1.0;
2452 2211910 : padfDensity[i + 1] = 1.0;
2453 : }
2454 :
2455 1172120 : if (poWK->panUnifiedSrcValid != nullptr)
2456 : {
2457 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2458 : {
2459 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2460 2067740 : bHasValid = true;
2461 : else
2462 74323 : padfDensity[i] = 0.0;
2463 :
2464 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2465 2068400 : bHasValid = true;
2466 : else
2467 73668 : padfDensity[i + 1] = 0.0;
2468 : }
2469 :
2470 : // Reset or fail as needed.
2471 1139400 : if (bHasValid)
2472 1116590 : bHasValid = false;
2473 : else
2474 22806 : return false;
2475 : }
2476 :
2477 1149320 : if (poWK->papanBandSrcValid != nullptr &&
2478 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2479 : {
2480 0 : for (int i = 0; i < nSrcLen; i += 2)
2481 : {
2482 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2483 0 : bHasValid = true;
2484 : else
2485 0 : padfDensity[i] = 0.0;
2486 :
2487 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2488 0 : iSrcOffset + i + 1))
2489 0 : bHasValid = true;
2490 : else
2491 0 : padfDensity[i + 1] = 0.0;
2492 : }
2493 :
2494 : // Reset or fail as needed.
2495 0 : if (bHasValid)
2496 0 : bHasValid = false;
2497 : else
2498 0 : return false;
2499 : }
2500 : }
2501 :
2502 : // TODO(schwehr): Fix casting.
2503 : // Fetch data.
2504 2346910 : switch (poWK->eWorkingDataType)
2505 : {
2506 1136680 : case GDT_UInt8:
2507 : {
2508 1136680 : GByte *pSrc =
2509 1136680 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2510 1136680 : pSrc += iSrcOffset;
2511 3281570 : for (int i = 0; i < nSrcLen; i += 2)
2512 : {
2513 2144890 : adfReal[i] = pSrc[i];
2514 2144890 : adfReal[i + 1] = pSrc[i + 1];
2515 : }
2516 1136680 : break;
2517 : }
2518 :
2519 0 : case GDT_Int8:
2520 : {
2521 0 : GInt8 *pSrc =
2522 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2523 0 : pSrc += iSrcOffset;
2524 0 : for (int i = 0; i < nSrcLen; i += 2)
2525 : {
2526 0 : adfReal[i] = pSrc[i];
2527 0 : adfReal[i + 1] = pSrc[i + 1];
2528 : }
2529 0 : break;
2530 : }
2531 :
2532 5950 : case GDT_Int16:
2533 : {
2534 5950 : GInt16 *pSrc =
2535 5950 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2536 5950 : pSrc += iSrcOffset;
2537 22164 : for (int i = 0; i < nSrcLen; i += 2)
2538 : {
2539 16214 : adfReal[i] = pSrc[i];
2540 16214 : adfReal[i + 1] = pSrc[i + 1];
2541 : }
2542 5950 : break;
2543 : }
2544 :
2545 4310 : case GDT_UInt16:
2546 : {
2547 4310 : GUInt16 *pSrc =
2548 4310 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2549 4310 : pSrc += iSrcOffset;
2550 18884 : for (int i = 0; i < nSrcLen; i += 2)
2551 : {
2552 14574 : adfReal[i] = pSrc[i];
2553 14574 : adfReal[i + 1] = pSrc[i + 1];
2554 : }
2555 4310 : break;
2556 : }
2557 :
2558 946 : case GDT_Int32:
2559 : {
2560 946 : GInt32 *pSrc =
2561 946 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2562 946 : pSrc += iSrcOffset;
2563 2624 : for (int i = 0; i < nSrcLen; i += 2)
2564 : {
2565 1678 : adfReal[i] = pSrc[i];
2566 1678 : adfReal[i + 1] = pSrc[i + 1];
2567 : }
2568 946 : break;
2569 : }
2570 :
2571 946 : case GDT_UInt32:
2572 : {
2573 946 : GUInt32 *pSrc =
2574 946 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2575 946 : pSrc += iSrcOffset;
2576 2624 : for (int i = 0; i < nSrcLen; i += 2)
2577 : {
2578 1678 : adfReal[i] = pSrc[i];
2579 1678 : adfReal[i + 1] = pSrc[i + 1];
2580 : }
2581 946 : break;
2582 : }
2583 :
2584 196 : case GDT_Int64:
2585 : {
2586 196 : auto pSrc =
2587 196 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2588 196 : pSrc += iSrcOffset;
2589 392 : for (int i = 0; i < nSrcLen; i += 2)
2590 : {
2591 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2592 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2593 : }
2594 196 : break;
2595 : }
2596 :
2597 196 : case GDT_UInt64:
2598 : {
2599 196 : auto pSrc =
2600 196 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2601 196 : pSrc += iSrcOffset;
2602 392 : for (int i = 0; i < nSrcLen; i += 2)
2603 : {
2604 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2605 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2606 : }
2607 196 : break;
2608 : }
2609 :
2610 0 : case GDT_Float16:
2611 : {
2612 0 : GFloat16 *pSrc =
2613 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2614 0 : pSrc += iSrcOffset;
2615 0 : for (int i = 0; i < nSrcLen; i += 2)
2616 : {
2617 0 : adfReal[i] = pSrc[i];
2618 0 : adfReal[i + 1] = pSrc[i + 1];
2619 : }
2620 0 : break;
2621 : }
2622 :
2623 25270 : case GDT_Float32:
2624 : {
2625 25270 : float *pSrc =
2626 25270 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2627 25270 : pSrc += iSrcOffset;
2628 121739 : for (int i = 0; i < nSrcLen; i += 2)
2629 : {
2630 96469 : adfReal[i] = double(pSrc[i]);
2631 96469 : adfReal[i + 1] = double(pSrc[i + 1]);
2632 : }
2633 25270 : break;
2634 : }
2635 :
2636 946 : case GDT_Float64:
2637 : {
2638 946 : double *pSrc =
2639 946 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2640 946 : pSrc += iSrcOffset;
2641 2624 : for (int i = 0; i < nSrcLen; i += 2)
2642 : {
2643 1678 : adfReal[i] = pSrc[i];
2644 1678 : adfReal[i + 1] = pSrc[i + 1];
2645 : }
2646 946 : break;
2647 : }
2648 :
2649 1169220 : case GDT_CInt16:
2650 : {
2651 1169220 : GInt16 *pSrc =
2652 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2653 1169220 : pSrc += 2 * iSrcOffset;
2654 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2655 : {
2656 3506800 : adfReal[i] = pSrc[2 * i];
2657 3506800 : padfImag[i] = pSrc[2 * i + 1];
2658 :
2659 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2660 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2661 : }
2662 1169220 : break;
2663 : }
2664 :
2665 750 : case GDT_CInt32:
2666 : {
2667 750 : GInt32 *pSrc =
2668 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2669 750 : pSrc += 2 * iSrcOffset;
2670 2232 : for (int i = 0; i < nSrcLen; i += 2)
2671 : {
2672 1482 : adfReal[i] = pSrc[2 * i];
2673 1482 : padfImag[i] = pSrc[2 * i + 1];
2674 :
2675 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2676 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2677 : }
2678 750 : break;
2679 : }
2680 :
2681 0 : case GDT_CFloat16:
2682 : {
2683 0 : GFloat16 *pSrc =
2684 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2685 0 : pSrc += 2 * iSrcOffset;
2686 0 : for (int i = 0; i < nSrcLen; i += 2)
2687 : {
2688 0 : adfReal[i] = pSrc[2 * i];
2689 0 : padfImag[i] = pSrc[2 * i + 1];
2690 :
2691 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2692 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2693 : }
2694 0 : break;
2695 : }
2696 :
2697 750 : case GDT_CFloat32:
2698 : {
2699 750 : float *pSrc =
2700 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2701 750 : pSrc += 2 * iSrcOffset;
2702 2232 : for (int i = 0; i < nSrcLen; i += 2)
2703 : {
2704 1482 : adfReal[i] = double(pSrc[2 * i]);
2705 1482 : padfImag[i] = double(pSrc[2 * i + 1]);
2706 :
2707 1482 : adfReal[i + 1] = double(pSrc[2 * i + 2]);
2708 1482 : padfImag[i + 1] = double(pSrc[2 * i + 3]);
2709 : }
2710 750 : break;
2711 : }
2712 :
2713 750 : case GDT_CFloat64:
2714 : {
2715 750 : double *pSrc =
2716 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2717 750 : pSrc += 2 * iSrcOffset;
2718 2232 : for (int i = 0; i < nSrcLen; i += 2)
2719 : {
2720 1482 : adfReal[i] = pSrc[2 * i];
2721 1482 : padfImag[i] = pSrc[2 * i + 1];
2722 :
2723 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2724 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2725 : }
2726 750 : break;
2727 : }
2728 :
2729 0 : case GDT_Unknown:
2730 : case GDT_TypeCount:
2731 0 : CPLAssert(false);
2732 : if (padfDensity)
2733 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2734 : return false;
2735 : }
2736 :
2737 2346910 : if (padfDensity == nullptr)
2738 1197590 : return true;
2739 :
2740 1149320 : if (poWK->pafUnifiedSrcDensity == nullptr)
2741 : {
2742 3256740 : for (int i = 0; i < nSrcLen; i += 2)
2743 : {
2744 : // Take into account earlier calcs.
2745 2127390 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2746 : {
2747 2087480 : padfDensity[i] = 1.0;
2748 2087480 : bHasValid = true;
2749 : }
2750 :
2751 2127390 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2752 : {
2753 2088140 : padfDensity[i + 1] = 1.0;
2754 2088140 : bHasValid = true;
2755 : }
2756 : }
2757 : }
2758 : else
2759 : {
2760 70068 : for (int i = 0; i < nSrcLen; i += 2)
2761 : {
2762 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2763 50103 : padfDensity[i] =
2764 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2765 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2766 49252 : bHasValid = true;
2767 :
2768 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2769 50103 : padfDensity[i + 1] =
2770 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2771 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2772 49186 : bHasValid = true;
2773 : }
2774 : }
2775 :
2776 1149320 : return bHasValid;
2777 : }
2778 :
2779 : /************************************************************************/
2780 : /* GWKGetPixelT() */
2781 : /************************************************************************/
2782 :
2783 : template <class T>
2784 10002719 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2785 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2786 :
2787 : {
2788 10002719 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2789 :
2790 22733143 : if ((poWK->panUnifiedSrcValid != nullptr &&
2791 20005418 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2792 10002719 : (poWK->papanBandSrcValid != nullptr &&
2793 589836 : poWK->papanBandSrcValid[iBand] != nullptr &&
2794 589836 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2795 : {
2796 9 : *pdfDensity = 0.0;
2797 9 : return false;
2798 : }
2799 :
2800 10002709 : *pValue = pSrc[iSrcOffset];
2801 :
2802 10002709 : if (poWK->pafUnifiedSrcDensity == nullptr)
2803 8880346 : *pdfDensity = 1.0;
2804 : else
2805 1122362 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2806 :
2807 10002709 : return *pdfDensity != 0.0;
2808 : }
2809 :
2810 : /************************************************************************/
2811 : /* GWKBilinearResample() */
2812 : /* Set of bilinear interpolators */
2813 : /************************************************************************/
2814 :
2815 77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2816 : double dfSrcX, double dfSrcY,
2817 : double *pdfDensity, double *pdfReal,
2818 : double *pdfImag)
2819 :
2820 : {
2821 : // Save as local variables to avoid following pointers.
2822 77448 : const int nSrcXSize = poWK->nSrcXSize;
2823 77448 : const int nSrcYSize = poWK->nSrcYSize;
2824 :
2825 77448 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2826 77448 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2827 77448 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2828 77448 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2829 77448 : bool bShifted = false;
2830 :
2831 77448 : if (iSrcX == -1)
2832 : {
2833 1534 : iSrcX = 0;
2834 1534 : dfRatioX = 1;
2835 : }
2836 77448 : if (iSrcY == -1)
2837 : {
2838 7734 : iSrcY = 0;
2839 7734 : dfRatioY = 1;
2840 : }
2841 77448 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2842 :
2843 : // Shift so we don't overrun the array.
2844 77448 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2845 77330 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2846 77330 : iSrcOffset + nSrcXSize + 1)
2847 : {
2848 230 : bShifted = true;
2849 230 : --iSrcOffset;
2850 : }
2851 :
2852 77448 : double adfDensity[2] = {0.0, 0.0};
2853 77448 : double adfReal[2] = {0.0, 0.0};
2854 77448 : double adfImag[2] = {0.0, 0.0};
2855 77448 : double dfAccumulatorReal = 0.0;
2856 77448 : double dfAccumulatorImag = 0.0;
2857 77448 : double dfAccumulatorDensity = 0.0;
2858 77448 : double dfAccumulatorDivisor = 0.0;
2859 :
2860 77448 : const GPtrDiff_t nSrcPixels =
2861 77448 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2862 : // Get pixel row.
2863 77448 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2864 154896 : iSrcOffset < nSrcPixels &&
2865 77448 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2866 : adfImag))
2867 : {
2868 71504 : double dfMult1 = dfRatioX * dfRatioY;
2869 71504 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2870 :
2871 : // Shifting corrected.
2872 71504 : if (bShifted)
2873 : {
2874 230 : adfReal[0] = adfReal[1];
2875 230 : adfImag[0] = adfImag[1];
2876 230 : adfDensity[0] = adfDensity[1];
2877 : }
2878 :
2879 : // Upper Left Pixel.
2880 71504 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2881 71504 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
2882 : {
2883 66050 : dfAccumulatorDivisor += dfMult1;
2884 :
2885 66050 : dfAccumulatorReal += adfReal[0] * dfMult1;
2886 66050 : dfAccumulatorImag += adfImag[0] * dfMult1;
2887 66050 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2888 : }
2889 :
2890 : // Upper Right Pixel.
2891 71504 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2892 70609 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2893 : {
2894 65335 : dfAccumulatorDivisor += dfMult2;
2895 :
2896 65335 : dfAccumulatorReal += adfReal[1] * dfMult2;
2897 65335 : dfAccumulatorImag += adfImag[1] * dfMult2;
2898 65335 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2899 : }
2900 : }
2901 :
2902 : // Get pixel row.
2903 77448 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2904 228032 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2905 73136 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2906 : adfReal, adfImag))
2907 : {
2908 67577 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2909 67577 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2910 :
2911 : // Shifting corrected
2912 67577 : if (bShifted)
2913 : {
2914 112 : adfReal[0] = adfReal[1];
2915 112 : adfImag[0] = adfImag[1];
2916 112 : adfDensity[0] = adfDensity[1];
2917 : }
2918 :
2919 : // Lower Left Pixel
2920 67577 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2921 67577 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
2922 : {
2923 62298 : dfAccumulatorDivisor += dfMult1;
2924 :
2925 62298 : dfAccumulatorReal += adfReal[0] * dfMult1;
2926 62298 : dfAccumulatorImag += adfImag[0] * dfMult1;
2927 62298 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2928 : }
2929 :
2930 : // Lower Right Pixel.
2931 67577 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2932 66800 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2933 : {
2934 61823 : dfAccumulatorDivisor += dfMult2;
2935 :
2936 61823 : dfAccumulatorReal += adfReal[1] * dfMult2;
2937 61823 : dfAccumulatorImag += adfImag[1] * dfMult2;
2938 61823 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2939 : }
2940 : }
2941 :
2942 : /* -------------------------------------------------------------------- */
2943 : /* Return result. */
2944 : /* -------------------------------------------------------------------- */
2945 77448 : if (dfAccumulatorDivisor == 1.0)
2946 : {
2947 45929 : *pdfReal = dfAccumulatorReal;
2948 45929 : *pdfImag = dfAccumulatorImag;
2949 45929 : *pdfDensity = dfAccumulatorDensity;
2950 45929 : return false;
2951 : }
2952 31519 : else if (dfAccumulatorDivisor < 0.00001)
2953 : {
2954 0 : *pdfReal = 0.0;
2955 0 : *pdfImag = 0.0;
2956 0 : *pdfDensity = 0.0;
2957 0 : return false;
2958 : }
2959 : else
2960 : {
2961 31519 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2962 31519 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2963 31519 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2964 31519 : return true;
2965 : }
2966 : }
2967 :
2968 : template <class T>
2969 6765770 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2970 : int iBand, double dfSrcX,
2971 : double dfSrcY, T *pValue)
2972 :
2973 : {
2974 :
2975 6765770 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2976 6765770 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2977 6765770 : GPtrDiff_t iSrcOffset =
2978 6765770 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2979 6765770 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2980 6765770 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2981 :
2982 6765770 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2983 :
2984 6765770 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2985 4482638 : iSrcY + 1 < poWK->nSrcYSize)
2986 : {
2987 4439120 : const double dfAccumulator =
2988 4439120 : (double(pSrc[iSrcOffset]) * dfRatioX +
2989 4439120 : double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
2990 : dfRatioY +
2991 4439120 : (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
2992 4439120 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
2993 4439120 : (1.0 - dfRatioX)) *
2994 4439120 : (1.0 - dfRatioY);
2995 :
2996 4439120 : *pValue = GWKRoundValueT<T>(dfAccumulator);
2997 :
2998 4439120 : return true;
2999 : }
3000 :
3001 2326650 : double dfAccumulatorDivisor = 0.0;
3002 2326650 : double dfAccumulator = 0.0;
3003 :
3004 : // Upper Left Pixel.
3005 2326650 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
3006 272257 : iSrcY < poWK->nSrcYSize)
3007 : {
3008 272257 : const double dfMult = dfRatioX * dfRatioY;
3009 :
3010 272257 : dfAccumulatorDivisor += dfMult;
3011 :
3012 272257 : dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
3013 : }
3014 :
3015 : // Upper Right Pixel.
3016 2326650 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3017 1973090 : iSrcY < poWK->nSrcYSize)
3018 : {
3019 1973090 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
3020 :
3021 1973090 : dfAccumulatorDivisor += dfMult;
3022 :
3023 1973090 : dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
3024 : }
3025 :
3026 : // Lower Right Pixel.
3027 2326650 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3028 2064364 : iSrcY + 1 < poWK->nSrcYSize)
3029 : {
3030 1987572 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3031 :
3032 1987572 : dfAccumulatorDivisor += dfMult;
3033 :
3034 1987572 : dfAccumulator +=
3035 1987572 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
3036 : }
3037 :
3038 : // Lower Left Pixel.
3039 2326650 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3040 363504 : iSrcY + 1 < poWK->nSrcYSize)
3041 : {
3042 286487 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
3043 :
3044 286487 : dfAccumulatorDivisor += dfMult;
3045 :
3046 286487 : dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
3047 : }
3048 :
3049 : /* -------------------------------------------------------------------- */
3050 : /* Return result. */
3051 : /* -------------------------------------------------------------------- */
3052 2326650 : double dfValue = 0.0;
3053 :
3054 2326650 : if (dfAccumulatorDivisor < 0.00001)
3055 : {
3056 0 : *pValue = 0;
3057 0 : return false;
3058 : }
3059 2326650 : else if (dfAccumulatorDivisor == 1.0)
3060 : {
3061 7320 : dfValue = dfAccumulator;
3062 : }
3063 : else
3064 : {
3065 2319328 : dfValue = dfAccumulator / dfAccumulatorDivisor;
3066 : }
3067 :
3068 2326650 : *pValue = GWKRoundValueT<T>(dfValue);
3069 :
3070 2326650 : return true;
3071 : }
3072 :
3073 : /************************************************************************/
3074 : /* GWKCubicResample() */
3075 : /* Set of bicubic interpolators using cubic convolution. */
3076 : /************************************************************************/
3077 :
3078 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
3079 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
3080 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
3081 :
3082 : template <typename T>
3083 1742940 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
3084 : T f1, T f2, T f3)
3085 : {
3086 1742940 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
3087 1742940 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
3088 1742940 : distance3 * (3 * (f1 - f2) + f3 - f0)));
3089 : }
3090 :
3091 : /************************************************************************/
3092 : /* GWKCubicComputeWeights() */
3093 : /************************************************************************/
3094 :
3095 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
3096 :
3097 : template <typename T>
3098 75432480 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
3099 : {
3100 75432480 : const T halfX = T(0.5) * x;
3101 75432480 : const T threeX = T(3.0) * x;
3102 75432480 : const T halfX2 = halfX * x;
3103 :
3104 75432480 : coeffs[0] = halfX * (-1 + x * (2 - x));
3105 75432480 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
3106 75432480 : coeffs[2] = halfX * (1 + x * (4 - threeX));
3107 75432480 : coeffs[3] = halfX2 * (-1 + x);
3108 75432480 : }
3109 :
3110 14411416 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
3111 : {
3112 14411416 : return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
3113 14411416 : v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
3114 : }
3115 :
3116 : #if 0
3117 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
3118 : // instead of 17.
3119 : // TODO(schwehr): Use an inline function.
3120 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
3121 : { \
3122 : const double dfX = dfX_; \
3123 : dfHalfX = 0.5 * dfX; \
3124 : const double dfThreeX = 3.0 * dfX; \
3125 : const double dfXMinus1 = dfX - 1; \
3126 : \
3127 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
3128 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
3129 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
3130 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
3131 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
3132 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
3133 : }
3134 :
3135 : // TODO(schwehr): Use an inline function.
3136 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
3137 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
3138 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
3139 : #endif
3140 :
3141 302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3142 : double dfSrcX, double dfSrcY,
3143 : double *pdfDensity, double *pdfReal,
3144 : double *pdfImag)
3145 :
3146 : {
3147 302045 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3148 302045 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3149 302045 : GPtrDiff_t iSrcOffset =
3150 302045 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3151 302045 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3152 302045 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3153 302045 : double adfDensity[4] = {};
3154 302045 : double adfReal[4] = {};
3155 302045 : double adfImag[4] = {};
3156 :
3157 : // Get the bilinear interpolation at the image borders.
3158 302045 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3159 286140 : iSrcY + 2 >= poWK->nSrcYSize)
3160 24670 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3161 24670 : pdfDensity, pdfReal, pdfImag);
3162 :
3163 277375 : double adfValueDens[4] = {};
3164 277375 : double adfValueReal[4] = {};
3165 277375 : double adfValueImag[4] = {};
3166 :
3167 277375 : double adfCoeffsX[4] = {};
3168 277375 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3169 :
3170 1240570 : for (GPtrDiff_t i = -1; i < 3; i++)
3171 : {
3172 1009640 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3173 998035 : 2, adfDensity, adfReal, adfImag) ||
3174 998035 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3175 980395 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3176 2979770 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3177 972094 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3178 : {
3179 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3180 46449 : pdfDensity, pdfReal, pdfImag);
3181 : }
3182 :
3183 963196 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3184 963196 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3185 963196 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3186 : }
3187 :
3188 : /* -------------------------------------------------------------------- */
3189 : /* For now, if we have any pixels missing in the kernel area, */
3190 : /* we fallback on using bilinear interpolation. Ideally we */
3191 : /* should do "weight adjustment" of our results similarly to */
3192 : /* what is done for the cubic spline and lanc. interpolators. */
3193 : /* -------------------------------------------------------------------- */
3194 :
3195 230926 : double adfCoeffsY[4] = {};
3196 230926 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3197 :
3198 230926 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3199 230926 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3200 230926 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3201 :
3202 230926 : return true;
3203 : }
3204 :
3205 : #ifdef USE_SSE2
3206 :
3207 : /************************************************************************/
3208 : /* XMMLoad4Values() */
3209 : /* */
3210 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3211 : /* m128 register. */
3212 : /************************************************************************/
3213 :
3214 433649000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3215 : {
3216 : unsigned int i;
3217 433649000 : memcpy(&i, ptr, 4);
3218 867297000 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3219 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3220 : // 32-bit integers.
3221 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3222 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3223 : #else
3224 867297000 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3225 867297000 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3226 : #endif
3227 867297000 : return _mm_cvtepi32_ps(xmm_i);
3228 : }
3229 :
3230 791724 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3231 : {
3232 : GUInt64 i;
3233 791724 : memcpy(&i, ptr, 8);
3234 1583450 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3235 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3236 : // 32-bit integers.
3237 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3238 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3239 : #else
3240 1583450 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3241 : #endif
3242 1583450 : return _mm_cvtepi32_ps(xmm_i);
3243 : }
3244 :
3245 : /************************************************************************/
3246 : /* XMMHorizontalAdd() */
3247 : /* */
3248 : /* Return the sum of the 4 floating points of the register. */
3249 : /************************************************************************/
3250 :
3251 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3252 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3253 : {
3254 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3255 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3256 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3257 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3258 : return _mm_cvtss_f32(sums);
3259 : }
3260 : #else
3261 108610000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3262 : {
3263 108610000 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3264 108610000 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3265 108610000 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3266 108610000 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3267 108610000 : return _mm_cvtss_f32(sums);
3268 : }
3269 : #endif
3270 :
3271 : #endif // define USE_SSE2
3272 :
3273 : /************************************************************************/
3274 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3275 : /************************************************************************/
3276 :
3277 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3278 : // because there are a few assumptions above those types.
3279 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3280 : // perf benefit.
3281 :
3282 : template <class T>
3283 389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3284 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3285 : double *pdfDensity, double *pdfReal)
3286 : {
3287 389755 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3288 389755 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3289 389755 : const GPtrDiff_t iSrcOffset =
3290 389755 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3291 :
3292 : // Get the bilinear interpolation at the image borders.
3293 389755 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3294 387271 : iSrcY + 2 >= poWK->nSrcYSize)
3295 : {
3296 2484 : double adfImagIgnored[4] = {};
3297 2484 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3298 2484 : pdfDensity, pdfReal, adfImagIgnored);
3299 : }
3300 :
3301 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3302 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3303 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3304 :
3305 : // TODO(schwehr): Explain the magic numbers.
3306 : float afTemp[4 + 4 + 4 + 1];
3307 : float *pafAligned =
3308 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3309 : float *pafCoeffs = pafAligned;
3310 : float *pafDensity = pafAligned + 4;
3311 : float *pafValue = pafAligned + 8;
3312 :
3313 : const float fHalfDeltaX = 0.5f * fDeltaX;
3314 : const float fThreeDeltaX = 3.0f * fDeltaX;
3315 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3316 :
3317 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3318 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3319 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3320 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3321 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3322 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3323 :
3324 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3325 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3326 : i++, iOffset += poWK->nSrcXSize)
3327 : {
3328 : const __m128 xmmDensity =
3329 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3330 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3331 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3332 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3333 :
3334 : const __m128 xmmValues =
3335 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3336 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3337 : }
3338 : if (_mm_movemask_ps(xmmMaskLowDensity))
3339 : {
3340 : double adfImagIgnored[4] = {};
3341 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3342 : pdfDensity, pdfReal, adfImagIgnored);
3343 : }
3344 :
3345 : const float fHalfDeltaY = 0.5f * fDeltaY;
3346 : const float fThreeDeltaY = 3.0f * fDeltaY;
3347 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3348 :
3349 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3350 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3351 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3352 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3353 :
3354 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3355 :
3356 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3357 : const __m128 xmmValue = _mm_load_ps(pafValue);
3358 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3359 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3360 :
3361 : // We did all above computations on float32 whereas the general case is
3362 : // float64. Not sure if one is fundamentally more correct than the other
3363 : // one, but we want our optimization to give the same result as the
3364 : // general case as much as possible, so if the resulting value is
3365 : // close to some_int_value + 0.5, redo the computation with the general
3366 : // case.
3367 : // Note: If other types than Byte or UInt16, will need changes.
3368 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3369 : return true;
3370 :
3371 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3372 :
3373 387271 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3374 387271 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3375 :
3376 387271 : double adfValueDens[4] = {};
3377 387271 : double adfValueReal[4] = {};
3378 :
3379 387271 : double adfCoeffsX[4] = {};
3380 387271 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3381 :
3382 387271 : double adfCoeffsY[4] = {};
3383 387271 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3384 :
3385 1930200 : for (GPtrDiff_t i = -1; i < 3; i++)
3386 : {
3387 1544480 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3388 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3389 1544480 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3390 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3391 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 1] <
3392 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3393 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 2] <
3394 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3395 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 3] <
3396 : SRC_DENSITY_THRESHOLD_FLOAT)
3397 : {
3398 1551 : double adfImagIgnored[4] = {};
3399 1551 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3400 : pdfDensity, pdfReal,
3401 1551 : adfImagIgnored);
3402 : }
3403 : #endif
3404 :
3405 3085860 : adfValueDens[i + 1] =
3406 1542930 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3407 :
3408 1542930 : adfValueReal[i + 1] = CONVOL4(
3409 : adfCoeffsX,
3410 1542930 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3411 : }
3412 :
3413 385720 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3414 385720 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3415 :
3416 385720 : return true;
3417 : }
3418 :
3419 : /************************************************************************/
3420 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3421 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3422 : /************************************************************************/
3423 :
3424 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3425 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3426 : double *pdfDensity, double *pdfReal)
3427 :
3428 : {
3429 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3430 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3431 0 : const GPtrDiff_t iSrcOffset =
3432 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3433 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3434 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3435 :
3436 : // Get the bilinear interpolation at the image borders.
3437 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3438 0 : iSrcY + 2 >= poWK->nSrcYSize)
3439 : {
3440 0 : double adfImagIgnored[4] = {};
3441 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3442 0 : pdfDensity, pdfReal, adfImagIgnored);
3443 : }
3444 :
3445 0 : double adfCoeffsX[4] = {};
3446 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3447 :
3448 0 : double adfCoeffsY[4] = {};
3449 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3450 :
3451 0 : double adfValueDens[4] = {};
3452 0 : double adfValueReal[4] = {};
3453 0 : double adfDensity[4] = {};
3454 0 : double adfReal[4] = {};
3455 0 : double adfImagIgnored[4] = {};
3456 :
3457 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3458 : {
3459 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3460 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3461 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3462 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3463 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3464 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3465 : {
3466 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3467 : pdfDensity, pdfReal,
3468 0 : adfImagIgnored);
3469 : }
3470 :
3471 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3472 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3473 : }
3474 :
3475 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3476 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3477 :
3478 0 : return true;
3479 : }
3480 :
3481 : template <class T>
3482 2231485 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3483 : int iBand, double dfSrcX,
3484 : double dfSrcY, T *pValue)
3485 :
3486 : {
3487 2231485 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3488 2231485 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3489 2231485 : const GPtrDiff_t iSrcOffset =
3490 2231485 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3491 2231485 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3492 2231485 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3493 2231485 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3494 2231485 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3495 :
3496 : // Get the bilinear interpolation at the image borders.
3497 2231485 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3498 1814944 : iSrcY + 2 >= poWK->nSrcYSize)
3499 488548 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3500 488548 : pValue);
3501 :
3502 1742937 : double adfCoeffs[4] = {};
3503 1742937 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3504 :
3505 1742937 : double adfValue[4] = {};
3506 :
3507 8714670 : for (GPtrDiff_t i = -1; i < 3; i++)
3508 : {
3509 6971746 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3510 :
3511 6971746 : adfValue[i + 1] = CONVOL4(
3512 : adfCoeffs,
3513 6971746 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3514 : }
3515 :
3516 : const double dfValue =
3517 1742937 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3518 : adfValue[1], adfValue[2], adfValue[3]);
3519 :
3520 1742937 : *pValue = GWKClampValueT<T>(dfValue);
3521 :
3522 1742937 : return true;
3523 : }
3524 :
3525 : /************************************************************************/
3526 : /* GWKLanczosSinc() */
3527 : /************************************************************************/
3528 :
3529 : /*
3530 : * Lanczos windowed sinc interpolation kernel with radius r.
3531 : * /
3532 : * | sinc(x) * sinc(x/r), if |x| < r
3533 : * L(x) = | 1, if x = 0 ,
3534 : * | 0, otherwise
3535 : * \
3536 : *
3537 : * where sinc(x) = sin(PI * x) / (PI * x).
3538 : */
3539 :
3540 1632 : static double GWKLanczosSinc(double dfX)
3541 : {
3542 1632 : if (dfX == 0.0)
3543 0 : return 1.0;
3544 :
3545 1632 : const double dfPIX = M_PI * dfX;
3546 1632 : const double dfPIXoverR = dfPIX / 3;
3547 1632 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3548 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3549 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3550 1632 : const double dfSinPIXoverR = sin(dfPIXoverR);
3551 1632 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3552 1632 : const double dfSinPIXMulSinPIXoverR =
3553 1632 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3554 1632 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3555 : }
3556 :
3557 106692 : static double GWKLanczosSinc4Values(double *padfValues)
3558 : {
3559 533460 : for (int i = 0; i < 4; i++)
3560 : {
3561 426768 : if (padfValues[i] == 0.0)
3562 : {
3563 0 : padfValues[i] = 1.0;
3564 : }
3565 : else
3566 : {
3567 426768 : const double dfPIX = M_PI * padfValues[i];
3568 426768 : const double dfPIXoverR = dfPIX / 3;
3569 426768 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3570 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3571 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3572 426768 : const double dfSinPIXoverR = sin(dfPIXoverR);
3573 426768 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3574 426768 : const double dfSinPIXMulSinPIXoverR =
3575 426768 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3576 426768 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3577 : }
3578 : }
3579 106692 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3580 : }
3581 :
3582 : /************************************************************************/
3583 : /* GWKBilinear() */
3584 : /************************************************************************/
3585 :
3586 6670070 : static double GWKBilinear(double dfX)
3587 : {
3588 6670070 : double dfAbsX = fabs(dfX);
3589 6670070 : if (dfAbsX <= 1.0)
3590 6198950 : return 1 - dfAbsX;
3591 : else
3592 471127 : return 0.0;
3593 : }
3594 :
3595 401592 : static double GWKBilinear4Values(double *padfValues)
3596 : {
3597 401592 : double dfAbsX0 = fabs(padfValues[0]);
3598 401592 : double dfAbsX1 = fabs(padfValues[1]);
3599 401592 : double dfAbsX2 = fabs(padfValues[2]);
3600 401592 : double dfAbsX3 = fabs(padfValues[3]);
3601 401592 : if (dfAbsX0 <= 1.0)
3602 295634 : padfValues[0] = 1 - dfAbsX0;
3603 : else
3604 105958 : padfValues[0] = 0.0;
3605 401592 : if (dfAbsX1 <= 1.0)
3606 401592 : padfValues[1] = 1 - dfAbsX1;
3607 : else
3608 0 : padfValues[1] = 0.0;
3609 401592 : if (dfAbsX2 <= 1.0)
3610 401592 : padfValues[2] = 1 - dfAbsX2;
3611 : else
3612 0 : padfValues[2] = 0.0;
3613 401592 : if (dfAbsX3 <= 1.0)
3614 295510 : padfValues[3] = 1 - dfAbsX3;
3615 : else
3616 106082 : padfValues[3] = 0.0;
3617 401592 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3618 : }
3619 :
3620 : /************************************************************************/
3621 : /* GWKCubic() */
3622 : /************************************************************************/
3623 :
3624 4383010 : static double GWKCubic(double dfX)
3625 : {
3626 4383010 : return CubicKernel(dfX);
3627 : }
3628 :
3629 8384070 : static double GWKCubic4Values(double *padfValues)
3630 : {
3631 8384070 : const double dfAbsX_0 = fabs(padfValues[0]);
3632 8384070 : const double dfAbsX_1 = fabs(padfValues[1]);
3633 8384070 : const double dfAbsX_2 = fabs(padfValues[2]);
3634 8384070 : const double dfAbsX_3 = fabs(padfValues[3]);
3635 8384070 : const double dfX2_0 = padfValues[0] * padfValues[0];
3636 8384070 : const double dfX2_1 = padfValues[1] * padfValues[1];
3637 8384070 : const double dfX2_2 = padfValues[2] * padfValues[2];
3638 8384070 : const double dfX2_3 = padfValues[3] * padfValues[3];
3639 :
3640 8384070 : double dfVal0 = 0.0;
3641 8384070 : if (dfAbsX_0 <= 1.0)
3642 1562300 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3643 6821770 : else if (dfAbsX_0 <= 2.0)
3644 4951580 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3645 :
3646 8384070 : double dfVal1 = 0.0;
3647 8384070 : if (dfAbsX_1 <= 1.0)
3648 4822010 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3649 3562060 : else if (dfAbsX_1 <= 2.0)
3650 3562060 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3651 :
3652 8384070 : double dfVal2 = 0.0;
3653 8384070 : if (dfAbsX_2 <= 1.0)
3654 6644980 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3655 1739090 : else if (dfAbsX_2 <= 2.0)
3656 1739090 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3657 :
3658 8384070 : double dfVal3 = 0.0;
3659 8384070 : if (dfAbsX_3 <= 1.0)
3660 3706150 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3661 4677910 : else if (dfAbsX_3 <= 2.0)
3662 4304680 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3663 :
3664 8384070 : padfValues[0] = dfVal0;
3665 8384070 : padfValues[1] = dfVal1;
3666 8384070 : padfValues[2] = dfVal2;
3667 8384070 : padfValues[3] = dfVal3;
3668 8384070 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3669 : }
3670 :
3671 : /************************************************************************/
3672 : /* GWKBSpline() */
3673 : /************************************************************************/
3674 :
3675 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3676 : // Equation 8 with (B,C)=(1,0)
3677 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3678 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3679 :
3680 139200 : static double GWKBSpline(double x)
3681 : {
3682 139200 : const double xp2 = x + 2.0;
3683 139200 : const double xp1 = x + 1.0;
3684 139200 : const double xm1 = x - 1.0;
3685 :
3686 : // This will most likely be used, so we'll compute it ahead of time to
3687 : // avoid stalling the processor.
3688 139200 : const double xp2c = xp2 * xp2 * xp2;
3689 :
3690 : // Note that the test is computed only if it is needed.
3691 : // TODO(schwehr): Make this easier to follow.
3692 : return xp2 > 0.0
3693 278400 : ? ((xp1 > 0.0)
3694 139200 : ? ((x > 0.0)
3695 124806 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3696 90308 : 6.0 * x * x * x
3697 : : 0.0) +
3698 124806 : -4.0 * xp1 * xp1 * xp1
3699 : : 0.0) +
3700 : xp2c
3701 139200 : : 0.0; // * 0.166666666666666666666
3702 : }
3703 :
3704 2220680 : static double GWKBSpline4Values(double *padfValues)
3705 : {
3706 11103400 : for (int i = 0; i < 4; i++)
3707 : {
3708 8882740 : const double x = padfValues[i];
3709 8882740 : const double xp2 = x + 2.0;
3710 8882740 : const double xp1 = x + 1.0;
3711 8882740 : const double xm1 = x - 1.0;
3712 :
3713 : // This will most likely be used, so we'll compute it ahead of time to
3714 : // avoid stalling the processor.
3715 8882740 : const double xp2c = xp2 * xp2 * xp2;
3716 :
3717 : // Note that the test is computed only if it is needed.
3718 : // TODO(schwehr): Make this easier to follow.
3719 8882740 : padfValues[i] =
3720 : (xp2 > 0.0)
3721 17765500 : ? ((xp1 > 0.0)
3722 8882740 : ? ((x > 0.0)
3723 6661820 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3724 4438260 : 6.0 * x * x * x
3725 : : 0.0) +
3726 6661820 : -4.0 * xp1 * xp1 * xp1
3727 : : 0.0) +
3728 : xp2c
3729 : : 0.0; // * 0.166666666666666666666
3730 : }
3731 2220680 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3732 : }
3733 : /************************************************************************/
3734 : /* GWKResampleWrkStruct */
3735 : /************************************************************************/
3736 :
3737 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3738 :
3739 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3740 : double dfSrcX, double dfSrcY,
3741 : double *pdfDensity, double *pdfReal,
3742 : double *pdfImag,
3743 : GWKResampleWrkStruct *psWrkStruct);
3744 :
3745 : struct _GWKResampleWrkStruct
3746 : {
3747 : pfnGWKResampleType pfnGWKResample;
3748 :
3749 : // Space for saved X weights.
3750 : double *padfWeightsX;
3751 : bool *pabCalcX;
3752 :
3753 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3754 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3755 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3756 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3757 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3758 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3759 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3760 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3761 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3762 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3763 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3764 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3765 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3766 :
3767 : // Space for saving a row of pixels.
3768 : double *padfRowDensity;
3769 : double *padfRowReal;
3770 : double *padfRowImag;
3771 : };
3772 :
3773 : /************************************************************************/
3774 : /* GWKResampleCreateWrkStruct() */
3775 : /************************************************************************/
3776 :
3777 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3778 : double dfSrcY, double *pdfDensity, double *pdfReal,
3779 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3780 :
3781 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3782 : double dfSrcX, double dfSrcY,
3783 : double *pdfDensity, double *pdfReal,
3784 : double *pdfImag,
3785 : GWKResampleWrkStruct *psWrkStruct);
3786 :
3787 397 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3788 : {
3789 397 : const int nXDist = (poWK->nXRadius + 1) * 2;
3790 397 : const int nYDist = (poWK->nYRadius + 1) * 2;
3791 :
3792 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3793 397 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3794 :
3795 : // Alloc space for saved X weights.
3796 397 : psWrkStruct->padfWeightsX =
3797 397 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3798 397 : psWrkStruct->pabCalcX =
3799 397 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3800 :
3801 397 : psWrkStruct->padfWeightsY =
3802 397 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3803 397 : psWrkStruct->iLastSrcX = -10;
3804 397 : psWrkStruct->iLastSrcY = -10;
3805 397 : psWrkStruct->dfLastDeltaX = -10;
3806 397 : psWrkStruct->dfLastDeltaY = -10;
3807 :
3808 : // Alloc space for saving a row of pixels.
3809 397 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3810 363 : poWK->panUnifiedSrcValid == nullptr &&
3811 340 : poWK->papanBandSrcValid == nullptr)
3812 : {
3813 340 : psWrkStruct->padfRowDensity = nullptr;
3814 : }
3815 : else
3816 : {
3817 57 : psWrkStruct->padfRowDensity =
3818 57 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3819 : }
3820 397 : psWrkStruct->padfRowReal =
3821 397 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3822 397 : psWrkStruct->padfRowImag =
3823 397 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3824 :
3825 397 : if (poWK->eResample == GRA_Lanczos)
3826 : {
3827 63 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3828 :
3829 63 : if (poWK->dfXScale < 1)
3830 : {
3831 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3832 4 : psWrkStruct->dfSinPiXScaleOver3 =
3833 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3834 4 : psWrkStruct->dfCosPiXScaleOver3);
3835 : // "Naive":
3836 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3837 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3838 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3839 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3840 4 : psWrkStruct->dfCosPiXScaleOver3 -
3841 4 : 3) *
3842 4 : psWrkStruct->dfCosPiXScaleOver3;
3843 4 : psWrkStruct->dfSinPiXScale = sqrt(
3844 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3845 : }
3846 :
3847 63 : if (poWK->dfYScale < 1)
3848 : {
3849 11 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3850 11 : psWrkStruct->dfSinPiYScaleOver3 =
3851 11 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3852 11 : psWrkStruct->dfCosPiYScaleOver3);
3853 : // "Naive":
3854 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3855 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3856 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3857 11 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3858 11 : psWrkStruct->dfCosPiYScaleOver3 -
3859 11 : 3) *
3860 11 : psWrkStruct->dfCosPiYScaleOver3;
3861 11 : psWrkStruct->dfSinPiYScale = sqrt(
3862 11 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3863 : }
3864 : }
3865 : else
3866 334 : psWrkStruct->pfnGWKResample = GWKResample;
3867 :
3868 397 : return psWrkStruct;
3869 : }
3870 :
3871 : /************************************************************************/
3872 : /* GWKResampleDeleteWrkStruct() */
3873 : /************************************************************************/
3874 :
3875 397 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3876 : {
3877 397 : CPLFree(psWrkStruct->padfWeightsX);
3878 397 : CPLFree(psWrkStruct->padfWeightsY);
3879 397 : CPLFree(psWrkStruct->pabCalcX);
3880 397 : CPLFree(psWrkStruct->padfRowDensity);
3881 397 : CPLFree(psWrkStruct->padfRowReal);
3882 397 : CPLFree(psWrkStruct->padfRowImag);
3883 397 : CPLFree(psWrkStruct);
3884 397 : }
3885 :
3886 : /************************************************************************/
3887 : /* GWKResample() */
3888 : /************************************************************************/
3889 :
3890 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3891 : double dfSrcY, double *pdfDensity, double *pdfReal,
3892 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3893 :
3894 : {
3895 : // Save as local variables to avoid following pointers in loops.
3896 239383 : const int nSrcXSize = poWK->nSrcXSize;
3897 239383 : const int nSrcYSize = poWK->nSrcYSize;
3898 :
3899 239383 : double dfAccumulatorReal = 0.0;
3900 239383 : double dfAccumulatorImag = 0.0;
3901 239383 : double dfAccumulatorDensity = 0.0;
3902 239383 : double dfAccumulatorWeight = 0.0;
3903 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3904 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3905 239383 : const GPtrDiff_t iSrcOffset =
3906 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3907 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3908 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3909 :
3910 239383 : const double dfXScale = poWK->dfXScale;
3911 239383 : const double dfYScale = poWK->dfYScale;
3912 :
3913 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
3914 :
3915 : // Space for saved X weights.
3916 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
3917 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
3918 :
3919 : // Space for saving a row of pixels.
3920 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
3921 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
3922 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
3923 :
3924 : // Mark as needing calculation (don't calculate the weights yet,
3925 : // because a mask may render it unnecessary).
3926 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
3927 :
3928 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3929 239383 : CPLAssert(pfnGetWeight);
3930 :
3931 : // Skip sampling over edge of image.
3932 239383 : int j = poWK->nFiltInitY;
3933 239383 : int jMax = poWK->nYRadius;
3934 239383 : if (iSrcY + j < 0)
3935 566 : j = -iSrcY;
3936 239383 : if (iSrcY + jMax >= nSrcYSize)
3937 662 : jMax = nSrcYSize - iSrcY - 1;
3938 :
3939 239383 : int iMin = poWK->nFiltInitX;
3940 239383 : int iMax = poWK->nXRadius;
3941 239383 : if (iSrcX + iMin < 0)
3942 566 : iMin = -iSrcX;
3943 239383 : if (iSrcX + iMax >= nSrcXSize)
3944 659 : iMax = nSrcXSize - iSrcX - 1;
3945 :
3946 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
3947 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
3948 :
3949 239383 : GPtrDiff_t iRowOffset =
3950 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3951 :
3952 : // Loop over pixel rows in the kernel.
3953 1445930 : for (; j <= jMax; ++j)
3954 : {
3955 1206540 : iRowOffset += nSrcXSize;
3956 :
3957 : // Get pixel values.
3958 : // We can potentially read extra elements after the "normal" end of the
3959 : // source arrays, but the contract of papabySrcImage[iBand],
3960 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3961 : // is to have WARP_EXTRA_ELTS reserved at their end.
3962 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3963 : padfRowDensity, padfRowReal, padfRowImag))
3964 72 : continue;
3965 :
3966 : // Calculate the Y weight.
3967 : double dfWeight1 = (bYScaleBelow1)
3968 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3969 1600 : : pfnGetWeight(j - dfDeltaY);
3970 :
3971 : // Iterate over pixels in row.
3972 1206470 : double dfAccumulatorRealLocal = 0.0;
3973 1206470 : double dfAccumulatorImagLocal = 0.0;
3974 1206470 : double dfAccumulatorDensityLocal = 0.0;
3975 1206470 : double dfAccumulatorWeightLocal = 0.0;
3976 :
3977 7317420 : for (int i = iMin; i <= iMax; ++i)
3978 : {
3979 : // Skip sampling if pixel has zero density.
3980 6110940 : if (padfRowDensity != nullptr &&
3981 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
3982 546 : continue;
3983 :
3984 6110400 : double dfWeight2 = 0.0;
3985 :
3986 : // Make or use a cached set of weights for this row.
3987 6110400 : if (pabCalcX[i - iMin])
3988 : {
3989 : // Use saved weight value instead of recomputing it.
3990 4903920 : dfWeight2 = padfWeightsX[i - iMin];
3991 : }
3992 : else
3993 : {
3994 : // Calculate & save the X weight.
3995 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
3996 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3997 1600 : : pfnGetWeight(i - dfDeltaX);
3998 :
3999 1206480 : pabCalcX[i - iMin] = true;
4000 : }
4001 :
4002 : // Accumulate!
4003 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
4004 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
4005 6110400 : if (padfRowDensity != nullptr)
4006 76731 : dfAccumulatorDensityLocal +=
4007 76731 : padfRowDensity[i - iMin] * dfWeight2;
4008 6110400 : dfAccumulatorWeightLocal += dfWeight2;
4009 : }
4010 :
4011 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
4012 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
4013 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
4014 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
4015 : }
4016 :
4017 239383 : if (dfAccumulatorWeight < 0.000001 ||
4018 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
4019 : {
4020 0 : *pdfDensity = 0.0;
4021 0 : return false;
4022 : }
4023 :
4024 : // Calculate the output taking into account weighting.
4025 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4026 : {
4027 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
4028 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
4029 239380 : if (padfRowDensity != nullptr)
4030 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
4031 : else
4032 237496 : *pdfDensity = 1.0;
4033 : }
4034 : else
4035 : {
4036 3 : *pdfReal = dfAccumulatorReal;
4037 3 : *pdfImag = dfAccumulatorImag;
4038 3 : if (padfRowDensity != nullptr)
4039 3 : *pdfDensity = dfAccumulatorDensity;
4040 : else
4041 0 : *pdfDensity = 1.0;
4042 : }
4043 :
4044 239383 : return true;
4045 : }
4046 :
4047 : /************************************************************************/
4048 : /* GWKResampleOptimizedLanczos() */
4049 : /************************************************************************/
4050 :
4051 617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
4052 : double dfSrcX, double dfSrcY,
4053 : double *pdfDensity, double *pdfReal,
4054 : double *pdfImag,
4055 : GWKResampleWrkStruct *psWrkStruct)
4056 :
4057 : {
4058 : // Save as local variables to avoid following pointers in loops.
4059 617144 : const int nSrcXSize = poWK->nSrcXSize;
4060 617144 : const int nSrcYSize = poWK->nSrcYSize;
4061 :
4062 617144 : double dfAccumulatorReal = 0.0;
4063 617144 : double dfAccumulatorImag = 0.0;
4064 617144 : double dfAccumulatorDensity = 0.0;
4065 617144 : double dfAccumulatorWeight = 0.0;
4066 617144 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4067 617144 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4068 617144 : const GPtrDiff_t iSrcOffset =
4069 617144 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4070 617144 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4071 617144 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4072 :
4073 617144 : const double dfXScale = poWK->dfXScale;
4074 617144 : const double dfYScale = poWK->dfYScale;
4075 :
4076 : // Space for saved X weights.
4077 617144 : double *const padfWeightsXShifted =
4078 617144 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
4079 617144 : double *const padfWeightsYShifted =
4080 617144 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
4081 :
4082 : // Space for saving a row of pixels.
4083 617144 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
4084 617144 : double *const padfRowReal = psWrkStruct->padfRowReal;
4085 617144 : double *const padfRowImag = psWrkStruct->padfRowImag;
4086 :
4087 : // Skip sampling over edge of image.
4088 617144 : int jMin = poWK->nFiltInitY;
4089 617144 : int jMax = poWK->nYRadius;
4090 617144 : if (iSrcY + jMin < 0)
4091 16522 : jMin = -iSrcY;
4092 617144 : if (iSrcY + jMax >= nSrcYSize)
4093 5782 : jMax = nSrcYSize - iSrcY - 1;
4094 :
4095 617144 : int iMin = poWK->nFiltInitX;
4096 617144 : int iMax = poWK->nXRadius;
4097 617144 : if (iSrcX + iMin < 0)
4098 15797 : iMin = -iSrcX;
4099 617144 : if (iSrcX + iMax >= nSrcXSize)
4100 4657 : iMax = nSrcXSize - iSrcX - 1;
4101 :
4102 617144 : if (dfXScale < 1.0)
4103 : {
4104 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
4105 200179 : iMin++;
4106 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
4107 0 : iMax--;
4108 :
4109 : // clang-format off
4110 : /*
4111 : Naive version:
4112 : for (int i = iMin; i <= iMax; ++i)
4113 : {
4114 : psWrkStruct->padfWeightsXShifted[i] =
4115 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
4116 : }
4117 :
4118 : but given that:
4119 :
4120 : GWKLanczosSinc(x):
4121 : if (dfX == 0.0)
4122 : return 1.0;
4123 :
4124 : const double dfPIX = M_PI * dfX;
4125 : const double dfPIXoverR = dfPIX / 3;
4126 : const double dfPIX2overR = dfPIX * dfPIXoverR;
4127 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
4128 :
4129 : and
4130 : sin (a + b) = sin a cos b + cos a sin b.
4131 : cos (a + b) = cos a cos b - sin a sin b.
4132 :
4133 : we can skip any sin() computation within the loop
4134 : */
4135 : // clang-format on
4136 :
4137 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
4138 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4139 : {
4140 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4141 :
4142 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4143 71790 : double dfCosOver3 = cos(dfPIXover3);
4144 71790 : double dfSinOver3 = sin(dfPIXover3);
4145 :
4146 : // "Naive":
4147 : // double dfSin = sin( M_PI * dfX );
4148 : // double dfCos = cos( M_PI * dfX );
4149 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4150 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4151 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4152 :
4153 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4154 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4155 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4156 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4157 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4158 71790 : padfWeightsXShifted[iMin] =
4159 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4160 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
4161 : {
4162 1564690 : dfX += dfXScale;
4163 1564690 : const double dfNewSin =
4164 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4165 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4166 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
4167 1564690 : padfWeightsXShifted[i] =
4168 : dfX == 0
4169 1564690 : ? 1.0
4170 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4171 1564690 : const double dfNewCos =
4172 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4173 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4174 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
4175 1564690 : dfSin = dfNewSin;
4176 1564690 : dfCos = dfNewCos;
4177 1564690 : dfSinOver3 = dfNewSinOver3;
4178 1564690 : dfCosOver3 = dfNewCosOver3;
4179 : }
4180 :
4181 71790 : psWrkStruct->iLastSrcX = iSrcX;
4182 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4183 : }
4184 : }
4185 : else
4186 : {
4187 757542 : while (iMin - dfDeltaX < -3.0)
4188 343260 : iMin++;
4189 414282 : while (iMax - dfDeltaX > 3.0)
4190 0 : iMax--;
4191 :
4192 414282 : if (iSrcX != psWrkStruct->iLastSrcX ||
4193 209580 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4194 : {
4195 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4196 : // following trigonometric formulas.
4197 :
4198 : // TODO(schwehr): Move this somewhere where it can be rendered at
4199 : // LaTeX.
4200 : // clang-format off
4201 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4202 : // cos(M_PI * dfBase) * sin(M_PI * k)
4203 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4204 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4205 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4206 :
4207 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4208 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4209 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4210 : // clang-format on
4211 :
4212 414282 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4213 414282 : const double dfSin2PIDeltaXOver3 =
4214 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4215 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4216 414282 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4217 414282 : const double dfSinPIDeltaX =
4218 414282 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4219 414282 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4220 414282 : const double dfInvPI2Over3xSinPIDeltaX =
4221 : dfInvPI2Over3 * dfSinPIDeltaX;
4222 414282 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4223 414282 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4224 414282 : const double dfSinPIOver3 = 0.8660254037844386;
4225 414282 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4226 414282 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4227 : const double padfCst[] = {
4228 414282 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4229 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4230 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4231 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4232 414282 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4233 :
4234 2936860 : for (int i = iMin; i <= iMax; ++i)
4235 : {
4236 2522570 : const double dfX = i - dfDeltaX;
4237 2522570 : if (dfX == 0.0)
4238 58282 : padfWeightsXShifted[i] = 1.0;
4239 : else
4240 2464290 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4241 : #if DEBUG_VERBOSE
4242 : // TODO(schwehr): AlmostEqual.
4243 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4244 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4245 : #endif
4246 : }
4247 :
4248 414282 : psWrkStruct->iLastSrcX = iSrcX;
4249 414282 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4250 : }
4251 : }
4252 :
4253 617144 : if (dfYScale < 1.0)
4254 : {
4255 403116 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4256 200254 : jMin++;
4257 202862 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4258 0 : jMax--;
4259 :
4260 : // clang-format off
4261 : /*
4262 : Naive version:
4263 : for (int j = jMin; j <= jMax; ++j)
4264 : {
4265 : padfWeightsYShifted[j] =
4266 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4267 : }
4268 : */
4269 : // clang-format on
4270 :
4271 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4272 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4273 : {
4274 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4275 :
4276 383 : double dfPIYover3 = M_PI / 3 * dfY;
4277 383 : double dfCosOver3 = cos(dfPIYover3);
4278 383 : double dfSinOver3 = sin(dfPIYover3);
4279 :
4280 : // "Naive":
4281 : // double dfSin = sin( M_PI * dfY );
4282 : // double dfCos = cos( M_PI * dfY );
4283 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4284 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4285 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4286 :
4287 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4288 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4289 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4290 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4291 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4292 383 : padfWeightsYShifted[jMin] =
4293 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4294 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4295 : {
4296 6935 : dfY += dfYScale;
4297 6935 : const double dfNewSin =
4298 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4299 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4300 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4301 6935 : padfWeightsYShifted[j] =
4302 : dfY == 0
4303 6935 : ? 1.0
4304 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4305 6935 : const double dfNewCos =
4306 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4307 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4308 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4309 6935 : dfSin = dfNewSin;
4310 6935 : dfCos = dfNewCos;
4311 6935 : dfSinOver3 = dfNewSinOver3;
4312 6935 : dfCosOver3 = dfNewCosOver3;
4313 : }
4314 :
4315 383 : psWrkStruct->iLastSrcY = iSrcY;
4316 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4317 : }
4318 : }
4319 : else
4320 : {
4321 684742 : while (jMin - dfDeltaY < -3.0)
4322 270460 : jMin++;
4323 414282 : while (jMax - dfDeltaY > 3.0)
4324 0 : jMax--;
4325 :
4326 414282 : if (iSrcY != psWrkStruct->iLastSrcY ||
4327 413663 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4328 : {
4329 1132 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4330 1132 : const double dfSin2PIDeltaYOver3 =
4331 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4332 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4333 1132 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4334 1132 : const double dfSinPIDeltaY =
4335 1132 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4336 1132 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4337 1132 : const double dfInvPI2Over3xSinPIDeltaY =
4338 : dfInvPI2Over3 * dfSinPIDeltaY;
4339 1132 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4340 1132 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4341 1132 : const double dfSinPIOver3 = 0.8660254037844386;
4342 1132 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4343 1132 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4344 : const double padfCst[] = {
4345 1132 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4346 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4347 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4348 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4349 1132 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4350 :
4351 7917 : for (int j = jMin; j <= jMax; ++j)
4352 : {
4353 6785 : const double dfY = j - dfDeltaY;
4354 6785 : if (dfY == 0.0)
4355 460 : padfWeightsYShifted[j] = 1.0;
4356 : else
4357 6325 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4358 : #if DEBUG_VERBOSE
4359 : // TODO(schwehr): AlmostEqual.
4360 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4361 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4362 : #endif
4363 : }
4364 :
4365 1132 : psWrkStruct->iLastSrcY = iSrcY;
4366 1132 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4367 : }
4368 : }
4369 :
4370 : // If we have no density information, we can simply compute the
4371 : // accumulated weight.
4372 617144 : if (padfRowDensity == nullptr)
4373 : {
4374 617144 : double dfRowAccWeight = 0.0;
4375 7903490 : for (int i = iMin; i <= iMax; ++i)
4376 : {
4377 7286350 : dfRowAccWeight += padfWeightsXShifted[i];
4378 : }
4379 617144 : double dfColAccWeight = 0.0;
4380 7958040 : for (int j = jMin; j <= jMax; ++j)
4381 : {
4382 7340900 : dfColAccWeight += padfWeightsYShifted[j];
4383 : }
4384 617144 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4385 : }
4386 :
4387 : // Loop over pixel rows in the kernel.
4388 :
4389 617144 : if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
4390 616524 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4391 : !padfRowDensity)
4392 : {
4393 : // Optimization for Byte case without any masking/alpha
4394 :
4395 616524 : if (dfAccumulatorWeight < 0.000001)
4396 : {
4397 0 : *pdfDensity = 0.0;
4398 0 : return false;
4399 : }
4400 :
4401 616524 : const GByte *pSrc =
4402 616524 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4403 616524 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4404 :
4405 : #if defined(USE_SSE2)
4406 616524 : if (iMax - iMin + 1 == 6)
4407 : {
4408 : // This is just an optimized version of the general case in
4409 : // the else clause.
4410 :
4411 346854 : pSrc += iMin;
4412 346854 : int j = jMin;
4413 : const auto fourXWeights =
4414 346854 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4415 :
4416 : // Process 2 lines at the same time.
4417 1375860 : for (; j < jMax; j += 2)
4418 : {
4419 : const XMMReg4Double v_acc =
4420 1029000 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4421 : const XMMReg4Double v_acc2 =
4422 1029000 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4423 1029000 : const double dfRowAcc = v_acc.GetHorizSum();
4424 1029000 : const double dfRowAccEnd =
4425 1029000 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4426 1029000 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4427 1029000 : dfAccumulatorReal +=
4428 1029000 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4429 1029000 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4430 1029000 : const double dfRowAcc2End =
4431 1029000 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4432 1029000 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4433 1029000 : dfAccumulatorReal +=
4434 1029000 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4435 1029000 : pSrc += 2 * nSrcXSize;
4436 : }
4437 346854 : if (j == jMax)
4438 : {
4439 : // Process last line if there's an odd number of them.
4440 :
4441 : const XMMReg4Double v_acc =
4442 86045 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4443 86045 : const double dfRowAcc = v_acc.GetHorizSum();
4444 86045 : const double dfRowAccEnd =
4445 86045 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4446 86045 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4447 86045 : dfAccumulatorReal +=
4448 86045 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4449 : }
4450 : }
4451 : else
4452 : #endif
4453 : {
4454 5463580 : for (int j = jMin; j <= jMax; ++j)
4455 : {
4456 5193900 : int i = iMin;
4457 5193900 : double dfRowAcc1 = 0.0;
4458 5193900 : double dfRowAcc2 = 0.0;
4459 : // A bit of loop unrolling
4460 62750600 : for (; i < iMax; i += 2)
4461 : {
4462 57556700 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4463 57556700 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4464 : }
4465 5193900 : if (i == iMax)
4466 : {
4467 : // Process last column if there's an odd number of them.
4468 426183 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4469 : }
4470 :
4471 5193900 : dfAccumulatorReal +=
4472 5193900 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4473 5193900 : pSrc += nSrcXSize;
4474 : }
4475 : }
4476 :
4477 : // Calculate the output taking into account weighting.
4478 616524 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4479 : {
4480 569230 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4481 569230 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4482 569230 : *pdfDensity = 1.0;
4483 : }
4484 : else
4485 : {
4486 47294 : *pdfReal = dfAccumulatorReal;
4487 47294 : *pdfDensity = 1.0;
4488 : }
4489 :
4490 616524 : return true;
4491 : }
4492 :
4493 620 : GPtrDiff_t iRowOffset =
4494 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4495 :
4496 620 : int nCountValid = 0;
4497 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4498 :
4499 3560 : for (int j = jMin; j <= jMax; ++j)
4500 : {
4501 2940 : iRowOffset += nSrcXSize;
4502 :
4503 : // Get pixel values.
4504 : // We can potentially read extra elements after the "normal" end of the
4505 : // source arrays, but the contract of papabySrcImage[iBand],
4506 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4507 : // is to have WARP_EXTRA_ELTS reserved at their end.
4508 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4509 : padfRowDensity, padfRowReal, padfRowImag))
4510 0 : continue;
4511 :
4512 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4513 :
4514 : // Iterate over pixels in row.
4515 2940 : if (padfRowDensity != nullptr)
4516 : {
4517 0 : for (int i = iMin; i <= iMax; ++i)
4518 : {
4519 : // Skip sampling if pixel has zero density.
4520 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4521 0 : continue;
4522 :
4523 0 : nCountValid++;
4524 :
4525 : // Use a cached set of weights for this row.
4526 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4527 :
4528 : // Accumulate!
4529 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4530 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4531 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4532 0 : dfAccumulatorWeight += dfWeight2;
4533 : }
4534 : }
4535 2940 : else if (bIsNonComplex)
4536 : {
4537 1764 : double dfRowAccReal = 0.0;
4538 10560 : for (int i = iMin; i <= iMax; ++i)
4539 : {
4540 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4541 :
4542 : // Accumulate!
4543 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4544 : }
4545 :
4546 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4547 : }
4548 : else
4549 : {
4550 1176 : double dfRowAccReal = 0.0;
4551 1176 : double dfRowAccImag = 0.0;
4552 7040 : for (int i = iMin; i <= iMax; ++i)
4553 : {
4554 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4555 :
4556 : // Accumulate!
4557 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4558 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4559 : }
4560 :
4561 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4562 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4563 : }
4564 : }
4565 :
4566 620 : if (dfAccumulatorWeight < 0.000001 ||
4567 0 : (padfRowDensity != nullptr &&
4568 0 : (dfAccumulatorDensity < 0.000001 ||
4569 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4570 : {
4571 0 : *pdfDensity = 0.0;
4572 0 : return false;
4573 : }
4574 :
4575 : // Calculate the output taking into account weighting.
4576 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4577 : {
4578 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4579 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4580 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4581 0 : if (padfRowDensity != nullptr)
4582 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4583 : else
4584 0 : *pdfDensity = 1.0;
4585 : }
4586 : else
4587 : {
4588 620 : *pdfReal = dfAccumulatorReal;
4589 620 : *pdfImag = dfAccumulatorImag;
4590 620 : if (padfRowDensity != nullptr)
4591 0 : *pdfDensity = dfAccumulatorDensity;
4592 : else
4593 620 : *pdfDensity = 1.0;
4594 : }
4595 :
4596 620 : return true;
4597 : }
4598 :
4599 : /************************************************************************/
4600 : /* GWKComputeWeights() */
4601 : /************************************************************************/
4602 :
4603 3881980 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4604 : double dfDeltaX, double dfXScale, int jMin,
4605 : int jMax, double dfDeltaY, double dfYScale,
4606 : double *padfWeightsHorizontal,
4607 : double *padfWeightsVertical, double &dfInvWeights)
4608 : {
4609 :
4610 3881980 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4611 3881980 : CPLAssert(pfnGetWeight);
4612 3881980 : const FilterFunc4ValuesType pfnGetWeight4Values =
4613 3881980 : apfGWKFilter4Values[eResample];
4614 3881980 : CPLAssert(pfnGetWeight4Values);
4615 :
4616 3881980 : int i = iMin; // Used after for.
4617 3881980 : int iC = 0; // Used after for.
4618 : // Not zero, but as close as possible to it, to avoid potential division by
4619 : // zero at end of function
4620 3881980 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4621 8714600 : for (; i + 2 < iMax; i += 4, iC += 4)
4622 : {
4623 4832620 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4624 4832620 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4625 4832620 : padfWeightsHorizontal[iC + 2] =
4626 4832620 : padfWeightsHorizontal[iC + 1] + dfXScale;
4627 4832620 : padfWeightsHorizontal[iC + 3] =
4628 4832620 : padfWeightsHorizontal[iC + 2] + dfXScale;
4629 4832620 : dfAccumulatorWeightHorizontal +=
4630 4832620 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4631 : }
4632 4105160 : for (; i <= iMax; ++i, ++iC)
4633 : {
4634 223187 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4635 223187 : padfWeightsHorizontal[iC] = dfWeight;
4636 223187 : dfAccumulatorWeightHorizontal += dfWeight;
4637 : }
4638 :
4639 3881980 : int j = jMin; // Used after for.
4640 3881980 : int jC = 0; // Used after for.
4641 : // Not zero, but as close as possible to it, to avoid potential division by
4642 : // zero at end of function
4643 3881980 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4644 8166130 : for (; j + 2 < jMax; j += 4, jC += 4)
4645 : {
4646 4284160 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4647 4284160 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4648 4284160 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4649 4284160 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4650 4284160 : dfAccumulatorWeightVertical +=
4651 4284160 : pfnGetWeight4Values(padfWeightsVertical + jC);
4652 : }
4653 8411130 : for (; j <= jMax; ++j, ++jC)
4654 : {
4655 4529160 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4656 4529160 : padfWeightsVertical[jC] = dfWeight;
4657 4529160 : dfAccumulatorWeightVertical += dfWeight;
4658 : }
4659 :
4660 3881980 : dfInvWeights =
4661 3881980 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4662 3881980 : }
4663 :
4664 : /************************************************************************/
4665 : /* GWKResampleNoMasksT() */
4666 : /************************************************************************/
4667 :
4668 : template <class T>
4669 : static bool
4670 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4671 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4672 : double *padfWeightsVertical, double &dfInvWeights)
4673 :
4674 : {
4675 : // Commonly used; save locally.
4676 : const int nSrcXSize = poWK->nSrcXSize;
4677 : const int nSrcYSize = poWK->nSrcYSize;
4678 :
4679 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4680 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4681 : const GPtrDiff_t iSrcOffset =
4682 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4683 :
4684 : const int nXRadius = poWK->nXRadius;
4685 : const int nYRadius = poWK->nYRadius;
4686 :
4687 : // Politely refuse to process invalid coordinates or obscenely small image.
4688 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4689 : nYRadius > nSrcYSize)
4690 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4691 : pValue);
4692 :
4693 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4694 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4695 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4696 :
4697 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4698 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4699 :
4700 : int iMin = 1 - nXRadius;
4701 : if (iSrcX + iMin < 0)
4702 : iMin = -iSrcX;
4703 : int iMax = nXRadius;
4704 : if (iSrcX + iMax >= nSrcXSize - 1)
4705 : iMax = nSrcXSize - 1 - iSrcX;
4706 :
4707 : int jMin = 1 - nYRadius;
4708 : if (iSrcY + jMin < 0)
4709 : jMin = -iSrcY;
4710 : int jMax = nYRadius;
4711 : if (iSrcY + jMax >= nSrcYSize - 1)
4712 : jMax = nSrcYSize - 1 - iSrcY;
4713 :
4714 : if (iBand == 0)
4715 : {
4716 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4717 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4718 : padfWeightsVertical, dfInvWeights);
4719 : }
4720 :
4721 : // Loop over all rows in the kernel.
4722 : double dfAccumulator = 0.0;
4723 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4724 : {
4725 : const GPtrDiff_t iSampJ =
4726 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4727 :
4728 : // Loop over all pixels in the row.
4729 : double dfAccumulatorLocal = 0.0;
4730 : double dfAccumulatorLocal2 = 0.0;
4731 : int iC = 0;
4732 : int i = iMin;
4733 : // Process by chunk of 4 cols.
4734 : for (; i + 2 < iMax; i += 4, iC += 4)
4735 : {
4736 : // Retrieve the pixel & accumulate.
4737 : dfAccumulatorLocal +=
4738 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4739 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4740 : padfWeightsHorizontal[iC + 1];
4741 : dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4742 : padfWeightsHorizontal[iC + 2];
4743 : dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4744 : padfWeightsHorizontal[iC + 3];
4745 : }
4746 : dfAccumulatorLocal += dfAccumulatorLocal2;
4747 : if (i < iMax)
4748 : {
4749 : dfAccumulatorLocal +=
4750 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4751 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4752 : padfWeightsHorizontal[iC + 1];
4753 : i += 2;
4754 : iC += 2;
4755 : }
4756 : if (i == iMax)
4757 : {
4758 : dfAccumulatorLocal +=
4759 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4760 : }
4761 :
4762 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4763 : }
4764 :
4765 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4766 :
4767 : return true;
4768 : }
4769 :
4770 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4771 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4772 : #if defined(USE_SSE2)
4773 :
4774 : /************************************************************************/
4775 : /* GWKResampleNoMasks_SSE2_T() */
4776 : /************************************************************************/
4777 :
4778 : template <class T>
4779 9589853 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4780 : double dfSrcX, double dfSrcY, T *pValue,
4781 : double *padfWeightsHorizontal,
4782 : double *padfWeightsVertical,
4783 : double &dfInvWeights)
4784 : {
4785 : // Commonly used; save locally.
4786 9589853 : const int nSrcXSize = poWK->nSrcXSize;
4787 9589853 : const int nSrcYSize = poWK->nSrcYSize;
4788 :
4789 9589853 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4790 9589853 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4791 9589853 : const GPtrDiff_t iSrcOffset =
4792 9589853 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4793 9589853 : const int nXRadius = poWK->nXRadius;
4794 9589853 : const int nYRadius = poWK->nYRadius;
4795 :
4796 : // Politely refuse to process invalid coordinates or obscenely small image.
4797 9589853 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4798 : nYRadius > nSrcYSize)
4799 3 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4800 3 : pValue);
4801 :
4802 9589851 : const T *pSrcBand =
4803 9589851 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4804 :
4805 9589851 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4806 9589851 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4807 9589851 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4808 9589851 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4809 :
4810 9589851 : int iMin = 1 - nXRadius;
4811 9589851 : if (iSrcX + iMin < 0)
4812 46218 : iMin = -iSrcX;
4813 9589851 : int iMax = nXRadius;
4814 9589851 : if (iSrcX + iMax >= nSrcXSize - 1)
4815 42714 : iMax = nSrcXSize - 1 - iSrcX;
4816 :
4817 9589851 : int jMin = 1 - nYRadius;
4818 9589851 : if (iSrcY + jMin < 0)
4819 49554 : jMin = -iSrcY;
4820 9589851 : int jMax = nYRadius;
4821 9589851 : if (iSrcY + jMax >= nSrcYSize - 1)
4822 35683 : jMax = nSrcYSize - 1 - iSrcY;
4823 :
4824 9589851 : if (iBand == 0)
4825 : {
4826 3881981 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4827 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4828 : padfWeightsVertical, dfInvWeights);
4829 : }
4830 :
4831 9589851 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4832 : // Process by chunk of 4 rows.
4833 9589851 : int jC = 0;
4834 9589851 : int j = jMin;
4835 9589851 : double dfAccumulator = 0.0;
4836 20264593 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4837 : {
4838 : // Loop over all pixels in the row.
4839 10674692 : int iC = 0;
4840 10674692 : int i = iMin;
4841 : // Process by chunk of 4 cols.
4842 10674692 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4843 10674692 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4844 10674692 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4845 10674692 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4846 27984580 : for (; i + 2 < iMax; i += 4, iC += 4)
4847 : {
4848 : // Retrieve the pixel & accumulate.
4849 17309788 : XMMReg4Double v_pixels_1 =
4850 17309788 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4851 17309788 : XMMReg4Double v_pixels_2 =
4852 17309788 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4853 17309788 : XMMReg4Double v_pixels_3 =
4854 17309788 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4855 17309788 : XMMReg4Double v_pixels_4 =
4856 17309788 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4857 :
4858 17309788 : XMMReg4Double v_padfWeight =
4859 17309788 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4860 :
4861 17309788 : v_acc_1 += v_pixels_1 * v_padfWeight;
4862 17309788 : v_acc_2 += v_pixels_2 * v_padfWeight;
4863 17309788 : v_acc_3 += v_pixels_3 * v_padfWeight;
4864 17309788 : v_acc_4 += v_pixels_4 * v_padfWeight;
4865 : }
4866 :
4867 10674692 : if (i < iMax)
4868 : {
4869 145982 : XMMReg2Double v_pixels_1 =
4870 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4871 145982 : XMMReg2Double v_pixels_2 =
4872 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4873 145982 : XMMReg2Double v_pixels_3 =
4874 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4875 145982 : XMMReg2Double v_pixels_4 =
4876 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4877 :
4878 145982 : XMMReg2Double v_padfWeight =
4879 145982 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4880 :
4881 145982 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4882 145982 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4883 145982 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4884 145982 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4885 :
4886 145982 : i += 2;
4887 145982 : iC += 2;
4888 : }
4889 :
4890 10674692 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4891 10674692 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4892 10674692 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4893 10674692 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4894 :
4895 10674692 : if (i == iMax)
4896 : {
4897 52267 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4898 52267 : padfWeightsHorizontal[iC];
4899 52267 : dfAccumulatorLocal_2 +=
4900 52267 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4901 52267 : padfWeightsHorizontal[iC];
4902 52267 : dfAccumulatorLocal_3 +=
4903 52267 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4904 52267 : padfWeightsHorizontal[iC];
4905 52267 : dfAccumulatorLocal_4 +=
4906 52267 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4907 52267 : padfWeightsHorizontal[iC];
4908 : }
4909 :
4910 10674692 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4911 10674692 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4912 10674692 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4913 10674692 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4914 : }
4915 22749841 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4916 : {
4917 : // Loop over all pixels in the row.
4918 13159940 : int iC = 0;
4919 13159940 : int i = iMin;
4920 : // Process by chunk of 4 cols.
4921 13159940 : XMMReg4Double v_acc = XMMReg4Double::Zero();
4922 26355663 : for (; i + 2 < iMax; i += 4, iC += 4)
4923 : {
4924 : // Retrieve the pixel & accumulate.
4925 13195723 : XMMReg4Double v_pixels =
4926 13195723 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4927 13195723 : XMMReg4Double v_padfWeight =
4928 13195723 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4929 :
4930 13195723 : v_acc += v_pixels * v_padfWeight;
4931 : }
4932 :
4933 13159940 : double dfAccumulatorLocal = v_acc.GetHorizSum();
4934 :
4935 13159940 : if (i < iMax)
4936 : {
4937 173976 : dfAccumulatorLocal +=
4938 173976 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4939 173976 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4940 173976 : padfWeightsHorizontal[iC + 1];
4941 173976 : i += 2;
4942 173976 : iC += 2;
4943 : }
4944 13159940 : if (i == iMax)
4945 : {
4946 33032 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4947 33032 : padfWeightsHorizontal[iC];
4948 : }
4949 :
4950 13159940 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4951 : }
4952 :
4953 9589851 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4954 :
4955 9589851 : return true;
4956 : }
4957 :
4958 : /************************************************************************/
4959 : /* GWKResampleNoMasksT<GByte>() */
4960 : /************************************************************************/
4961 :
4962 : template <>
4963 8991350 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4964 : double dfSrcX, double dfSrcY, GByte *pValue,
4965 : double *padfWeightsHorizontal,
4966 : double *padfWeightsVertical,
4967 : double &dfInvWeights)
4968 : {
4969 8991350 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4970 : padfWeightsHorizontal, padfWeightsVertical,
4971 8991350 : dfInvWeights);
4972 : }
4973 :
4974 : /************************************************************************/
4975 : /* GWKResampleNoMasksT<GInt16>() */
4976 : /************************************************************************/
4977 :
4978 : template <>
4979 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4980 : double dfSrcX, double dfSrcY, GInt16 *pValue,
4981 : double *padfWeightsHorizontal,
4982 : double *padfWeightsVertical,
4983 : double &dfInvWeights)
4984 : {
4985 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4986 : padfWeightsHorizontal, padfWeightsVertical,
4987 252563 : dfInvWeights);
4988 : }
4989 :
4990 : /************************************************************************/
4991 : /* GWKResampleNoMasksT<GUInt16>() */
4992 : /************************************************************************/
4993 :
4994 : template <>
4995 343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4996 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
4997 : double *padfWeightsHorizontal,
4998 : double *padfWeightsVertical,
4999 : double &dfInvWeights)
5000 : {
5001 343440 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5002 : padfWeightsHorizontal, padfWeightsVertical,
5003 343440 : dfInvWeights);
5004 : }
5005 :
5006 : /************************************************************************/
5007 : /* GWKResampleNoMasksT<float>() */
5008 : /************************************************************************/
5009 :
5010 : template <>
5011 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
5012 : double dfSrcX, double dfSrcY, float *pValue,
5013 : double *padfWeightsHorizontal,
5014 : double *padfWeightsVertical,
5015 : double &dfInvWeights)
5016 : {
5017 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5018 : padfWeightsHorizontal, padfWeightsVertical,
5019 2500 : dfInvWeights);
5020 : }
5021 :
5022 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5023 :
5024 : /************************************************************************/
5025 : /* GWKResampleNoMasksT<double>() */
5026 : /************************************************************************/
5027 :
5028 : template <>
5029 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
5030 : double dfSrcX, double dfSrcY, double *pValue,
5031 : double *padfWeightsHorizontal,
5032 : double *padfWeightsVertical,
5033 : double &dfInvWeights)
5034 : {
5035 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5036 : padfWeightsHorizontal, padfWeightsVertical,
5037 : dfInvWeights);
5038 : }
5039 :
5040 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
5041 :
5042 : #endif /* defined(USE_SSE2) */
5043 :
5044 : /************************************************************************/
5045 : /* GWKRoundSourceCoordinates() */
5046 : /************************************************************************/
5047 :
5048 1000 : static void GWKRoundSourceCoordinates(
5049 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
5050 : double dfSrcCoordPrecision, double dfErrorThreshold,
5051 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
5052 : double dfDstY)
5053 : {
5054 1000 : double dfPct = 0.8;
5055 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
5056 : {
5057 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
5058 : }
5059 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
5060 :
5061 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5062 : {
5063 500000 : const double dfXBefore = padfX[iDstX];
5064 500000 : const double dfYBefore = padfY[iDstX];
5065 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5066 : dfSrcCoordPrecision;
5067 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5068 : dfSrcCoordPrecision;
5069 :
5070 : // If we are in an uncertainty zone, go to non-approximated
5071 : // transformation.
5072 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
5073 : // be at least 10 times greater than the approximation error.
5074 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
5075 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
5076 : {
5077 180090 : padfX[iDstX] = iDstX + dfDstXOff;
5078 180090 : padfY[iDstX] = dfDstY;
5079 180090 : padfZ[iDstX] = 0.0;
5080 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
5081 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
5082 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5083 : dfSrcCoordPrecision;
5084 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5085 : dfSrcCoordPrecision;
5086 : }
5087 : }
5088 1000 : }
5089 :
5090 : /************************************************************************/
5091 : /* GWKCheckAndComputeSrcOffsets() */
5092 : /************************************************************************/
5093 : static CPL_INLINE bool
5094 152624000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5095 : int _iDstY, double *_padfX, double *_padfY,
5096 : int _nSrcXSize, int _nSrcYSize,
5097 : GPtrDiff_t &iSrcOffset)
5098 : {
5099 152624000 : const GDALWarpKernel *_poWK = psJob->poWK;
5100 152829000 : for (int iTry = 0; iTry < 2; ++iTry)
5101 : {
5102 152829000 : if (iTry == 1)
5103 : {
5104 : // If the source coordinate is slightly outside of the source raster
5105 : // retry to transform it alone, so that the exact coordinate
5106 : // transformer is used.
5107 :
5108 205524 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5109 205524 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5110 205524 : double dfZ = 0;
5111 205524 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5112 205524 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5113 205524 : _pabSuccess + _iDstX);
5114 : }
5115 152829000 : if (!_pabSuccess[_iDstX])
5116 3614790 : return false;
5117 :
5118 : // If this happens this is likely the symptom of a bug somewhere.
5119 149214000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5120 : {
5121 : static bool bNanCoordFound = false;
5122 0 : if (!bNanCoordFound)
5123 : {
5124 0 : CPLDebug("WARP",
5125 : "GWKCheckAndComputeSrcOffsets(): "
5126 : "NaN coordinate found on point %d.",
5127 : _iDstX);
5128 0 : bNanCoordFound = true;
5129 : }
5130 0 : return false;
5131 : }
5132 :
5133 : /* --------------------------------------------------------------------
5134 : */
5135 : /* Figure out what pixel we want in our source raster, and skip */
5136 : /* further processing if it is well off the source image. */
5137 : /* --------------------------------------------------------------------
5138 : */
5139 : /* We test against the value before casting to avoid the */
5140 : /* problem of asymmetric truncation effects around zero. That is */
5141 : /* -0.5 will be 0 when cast to an int. */
5142 149214000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5143 : {
5144 : // If the source coordinate is slightly outside of the source raster
5145 : // retry to transform it alone, so that the exact coordinate
5146 : // transformer is used.
5147 6006460 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5148 42249 : continue;
5149 5964210 : return false;
5150 : }
5151 :
5152 143208000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5153 : {
5154 : // If the source coordinate is slightly outside of the source raster
5155 : // retry to transform it alone, so that the exact coordinate
5156 : // transformer is used.
5157 6203470 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5158 64466 : continue;
5159 6139010 : return false;
5160 : }
5161 :
5162 : // Check for potential overflow when casting from float to int, (if
5163 : // operating outside natural projection area, padfX/Y can be a very huge
5164 : // positive number before doing the actual conversion), as such cast is
5165 : // undefined behavior that can trigger exception with some compilers
5166 : // (see #6753)
5167 137004000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5168 : {
5169 : // If the source coordinate is slightly outside of the source raster
5170 : // retry to transform it alone, so that the exact coordinate
5171 : // transformer is used.
5172 3932310 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5173 47544 : continue;
5174 3884760 : return false;
5175 : }
5176 133072000 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5177 : {
5178 : // If the source coordinate is slightly outside of the source raster
5179 : // retry to transform it alone, so that the exact coordinate
5180 : // transformer is used.
5181 4488370 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5182 51265 : continue;
5183 4437110 : return false;
5184 : }
5185 :
5186 128584000 : break;
5187 : }
5188 :
5189 128584000 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5190 128584000 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5191 128584000 : if (iSrcX == _nSrcXSize)
5192 0 : iSrcX--;
5193 128584000 : if (iSrcY == _nSrcYSize)
5194 0 : iSrcY--;
5195 :
5196 : // Those checks should normally be OK given the previous ones.
5197 128584000 : CPLAssert(iSrcX >= 0);
5198 128584000 : CPLAssert(iSrcY >= 0);
5199 128584000 : CPLAssert(iSrcX < _nSrcXSize);
5200 128584000 : CPLAssert(iSrcY < _nSrcYSize);
5201 :
5202 128584000 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5203 :
5204 128584000 : return true;
5205 : }
5206 :
5207 : /************************************************************************/
5208 : /* GWKOneSourceCornerFailsToReproject() */
5209 : /************************************************************************/
5210 :
5211 917 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5212 : {
5213 917 : GDALWarpKernel *poWK = psJob->poWK;
5214 2741 : for (int iY = 0; iY <= 1; ++iY)
5215 : {
5216 5478 : for (int iX = 0; iX <= 1; ++iX)
5217 : {
5218 3654 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5219 3654 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5220 3654 : double dfZTmp = 0;
5221 3654 : int nSuccess = FALSE;
5222 3654 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5223 : &dfYTmp, &dfZTmp, &nSuccess);
5224 3654 : if (!nSuccess)
5225 6 : return true;
5226 : }
5227 : }
5228 911 : return false;
5229 : }
5230 :
5231 : /************************************************************************/
5232 : /* GWKAdjustSrcOffsetOnEdge() */
5233 : /************************************************************************/
5234 :
5235 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5236 : GPtrDiff_t &iSrcOffset)
5237 : {
5238 9714 : GDALWarpKernel *poWK = psJob->poWK;
5239 9714 : const int nSrcXSize = poWK->nSrcXSize;
5240 9714 : const int nSrcYSize = poWK->nSrcYSize;
5241 :
5242 : // Check if the computed source position slightly altered
5243 : // fails to reproject. If so, then we are at the edge of
5244 : // the validity area, and it is worth checking neighbour
5245 : // source pixels for validity.
5246 9714 : int nSuccess = FALSE;
5247 : {
5248 9714 : double dfXTmp =
5249 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5250 9714 : double dfYTmp =
5251 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5252 9714 : double dfZTmp = 0;
5253 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5254 : &dfZTmp, &nSuccess);
5255 : }
5256 9714 : if (nSuccess)
5257 : {
5258 6996 : double dfXTmp =
5259 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5260 6996 : double dfYTmp =
5261 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5262 6996 : double dfZTmp = 0;
5263 6996 : nSuccess = FALSE;
5264 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5265 : &dfZTmp, &nSuccess);
5266 : }
5267 9714 : if (nSuccess)
5268 : {
5269 5624 : double dfXTmp =
5270 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5271 5624 : double dfYTmp =
5272 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5273 5624 : double dfZTmp = 0;
5274 5624 : nSuccess = FALSE;
5275 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5276 : &dfZTmp, &nSuccess);
5277 : }
5278 :
5279 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5280 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5281 : {
5282 1860 : iSrcOffset++;
5283 1860 : return true;
5284 : }
5285 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5286 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5287 : {
5288 1334 : iSrcOffset += nSrcXSize;
5289 1334 : return true;
5290 : }
5291 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5292 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5293 : {
5294 956 : iSrcOffset--;
5295 956 : return true;
5296 : }
5297 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5298 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5299 : {
5300 340 : iSrcOffset -= nSrcXSize;
5301 340 : return true;
5302 : }
5303 :
5304 5224 : return false;
5305 : }
5306 :
5307 : /************************************************************************/
5308 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5309 : /************************************************************************/
5310 :
5311 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5312 : GPtrDiff_t &iSrcOffset)
5313 : {
5314 0 : GDALWarpKernel *poWK = psJob->poWK;
5315 0 : const int nSrcXSize = poWK->nSrcXSize;
5316 0 : const int nSrcYSize = poWK->nSrcYSize;
5317 :
5318 : // Check if the computed source position slightly altered
5319 : // fails to reproject. If so, then we are at the edge of
5320 : // the validity area, and it is worth checking neighbour
5321 : // source pixels for validity.
5322 0 : int nSuccess = FALSE;
5323 : {
5324 0 : double dfXTmp =
5325 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5326 0 : double dfYTmp =
5327 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5328 0 : double dfZTmp = 0;
5329 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5330 : &dfZTmp, &nSuccess);
5331 : }
5332 0 : if (nSuccess)
5333 : {
5334 0 : double dfXTmp =
5335 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5336 0 : double dfYTmp =
5337 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5338 0 : double dfZTmp = 0;
5339 0 : nSuccess = FALSE;
5340 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5341 : &dfZTmp, &nSuccess);
5342 : }
5343 0 : if (nSuccess)
5344 : {
5345 0 : double dfXTmp =
5346 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5347 0 : double dfYTmp =
5348 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5349 0 : double dfZTmp = 0;
5350 0 : nSuccess = FALSE;
5351 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5352 : &dfZTmp, &nSuccess);
5353 : }
5354 :
5355 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5356 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5357 : SRC_DENSITY_THRESHOLD_FLOAT)
5358 : {
5359 0 : iSrcOffset++;
5360 0 : return true;
5361 : }
5362 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5363 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5364 : SRC_DENSITY_THRESHOLD_FLOAT)
5365 : {
5366 0 : iSrcOffset += nSrcXSize;
5367 0 : return true;
5368 : }
5369 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5370 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5371 : SRC_DENSITY_THRESHOLD_FLOAT)
5372 : {
5373 0 : iSrcOffset--;
5374 0 : return true;
5375 : }
5376 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5377 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5378 : SRC_DENSITY_THRESHOLD_FLOAT)
5379 : {
5380 0 : iSrcOffset -= nSrcXSize;
5381 0 : return true;
5382 : }
5383 :
5384 0 : return false;
5385 : }
5386 :
5387 : /************************************************************************/
5388 : /* GWKGeneralCase() */
5389 : /* */
5390 : /* This is the most general case. It attempts to handle all */
5391 : /* possible features with relatively little concern for */
5392 : /* efficiency. */
5393 : /************************************************************************/
5394 :
5395 239 : static void GWKGeneralCaseThread(void *pData)
5396 : {
5397 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5398 239 : GDALWarpKernel *poWK = psJob->poWK;
5399 239 : const int iYMin = psJob->iYMin;
5400 239 : const int iYMax = psJob->iYMax;
5401 : const double dfMultFactorVerticalShiftPipeline =
5402 239 : poWK->bApplyVerticalShift
5403 239 : ? CPLAtof(CSLFetchNameValueDef(
5404 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5405 : "1.0"))
5406 239 : : 0.0;
5407 : const bool bAvoidNoDataSingleBand =
5408 239 : poWK->nBands == 1 ||
5409 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5410 239 : "UNIFIED_SRC_NODATA", "FALSE"));
5411 :
5412 239 : int nDstXSize = poWK->nDstXSize;
5413 239 : int nSrcXSize = poWK->nSrcXSize;
5414 239 : int nSrcYSize = poWK->nSrcYSize;
5415 :
5416 : /* -------------------------------------------------------------------- */
5417 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5418 : /* scanlines worth of positions. */
5419 : /* -------------------------------------------------------------------- */
5420 : // For x, 2 *, because we cache the precomputed values at the end.
5421 : double *padfX =
5422 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5423 : double *padfY =
5424 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5425 : double *padfZ =
5426 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5427 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5428 :
5429 239 : const bool bUse4SamplesFormula =
5430 239 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5431 :
5432 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5433 239 : if (poWK->eResample != GRA_NearestNeighbour)
5434 : {
5435 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5436 : }
5437 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5438 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5439 239 : const double dfErrorThreshold = CPLAtof(
5440 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5441 :
5442 : const bool bOneSourceCornerFailsToReproject =
5443 239 : GWKOneSourceCornerFailsToReproject(psJob);
5444 :
5445 : // Precompute values.
5446 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5447 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5448 :
5449 : /* ==================================================================== */
5450 : /* Loop over output lines. */
5451 : /* ==================================================================== */
5452 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5453 : {
5454 : /* --------------------------------------------------------------------
5455 : */
5456 : /* Setup points to transform to source image space. */
5457 : /* --------------------------------------------------------------------
5458 : */
5459 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5460 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5461 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5462 236160 : padfY[iDstX] = dfY;
5463 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5464 :
5465 : /* --------------------------------------------------------------------
5466 : */
5467 : /* Transform the points from destination pixel/line coordinates */
5468 : /* to source pixel/line coordinates. */
5469 : /* --------------------------------------------------------------------
5470 : */
5471 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5472 : padfY, padfZ, pabSuccess);
5473 6230 : if (dfSrcCoordPrecision > 0.0)
5474 : {
5475 0 : GWKRoundSourceCoordinates(
5476 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5477 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5478 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5479 : }
5480 :
5481 : /* ====================================================================
5482 : */
5483 : /* Loop over pixels in output scanline. */
5484 : /* ====================================================================
5485 : */
5486 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5487 : {
5488 236160 : GPtrDiff_t iSrcOffset = 0;
5489 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5490 : padfX, padfY, nSrcXSize,
5491 : nSrcYSize, iSrcOffset))
5492 0 : continue;
5493 :
5494 : /* --------------------------------------------------------------------
5495 : */
5496 : /* Do not try to apply transparent/invalid source pixels to the
5497 : */
5498 : /* destination. This currently ignores the multi-pixel input
5499 : */
5500 : /* of bilinear and cubic resamples. */
5501 : /* --------------------------------------------------------------------
5502 : */
5503 236160 : double dfDensity = 1.0;
5504 :
5505 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5506 : {
5507 1200 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5508 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5509 : {
5510 0 : if (!bOneSourceCornerFailsToReproject)
5511 : {
5512 0 : continue;
5513 : }
5514 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5515 : psJob, iSrcOffset))
5516 : {
5517 0 : dfDensity =
5518 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5519 : }
5520 : else
5521 : {
5522 0 : continue;
5523 : }
5524 : }
5525 : }
5526 :
5527 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5528 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5529 : {
5530 0 : if (!bOneSourceCornerFailsToReproject)
5531 : {
5532 0 : continue;
5533 : }
5534 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5535 : {
5536 0 : continue;
5537 : }
5538 : }
5539 :
5540 : /* ====================================================================
5541 : */
5542 : /* Loop processing each band. */
5543 : /* ====================================================================
5544 : */
5545 236160 : bool bHasFoundDensity = false;
5546 :
5547 236160 : const GPtrDiff_t iDstOffset =
5548 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5549 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5550 : {
5551 236160 : double dfBandDensity = 0.0;
5552 236160 : double dfValueReal = 0.0;
5553 236160 : double dfValueImag = 0.0;
5554 :
5555 : /* --------------------------------------------------------------------
5556 : */
5557 : /* Collect the source value. */
5558 : /* --------------------------------------------------------------------
5559 : */
5560 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5561 : nSrcYSize == 1)
5562 : {
5563 : // FALSE is returned if dfBandDensity == 0, which is
5564 : // checked below.
5565 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5566 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5567 : &dfValueImag));
5568 : }
5569 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5570 : {
5571 248 : GWKBilinearResample4Sample(
5572 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5573 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5574 : &dfValueReal, &dfValueImag);
5575 : }
5576 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5577 : {
5578 248 : GWKCubicResample4Sample(
5579 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5580 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5581 : &dfValueReal, &dfValueImag);
5582 : }
5583 : else
5584 : #ifdef DEBUG
5585 : // Only useful for clang static analyzer.
5586 235096 : if (psWrkStruct != nullptr)
5587 : #endif
5588 : {
5589 235096 : psWrkStruct->pfnGWKResample(
5590 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5591 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5592 : &dfValueReal, &dfValueImag, psWrkStruct);
5593 : }
5594 :
5595 : // If we didn't find any valid inputs skip to next band.
5596 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5597 0 : continue;
5598 :
5599 236160 : if (poWK->bApplyVerticalShift)
5600 : {
5601 0 : if (!std::isfinite(padfZ[iDstX]))
5602 0 : continue;
5603 : // Subtract padfZ[] since the coordinate transformation is
5604 : // from target to source
5605 0 : dfValueReal =
5606 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5607 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5608 : }
5609 :
5610 236160 : bHasFoundDensity = true;
5611 :
5612 : /* --------------------------------------------------------------------
5613 : */
5614 : /* We have a computed value from the source. Now apply it
5615 : * to */
5616 : /* the destination pixel. */
5617 : /* --------------------------------------------------------------------
5618 : */
5619 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5620 : dfValueReal, dfValueImag,
5621 : bAvoidNoDataSingleBand);
5622 : }
5623 :
5624 236160 : if (!bHasFoundDensity)
5625 0 : continue;
5626 :
5627 236160 : if (!bAvoidNoDataSingleBand)
5628 : {
5629 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
5630 : }
5631 :
5632 : /* --------------------------------------------------------------------
5633 : */
5634 : /* Update destination density/validity masks. */
5635 : /* --------------------------------------------------------------------
5636 : */
5637 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5638 :
5639 236160 : if (poWK->panDstValid != nullptr)
5640 : {
5641 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5642 : }
5643 : } /* Next iDstX */
5644 :
5645 : /* --------------------------------------------------------------------
5646 : */
5647 : /* Report progress to the user, and optionally cancel out. */
5648 : /* --------------------------------------------------------------------
5649 : */
5650 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5651 0 : break;
5652 : }
5653 :
5654 : /* -------------------------------------------------------------------- */
5655 : /* Cleanup and return. */
5656 : /* -------------------------------------------------------------------- */
5657 239 : CPLFree(padfX);
5658 239 : CPLFree(padfY);
5659 239 : CPLFree(padfZ);
5660 239 : CPLFree(pabSuccess);
5661 239 : if (psWrkStruct)
5662 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5663 239 : }
5664 :
5665 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5666 : {
5667 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5668 : }
5669 :
5670 : /************************************************************************/
5671 : /* GWKRealCase() */
5672 : /* */
5673 : /* General case for non-complex data types. */
5674 : /************************************************************************/
5675 :
5676 219 : static void GWKRealCaseThread(void *pData)
5677 :
5678 : {
5679 219 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5680 219 : GDALWarpKernel *poWK = psJob->poWK;
5681 219 : const int iYMin = psJob->iYMin;
5682 219 : const int iYMax = psJob->iYMax;
5683 :
5684 219 : const int nDstXSize = poWK->nDstXSize;
5685 219 : const int nSrcXSize = poWK->nSrcXSize;
5686 219 : const int nSrcYSize = poWK->nSrcYSize;
5687 : const double dfMultFactorVerticalShiftPipeline =
5688 219 : poWK->bApplyVerticalShift
5689 219 : ? CPLAtof(CSLFetchNameValueDef(
5690 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5691 : "1.0"))
5692 219 : : 0.0;
5693 : const bool bAvoidNoDataSingleBand =
5694 297 : poWK->nBands == 1 ||
5695 78 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5696 219 : "UNIFIED_SRC_NODATA", "FALSE"));
5697 :
5698 : /* -------------------------------------------------------------------- */
5699 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5700 : /* scanlines worth of positions. */
5701 : /* -------------------------------------------------------------------- */
5702 :
5703 : // For x, 2 *, because we cache the precomputed values at the end.
5704 : double *padfX =
5705 219 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5706 : double *padfY =
5707 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5708 : double *padfZ =
5709 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5710 219 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5711 :
5712 219 : const bool bUse4SamplesFormula =
5713 219 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5714 :
5715 219 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5716 219 : if (poWK->eResample != GRA_NearestNeighbour)
5717 : {
5718 177 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5719 : }
5720 219 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5721 219 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5722 219 : const double dfErrorThreshold = CPLAtof(
5723 219 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5724 :
5725 626 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5726 407 : poWK->papanBandSrcValid == nullptr &&
5727 188 : poWK->pafUnifiedSrcDensity != nullptr;
5728 :
5729 : const bool bOneSourceCornerFailsToReproject =
5730 219 : GWKOneSourceCornerFailsToReproject(psJob);
5731 :
5732 : // Precompute values.
5733 22605 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5734 22386 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5735 :
5736 : /* ==================================================================== */
5737 : /* Loop over output lines. */
5738 : /* ==================================================================== */
5739 25393 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5740 : {
5741 : /* --------------------------------------------------------------------
5742 : */
5743 : /* Setup points to transform to source image space. */
5744 : /* --------------------------------------------------------------------
5745 : */
5746 25174 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5747 25174 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5748 44331500 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5749 44306300 : padfY[iDstX] = dfY;
5750 25174 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5751 :
5752 : /* --------------------------------------------------------------------
5753 : */
5754 : /* Transform the points from destination pixel/line coordinates */
5755 : /* to source pixel/line coordinates. */
5756 : /* --------------------------------------------------------------------
5757 : */
5758 25174 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5759 : padfY, padfZ, pabSuccess);
5760 25174 : if (dfSrcCoordPrecision > 0.0)
5761 : {
5762 0 : GWKRoundSourceCoordinates(
5763 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5764 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5765 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5766 : }
5767 :
5768 : /* ====================================================================
5769 : */
5770 : /* Loop over pixels in output scanline. */
5771 : /* ====================================================================
5772 : */
5773 44331500 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5774 : {
5775 44306300 : GPtrDiff_t iSrcOffset = 0;
5776 44306300 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5777 : padfX, padfY, nSrcXSize,
5778 : nSrcYSize, iSrcOffset))
5779 43567600 : continue;
5780 :
5781 : /* --------------------------------------------------------------------
5782 : */
5783 : /* Do not try to apply transparent/invalid source pixels to the
5784 : */
5785 : /* destination. This currently ignores the multi-pixel input
5786 : */
5787 : /* of bilinear and cubic resamples. */
5788 : /* --------------------------------------------------------------------
5789 : */
5790 31793100 : double dfDensity = 1.0;
5791 :
5792 31793100 : if (poWK->pafUnifiedSrcDensity != nullptr)
5793 : {
5794 1656100 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5795 1656100 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5796 : {
5797 1525010 : if (!bOneSourceCornerFailsToReproject)
5798 : {
5799 1525010 : continue;
5800 : }
5801 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5802 : psJob, iSrcOffset))
5803 : {
5804 0 : dfDensity =
5805 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5806 : }
5807 : else
5808 : {
5809 0 : continue;
5810 : }
5811 : }
5812 : }
5813 :
5814 59897300 : if (poWK->panUnifiedSrcValid != nullptr &&
5815 29629200 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5816 : {
5817 29531600 : if (!bOneSourceCornerFailsToReproject)
5818 : {
5819 29529300 : continue;
5820 : }
5821 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5822 : {
5823 0 : continue;
5824 : }
5825 : }
5826 :
5827 : /* ====================================================================
5828 : */
5829 : /* Loop processing each band. */
5830 : /* ====================================================================
5831 : */
5832 738768 : bool bHasFoundDensity = false;
5833 :
5834 738768 : const GPtrDiff_t iDstOffset =
5835 738768 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5836 2069310 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5837 : {
5838 1330540 : double dfBandDensity = 0.0;
5839 1330540 : double dfValueReal = 0.0;
5840 :
5841 : /* --------------------------------------------------------------------
5842 : */
5843 : /* Collect the source value. */
5844 : /* --------------------------------------------------------------------
5845 : */
5846 1330540 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5847 : nSrcYSize == 1)
5848 : {
5849 : // FALSE is returned if dfBandDensity == 0, which is
5850 : // checked below.
5851 15516 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5852 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5853 : }
5854 1315030 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5855 : {
5856 2046 : double dfValueImagIgnored = 0.0;
5857 2046 : GWKBilinearResample4Sample(
5858 2046 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5859 2046 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5860 2046 : &dfValueReal, &dfValueImagIgnored);
5861 : }
5862 1312980 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5863 : {
5864 691552 : if (bSrcMaskIsDensity)
5865 : {
5866 389755 : if (poWK->eWorkingDataType == GDT_UInt8)
5867 : {
5868 389755 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5869 389755 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5870 389755 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5871 : &dfValueReal);
5872 : }
5873 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
5874 : {
5875 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
5876 0 : GUInt16>(poWK, iBand,
5877 0 : padfX[iDstX] - poWK->nSrcXOff,
5878 0 : padfY[iDstX] - poWK->nSrcYOff,
5879 : &dfBandDensity, &dfValueReal);
5880 : }
5881 : else
5882 : {
5883 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
5884 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5885 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5886 : &dfValueReal);
5887 : }
5888 : }
5889 : else
5890 : {
5891 301797 : double dfValueImagIgnored = 0.0;
5892 301797 : GWKCubicResample4Sample(
5893 301797 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5894 301797 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5895 : &dfValueReal, &dfValueImagIgnored);
5896 691552 : }
5897 : }
5898 : else
5899 : #ifdef DEBUG
5900 : // Only useful for clang static analyzer.
5901 621431 : if (psWrkStruct != nullptr)
5902 : #endif
5903 : {
5904 621431 : double dfValueImagIgnored = 0.0;
5905 621431 : psWrkStruct->pfnGWKResample(
5906 621431 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5907 621431 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5908 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
5909 : }
5910 :
5911 : // If we didn't find any valid inputs skip to next band.
5912 1330540 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5913 0 : continue;
5914 :
5915 1330540 : if (poWK->bApplyVerticalShift)
5916 : {
5917 0 : if (!std::isfinite(padfZ[iDstX]))
5918 0 : continue;
5919 : // Subtract padfZ[] since the coordinate transformation is
5920 : // from target to source
5921 0 : dfValueReal =
5922 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5923 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5924 : }
5925 :
5926 1330540 : bHasFoundDensity = true;
5927 :
5928 : /* --------------------------------------------------------------------
5929 : */
5930 : /* We have a computed value from the source. Now apply it
5931 : * to */
5932 : /* the destination pixel. */
5933 : /* --------------------------------------------------------------------
5934 : */
5935 1330540 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
5936 : dfValueReal, bAvoidNoDataSingleBand);
5937 : }
5938 :
5939 738768 : if (!bHasFoundDensity)
5940 0 : continue;
5941 :
5942 738768 : if (!bAvoidNoDataSingleBand)
5943 : {
5944 100295 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
5945 : }
5946 :
5947 : /* --------------------------------------------------------------------
5948 : */
5949 : /* Update destination density/validity masks. */
5950 : /* --------------------------------------------------------------------
5951 : */
5952 738768 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5953 :
5954 738768 : if (poWK->panDstValid != nullptr)
5955 : {
5956 104586 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5957 : }
5958 : } // Next iDstX.
5959 :
5960 : /* --------------------------------------------------------------------
5961 : */
5962 : /* Report progress to the user, and optionally cancel out. */
5963 : /* --------------------------------------------------------------------
5964 : */
5965 25174 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5966 0 : break;
5967 : }
5968 :
5969 : /* -------------------------------------------------------------------- */
5970 : /* Cleanup and return. */
5971 : /* -------------------------------------------------------------------- */
5972 219 : CPLFree(padfX);
5973 219 : CPLFree(padfY);
5974 219 : CPLFree(padfZ);
5975 219 : CPLFree(pabSuccess);
5976 219 : if (psWrkStruct)
5977 177 : GWKResampleDeleteWrkStruct(psWrkStruct);
5978 219 : }
5979 :
5980 219 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
5981 : {
5982 219 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
5983 : }
5984 :
5985 : /************************************************************************/
5986 : /* GWKCubicResampleNoMasks4MultiBandT() */
5987 : /************************************************************************/
5988 :
5989 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
5990 : /* and enough SSE registries */
5991 : #if defined(USE_SSE2)
5992 :
5993 108610000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
5994 : const __m128 row2, const __m128 row3,
5995 : const __m128 weightsXY0,
5996 : const __m128 weightsXY1,
5997 : const __m128 weightsXY2,
5998 : const __m128 weightsXY3)
5999 : {
6000 760270000 : return XMMHorizontalAdd(_mm_add_ps(
6001 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6002 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6003 108610000 : _mm_mul_ps(row3, weightsXY3))));
6004 : }
6005 :
6006 : template <class T>
6007 37174677 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6008 : double dfSrcX, double dfSrcY,
6009 : const GPtrDiff_t iDstOffset)
6010 : {
6011 37174677 : const double dfSrcXShifted = dfSrcX - 0.5;
6012 37174677 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6013 37174677 : const double dfSrcYShifted = dfSrcY - 0.5;
6014 37174677 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6015 37174677 : const GPtrDiff_t iSrcOffset =
6016 37174677 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6017 :
6018 : // Get the bilinear interpolation at the image borders.
6019 37174677 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6020 36214777 : iSrcY + 2 >= poWK->nSrcYSize)
6021 : {
6022 3885370 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6023 : {
6024 : T value;
6025 2914030 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6026 : &value);
6027 2914030 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6028 : value;
6029 971343 : }
6030 : }
6031 : else
6032 : {
6033 36203377 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6034 36203377 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6035 :
6036 : float afCoeffsX[4];
6037 : float afCoeffsY[4];
6038 36203377 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6039 36203377 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6040 36203377 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6041 : const auto weightsXY0 =
6042 72406754 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6043 : const auto weightsXY1 =
6044 72406754 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6045 : const auto weightsXY2 =
6046 72406754 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6047 : const auto weightsXY3 =
6048 36203377 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6049 :
6050 36203377 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6051 :
6052 36203377 : int iBand = 0;
6053 : // Process 2 bands at a time
6054 72406754 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6055 : {
6056 36203377 : const T *CPL_RESTRICT pBand0 =
6057 36203377 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6058 36203377 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6059 : const auto row1_0 =
6060 36203377 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6061 : const auto row2_0 =
6062 36203377 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6063 : const auto row3_0 =
6064 36203377 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6065 :
6066 36203377 : const T *CPL_RESTRICT pBand1 =
6067 36203377 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6068 36203377 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6069 : const auto row1_1 =
6070 36203377 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6071 : const auto row2_1 =
6072 36203377 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6073 : const auto row3_1 =
6074 36203377 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6075 :
6076 : const float fValue_0 =
6077 36203377 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6078 : weightsXY1, weightsXY2, weightsXY3);
6079 :
6080 : const float fValue_1 =
6081 36203377 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6082 : weightsXY1, weightsXY2, weightsXY3);
6083 :
6084 36203377 : T *CPL_RESTRICT pDstBand0 =
6085 36203377 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6086 36203377 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6087 :
6088 36203377 : T *CPL_RESTRICT pDstBand1 =
6089 36203377 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6090 36203377 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6091 : }
6092 36203377 : if (iBand < poWK->nBands)
6093 : {
6094 36203377 : const T *CPL_RESTRICT pBand0 =
6095 36203377 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6096 36203377 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6097 : const auto row1 =
6098 36203377 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6099 : const auto row2 =
6100 36203377 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6101 : const auto row3 =
6102 36203377 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6103 :
6104 : const float fValue =
6105 36203377 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6106 : weightsXY2, weightsXY3);
6107 :
6108 36203377 : T *CPL_RESTRICT pDstBand =
6109 36203377 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6110 36203377 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6111 : }
6112 : }
6113 :
6114 37174677 : if (poWK->pafDstDensity)
6115 37093836 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6116 37174677 : }
6117 :
6118 : #endif // defined(USE_SSE2)
6119 :
6120 : /************************************************************************/
6121 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6122 : /************************************************************************/
6123 :
6124 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6125 1842 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6126 :
6127 : {
6128 1842 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6129 1842 : GDALWarpKernel *poWK = psJob->poWK;
6130 1842 : const int iYMin = psJob->iYMin;
6131 1842 : const int iYMax = psJob->iYMax;
6132 1824 : const double dfMultFactorVerticalShiftPipeline =
6133 1842 : poWK->bApplyVerticalShift
6134 18 : ? CPLAtof(CSLFetchNameValueDef(
6135 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6136 : "1.0"))
6137 : : 0.0;
6138 :
6139 1842 : const int nDstXSize = poWK->nDstXSize;
6140 1842 : const int nSrcXSize = poWK->nSrcXSize;
6141 1842 : const int nSrcYSize = poWK->nSrcYSize;
6142 :
6143 : /* -------------------------------------------------------------------- */
6144 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6145 : /* scanlines worth of positions. */
6146 : /* -------------------------------------------------------------------- */
6147 :
6148 : // For x, 2 *, because we cache the precomputed values at the end.
6149 : double *padfX =
6150 1842 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6151 : double *padfY =
6152 1842 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6153 : double *padfZ =
6154 1842 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6155 1842 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6156 :
6157 1842 : const int nXRadius = poWK->nXRadius;
6158 : double *padfWeightsX =
6159 1842 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6160 : double *padfWeightsY = static_cast<double *>(
6161 1842 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6162 1842 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6163 1842 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6164 1842 : const double dfErrorThreshold = CPLAtof(
6165 1842 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6166 :
6167 : // Precompute values.
6168 418872 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6169 417030 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6170 :
6171 : /* ==================================================================== */
6172 : /* Loop over output lines. */
6173 : /* ==================================================================== */
6174 293317 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6175 : {
6176 : /* --------------------------------------------------------------------
6177 : */
6178 : /* Setup points to transform to source image space. */
6179 : /* --------------------------------------------------------------------
6180 : */
6181 291476 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6182 291476 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6183 98586759 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6184 98295220 : padfY[iDstX] = dfY;
6185 291476 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6186 :
6187 : /* --------------------------------------------------------------------
6188 : */
6189 : /* Transform the points from destination pixel/line coordinates */
6190 : /* to source pixel/line coordinates. */
6191 : /* --------------------------------------------------------------------
6192 : */
6193 291476 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6194 : padfY, padfZ, pabSuccess);
6195 291476 : if (dfSrcCoordPrecision > 0.0)
6196 : {
6197 1000 : GWKRoundSourceCoordinates(
6198 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6199 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6200 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6201 : }
6202 :
6203 : /* ====================================================================
6204 : */
6205 : /* Loop over pixels in output scanline. */
6206 : /* ====================================================================
6207 : */
6208 98586759 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6209 : {
6210 98295220 : GPtrDiff_t iSrcOffset = 0;
6211 98295220 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6212 : padfX, padfY, nSrcXSize,
6213 : nSrcYSize, iSrcOffset))
6214 47394279 : continue;
6215 :
6216 : /* ====================================================================
6217 : */
6218 : /* Loop processing each band. */
6219 : /* ====================================================================
6220 : */
6221 88075783 : const GPtrDiff_t iDstOffset =
6222 88075783 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6223 :
6224 : #if defined(USE_SSE2)
6225 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6226 : (std::is_same<T, GByte>::value ||
6227 : std::is_same<T, GUInt16>::value))
6228 : {
6229 38170876 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6230 : {
6231 37174677 : GWKCubicResampleNoMasks4MultiBandT<T>(
6232 37174677 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6233 37174677 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6234 :
6235 37174677 : continue;
6236 : }
6237 : }
6238 : #endif // defined(USE_SSE2)
6239 :
6240 50901040 : [[maybe_unused]] double dfInvWeights = 0;
6241 144559858 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6242 : {
6243 93659054 : T value = 0;
6244 : if constexpr (eResample == GRA_NearestNeighbour)
6245 : {
6246 78474530 : value = reinterpret_cast<T *>(
6247 78474530 : poWK->papabySrcImage[iBand])[iSrcOffset];
6248 : }
6249 : else if constexpr (bUse4SamplesFormula)
6250 : {
6251 : if constexpr (eResample == GRA_Bilinear)
6252 3363189 : GWKBilinearResampleNoMasks4SampleT(
6253 3363189 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6254 3363189 : padfY[iDstX] - poWK->nSrcYOff, &value);
6255 : else
6256 2231485 : GWKCubicResampleNoMasks4SampleT(
6257 2231485 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6258 2231485 : padfY[iDstX] - poWK->nSrcYOff, &value);
6259 : }
6260 : else
6261 : {
6262 9589850 : GWKResampleNoMasksT(
6263 9589850 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6264 9589850 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6265 : padfWeightsY, dfInvWeights);
6266 : }
6267 :
6268 93659054 : if (poWK->bApplyVerticalShift)
6269 : {
6270 818 : if (!std::isfinite(padfZ[iDstX]))
6271 0 : continue;
6272 : // Subtract padfZ[] since the coordinate transformation is
6273 : // from target to source
6274 818 : value = GWKClampValueT<T>(
6275 818 : double(value) * poWK->dfMultFactorVerticalShift -
6276 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6277 : }
6278 :
6279 93659054 : if (poWK->pafDstDensity)
6280 14049274 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6281 :
6282 93659054 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6283 : value;
6284 : }
6285 : }
6286 :
6287 : /* --------------------------------------------------------------------
6288 : */
6289 : /* Report progress to the user, and optionally cancel out. */
6290 : /* --------------------------------------------------------------------
6291 : */
6292 291476 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6293 1 : break;
6294 : }
6295 :
6296 : /* -------------------------------------------------------------------- */
6297 : /* Cleanup and return. */
6298 : /* -------------------------------------------------------------------- */
6299 1842 : CPLFree(padfX);
6300 1842 : CPLFree(padfY);
6301 1842 : CPLFree(padfZ);
6302 1842 : CPLFree(pabSuccess);
6303 1842 : CPLFree(padfWeightsX);
6304 1842 : CPLFree(padfWeightsY);
6305 1842 : }
6306 :
6307 : template <class T, GDALResampleAlg eResample>
6308 994 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6309 : {
6310 994 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6311 : pData);
6312 994 : }
6313 :
6314 : template <class T, GDALResampleAlg eResample>
6315 848 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6316 :
6317 : {
6318 848 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6319 848 : GDALWarpKernel *poWK = psJob->poWK;
6320 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6321 848 : const bool bUse4SamplesFormula =
6322 848 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6323 848 : if (bUse4SamplesFormula)
6324 746 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6325 : pData);
6326 : else
6327 102 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6328 : pData);
6329 848 : }
6330 :
6331 943 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6332 : {
6333 943 : return GWKRun(
6334 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6335 943 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6336 : }
6337 :
6338 126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6339 : {
6340 126 : return GWKRun(
6341 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6342 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6343 126 : GRA_Bilinear>);
6344 : }
6345 :
6346 676 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6347 : {
6348 676 : return GWKRun(
6349 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6350 676 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6351 : }
6352 :
6353 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6354 : {
6355 9 : return GWKRun(
6356 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6357 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6358 : }
6359 :
6360 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6361 :
6362 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6363 : {
6364 : return GWKRun(
6365 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6366 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6367 : }
6368 : #endif
6369 :
6370 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6371 : {
6372 12 : return GWKRun(
6373 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6374 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6375 : }
6376 :
6377 : /************************************************************************/
6378 : /* GWKNearestByte() */
6379 : /* */
6380 : /* Case for 8bit input data with nearest neighbour resampling */
6381 : /* using valid flags. Should be as fast as possible for this */
6382 : /* particular transformation type. */
6383 : /************************************************************************/
6384 :
6385 459 : template <class T> static void GWKNearestThread(void *pData)
6386 :
6387 : {
6388 459 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6389 459 : GDALWarpKernel *poWK = psJob->poWK;
6390 459 : const int iYMin = psJob->iYMin;
6391 459 : const int iYMax = psJob->iYMax;
6392 459 : const double dfMultFactorVerticalShiftPipeline =
6393 459 : poWK->bApplyVerticalShift
6394 0 : ? CPLAtof(CSLFetchNameValueDef(
6395 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6396 : "1.0"))
6397 : : 0.0;
6398 459 : const bool bAvoidNoDataSingleBand =
6399 525 : poWK->nBands == 1 ||
6400 66 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
6401 : "UNIFIED_SRC_NODATA", "FALSE"));
6402 :
6403 459 : const int nDstXSize = poWK->nDstXSize;
6404 459 : const int nSrcXSize = poWK->nSrcXSize;
6405 459 : const int nSrcYSize = poWK->nSrcYSize;
6406 :
6407 : /* -------------------------------------------------------------------- */
6408 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6409 : /* scanlines worth of positions. */
6410 : /* -------------------------------------------------------------------- */
6411 :
6412 : // For x, 2 *, because we cache the precomputed values at the end.
6413 : double *padfX =
6414 459 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6415 : double *padfY =
6416 459 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6417 : double *padfZ =
6418 459 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6419 459 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6420 :
6421 459 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6422 459 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6423 459 : const double dfErrorThreshold = CPLAtof(
6424 459 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6425 :
6426 : const bool bOneSourceCornerFailsToReproject =
6427 459 : GWKOneSourceCornerFailsToReproject(psJob);
6428 :
6429 : // Precompute values.
6430 62854 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6431 62395 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6432 :
6433 : /* ==================================================================== */
6434 : /* Loop over output lines. */
6435 : /* ==================================================================== */
6436 48162 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6437 : {
6438 :
6439 : /* --------------------------------------------------------------------
6440 : */
6441 : /* Setup points to transform to source image space. */
6442 : /* --------------------------------------------------------------------
6443 : */
6444 47703 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6445 47703 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6446 9833535 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6447 9785833 : padfY[iDstX] = dfY;
6448 47703 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6449 :
6450 : /* --------------------------------------------------------------------
6451 : */
6452 : /* Transform the points from destination pixel/line coordinates */
6453 : /* to source pixel/line coordinates. */
6454 : /* --------------------------------------------------------------------
6455 : */
6456 47703 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6457 : padfY, padfZ, pabSuccess);
6458 47703 : if (dfSrcCoordPrecision > 0.0)
6459 : {
6460 0 : GWKRoundSourceCoordinates(
6461 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6462 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6463 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6464 : }
6465 : /* ====================================================================
6466 : */
6467 : /* Loop over pixels in output scanline. */
6468 : /* ====================================================================
6469 : */
6470 9833535 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6471 : {
6472 9785833 : GPtrDiff_t iSrcOffset = 0;
6473 9785833 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6474 : padfX, padfY, nSrcXSize,
6475 : nSrcYSize, iSrcOffset))
6476 2358945 : continue;
6477 :
6478 : /* --------------------------------------------------------------------
6479 : */
6480 : /* Do not try to apply invalid source pixels to the dest. */
6481 : /* --------------------------------------------------------------------
6482 : */
6483 9606143 : if (poWK->panUnifiedSrcValid != nullptr &&
6484 1127399 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6485 : {
6486 49043 : if (!bOneSourceCornerFailsToReproject)
6487 : {
6488 41558 : continue;
6489 : }
6490 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6491 : {
6492 5224 : continue;
6493 : }
6494 : }
6495 :
6496 : /* --------------------------------------------------------------------
6497 : */
6498 : /* Do not try to apply transparent source pixels to the
6499 : * destination.*/
6500 : /* --------------------------------------------------------------------
6501 : */
6502 8431960 : double dfDensity = 1.0;
6503 :
6504 8431960 : if (poWK->pafUnifiedSrcDensity != nullptr)
6505 : {
6506 1557335 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6507 1557335 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6508 1005075 : continue;
6509 : }
6510 :
6511 : /* ====================================================================
6512 : */
6513 : /* Loop processing each band. */
6514 : /* ====================================================================
6515 : */
6516 :
6517 7426888 : const GPtrDiff_t iDstOffset =
6518 7426888 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6519 :
6520 17415958 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6521 : {
6522 9989020 : T value = 0;
6523 9989020 : double dfBandDensity = 0.0;
6524 :
6525 : /* --------------------------------------------------------------------
6526 : */
6527 : /* Collect the source value. */
6528 : /* --------------------------------------------------------------------
6529 : */
6530 9989020 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6531 : &value))
6532 : {
6533 :
6534 9989010 : if (poWK->bApplyVerticalShift)
6535 : {
6536 0 : if (!std::isfinite(padfZ[iDstX]))
6537 0 : continue;
6538 : // Subtract padfZ[] since the coordinate transformation
6539 : // is from target to source
6540 0 : value = GWKClampValueT<T>(
6541 0 : double(value) * poWK->dfMultFactorVerticalShift -
6542 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6543 : }
6544 :
6545 9989010 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6546 : dfBandDensity, value,
6547 : bAvoidNoDataSingleBand);
6548 : }
6549 : }
6550 :
6551 : /* --------------------------------------------------------------------
6552 : */
6553 : /* Mark this pixel valid/opaque in the output. */
6554 : /* --------------------------------------------------------------------
6555 : */
6556 :
6557 7426888 : if (!bAvoidNoDataSingleBand)
6558 : {
6559 424278 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6560 : }
6561 :
6562 7426888 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6563 :
6564 7426888 : if (poWK->panDstValid != nullptr)
6565 : {
6566 6156885 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6567 : }
6568 : } /* Next iDstX */
6569 :
6570 : /* --------------------------------------------------------------------
6571 : */
6572 : /* Report progress to the user, and optionally cancel out. */
6573 : /* --------------------------------------------------------------------
6574 : */
6575 47703 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6576 0 : break;
6577 : }
6578 :
6579 : /* -------------------------------------------------------------------- */
6580 : /* Cleanup and return. */
6581 : /* -------------------------------------------------------------------- */
6582 459 : CPLFree(padfX);
6583 459 : CPLFree(padfY);
6584 459 : CPLFree(padfZ);
6585 459 : CPLFree(pabSuccess);
6586 459 : }
6587 :
6588 350 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6589 : {
6590 350 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6591 : }
6592 :
6593 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6594 : {
6595 14 : return GWKRun(
6596 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6597 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6598 : }
6599 :
6600 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6601 : {
6602 5 : return GWKRun(
6603 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6604 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6605 5 : GRA_Bilinear>);
6606 : }
6607 :
6608 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6609 : {
6610 6 : return GWKRun(
6611 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6612 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6613 6 : GRA_Bilinear>);
6614 : }
6615 :
6616 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6617 : {
6618 4 : return GWKRun(
6619 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6620 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6621 4 : GRA_Bilinear>);
6622 : }
6623 :
6624 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6625 :
6626 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6627 : {
6628 : return GWKRun(
6629 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6630 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6631 : GRA_Bilinear>);
6632 : }
6633 : #endif
6634 :
6635 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6636 : {
6637 5 : return GWKRun(
6638 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6639 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6640 : }
6641 :
6642 14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6643 : {
6644 14 : return GWKRun(
6645 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6646 14 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6647 : }
6648 :
6649 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6650 : {
6651 6 : return GWKRun(
6652 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6653 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6654 : }
6655 :
6656 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6657 : {
6658 5 : return GWKRun(
6659 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6660 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6661 : }
6662 :
6663 45 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6664 : {
6665 45 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6666 : }
6667 :
6668 10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6669 : {
6670 10 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6671 : }
6672 :
6673 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6674 : {
6675 11 : return GWKRun(
6676 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6677 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6678 : }
6679 :
6680 50 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6681 : {
6682 50 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6683 : }
6684 :
6685 : /************************************************************************/
6686 : /* GWKAverageOrMode() */
6687 : /* */
6688 : /************************************************************************/
6689 :
6690 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6691 : ((iSrcY == iSrcYMin) \
6692 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6693 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6694 : : 1.0)
6695 :
6696 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6697 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6698 : ? dfWeightY \
6699 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6700 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6701 : : dfWeightY)
6702 :
6703 : static void GWKAverageOrModeThread(void *pData);
6704 :
6705 163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6706 : {
6707 163 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6708 : }
6709 :
6710 : /************************************************************************/
6711 : /* GWKAverageOrModeComputeLineCoords() */
6712 : /************************************************************************/
6713 :
6714 8183 : static void GWKAverageOrModeComputeLineCoords(
6715 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6716 : double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6717 : int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6718 : double dfErrorThreshold)
6719 : {
6720 8183 : const GDALWarpKernel *poWK = psJob->poWK;
6721 8183 : const int nDstXSize = poWK->nDstXSize;
6722 :
6723 : // Setup points to transform to source image space.
6724 2097530 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6725 : {
6726 2089340 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6727 2089340 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6728 2089340 : padfZ[iDstX] = 0.0;
6729 2089340 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6730 2089340 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6731 2089340 : padfZ2[iDstX] = 0.0;
6732 : }
6733 :
6734 : /* ----------------------------------------------------------------- */
6735 : /* Transform the points from destination pixel/line coordinates */
6736 : /* to source pixel/line coordinates. */
6737 : /* ----------------------------------------------------------------- */
6738 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6739 : padfZ, pabSuccess);
6740 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6741 : padfY2, padfZ2, pabSuccess2);
6742 :
6743 8183 : if (dfSrcCoordPrecision > 0.0)
6744 : {
6745 0 : GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6746 : dfSrcCoordPrecision, dfErrorThreshold,
6747 0 : poWK->pfnTransformer, psJob->pTransformerArg,
6748 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6749 0 : GWKRoundSourceCoordinates(
6750 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6751 0 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6752 0 : 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6753 : }
6754 8183 : }
6755 :
6756 : /************************************************************************/
6757 : /* GWKAverageOrModeComputeSourceCoords() */
6758 : /************************************************************************/
6759 :
6760 2089340 : static bool GWKAverageOrModeComputeSourceCoords(
6761 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6762 : double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6763 : // Output:
6764 : bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6765 : double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6766 : {
6767 2089340 : const GDALWarpKernel *poWK = psJob->poWK;
6768 2089340 : const int nSrcXSize = poWK->nSrcXSize;
6769 2089340 : const int nSrcYSize = poWK->nSrcYSize;
6770 :
6771 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6772 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6773 2089340 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6774 1991690 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6775 1991690 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6776 1965300 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6777 1965300 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6778 1911930 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6779 1911430 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6780 1910040 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6781 : {
6782 179362 : return false;
6783 : }
6784 :
6785 : // Compute corners in source crs.
6786 :
6787 : // The transformation might not have preserved ordering of
6788 : // coordinates so do the necessary swapping (#5433).
6789 : // NOTE: this is really an approximative fix. To do something
6790 : // more precise we would for example need to compute the
6791 : // transformation of coordinates in the
6792 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6793 : // coordinates, and take the bounding box of the got source
6794 : // coordinates.
6795 :
6796 1909980 : if (padfX[iDstX] > padfX2[iDstX])
6797 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
6798 :
6799 : // Detect situations where the target pixel is close to the
6800 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6801 : // close to the left-most and right-most columns of the source
6802 : // raster. The 2 value below was experimentally determined to
6803 : // avoid false-positives and false-negatives.
6804 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6805 1909980 : bWrapOverX = false;
6806 1909980 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6807 1909980 : if (poWK->nSrcXOff == 0 &&
6808 1909980 : padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
6809 16499 : (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale < nThresholdWrapOverX)
6810 : {
6811 : // Check there is a discontinuity by checking at mid-pixel.
6812 : // NOTE: all this remains fragile. To confidently
6813 : // detect antimeridian warping we should probably try to access
6814 : // georeferenced coordinates, and not rely only on tests on
6815 : // image space coordinates. But accessing georeferenced
6816 : // coordinates from here is not trivial, and we would for example
6817 : // have to handle both geographic, Mercator, etc.
6818 : // Let's hope this heuristics is good enough for now.
6819 1041 : double x = iDstX + 0.5 + poWK->nDstXOff;
6820 1041 : double y = iDstY + poWK->nDstYOff;
6821 1041 : double z = 0;
6822 1041 : int bSuccess = FALSE;
6823 1041 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6824 : &bSuccess);
6825 1041 : if (bSuccess && x < padfX[iDstX])
6826 : {
6827 1008 : bWrapOverX = true;
6828 1008 : std::swap(padfX[iDstX], padfX2[iDstX]);
6829 1008 : padfX2[iDstX] += nSrcXSize;
6830 : }
6831 : }
6832 :
6833 1909980 : dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6834 1909980 : dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6835 1909980 : constexpr double EPSILON = 1e-10;
6836 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6837 1909980 : if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6838 156 : return false;
6839 1909830 : iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6840 1909830 : iSrcXMax = static_cast<int>(
6841 1909830 : std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
6842 1909830 : if (!bWrapOverX)
6843 1908820 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6844 1909830 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6845 472 : iSrcXMax++;
6846 :
6847 1909830 : if (padfY[iDstX] > padfY2[iDstX])
6848 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
6849 1909830 : dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6850 1909830 : dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6851 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6852 1909830 : if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
6853 78 : return false;
6854 1909750 : iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
6855 1909750 : iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
6856 1909750 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6857 0 : iSrcYMax++;
6858 :
6859 1909750 : return true;
6860 : }
6861 :
6862 : /************************************************************************/
6863 : /* GWKModeRealType() */
6864 : /************************************************************************/
6865 :
6866 17780 : template <class T> static inline bool IsSame(T a, T b)
6867 : {
6868 17780 : return a == b;
6869 : }
6870 :
6871 0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
6872 : {
6873 0 : return a == b || (CPLIsNan(a) && CPLIsNan(b));
6874 : }
6875 :
6876 18 : template <> bool IsSame<float>(float a, float b)
6877 : {
6878 18 : return a == b || (std::isnan(a) && std::isnan(b));
6879 : }
6880 :
6881 56 : template <> bool IsSame<double>(double a, double b)
6882 : {
6883 56 : return a == b || (std::isnan(a) && std::isnan(b));
6884 : }
6885 :
6886 19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
6887 : {
6888 19 : const GDALWarpKernel *poWK = psJob->poWK;
6889 19 : const int iYMin = psJob->iYMin;
6890 19 : const int iYMax = psJob->iYMax;
6891 19 : const int nDstXSize = poWK->nDstXSize;
6892 19 : const int nSrcXSize = poWK->nSrcXSize;
6893 19 : const int nSrcYSize = poWK->nSrcYSize;
6894 19 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6895 :
6896 19 : T *pVals = nullptr;
6897 19 : float *pafCounts = nullptr;
6898 :
6899 19 : if (nSrcXSize > 0 && nSrcYSize > 0)
6900 : {
6901 : pVals = static_cast<T *>(
6902 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
6903 : pafCounts = static_cast<float *>(
6904 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6905 19 : if (pVals == nullptr || pafCounts == nullptr)
6906 : {
6907 0 : VSIFree(pVals);
6908 0 : VSIFree(pafCounts);
6909 0 : return;
6910 : }
6911 : }
6912 :
6913 : /* -------------------------------------------------------------------- */
6914 : /* Allocate x,y,z coordinate arrays for transformation ... two */
6915 : /* scanlines worth of positions. */
6916 : /* -------------------------------------------------------------------- */
6917 :
6918 : double *padfX =
6919 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6920 : double *padfY =
6921 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6922 : double *padfZ =
6923 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6924 : double *padfX2 =
6925 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6926 : double *padfY2 =
6927 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6928 : double *padfZ2 =
6929 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6930 19 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6931 19 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6932 :
6933 19 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6934 19 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6935 19 : const double dfErrorThreshold = CPLAtof(
6936 19 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6937 19 : const bool bAvoidNoDataSingleBand =
6938 19 : poWK->nBands == 1 ||
6939 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
6940 : "UNIFIED_SRC_NODATA", "FALSE"));
6941 :
6942 19 : const int nXMargin =
6943 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
6944 19 : const int nYMargin =
6945 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
6946 :
6947 : /* ==================================================================== */
6948 : /* Loop over output lines. */
6949 : /* ==================================================================== */
6950 116 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6951 : {
6952 97 : GWKAverageOrModeComputeLineCoords(
6953 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
6954 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
6955 :
6956 : // Loop over pixels in output scanline.
6957 3514 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6958 : {
6959 3417 : GPtrDiff_t iSrcOffset = 0;
6960 3417 : double dfDensity = 1.0;
6961 3417 : bool bHasFoundDensity = false;
6962 :
6963 3417 : bool bWrapOverX = false;
6964 3417 : double dfXMin = 0;
6965 3417 : double dfYMin = 0;
6966 3417 : double dfXMax = 0;
6967 3417 : double dfYMax = 0;
6968 3417 : int iSrcXMin = 0;
6969 3417 : int iSrcYMin = 0;
6970 3417 : int iSrcXMax = 0;
6971 3417 : int iSrcYMax = 0;
6972 3417 : if (!GWKAverageOrModeComputeSourceCoords(
6973 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
6974 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
6975 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
6976 : {
6977 0 : continue;
6978 : }
6979 :
6980 3417 : const GPtrDiff_t iDstOffset =
6981 3417 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6982 :
6983 : // Loop processing each band.
6984 6834 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6985 : {
6986 3417 : double dfBandDensity = 0.0;
6987 :
6988 3417 : int nBins = 0;
6989 3417 : int iModeIndex = -1;
6990 3417 : T nVal{};
6991 :
6992 10248 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
6993 : {
6994 6831 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
6995 6831 : iSrcOffset =
6996 6831 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6997 20530 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
6998 : iSrcX++, iSrcOffset++)
6999 : {
7000 13699 : if (bWrapOverX)
7001 0 : iSrcOffset =
7002 0 : (iSrcX % nSrcXSize) +
7003 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7004 :
7005 13699 : if (poWK->panUnifiedSrcValid != nullptr &&
7006 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7007 0 : continue;
7008 :
7009 13699 : if (GWKGetPixelT(poWK, iBand, iSrcOffset,
7010 27398 : &dfBandDensity, &nVal) &&
7011 13699 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7012 : {
7013 13699 : const double dfWeight =
7014 13699 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7015 :
7016 : // Check array for existing entry.
7017 13699 : int i = 0;
7018 29194 : for (i = 0; i < nBins; ++i)
7019 : {
7020 17807 : if (IsSame(pVals[i], nVal))
7021 : {
7022 :
7023 2312 : pafCounts[i] +=
7024 2312 : static_cast<float>(dfWeight);
7025 2312 : bool bValIsMaxCount =
7026 2312 : (pafCounts[i] > pafCounts[iModeIndex]);
7027 :
7028 2312 : if (!bValIsMaxCount &&
7029 1498 : pafCounts[i] == pafCounts[iModeIndex])
7030 : {
7031 1490 : switch (eTieStrategy)
7032 : {
7033 1477 : case GWKTS_First:
7034 1477 : break;
7035 6 : case GWKTS_Min:
7036 6 : bValIsMaxCount =
7037 6 : nVal < pVals[iModeIndex];
7038 6 : break;
7039 7 : case GWKTS_Max:
7040 7 : bValIsMaxCount =
7041 7 : nVal > pVals[iModeIndex];
7042 7 : break;
7043 : }
7044 : }
7045 :
7046 2312 : if (bValIsMaxCount)
7047 : {
7048 817 : iModeIndex = i;
7049 : }
7050 :
7051 2312 : break;
7052 : }
7053 : }
7054 :
7055 : // Add to arr if entry not already there.
7056 13699 : if (i == nBins)
7057 : {
7058 11387 : pVals[i] = nVal;
7059 11387 : pafCounts[i] = static_cast<float>(dfWeight);
7060 :
7061 11387 : if (iModeIndex < 0)
7062 3417 : iModeIndex = i;
7063 :
7064 11387 : ++nBins;
7065 : }
7066 : }
7067 : }
7068 : }
7069 :
7070 3417 : if (iModeIndex != -1)
7071 : {
7072 3417 : nVal = pVals[iModeIndex];
7073 3417 : dfBandDensity = 1;
7074 3417 : bHasFoundDensity = true;
7075 : }
7076 :
7077 : // We have a computed value from the source. Now apply it
7078 : // to the destination pixel
7079 3417 : if (bHasFoundDensity)
7080 : {
7081 3417 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
7082 : dfBandDensity, nVal,
7083 : bAvoidNoDataSingleBand);
7084 : }
7085 : }
7086 :
7087 3417 : if (!bHasFoundDensity)
7088 0 : continue;
7089 :
7090 3417 : if (!bAvoidNoDataSingleBand)
7091 : {
7092 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7093 : }
7094 :
7095 : /* --------------------------------------------------------------------
7096 : */
7097 : /* Update destination density/validity masks. */
7098 : /* --------------------------------------------------------------------
7099 : */
7100 3417 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7101 :
7102 3417 : if (poWK->panDstValid != nullptr)
7103 : {
7104 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7105 : }
7106 : } /* Next iDstX */
7107 :
7108 : /* --------------------------------------------------------------------
7109 : */
7110 : /* Report progress to the user, and optionally cancel out. */
7111 : /* --------------------------------------------------------------------
7112 : */
7113 97 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7114 0 : break;
7115 : }
7116 :
7117 : /* -------------------------------------------------------------------- */
7118 : /* Cleanup and return. */
7119 : /* -------------------------------------------------------------------- */
7120 19 : CPLFree(padfX);
7121 19 : CPLFree(padfY);
7122 19 : CPLFree(padfZ);
7123 19 : CPLFree(padfX2);
7124 19 : CPLFree(padfY2);
7125 19 : CPLFree(padfZ2);
7126 19 : CPLFree(pabSuccess);
7127 19 : CPLFree(pabSuccess2);
7128 19 : VSIFree(pVals);
7129 19 : VSIFree(pafCounts);
7130 : }
7131 :
7132 : /************************************************************************/
7133 : /* GWKModeComplexType() */
7134 : /************************************************************************/
7135 :
7136 8 : static void GWKModeComplexType(GWKJobStruct *psJob)
7137 : {
7138 8 : const GDALWarpKernel *poWK = psJob->poWK;
7139 8 : const int iYMin = psJob->iYMin;
7140 8 : const int iYMax = psJob->iYMax;
7141 8 : const int nDstXSize = poWK->nDstXSize;
7142 8 : const int nSrcXSize = poWK->nSrcXSize;
7143 8 : const int nSrcYSize = poWK->nSrcYSize;
7144 8 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7145 : const double dfMultFactorVerticalShiftPipeline =
7146 8 : poWK->bApplyVerticalShift
7147 8 : ? CPLAtof(CSLFetchNameValueDef(
7148 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7149 : "1.0"))
7150 8 : : 0.0;
7151 : const bool bAvoidNoDataSingleBand =
7152 8 : poWK->nBands == 1 ||
7153 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7154 8 : "UNIFIED_SRC_NODATA", "FALSE"));
7155 :
7156 8 : double *padfRealVals = nullptr;
7157 8 : double *padfImagVals = nullptr;
7158 8 : float *pafCounts = nullptr;
7159 :
7160 8 : if (nSrcXSize > 0 && nSrcYSize > 0)
7161 : {
7162 : padfRealVals = static_cast<double *>(
7163 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7164 : padfImagVals = static_cast<double *>(
7165 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7166 : pafCounts = static_cast<float *>(
7167 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7168 8 : if (padfRealVals == nullptr || padfImagVals == nullptr ||
7169 : pafCounts == nullptr)
7170 : {
7171 0 : VSIFree(padfRealVals);
7172 0 : VSIFree(padfImagVals);
7173 0 : VSIFree(pafCounts);
7174 0 : return;
7175 : }
7176 : }
7177 :
7178 : /* -------------------------------------------------------------------- */
7179 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7180 : /* scanlines worth of positions. */
7181 : /* -------------------------------------------------------------------- */
7182 :
7183 : double *padfX =
7184 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7185 : double *padfY =
7186 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7187 : double *padfZ =
7188 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7189 : double *padfX2 =
7190 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7191 : double *padfY2 =
7192 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7193 : double *padfZ2 =
7194 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7195 8 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7196 8 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7197 :
7198 8 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7199 8 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7200 8 : const double dfErrorThreshold = CPLAtof(
7201 8 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7202 :
7203 : const int nXMargin =
7204 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7205 : const int nYMargin =
7206 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7207 :
7208 : /* ==================================================================== */
7209 : /* Loop over output lines. */
7210 : /* ==================================================================== */
7211 16 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7212 : {
7213 8 : GWKAverageOrModeComputeLineCoords(
7214 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7215 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7216 :
7217 : // Loop over pixels in output scanline.
7218 16 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7219 : {
7220 8 : GPtrDiff_t iSrcOffset = 0;
7221 8 : double dfDensity = 1.0;
7222 8 : bool bHasFoundDensity = false;
7223 :
7224 8 : bool bWrapOverX = false;
7225 8 : double dfXMin = 0;
7226 8 : double dfYMin = 0;
7227 8 : double dfXMax = 0;
7228 8 : double dfYMax = 0;
7229 8 : int iSrcXMin = 0;
7230 8 : int iSrcYMin = 0;
7231 8 : int iSrcXMax = 0;
7232 8 : int iSrcYMax = 0;
7233 8 : if (!GWKAverageOrModeComputeSourceCoords(
7234 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7235 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7236 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7237 : {
7238 0 : continue;
7239 : }
7240 :
7241 8 : const GPtrDiff_t iDstOffset =
7242 8 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7243 :
7244 : // Loop processing each band.
7245 16 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7246 : {
7247 8 : double dfBandDensity = 0.0;
7248 :
7249 8 : int nBins = 0;
7250 8 : int iModeIndex = -1;
7251 8 : double dfValueReal = 0;
7252 8 : double dfValueImag = 0;
7253 :
7254 16 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7255 : {
7256 8 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7257 8 : iSrcOffset =
7258 8 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7259 38 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7260 : iSrcX++, iSrcOffset++)
7261 : {
7262 30 : if (bWrapOverX)
7263 0 : iSrcOffset =
7264 0 : (iSrcX % nSrcXSize) +
7265 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7266 :
7267 30 : if (poWK->panUnifiedSrcValid != nullptr &&
7268 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7269 0 : continue;
7270 :
7271 30 : if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7272 : &dfBandDensity, &dfValueReal,
7273 60 : &dfValueImag) &&
7274 30 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7275 : {
7276 30 : const double dfWeight =
7277 30 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7278 :
7279 : // Check array for existing entry.
7280 30 : int i = 0;
7281 49 : for (i = 0; i < nBins; ++i)
7282 : {
7283 47 : if (IsSame(padfRealVals[i], dfValueReal) &&
7284 14 : IsSame(padfImagVals[i], dfValueImag))
7285 : {
7286 :
7287 14 : pafCounts[i] +=
7288 14 : static_cast<float>(dfWeight);
7289 14 : bool bValIsMaxCount =
7290 14 : (pafCounts[i] > pafCounts[iModeIndex]);
7291 :
7292 14 : if (!bValIsMaxCount &&
7293 6 : pafCounts[i] == pafCounts[iModeIndex])
7294 : {
7295 3 : switch (eTieStrategy)
7296 : {
7297 3 : case GWKTS_First:
7298 3 : break;
7299 0 : case GWKTS_Min:
7300 0 : bValIsMaxCount =
7301 0 : dfValueReal <
7302 0 : padfRealVals[iModeIndex];
7303 0 : break;
7304 0 : case GWKTS_Max:
7305 0 : bValIsMaxCount =
7306 0 : dfValueReal >
7307 0 : padfRealVals[iModeIndex];
7308 0 : break;
7309 : }
7310 : }
7311 :
7312 14 : if (bValIsMaxCount)
7313 : {
7314 8 : iModeIndex = i;
7315 : }
7316 :
7317 14 : break;
7318 : }
7319 : }
7320 :
7321 : // Add to arr if entry not already there.
7322 30 : if (i == nBins)
7323 : {
7324 16 : padfRealVals[i] = dfValueReal;
7325 16 : padfImagVals[i] = dfValueImag;
7326 16 : pafCounts[i] = static_cast<float>(dfWeight);
7327 :
7328 16 : if (iModeIndex < 0)
7329 8 : iModeIndex = i;
7330 :
7331 16 : ++nBins;
7332 : }
7333 : }
7334 : }
7335 : }
7336 :
7337 8 : if (iModeIndex != -1)
7338 : {
7339 8 : dfValueReal = padfRealVals[iModeIndex];
7340 8 : dfValueImag = padfImagVals[iModeIndex];
7341 8 : dfBandDensity = 1;
7342 :
7343 8 : if (poWK->bApplyVerticalShift)
7344 : {
7345 0 : if (!std::isfinite(padfZ[iDstX]))
7346 0 : continue;
7347 : // Subtract padfZ[] since the coordinate
7348 : // transformation is from target to source
7349 0 : dfValueReal =
7350 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7351 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7352 : }
7353 :
7354 8 : bHasFoundDensity = true;
7355 : }
7356 :
7357 : // We have a computed value from the source. Now apply it
7358 : // to the destination pixel
7359 8 : if (bHasFoundDensity)
7360 : {
7361 8 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7362 : dfValueReal, dfValueImag,
7363 : bAvoidNoDataSingleBand);
7364 : }
7365 : }
7366 :
7367 8 : if (!bHasFoundDensity)
7368 0 : continue;
7369 :
7370 8 : if (!bAvoidNoDataSingleBand)
7371 : {
7372 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7373 : }
7374 :
7375 : /* --------------------------------------------------------------------
7376 : */
7377 : /* Update destination density/validity masks. */
7378 : /* --------------------------------------------------------------------
7379 : */
7380 8 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7381 :
7382 8 : if (poWK->panDstValid != nullptr)
7383 : {
7384 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7385 : }
7386 : } /* Next iDstX */
7387 :
7388 : /* --------------------------------------------------------------------
7389 : */
7390 : /* Report progress to the user, and optionally cancel out. */
7391 : /* --------------------------------------------------------------------
7392 : */
7393 8 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7394 0 : break;
7395 : }
7396 :
7397 : /* -------------------------------------------------------------------- */
7398 : /* Cleanup and return. */
7399 : /* -------------------------------------------------------------------- */
7400 8 : CPLFree(padfX);
7401 8 : CPLFree(padfY);
7402 8 : CPLFree(padfZ);
7403 8 : CPLFree(padfX2);
7404 8 : CPLFree(padfY2);
7405 8 : CPLFree(padfZ2);
7406 8 : CPLFree(pabSuccess);
7407 8 : CPLFree(pabSuccess2);
7408 8 : VSIFree(padfRealVals);
7409 8 : VSIFree(padfImagVals);
7410 8 : VSIFree(pafCounts);
7411 : }
7412 :
7413 : /************************************************************************/
7414 : /* GWKAverageOrModeThread() */
7415 : /************************************************************************/
7416 :
7417 : // Overall logic based on GWKGeneralCaseThread().
7418 163 : static void GWKAverageOrModeThread(void *pData)
7419 : {
7420 163 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7421 163 : const GDALWarpKernel *poWK = psJob->poWK;
7422 163 : const int iYMin = psJob->iYMin;
7423 163 : const int iYMax = psJob->iYMax;
7424 : const double dfMultFactorVerticalShiftPipeline =
7425 163 : poWK->bApplyVerticalShift
7426 163 : ? CPLAtof(CSLFetchNameValueDef(
7427 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7428 : "1.0"))
7429 163 : : 0.0;
7430 : const bool bAvoidNoDataSingleBand =
7431 194 : poWK->nBands == 1 ||
7432 31 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7433 163 : "UNIFIED_SRC_NODATA", "FALSE"));
7434 :
7435 163 : const int nDstXSize = poWK->nDstXSize;
7436 163 : const int nSrcXSize = poWK->nSrcXSize;
7437 :
7438 : /* -------------------------------------------------------------------- */
7439 : /* Find out which algorithm to use (small optim.) */
7440 : /* -------------------------------------------------------------------- */
7441 :
7442 : // Only used for GRA_Mode
7443 163 : float *pafCounts = nullptr;
7444 163 : int nBins = 0;
7445 163 : int nBinsOffset = 0;
7446 163 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7447 :
7448 : // Only used with Q1, Med and Q3
7449 163 : float quant = 0.0f;
7450 :
7451 : // To control array allocation only when data type is complex
7452 163 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7453 :
7454 163 : if (poWK->eResample == GRA_Mode)
7455 : {
7456 45 : if (poWK->bApplyVerticalShift)
7457 : {
7458 0 : return GWKModeComplexType(psJob);
7459 : }
7460 :
7461 45 : switch (poWK->eWorkingDataType)
7462 : {
7463 7 : case GDT_UInt8:
7464 7 : nBins = 256;
7465 7 : break;
7466 :
7467 0 : case GDT_Int8:
7468 0 : nBins = 256;
7469 0 : nBinsOffset = nBins / 2;
7470 0 : break;
7471 :
7472 1 : case GDT_UInt16:
7473 1 : nBins = 65536;
7474 1 : break;
7475 :
7476 10 : case GDT_Int16:
7477 10 : nBins = 65536;
7478 10 : nBinsOffset = nBins / 2;
7479 10 : break;
7480 :
7481 10 : case GDT_Int32:
7482 10 : return GWKModeRealType<int32_t>(psJob);
7483 :
7484 1 : case GDT_UInt32:
7485 1 : return GWKModeRealType<uint32_t>(psJob);
7486 :
7487 1 : case GDT_Int64:
7488 1 : return GWKModeRealType<int64_t>(psJob);
7489 :
7490 1 : case GDT_UInt64:
7491 1 : return GWKModeRealType<uint64_t>(psJob);
7492 :
7493 0 : case GDT_Float16:
7494 0 : return GWKModeRealType<GFloat16>(psJob);
7495 :
7496 4 : case GDT_Float32:
7497 4 : return GWKModeRealType<float>(psJob);
7498 :
7499 2 : case GDT_Float64:
7500 2 : return GWKModeRealType<double>(psJob);
7501 :
7502 8 : case GDT_CInt16:
7503 : case GDT_CInt32:
7504 : case GDT_CFloat16:
7505 : case GDT_CFloat32:
7506 : case GDT_CFloat64:
7507 8 : return GWKModeComplexType(psJob);
7508 :
7509 0 : case GDT_Unknown:
7510 : case GDT_TypeCount:
7511 0 : CPLAssert(false);
7512 : return;
7513 : }
7514 :
7515 18 : if (nBins)
7516 : {
7517 : pafCounts =
7518 18 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7519 18 : if (pafCounts == nullptr)
7520 0 : return;
7521 : }
7522 : }
7523 118 : else if (poWK->eResample == GRA_Med)
7524 : {
7525 6 : quant = 0.5f;
7526 : }
7527 112 : else if (poWK->eResample == GRA_Q1)
7528 : {
7529 10 : quant = 0.25f;
7530 : }
7531 102 : else if (poWK->eResample == GRA_Q3)
7532 : {
7533 5 : quant = 0.75f;
7534 : }
7535 97 : else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7536 11 : poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7537 : {
7538 : // Other resample algorithms not permitted here.
7539 0 : CPLError(CE_Fatal, CPLE_AppDefined,
7540 : "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7541 : "illegal resample");
7542 : }
7543 :
7544 136 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7545 :
7546 : /* -------------------------------------------------------------------- */
7547 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7548 : /* scanlines worth of positions. */
7549 : /* -------------------------------------------------------------------- */
7550 :
7551 : double *padfX =
7552 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7553 : double *padfY =
7554 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7555 : double *padfZ =
7556 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7557 : double *padfX2 =
7558 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7559 : double *padfY2 =
7560 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7561 : double *padfZ2 =
7562 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7563 136 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7564 136 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7565 :
7566 136 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7567 136 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7568 136 : const double dfErrorThreshold = CPLAtof(
7569 136 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7570 :
7571 : const double dfExcludedValuesThreshold =
7572 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7573 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7574 136 : 100.0;
7575 : const double dfNodataValuesThreshold =
7576 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7577 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7578 136 : 100.0;
7579 :
7580 : const int nXMargin =
7581 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7582 : const int nYMargin =
7583 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7584 :
7585 : /* ==================================================================== */
7586 : /* Loop over output lines. */
7587 : /* ==================================================================== */
7588 8214 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7589 : {
7590 8078 : GWKAverageOrModeComputeLineCoords(
7591 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7592 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7593 :
7594 : /* ====================================================================
7595 : */
7596 : /* Loop over pixels in output scanline. */
7597 : /* ====================================================================
7598 : */
7599 2094000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7600 : {
7601 2085920 : GPtrDiff_t iSrcOffset = 0;
7602 2085920 : double dfDensity = 1.0;
7603 2085920 : bool bHasFoundDensity = false;
7604 :
7605 2085920 : bool bWrapOverX = false;
7606 2085920 : double dfXMin = 0;
7607 2085920 : double dfYMin = 0;
7608 2085920 : double dfXMax = 0;
7609 2085920 : double dfYMax = 0;
7610 2085920 : int iSrcXMin = 0;
7611 2085920 : int iSrcYMin = 0;
7612 2085920 : int iSrcXMax = 0;
7613 2085920 : int iSrcYMax = 0;
7614 2085920 : if (!GWKAverageOrModeComputeSourceCoords(
7615 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7616 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7617 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7618 : {
7619 687183 : continue;
7620 : }
7621 :
7622 1906320 : const GPtrDiff_t iDstOffset =
7623 1906320 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7624 :
7625 1906320 : bool bDone = false;
7626 :
7627 : // Special Average mode where we process all bands together,
7628 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7629 1906320 : constexpr double EPSILON = 1e-10;
7630 4613330 : if (poWK->eResample == GRA_Average &&
7631 800681 : (!poWK->m_aadfExcludedValues.empty() ||
7632 589832 : dfNodataValuesThreshold < 1 - EPSILON) &&
7633 2707000 : !poWK->bApplyVerticalShift && !bIsComplex)
7634 : {
7635 589832 : double dfTotalWeightInvalid = 0.0;
7636 589832 : double dfTotalWeightExcluded = 0.0;
7637 589832 : double dfTotalWeightRegular = 0.0;
7638 1179660 : std::vector<double> adfValueReal(poWK->nBands, 0);
7639 1179660 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7640 : std::vector<int> anCountExcludedValues(
7641 589832 : poWK->m_aadfExcludedValues.size(), 0);
7642 :
7643 2162710 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7644 : {
7645 1572880 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7646 1572880 : iSrcOffset =
7647 1572880 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7648 6291500 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7649 : iSrcX++, iSrcOffset++)
7650 : {
7651 4718620 : if (bWrapOverX)
7652 0 : iSrcOffset =
7653 0 : (iSrcX % nSrcXSize) +
7654 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7655 :
7656 4718620 : const double dfWeight =
7657 4718620 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7658 4718620 : if (dfWeight <= 0)
7659 0 : continue;
7660 :
7661 4718640 : if (poWK->panUnifiedSrcValid != nullptr &&
7662 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7663 : {
7664 3 : dfTotalWeightInvalid += dfWeight;
7665 3 : continue;
7666 : }
7667 :
7668 4718620 : bool bAllValid = true;
7669 8651150 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7670 : {
7671 7340300 : double dfBandDensity = 0;
7672 7340300 : double dfValueImagTmp = 0;
7673 11272800 : if (!(GWKGetPixelValue(
7674 : poWK, iBand, iSrcOffset, &dfBandDensity,
7675 7340300 : &adfValueReal[iBand], &dfValueImagTmp) &&
7676 3932530 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7677 : {
7678 3407770 : bAllValid = false;
7679 3407770 : break;
7680 : }
7681 : }
7682 :
7683 4718620 : if (!bAllValid)
7684 : {
7685 3407770 : dfTotalWeightInvalid += dfWeight;
7686 3407770 : continue;
7687 : }
7688 :
7689 1310850 : bool bExcludedValueFound = false;
7690 2490500 : for (size_t i = 0;
7691 2490500 : i < poWK->m_aadfExcludedValues.size(); ++i)
7692 : {
7693 1179670 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7694 : {
7695 22 : bExcludedValueFound = true;
7696 22 : ++anCountExcludedValues[i];
7697 22 : dfTotalWeightExcluded += dfWeight;
7698 22 : break;
7699 : }
7700 : }
7701 1310850 : if (!bExcludedValueFound)
7702 : {
7703 : // Weighted incremental algorithm mean
7704 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7705 1310830 : dfTotalWeightRegular += dfWeight;
7706 5243290 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7707 : {
7708 3932460 : adfValueAveraged[iBand] +=
7709 7864930 : (dfWeight / dfTotalWeightRegular) *
7710 7864930 : (adfValueReal[iBand] -
7711 3932460 : adfValueAveraged[iBand]);
7712 : }
7713 : }
7714 : }
7715 : }
7716 :
7717 589832 : const double dfTotalWeight = dfTotalWeightInvalid +
7718 : dfTotalWeightExcluded +
7719 : dfTotalWeightRegular;
7720 589832 : if (dfTotalWeightInvalid > 0 &&
7721 : dfTotalWeightInvalid >=
7722 458751 : dfNodataValuesThreshold * dfTotalWeight)
7723 : {
7724 : // Do nothing. Let bHasFoundDensity to false.
7725 : }
7726 131085 : else if (dfTotalWeightExcluded > 0 &&
7727 : dfTotalWeightExcluded >=
7728 7 : dfExcludedValuesThreshold * dfTotalWeight)
7729 : {
7730 : // Find the most represented excluded value tuple
7731 3 : size_t iExcludedValue = 0;
7732 3 : int nExcludedValueCount = 0;
7733 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7734 : ++i)
7735 : {
7736 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
7737 : {
7738 3 : iExcludedValue = i;
7739 3 : nExcludedValueCount = anCountExcludedValues[i];
7740 : }
7741 : }
7742 :
7743 3 : bHasFoundDensity = true;
7744 :
7745 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7746 : {
7747 9 : GWKSetPixelValue(
7748 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7749 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7750 : 0, bAvoidNoDataSingleBand);
7751 : }
7752 :
7753 3 : if (!bAvoidNoDataSingleBand)
7754 : {
7755 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7756 3 : }
7757 : }
7758 131082 : else if (dfTotalWeightRegular > 0)
7759 : {
7760 131082 : bHasFoundDensity = true;
7761 :
7762 524324 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7763 : {
7764 393242 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7765 : /* dfBandDensity = */ 1.0,
7766 393242 : adfValueAveraged[iBand], 0,
7767 : bAvoidNoDataSingleBand);
7768 : }
7769 :
7770 131082 : if (!bAvoidNoDataSingleBand)
7771 : {
7772 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7773 : }
7774 : }
7775 :
7776 : // Skip below loop on bands
7777 589832 : bDone = true;
7778 : }
7779 :
7780 : /* ====================================================================
7781 : */
7782 : /* Loop processing each band. */
7783 : /* ====================================================================
7784 : */
7785 :
7786 4729250 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7787 : {
7788 2822920 : double dfBandDensity = 0.0;
7789 2822920 : double dfValueReal = 0.0;
7790 2822920 : double dfValueImag = 0.0;
7791 2822920 : double dfValueRealTmp = 0.0;
7792 2822920 : double dfValueImagTmp = 0.0;
7793 :
7794 : /* --------------------------------------------------------------------
7795 : */
7796 : /* Collect the source value. */
7797 : /* --------------------------------------------------------------------
7798 : */
7799 :
7800 : // Loop over source lines and pixels - 3 possible algorithms.
7801 :
7802 2822920 : if (poWK->eResample == GRA_Average)
7803 : {
7804 300849 : double dfTotalWeight = 0.0;
7805 :
7806 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7807 : // in gcore/overview.cpp.
7808 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7809 : {
7810 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7811 330459 : iSrcOffset = iSrcXMin +
7812 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7813 803200 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7814 : iSrcX++, iSrcOffset++)
7815 : {
7816 472741 : if (bWrapOverX)
7817 630 : iSrcOffset =
7818 630 : (iSrcX % nSrcXSize) +
7819 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7820 :
7821 472745 : if (poWK->panUnifiedSrcValid != nullptr &&
7822 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7823 : iSrcOffset))
7824 : {
7825 1 : continue;
7826 : }
7827 :
7828 472740 : if (GWKGetPixelValue(
7829 : poWK, iBand, iSrcOffset, &dfBandDensity,
7830 945480 : &dfValueRealTmp, &dfValueImagTmp) &&
7831 472740 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7832 : {
7833 472740 : const double dfWeight =
7834 472740 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7835 472740 : if (dfWeight > 0)
7836 : {
7837 : // Weighted incremental algorithm mean
7838 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7839 472740 : dfTotalWeight += dfWeight;
7840 472740 : dfValueReal +=
7841 472740 : (dfWeight / dfTotalWeight) *
7842 472740 : (dfValueRealTmp - dfValueReal);
7843 472740 : if (bIsComplex)
7844 : {
7845 252 : dfValueImag +=
7846 252 : (dfWeight / dfTotalWeight) *
7847 252 : (dfValueImagTmp - dfValueImag);
7848 : }
7849 : }
7850 : }
7851 : }
7852 : }
7853 :
7854 300849 : if (dfTotalWeight > 0)
7855 : {
7856 300849 : if (poWK->bApplyVerticalShift)
7857 : {
7858 0 : if (!std::isfinite(padfZ[iDstX]))
7859 0 : continue;
7860 : // Subtract padfZ[] since the coordinate
7861 : // transformation is from target to source
7862 0 : dfValueReal =
7863 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7864 0 : padfZ[iDstX] *
7865 : dfMultFactorVerticalShiftPipeline;
7866 : }
7867 :
7868 300849 : dfBandDensity = 1;
7869 300849 : bHasFoundDensity = true;
7870 : }
7871 : } // GRA_Average.
7872 :
7873 2522070 : else if (poWK->eResample == GRA_RMS)
7874 : {
7875 300416 : double dfTotalReal = 0.0;
7876 300416 : double dfTotalImag = 0.0;
7877 300416 : double dfTotalWeight = 0.0;
7878 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7879 : // in gcore/overview.cpp.
7880 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7881 : {
7882 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7883 330162 : iSrcOffset = iSrcXMin +
7884 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7885 802723 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7886 : iSrcX++, iSrcOffset++)
7887 : {
7888 472561 : if (bWrapOverX)
7889 630 : iSrcOffset =
7890 630 : (iSrcX % nSrcXSize) +
7891 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7892 :
7893 472561 : if (poWK->panUnifiedSrcValid != nullptr &&
7894 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7895 : iSrcOffset))
7896 : {
7897 0 : continue;
7898 : }
7899 :
7900 472561 : if (GWKGetPixelValue(
7901 : poWK, iBand, iSrcOffset, &dfBandDensity,
7902 945122 : &dfValueRealTmp, &dfValueImagTmp) &&
7903 472561 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7904 : {
7905 472561 : const double dfWeight =
7906 472561 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7907 472561 : dfTotalWeight += dfWeight;
7908 472561 : dfTotalReal +=
7909 472561 : dfValueRealTmp * dfValueRealTmp * dfWeight;
7910 472561 : if (bIsComplex)
7911 48 : dfTotalImag += dfValueImagTmp *
7912 48 : dfValueImagTmp * dfWeight;
7913 : }
7914 : }
7915 : }
7916 :
7917 300416 : if (dfTotalWeight > 0)
7918 : {
7919 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7920 :
7921 300416 : if (poWK->bApplyVerticalShift)
7922 : {
7923 0 : if (!std::isfinite(padfZ[iDstX]))
7924 0 : continue;
7925 : // Subtract padfZ[] since the coordinate
7926 : // transformation is from target to source
7927 0 : dfValueReal =
7928 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7929 0 : padfZ[iDstX] *
7930 : dfMultFactorVerticalShiftPipeline;
7931 : }
7932 :
7933 300416 : if (bIsComplex)
7934 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7935 :
7936 300416 : dfBandDensity = 1;
7937 300416 : bHasFoundDensity = true;
7938 : }
7939 : } // GRA_RMS.
7940 :
7941 2221660 : else if (poWK->eResample == GRA_Mode)
7942 : {
7943 496623 : float fMaxCount = 0.0f;
7944 496623 : int nMode = -1;
7945 496623 : bool bHasSourceValues = false;
7946 :
7947 496623 : memset(pafCounts, 0, nBins * sizeof(float));
7948 :
7949 1612560 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7950 : {
7951 1115940 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7952 1115940 : iSrcOffset = iSrcXMin +
7953 1115940 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7954 4733160 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7955 : iSrcX++, iSrcOffset++)
7956 : {
7957 3617230 : if (bWrapOverX)
7958 630 : iSrcOffset =
7959 630 : (iSrcX % nSrcXSize) +
7960 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7961 :
7962 3617230 : if (poWK->panUnifiedSrcValid != nullptr &&
7963 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7964 : iSrcOffset))
7965 0 : continue;
7966 :
7967 3617230 : if (GWKGetPixelValue(
7968 : poWK, iBand, iSrcOffset, &dfBandDensity,
7969 7234450 : &dfValueRealTmp, &dfValueImagTmp) &&
7970 3617230 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7971 : {
7972 3617230 : bHasSourceValues = true;
7973 3617230 : const int nVal =
7974 3617230 : static_cast<int>(dfValueRealTmp);
7975 3617230 : const int iBin = nVal + nBinsOffset;
7976 3617230 : const double dfWeight =
7977 3617230 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7978 :
7979 : // Sum the density.
7980 3617230 : pafCounts[iBin] += static_cast<float>(dfWeight);
7981 : // Is it the most common value so far?
7982 3617230 : bool bUpdateMode = pafCounts[iBin] > fMaxCount;
7983 3617230 : if (!bUpdateMode &&
7984 778316 : pafCounts[iBin] == fMaxCount)
7985 : {
7986 218628 : switch (eTieStrategy)
7987 : {
7988 218620 : case GWKTS_First:
7989 218620 : break;
7990 4 : case GWKTS_Min:
7991 4 : bUpdateMode = nVal < nMode;
7992 4 : break;
7993 4 : case GWKTS_Max:
7994 4 : bUpdateMode = nVal > nMode;
7995 4 : break;
7996 : }
7997 : }
7998 3617230 : if (bUpdateMode)
7999 : {
8000 2838920 : nMode = nVal;
8001 2838920 : fMaxCount = pafCounts[iBin];
8002 : }
8003 : }
8004 : }
8005 : }
8006 :
8007 496623 : if (bHasSourceValues)
8008 : {
8009 496623 : dfValueReal = nMode;
8010 496623 : dfBandDensity = 1;
8011 496623 : bHasFoundDensity = true;
8012 : }
8013 : } // GRA_Mode.
8014 :
8015 1725040 : else if (poWK->eResample == GRA_Max)
8016 : {
8017 335037 : bool bFoundValid = false;
8018 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
8019 : // This code adapted from nAlgo 1 method, GRA_Average.
8020 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8021 : {
8022 952975 : iSrcOffset = iSrcXMin +
8023 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8024 4406540 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8025 : iSrcX++, iSrcOffset++)
8026 : {
8027 3453560 : if (bWrapOverX)
8028 630 : iSrcOffset =
8029 630 : (iSrcX % nSrcXSize) +
8030 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8031 :
8032 3456370 : if (poWK->panUnifiedSrcValid != nullptr &&
8033 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8034 : iSrcOffset))
8035 : {
8036 2446 : continue;
8037 : }
8038 :
8039 : // Returns pixel value if it is not no data.
8040 3451120 : if (GWKGetPixelValue(
8041 : poWK, iBand, iSrcOffset, &dfBandDensity,
8042 6902230 : &dfValueRealTmp, &dfValueImagTmp) &&
8043 3451120 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8044 : {
8045 3451120 : bFoundValid = true;
8046 3451120 : if (dfTotalReal < dfValueRealTmp)
8047 : {
8048 442642 : dfTotalReal = dfValueRealTmp;
8049 : }
8050 : }
8051 : }
8052 : }
8053 :
8054 335037 : if (bFoundValid)
8055 : {
8056 335037 : dfValueReal = dfTotalReal;
8057 :
8058 335037 : if (poWK->bApplyVerticalShift)
8059 : {
8060 0 : if (!std::isfinite(padfZ[iDstX]))
8061 0 : continue;
8062 : // Subtract padfZ[] since the coordinate
8063 : // transformation is from target to source
8064 0 : dfValueReal =
8065 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8066 0 : padfZ[iDstX] *
8067 : dfMultFactorVerticalShiftPipeline;
8068 : }
8069 :
8070 335037 : dfBandDensity = 1;
8071 335037 : bHasFoundDensity = true;
8072 : }
8073 : }
8074 :
8075 1390000 : else if (poWK->eResample == GRA_Min)
8076 : {
8077 335012 : bool bFoundValid = false;
8078 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
8079 : // This code adapted from nAlgo 1 method, GRA_Average.
8080 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8081 : {
8082 952710 : iSrcOffset = iSrcXMin +
8083 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8084 4403460 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8085 : iSrcX++, iSrcOffset++)
8086 : {
8087 3450750 : if (bWrapOverX)
8088 630 : iSrcOffset =
8089 630 : (iSrcX % nSrcXSize) +
8090 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8091 :
8092 3450750 : if (poWK->panUnifiedSrcValid != nullptr &&
8093 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8094 : iSrcOffset))
8095 : {
8096 0 : continue;
8097 : }
8098 :
8099 : // Returns pixel value if it is not no data.
8100 3450750 : if (GWKGetPixelValue(
8101 : poWK, iBand, iSrcOffset, &dfBandDensity,
8102 6901500 : &dfValueRealTmp, &dfValueImagTmp) &&
8103 3450750 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8104 : {
8105 3450750 : bFoundValid = true;
8106 3450750 : if (dfTotalReal > dfValueRealTmp)
8107 : {
8108 443069 : dfTotalReal = dfValueRealTmp;
8109 : }
8110 : }
8111 : }
8112 : }
8113 :
8114 335012 : if (bFoundValid)
8115 : {
8116 335012 : dfValueReal = dfTotalReal;
8117 :
8118 335012 : if (poWK->bApplyVerticalShift)
8119 : {
8120 0 : if (!std::isfinite(padfZ[iDstX]))
8121 0 : continue;
8122 : // Subtract padfZ[] since the coordinate
8123 : // transformation is from target to source
8124 0 : dfValueReal =
8125 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8126 0 : padfZ[iDstX] *
8127 : dfMultFactorVerticalShiftPipeline;
8128 : }
8129 :
8130 335012 : dfBandDensity = 1;
8131 335012 : bHasFoundDensity = true;
8132 : }
8133 : } // GRA_Min.
8134 :
8135 : else
8136 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
8137 : {
8138 1054990 : CPLAssert(quant > 0.0f);
8139 :
8140 1054990 : bool bFoundValid = false;
8141 1054990 : std::vector<double> dfRealValuesTmp;
8142 :
8143 : // This code adapted from nAlgo 1 method, GRA_Average.
8144 4012980 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8145 : {
8146 2957990 : iSrcOffset = iSrcXMin +
8147 2957990 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8148 13509900 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8149 : iSrcX++, iSrcOffset++)
8150 : {
8151 10551900 : if (bWrapOverX)
8152 1890 : iSrcOffset =
8153 1890 : (iSrcX % nSrcXSize) +
8154 1890 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8155 :
8156 10748500 : if (poWK->panUnifiedSrcValid != nullptr &&
8157 196608 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8158 : iSrcOffset))
8159 : {
8160 195449 : continue;
8161 : }
8162 :
8163 : // Returns pixel value if it is not no data.
8164 10356400 : if (GWKGetPixelValue(
8165 : poWK, iBand, iSrcOffset, &dfBandDensity,
8166 20712900 : &dfValueRealTmp, &dfValueImagTmp) &&
8167 10356400 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8168 : {
8169 10356400 : bFoundValid = true;
8170 10356400 : dfRealValuesTmp.push_back(dfValueRealTmp);
8171 : }
8172 : }
8173 : }
8174 :
8175 1054990 : if (bFoundValid)
8176 : {
8177 1006150 : std::sort(dfRealValuesTmp.begin(),
8178 : dfRealValuesTmp.end());
8179 : int quantIdx = static_cast<int>(
8180 1006150 : std::ceil(quant * dfRealValuesTmp.size() - 1));
8181 1006150 : dfValueReal = dfRealValuesTmp[quantIdx];
8182 :
8183 1006150 : if (poWK->bApplyVerticalShift)
8184 : {
8185 0 : if (!std::isfinite(padfZ[iDstX]))
8186 0 : continue;
8187 : // Subtract padfZ[] since the coordinate
8188 : // transformation is from target to source
8189 0 : dfValueReal =
8190 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8191 0 : padfZ[iDstX] *
8192 : dfMultFactorVerticalShiftPipeline;
8193 : }
8194 :
8195 1006150 : dfBandDensity = 1;
8196 1006150 : bHasFoundDensity = true;
8197 1006150 : dfRealValuesTmp.clear();
8198 : }
8199 : } // Quantile.
8200 :
8201 : /* --------------------------------------------------------------------
8202 : */
8203 : /* We have a computed value from the source. Now apply it
8204 : * to */
8205 : /* the destination pixel. */
8206 : /* --------------------------------------------------------------------
8207 : */
8208 2822920 : if (bHasFoundDensity)
8209 : {
8210 : // TODO: Should we compute dfBandDensity in fct of
8211 : // nCount/nCount2, or use as a threshold to set the dest
8212 : // value?
8213 : // dfBandDensity = (float) nCount / nCount2;
8214 : // if( (float) nCount / nCount2 > 0.1 )
8215 : // or fix gdalwarp crop_to_cutline to crop partially
8216 : // overlapping pixels.
8217 2774080 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8218 : dfValueReal, dfValueImag,
8219 : bAvoidNoDataSingleBand);
8220 : }
8221 : }
8222 :
8223 1906320 : if (!bHasFoundDensity)
8224 507587 : continue;
8225 :
8226 1398740 : if (!bAvoidNoDataSingleBand)
8227 : {
8228 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
8229 : }
8230 :
8231 : /* --------------------------------------------------------------------
8232 : */
8233 : /* Update destination density/validity masks. */
8234 : /* --------------------------------------------------------------------
8235 : */
8236 1398740 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8237 :
8238 1398740 : if (poWK->panDstValid != nullptr)
8239 : {
8240 1184 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8241 : }
8242 : } /* Next iDstX */
8243 :
8244 : /* --------------------------------------------------------------------
8245 : */
8246 : /* Report progress to the user, and optionally cancel out. */
8247 : /* --------------------------------------------------------------------
8248 : */
8249 8078 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8250 0 : break;
8251 : }
8252 :
8253 : /* -------------------------------------------------------------------- */
8254 : /* Cleanup and return. */
8255 : /* -------------------------------------------------------------------- */
8256 136 : CPLFree(padfX);
8257 136 : CPLFree(padfY);
8258 136 : CPLFree(padfZ);
8259 136 : CPLFree(padfX2);
8260 136 : CPLFree(padfY2);
8261 136 : CPLFree(padfZ2);
8262 136 : CPLFree(pabSuccess);
8263 136 : CPLFree(pabSuccess2);
8264 136 : VSIFree(pafCounts);
8265 : }
8266 :
8267 : /************************************************************************/
8268 : /* getOrientation() */
8269 : /************************************************************************/
8270 :
8271 : typedef std::pair<double, double> XYPair;
8272 :
8273 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8274 : // -1 if it is counter-clockwise oriented,
8275 : // or 0 if it is colinear.
8276 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8277 : {
8278 2355910 : const double p1x = p1.first;
8279 2355910 : const double p1y = p1.second;
8280 2355910 : const double p2x = p2.first;
8281 2355910 : const double p2y = p2.second;
8282 2355910 : const double p3x = p3.first;
8283 2355910 : const double p3y = p3.second;
8284 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8285 2355910 : if (std::abs(val) < 1e-20)
8286 2690 : return 0;
8287 2353220 : else if (val > 0)
8288 0 : return 1;
8289 : else
8290 2353220 : return -1;
8291 : }
8292 :
8293 : /************************************************************************/
8294 : /* isConvex() */
8295 : /************************************************************************/
8296 :
8297 : typedef std::vector<XYPair> XYPoly;
8298 :
8299 : // poly must be closed
8300 785302 : static bool isConvex(const XYPoly &poly)
8301 : {
8302 785302 : const size_t n = poly.size();
8303 785302 : size_t i = 0;
8304 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8305 785302 : ++i;
8306 2355910 : for (; i < n - 2; ++i)
8307 : {
8308 : const int orientation =
8309 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8310 1570600 : if (orientation != 0)
8311 : {
8312 1567910 : if (last_orientation == 0)
8313 0 : last_orientation = orientation;
8314 1567910 : else if (orientation != last_orientation)
8315 0 : return false;
8316 : }
8317 : }
8318 785302 : return true;
8319 : }
8320 :
8321 : /************************************************************************/
8322 : /* pointIntersectsConvexPoly() */
8323 : /************************************************************************/
8324 :
8325 : // Returns whether xy intersects poly, that must be closed and convex.
8326 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8327 : {
8328 6049100 : const size_t n = poly.size();
8329 6049100 : double dx1 = xy.first - poly[0].first;
8330 6049100 : double dy1 = xy.second - poly[0].second;
8331 6049100 : double dx2 = poly[1].first - poly[0].first;
8332 6049100 : double dy2 = poly[1].second - poly[0].second;
8333 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8334 :
8335 : // Check if the point remains on the same side (left/right) of all edges
8336 14556400 : for (size_t i = 2; i < n; i++)
8337 : {
8338 12793100 : dx1 = xy.first - poly[i - 1].first;
8339 12793100 : dy1 = xy.second - poly[i - 1].second;
8340 :
8341 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8342 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8343 :
8344 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8345 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8346 725558 : prevCrossProduct = crossProduct;
8347 12067500 : else if (prevCrossProduct * crossProduct < 0)
8348 4285760 : return false;
8349 : }
8350 :
8351 1763340 : return true;
8352 : }
8353 :
8354 : /************************************************************************/
8355 : /* getIntersection() */
8356 : /************************************************************************/
8357 :
8358 : /* Returns intersection of [p1,p2] with [p3,p4], if
8359 : * it is a single point, and the 2 segments are not colinear.
8360 : */
8361 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8362 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8363 : {
8364 11811000 : const double x1 = p1.first;
8365 11811000 : const double y1 = p1.second;
8366 11811000 : const double x2 = p2.first;
8367 11811000 : const double y2 = p2.second;
8368 11811000 : const double x3 = p3.first;
8369 11811000 : const double y3 = p3.second;
8370 11811000 : const double x4 = p4.first;
8371 11811000 : const double y4 = p4.second;
8372 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8373 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8374 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8375 9260780 : return false;
8376 :
8377 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8378 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8379 973924 : return false;
8380 :
8381 1576340 : const double t = t_num / denom;
8382 1576340 : xy.first = x1 + t * (x2 - x1);
8383 1576340 : xy.second = y1 + t * (y2 - y1);
8384 1576340 : return true;
8385 : }
8386 :
8387 : /************************************************************************/
8388 : /* getConvexPolyIntersection() */
8389 : /************************************************************************/
8390 :
8391 : // poly1 and poly2 must be closed and convex.
8392 : // The returned intersection will not necessary be closed.
8393 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8394 : XYPoly &intersection)
8395 : {
8396 785302 : intersection.clear();
8397 :
8398 : // Add all points of poly1 inside poly2
8399 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8400 : {
8401 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8402 1187430 : intersection.push_back(poly1[i]);
8403 : }
8404 785302 : if (intersection.size() == poly1.size() - 1)
8405 : {
8406 : // poly1 is inside poly2
8407 119100 : return;
8408 : }
8409 :
8410 : // Add all points of poly2 inside poly1
8411 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8412 : {
8413 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8414 575904 : intersection.push_back(poly2[i]);
8415 : }
8416 :
8417 : // Compute the intersection of all edges of both polygons
8418 726972 : XYPair xy;
8419 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8420 : {
8421 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8422 : {
8423 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8424 11631600 : poly2[i2 + 1], xy))
8425 : {
8426 1576230 : intersection.push_back(xy);
8427 : }
8428 : }
8429 : }
8430 :
8431 726972 : if (intersection.empty())
8432 60770 : return;
8433 :
8434 : // Find lowest-left point in intersection set
8435 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8436 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8437 3772450 : for (const auto &pair : intersection)
8438 : {
8439 3106240 : const double x = pair.first;
8440 3106240 : const double y = pair.second;
8441 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8442 : {
8443 1096040 : lowest_x = x;
8444 1096040 : lowest_y = y;
8445 : }
8446 : }
8447 :
8448 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8449 : {
8450 5737980 : const double p1x_diff = p1.first - lowest_x;
8451 5737980 : const double p1y_diff = p1.second - lowest_y;
8452 5737980 : const double p2x_diff = p2.first - lowest_x;
8453 5737980 : const double p2y_diff = p2.second - lowest_y;
8454 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8455 : {
8456 2655420 : if (p1x_diff >= 0)
8457 : {
8458 2655420 : if (p2x_diff >= 0)
8459 2655420 : return p1.first < p2.first;
8460 0 : return true;
8461 : }
8462 : else
8463 : {
8464 0 : if (p2x_diff >= 0)
8465 0 : return false;
8466 0 : return p1.first < p2.first;
8467 : }
8468 : }
8469 :
8470 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8471 1046960 : return p1.second < p2.second;
8472 :
8473 : double tan_p1;
8474 2035600 : if (p1x_diff == 0.0)
8475 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8476 : else
8477 1570980 : tan_p1 = p1y_diff / p1x_diff;
8478 :
8479 : double tan_p2;
8480 2035600 : if (p2x_diff == 0.0)
8481 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8482 : else
8483 1196080 : tan_p2 = p2y_diff / p2x_diff;
8484 :
8485 2035600 : if (tan_p1 >= 0)
8486 : {
8487 1904790 : if (tan_p2 >= 0)
8488 1881590 : return tan_p1 < tan_p2;
8489 : else
8490 23199 : return true;
8491 : }
8492 : else
8493 : {
8494 130806 : if (tan_p2 >= 0)
8495 103900 : return false;
8496 : else
8497 26906 : return tan_p1 < tan_p2;
8498 : }
8499 666202 : };
8500 :
8501 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8502 : // hull
8503 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8504 :
8505 : // Remove duplicated points
8506 666202 : size_t j = 1;
8507 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8508 : {
8509 2440040 : if (intersection[i] != intersection[i - 1])
8510 : {
8511 1452560 : if (j < i)
8512 545275 : intersection[j] = intersection[i];
8513 1452560 : ++j;
8514 : }
8515 : }
8516 666202 : intersection.resize(j);
8517 : }
8518 :
8519 : /************************************************************************/
8520 : /* getArea() */
8521 : /************************************************************************/
8522 :
8523 : // poly may or may not be closed.
8524 558521 : static double getArea(const XYPoly &poly)
8525 : {
8526 : // CPLAssert(poly.size() >= 2);
8527 558521 : const size_t nPointCount = poly.size();
8528 : double dfAreaSum =
8529 558521 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
8530 :
8531 1765140 : for (size_t i = 1; i < nPointCount - 1; i++)
8532 : {
8533 1206610 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
8534 : }
8535 :
8536 558521 : dfAreaSum += poly[nPointCount - 1].first *
8537 558521 : (poly[0].second - poly[nPointCount - 2].second);
8538 :
8539 558521 : return 0.5 * std::fabs(dfAreaSum);
8540 : }
8541 :
8542 : /************************************************************************/
8543 : /* GWKSumPreserving() */
8544 : /************************************************************************/
8545 :
8546 : static void GWKSumPreservingThread(void *pData);
8547 :
8548 19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8549 : {
8550 19 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8551 : }
8552 :
8553 19 : static void GWKSumPreservingThread(void *pData)
8554 : {
8555 19 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8556 19 : GDALWarpKernel *poWK = psJob->poWK;
8557 19 : const int iYMin = psJob->iYMin;
8558 19 : const int iYMax = psJob->iYMax;
8559 : const bool bIsAffineNoRotation =
8560 19 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8561 28 : poWK->pTransformerArg) &&
8562 : // for debug/testing purposes
8563 9 : CPLTestBool(
8564 19 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8565 : const bool bAvoidNoDataSingleBand =
8566 21 : poWK->nBands == 1 ||
8567 2 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
8568 19 : "UNIFIED_SRC_NODATA", "FALSE"));
8569 :
8570 19 : const int nDstXSize = poWK->nDstXSize;
8571 19 : const int nSrcXSize = poWK->nSrcXSize;
8572 19 : const int nSrcYSize = poWK->nSrcYSize;
8573 :
8574 38 : std::vector<double> adfX0(nSrcXSize + 1);
8575 38 : std::vector<double> adfY0(nSrcXSize + 1);
8576 38 : std::vector<double> adfZ0(nSrcXSize + 1);
8577 38 : std::vector<double> adfX1(nSrcXSize + 1);
8578 38 : std::vector<double> adfY1(nSrcXSize + 1);
8579 38 : std::vector<double> adfZ1(nSrcXSize + 1);
8580 38 : std::vector<int> abSuccess0(nSrcXSize + 1);
8581 38 : std::vector<int> abSuccess1(nSrcXSize + 1);
8582 :
8583 : CPLRectObj sGlobalBounds;
8584 19 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8585 19 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8586 19 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8587 19 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8588 19 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8589 :
8590 : struct SourcePixel
8591 : {
8592 : int iSrcX;
8593 : int iSrcY;
8594 :
8595 : // Coordinates of source pixel in target pixel coordinates
8596 : double dfDstX0;
8597 : double dfDstY0;
8598 : double dfDstX1;
8599 : double dfDstY1;
8600 : double dfDstX2;
8601 : double dfDstY2;
8602 : double dfDstX3;
8603 : double dfDstY3;
8604 :
8605 : // Source pixel total area (might be larger than the one described
8606 : // by above coordinates, if the pixel was crossing the antimeridian
8607 : // and split)
8608 : double dfArea;
8609 : };
8610 :
8611 38 : std::vector<SourcePixel> sourcePixels;
8612 :
8613 38 : XYPoly discontinuityLeft(5);
8614 38 : XYPoly discontinuityRight(5);
8615 :
8616 : /* ==================================================================== */
8617 : /* First pass: transform the 4 corners of each potential */
8618 : /* contributing source pixel to target pixel coordinates. */
8619 : /* ==================================================================== */
8620 :
8621 : // Special case for top line
8622 : {
8623 19 : int iY = 0;
8624 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8625 : {
8626 3345 : adfX1[iX] = iX + poWK->nSrcXOff;
8627 3345 : adfY1[iX] = iY + poWK->nSrcYOff;
8628 3345 : adfZ1[iX] = 0;
8629 : }
8630 :
8631 19 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8632 : adfX1.data(), adfY1.data(), adfZ1.data(),
8633 : abSuccess1.data());
8634 :
8635 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8636 : {
8637 3345 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8638 0 : abSuccess1[iX] = FALSE;
8639 : else
8640 : {
8641 3345 : adfX1[iX] -= poWK->nDstXOff;
8642 3345 : adfY1[iX] -= poWK->nDstYOff;
8643 : }
8644 : }
8645 : }
8646 :
8647 22624 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8648 : {
8649 22624 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8650 10966 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8651 22624 : ? 1
8652 11658 : : -1;
8653 19 : };
8654 :
8655 : const auto FindDiscontinuity =
8656 80 : [poWK, psJob, getInsideXSign](
8657 : double dfXLeft, double dfXRight, double dfY,
8658 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8659 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8660 : {
8661 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8662 : {
8663 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8664 800 : double dfXMidReprojected = dfXMid;
8665 800 : dfYMidReprojected = dfY;
8666 800 : double dfZ = 0;
8667 800 : int nSuccess = 0;
8668 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8669 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8670 : &nSuccess);
8671 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8672 : {
8673 456 : dfXRight = dfXMid;
8674 456 : dfXMidReprojectedRight = dfXMidReprojected;
8675 : }
8676 : else
8677 : {
8678 344 : dfXLeft = dfXMid;
8679 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8680 : }
8681 : }
8682 80 : };
8683 :
8684 2685 : for (int iY = 0; iY < nSrcYSize; ++iY)
8685 : {
8686 2666 : std::swap(adfX0, adfX1);
8687 2666 : std::swap(adfY0, adfY1);
8688 2666 : std::swap(adfZ0, adfZ1);
8689 2666 : std::swap(abSuccess0, abSuccess1);
8690 :
8691 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8692 : {
8693 4833460 : adfX1[iX] = iX + poWK->nSrcXOff;
8694 4833460 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8695 4833460 : adfZ1[iX] = 0;
8696 : }
8697 :
8698 2666 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8699 : adfX1.data(), adfY1.data(), adfZ1.data(),
8700 : abSuccess1.data());
8701 :
8702 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8703 : {
8704 4833460 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8705 0 : abSuccess1[iX] = FALSE;
8706 : else
8707 : {
8708 4833460 : adfX1[iX] -= poWK->nDstXOff;
8709 4833460 : adfY1[iX] -= poWK->nDstYOff;
8710 : }
8711 : }
8712 :
8713 4833460 : for (int iX = 0; iX < nSrcXSize; ++iX)
8714 : {
8715 9661580 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8716 4830790 : abSuccess1[iX + 1])
8717 : {
8718 : /* --------------------------------------------------------------------
8719 : */
8720 : /* Do not try to apply transparent source pixels to the
8721 : * destination.*/
8722 : /* --------------------------------------------------------------------
8723 : */
8724 4830790 : const auto iSrcOffset =
8725 4830790 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8726 9560570 : if (poWK->panUnifiedSrcValid != nullptr &&
8727 4729780 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8728 : {
8729 4738340 : continue;
8730 : }
8731 :
8732 103415 : if (poWK->pafUnifiedSrcDensity != nullptr)
8733 : {
8734 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8735 : SRC_DENSITY_THRESHOLD_FLOAT)
8736 0 : continue;
8737 : }
8738 :
8739 : SourcePixel sp;
8740 103415 : sp.dfArea = 0;
8741 103415 : sp.dfDstX0 = adfX0[iX];
8742 103415 : sp.dfDstY0 = adfY0[iX];
8743 103415 : sp.dfDstX1 = adfX0[iX + 1];
8744 103415 : sp.dfDstY1 = adfY0[iX + 1];
8745 103415 : sp.dfDstX2 = adfX1[iX + 1];
8746 103415 : sp.dfDstY2 = adfY1[iX + 1];
8747 103415 : sp.dfDstX3 = adfX1[iX];
8748 103415 : sp.dfDstY3 = adfY1[iX];
8749 :
8750 : // Detect pixel that likely cross the anti-meridian and
8751 : // introduce a discontinuity when reprojected.
8752 :
8753 103415 : if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
8754 10766 : std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
8755 5241 : getInsideXSign(adfX0[iX]) !=
8756 5313 : getInsideXSign(adfX0[iX + 1]) &&
8757 128 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8758 56 : getInsideXSign(adfX0[iX + 1]) ==
8759 108996 : getInsideXSign(adfX1[iX + 1]) &&
8760 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8761 : 0)
8762 : {
8763 : #ifdef DEBUG_VERBOSE
8764 : CPLDebug(
8765 : "WARP",
8766 : "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
8767 : "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
8768 : "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
8769 : iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
8770 : adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
8771 : adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
8772 : #endif
8773 40 : double dfXMidReprojectedLeftTop = 0;
8774 40 : double dfXMidReprojectedRightTop = 0;
8775 40 : double dfYMidReprojectedTop = 0;
8776 40 : FindDiscontinuity(
8777 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8778 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8779 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8780 : dfYMidReprojectedTop);
8781 40 : double dfXMidReprojectedLeftBottom = 0;
8782 40 : double dfXMidReprojectedRightBottom = 0;
8783 40 : double dfYMidReprojectedBottom = 0;
8784 40 : FindDiscontinuity(
8785 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8786 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8787 : dfXMidReprojectedLeftBottom,
8788 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8789 :
8790 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8791 40 : discontinuityLeft[1] =
8792 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8793 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8794 40 : dfYMidReprojectedBottom);
8795 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8796 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8797 :
8798 40 : discontinuityRight[0] =
8799 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8800 40 : discontinuityRight[1] =
8801 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8802 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8803 40 : dfYMidReprojectedBottom);
8804 40 : discontinuityRight[3] =
8805 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8806 40 : discontinuityRight[4] =
8807 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8808 :
8809 40 : sp.dfArea = getArea(discontinuityLeft) +
8810 40 : getArea(discontinuityRight);
8811 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8812 : {
8813 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8814 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8815 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8816 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8817 : }
8818 : else
8819 : {
8820 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8821 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8822 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8823 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8824 : }
8825 : }
8826 :
8827 : // Bounding box of source pixel (expressed in target pixel
8828 : // coordinates)
8829 : CPLRectObj sRect;
8830 103415 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8831 103415 : std::min(sp.dfDstX2, sp.dfDstX3));
8832 103415 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8833 103415 : std::min(sp.dfDstY2, sp.dfDstY3));
8834 103415 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8835 103415 : std::max(sp.dfDstX2, sp.dfDstX3));
8836 103415 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8837 103415 : std::max(sp.dfDstY2, sp.dfDstY3));
8838 103415 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8839 101355 : sRect.miny < iYMax && sRect.maxy > iYMin))
8840 : {
8841 10852 : continue;
8842 : }
8843 :
8844 92563 : sp.iSrcX = iX;
8845 92563 : sp.iSrcY = iY;
8846 :
8847 92563 : if (!bIsAffineNoRotation)
8848 : {
8849 : // Check polygon validity (no self-crossing)
8850 89745 : XYPair xy;
8851 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8852 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8853 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8854 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8855 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8856 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8857 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8858 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8859 : {
8860 113 : continue;
8861 : }
8862 : }
8863 :
8864 92450 : CPLQuadTreeInsertWithBounds(
8865 : hQuadTree,
8866 : reinterpret_cast<void *>(
8867 92450 : static_cast<uintptr_t>(sourcePixels.size())),
8868 : &sRect);
8869 :
8870 92450 : sourcePixels.push_back(sp);
8871 : }
8872 : }
8873 : }
8874 :
8875 38 : std::vector<double> adfRealValue(poWK->nBands);
8876 38 : std::vector<double> adfImagValue(poWK->nBands);
8877 38 : std::vector<double> adfBandDensity(poWK->nBands);
8878 38 : std::vector<double> adfWeight(poWK->nBands);
8879 :
8880 : #ifdef CHECK_SUM_WITH_GEOS
8881 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8882 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8883 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8884 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8885 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8886 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8887 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8888 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8889 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8890 :
8891 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8892 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8893 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8894 : #endif
8895 :
8896 : const XYPoly xy1{
8897 38 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8898 38 : XYPoly xy2(5);
8899 38 : XYPoly xy2_triangle(4);
8900 38 : XYPoly intersection;
8901 :
8902 : /* ==================================================================== */
8903 : /* Loop over output lines. */
8904 : /* ==================================================================== */
8905 1951 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8906 : {
8907 : CPLRectObj sRect;
8908 1932 : sRect.miny = iDstY;
8909 1932 : sRect.maxy = iDstY + 1;
8910 :
8911 : /* ====================================================================
8912 : */
8913 : /* Loop over pixels in output scanline. */
8914 : /* ====================================================================
8915 : */
8916 1403940 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8917 : {
8918 1402010 : sRect.minx = iDstX;
8919 1402010 : sRect.maxx = iDstX + 1;
8920 1402010 : int nSourcePixels = 0;
8921 : void **pahSourcePixel =
8922 1402010 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8923 1402010 : if (nSourcePixels == 0)
8924 : {
8925 1183090 : CPLFree(pahSourcePixel);
8926 1183100 : continue;
8927 : }
8928 :
8929 218919 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8930 218919 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8931 218919 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8932 218919 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
8933 218919 : double dfDensity = 0;
8934 : // Just above zero to please Coveriy Scan
8935 218919 : double dfTotalWeight = std::numeric_limits<double>::min();
8936 :
8937 : /* ====================================================================
8938 : */
8939 : /* Iterate over each contributing source pixel to add its
8940 : */
8941 : /* value weighed by the ratio of the area of its
8942 : * intersection */
8943 : /* with the target pixel divided by the area of the source
8944 : */
8945 : /* pixel. */
8946 : /* ====================================================================
8947 : */
8948 1020550 : for (int i = 0; i < nSourcePixels; ++i)
8949 : {
8950 801628 : const int iSourcePixel = static_cast<int>(
8951 801628 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8952 801628 : auto &sp = sourcePixels[iSourcePixel];
8953 :
8954 801628 : double dfWeight = 0.0;
8955 801628 : if (bIsAffineNoRotation)
8956 : {
8957 : // Optimization since the source pixel is a rectangle in
8958 : // target pixel coordinates
8959 16326 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8960 16326 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8961 16326 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8962 16326 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8963 16326 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8964 16326 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8965 16326 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8966 16326 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8967 16326 : dfWeight =
8968 16326 : ((dfIntersMaxX - dfIntersMinX) *
8969 16326 : (dfIntersMaxY - dfIntersMinY)) /
8970 16326 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8971 : }
8972 : else
8973 : {
8974 : // Compute the polygon of the source pixel in target pixel
8975 : // coordinates, and shifted to the target pixel (unit square
8976 : // coordinates)
8977 :
8978 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8979 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8980 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8981 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8982 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8983 :
8984 785302 : if (isConvex(xy2))
8985 : {
8986 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
8987 785302 : if (intersection.size() >= 3)
8988 : {
8989 468849 : dfWeight = getArea(intersection);
8990 : }
8991 : }
8992 : else
8993 : {
8994 : // Split xy2 into 2 triangles.
8995 0 : xy2_triangle[0] = xy2[0];
8996 0 : xy2_triangle[1] = xy2[1];
8997 0 : xy2_triangle[2] = xy2[2];
8998 0 : xy2_triangle[3] = xy2[0];
8999 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9000 : intersection);
9001 0 : if (intersection.size() >= 3)
9002 : {
9003 0 : dfWeight = getArea(intersection);
9004 : }
9005 :
9006 0 : xy2_triangle[1] = xy2[2];
9007 0 : xy2_triangle[2] = xy2[3];
9008 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9009 : intersection);
9010 0 : if (intersection.size() >= 3)
9011 : {
9012 0 : dfWeight += getArea(intersection);
9013 : }
9014 : }
9015 785302 : if (dfWeight > 0.0)
9016 : {
9017 468828 : if (sp.dfArea == 0)
9018 89592 : sp.dfArea = getArea(xy2);
9019 468828 : dfWeight /= sp.dfArea;
9020 : }
9021 :
9022 : #ifdef CHECK_SUM_WITH_GEOS
9023 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
9024 : sp.dfDstX0 - iDstX,
9025 : sp.dfDstY0 - iDstY);
9026 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
9027 : sp.dfDstX1 - iDstX,
9028 : sp.dfDstY1 - iDstY);
9029 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
9030 : sp.dfDstX2 - iDstX,
9031 : sp.dfDstY2 - iDstY);
9032 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
9033 : sp.dfDstX3 - iDstX,
9034 : sp.dfDstY3 - iDstY);
9035 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
9036 : sp.dfDstX0 - iDstX,
9037 : sp.dfDstY0 - iDstY);
9038 :
9039 : double dfWeightGEOS = 0.0;
9040 : auto hIntersection =
9041 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
9042 : if (hIntersection)
9043 : {
9044 : double dfIntersArea = 0.0;
9045 : if (GEOSArea_r(hGEOSContext, hIntersection,
9046 : &dfIntersArea) &&
9047 : dfIntersArea > 0)
9048 : {
9049 : double dfSourceArea = 0.0;
9050 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
9051 : {
9052 : dfWeightGEOS = dfIntersArea / dfSourceArea;
9053 : }
9054 : }
9055 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
9056 : }
9057 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
9058 : {
9059 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
9060 : dfWeight, dfWeightGEOS);
9061 : printf("xy2: "); // ok
9062 : for (const auto &xy : xy2)
9063 : printf("[%f, %f], ", xy.first, xy.second); // ok
9064 : printf("\n"); // ok
9065 : printf("intersection: "); // ok
9066 : for (const auto &xy : intersection)
9067 : printf("[%f, %f], ", xy.first, xy.second); // ok
9068 : printf("\n"); // ok
9069 : }
9070 : #endif
9071 : }
9072 801628 : if (dfWeight > 0.0)
9073 : {
9074 : #ifdef DEBUG_VERBOSE
9075 : #if defined(DST_X) && defined(DST_Y)
9076 : if (iDstX + poWK->nDstXOff == DST_X &&
9077 : iDstY + poWK->nDstYOff == DST_Y)
9078 : {
9079 : CPLDebug("WARP",
9080 : "iSrcX = %d, iSrcY = %d, weight =%.17g",
9081 : sp.iSrcX + poWK->nSrcXOff,
9082 : sp.iSrcY + poWK->nSrcYOff, dfWeight);
9083 : }
9084 : #endif
9085 : #endif
9086 :
9087 474104 : const GPtrDiff_t iSrcOffset =
9088 474104 : sp.iSrcX +
9089 474104 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
9090 474104 : dfTotalWeight += dfWeight;
9091 :
9092 474104 : if (poWK->pafUnifiedSrcDensity != nullptr)
9093 : {
9094 0 : dfDensity +=
9095 0 : dfWeight *
9096 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
9097 : }
9098 : else
9099 : {
9100 474104 : dfDensity += dfWeight;
9101 : }
9102 :
9103 1818730 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9104 : {
9105 : // Returns pixel value if it is not no data.
9106 : double dfBandDensity;
9107 : double dfRealValue;
9108 : double dfImagValue;
9109 2689250 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
9110 : &dfBandDensity, &dfRealValue,
9111 : &dfImagValue) &&
9112 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
9113 : {
9114 0 : continue;
9115 : }
9116 : #ifdef DEBUG_VERBOSE
9117 : #if defined(DST_X) && defined(DST_Y)
9118 : if (iDstX + poWK->nDstXOff == DST_X &&
9119 : iDstY + poWK->nDstYOff == DST_Y)
9120 : {
9121 : CPLDebug("WARP", "value * weight = %.17g",
9122 : dfRealValue * dfWeight);
9123 : }
9124 : #endif
9125 : #endif
9126 :
9127 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
9128 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
9129 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
9130 1344620 : adfWeight[iBand] += dfWeight;
9131 : }
9132 : }
9133 : }
9134 :
9135 218919 : CPLFree(pahSourcePixel);
9136 :
9137 : /* --------------------------------------------------------------------
9138 : */
9139 : /* Update destination pixel value. */
9140 : /* --------------------------------------------------------------------
9141 : */
9142 218919 : bool bHasFoundDensity = false;
9143 218919 : const GPtrDiff_t iDstOffset =
9144 218919 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
9145 827838 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9146 : {
9147 608919 : if (adfWeight[iBand] > 0)
9148 : {
9149 : const double dfBandDensity =
9150 608909 : adfBandDensity[iBand] / adfWeight[iBand];
9151 608909 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
9152 : {
9153 608909 : bHasFoundDensity = true;
9154 608909 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
9155 608909 : adfRealValue[iBand],
9156 608909 : adfImagValue[iBand],
9157 : bAvoidNoDataSingleBand);
9158 : }
9159 : }
9160 : }
9161 :
9162 218919 : if (!bHasFoundDensity)
9163 10 : continue;
9164 :
9165 218909 : if (!bAvoidNoDataSingleBand)
9166 : {
9167 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
9168 : }
9169 :
9170 : /* --------------------------------------------------------------------
9171 : */
9172 : /* Update destination density/validity masks. */
9173 : /* --------------------------------------------------------------------
9174 : */
9175 218909 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
9176 :
9177 218909 : if (poWK->panDstValid != nullptr)
9178 : {
9179 11752 : CPLMaskSet(poWK->panDstValid, iDstOffset);
9180 : }
9181 : }
9182 :
9183 : /* --------------------------------------------------------------------
9184 : */
9185 : /* Report progress to the user, and optionally cancel out. */
9186 : /* --------------------------------------------------------------------
9187 : */
9188 1932 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
9189 0 : break;
9190 : }
9191 :
9192 : #ifdef CHECK_SUM_WITH_GEOS
9193 : GEOSGeom_destroy_r(hGEOSContext, hP1);
9194 : GEOSGeom_destroy_r(hGEOSContext, hP2);
9195 : OGRGeometry::freeGEOSContext(hGEOSContext);
9196 : #endif
9197 19 : CPLQuadTreeDestroy(hQuadTree);
9198 19 : }
|