Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80 : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 21284 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 21284 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 9579 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 9579 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 9579 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 9579 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
171 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178 : #endif
179 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186 : #endif
187 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
193 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
194 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
195 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
196 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
199 :
200 : /************************************************************************/
201 : /* GWKJobStruct */
202 : /************************************************************************/
203 :
204 : struct GWKJobStruct
205 : {
206 : std::mutex &mutex;
207 : std::condition_variable &cv;
208 : int counterSingleThreaded = 0;
209 : int &counter;
210 : bool &stopFlag;
211 : GDALWarpKernel *poWK = nullptr;
212 : int iYMin = 0;
213 : int iYMax = 0;
214 : int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
215 : void *pTransformerArg = nullptr;
216 : // used by GWKRun() to assign the proper pTransformerArg
217 : void (*pfnFunc)(void *) = nullptr;
218 :
219 3231 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
220 : int &counter_, bool &stopFlag_)
221 3231 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
222 : {
223 3231 : }
224 : };
225 :
226 : struct GWKThreadData
227 : {
228 : std::unique_ptr<CPLJobQueue> poJobQueue{};
229 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
230 : int nMaxThreads{0};
231 : int counter{0};
232 : bool stopFlag{false};
233 : std::mutex mutex{};
234 : std::condition_variable cv{};
235 : bool bTransformerArgInputAssignedToThread{false};
236 : void *pTransformerArgInput{
237 : nullptr}; // owned by calling layer. Not to be destroyed
238 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
239 : int nTotalThreadCountForThisRun = 0;
240 : int nCurThreadCountForThisRun = 0;
241 : };
242 :
243 : /************************************************************************/
244 : /* GWKProgressThread() */
245 : /************************************************************************/
246 :
247 : // Return TRUE if the computation must be interrupted.
248 36 : static int GWKProgressThread(GWKJobStruct *psJob)
249 : {
250 36 : bool stop = false;
251 : {
252 36 : std::lock_guard<std::mutex> lock(psJob->mutex);
253 36 : psJob->counter++;
254 36 : stop = psJob->stopFlag;
255 : }
256 36 : psJob->cv.notify_one();
257 :
258 36 : return stop;
259 : }
260 :
261 : /************************************************************************/
262 : /* GWKProgressMonoThread() */
263 : /************************************************************************/
264 :
265 : // Return TRUE if the computation must be interrupted.
266 446697 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
267 : {
268 446697 : GDALWarpKernel *poWK = psJob->poWK;
269 446697 : if (!poWK->pfnProgress(poWK->dfProgressBase +
270 446697 : poWK->dfProgressScale *
271 446697 : (++psJob->counterSingleThreaded /
272 446697 : static_cast<double>(psJob->iYMax)),
273 : "", poWK->pProgress))
274 : {
275 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
276 1 : psJob->stopFlag = true;
277 1 : return TRUE;
278 : }
279 446696 : return FALSE;
280 : }
281 :
282 : /************************************************************************/
283 : /* GWKGenericMonoThread() */
284 : /************************************************************************/
285 :
286 3206 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
287 : void (*pfnFunc)(void *pUserData))
288 : {
289 3206 : GWKThreadData td;
290 :
291 : // NOTE: the mutex is not used.
292 3206 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
293 3206 : job.poWK = poWK;
294 3206 : job.iYMin = 0;
295 3206 : job.iYMax = poWK->nDstYSize;
296 3206 : job.pfnProgress = GWKProgressMonoThread;
297 3206 : job.pTransformerArg = poWK->pTransformerArg;
298 3206 : job.counterSingleThreaded = td.counter;
299 3206 : pfnFunc(&job);
300 3206 : td.counter = job.counterSingleThreaded;
301 :
302 6412 : return td.stopFlag ? CE_Failure : CE_None;
303 : }
304 :
305 : /************************************************************************/
306 : /* GWKThreadsCreate() */
307 : /************************************************************************/
308 :
309 1811 : void *GWKThreadsCreate(char **papszWarpOptions,
310 : GDALTransformerFunc /* pfnTransformer */,
311 : void *pTransformerArg)
312 : {
313 1811 : const int nThreads = GDALGetNumThreads(papszWarpOptions, "NUM_THREADS",
314 : GDAL_DEFAULT_MAX_THREAD_COUNT,
315 : /* bDefaultAllCPUs = */ false);
316 1811 : GWKThreadData *psThreadData = new GWKThreadData();
317 : auto poThreadPool =
318 1811 : nThreads > 1 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
319 1811 : if (poThreadPool)
320 : {
321 25 : psThreadData->nMaxThreads = nThreads;
322 25 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
323 : nThreads,
324 25 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
325 50 : psThreadData->counter, psThreadData->stopFlag)));
326 :
327 25 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
328 25 : psThreadData->pTransformerArgInput = pTransformerArg;
329 : }
330 :
331 1811 : return psThreadData;
332 : }
333 :
334 : /************************************************************************/
335 : /* GWKThreadsEnd() */
336 : /************************************************************************/
337 :
338 1811 : void GWKThreadsEnd(void *psThreadDataIn)
339 : {
340 1811 : if (psThreadDataIn == nullptr)
341 0 : return;
342 :
343 1811 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
344 1811 : if (psThreadData->poJobQueue)
345 : {
346 : // cppcheck-suppress constVariableReference
347 35 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
348 : {
349 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
350 10 : GDALDestroyTransformer(pair.second);
351 : }
352 25 : psThreadData->poJobQueue.reset();
353 : }
354 1811 : delete psThreadData;
355 : }
356 :
357 : /************************************************************************/
358 : /* ThreadFuncAdapter() */
359 : /************************************************************************/
360 :
361 34 : static void ThreadFuncAdapter(void *pData)
362 : {
363 34 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
364 34 : GWKThreadData *psThreadData =
365 34 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
366 :
367 : // Look if we have already a per-thread transformer
368 34 : void *pTransformerArg = nullptr;
369 34 : const GIntBig nThreadId = CPLGetPID();
370 :
371 : {
372 68 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
373 34 : ++psThreadData->nCurThreadCountForThisRun;
374 :
375 34 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
376 34 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
377 : {
378 0 : pTransformerArg = oIter->second;
379 : }
380 34 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
381 34 : psThreadData->nCurThreadCountForThisRun ==
382 34 : psThreadData->nTotalThreadCountForThisRun)
383 : {
384 : // If we are the last thread to be started, temporarily borrow the
385 : // original transformer
386 24 : psThreadData->bTransformerArgInputAssignedToThread = true;
387 24 : pTransformerArg = psThreadData->pTransformerArgInput;
388 24 : psThreadData->mapThreadToTransformerArg[nThreadId] =
389 : pTransformerArg;
390 : }
391 :
392 34 : if (pTransformerArg == nullptr)
393 : {
394 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
395 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
396 : }
397 : }
398 :
399 : // If no transformer assigned to current thread, instantiate one
400 34 : if (pTransformerArg == nullptr)
401 : {
402 : // This somehow assumes that GDALCloneTransformer() is thread-safe
403 : // which should normally be the case.
404 : pTransformerArg =
405 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
406 :
407 : // Lock for the stop flag and the transformer map.
408 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
409 10 : if (!pTransformerArg)
410 : {
411 0 : psJob->stopFlag = true;
412 0 : return;
413 : }
414 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
415 : }
416 :
417 34 : psJob->pTransformerArg = pTransformerArg;
418 34 : psJob->pfnFunc(pData);
419 :
420 : // Give back original transformer, if borrowed.
421 : {
422 68 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
423 34 : if (psThreadData->bTransformerArgInputAssignedToThread &&
424 27 : pTransformerArg == psThreadData->pTransformerArgInput)
425 : {
426 : psThreadData->mapThreadToTransformerArg.erase(
427 24 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
428 24 : psThreadData->bTransformerArgInputAssignedToThread = false;
429 : }
430 : }
431 : }
432 :
433 : /************************************************************************/
434 : /* GWKRun() */
435 : /************************************************************************/
436 :
437 3230 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
438 : void (*pfnFunc)(void *pUserData))
439 :
440 : {
441 3230 : const int nDstYSize = poWK->nDstYSize;
442 :
443 3230 : CPLDebug("GDAL",
444 : "GDALWarpKernel()::%s() "
445 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
446 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
447 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
448 : poWK->nDstYSize);
449 :
450 3230 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
451 : {
452 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
453 0 : return CE_Failure;
454 : }
455 :
456 3230 : GWKThreadData *psThreadData =
457 : static_cast<GWKThreadData *>(poWK->psThreadData);
458 3230 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
459 : {
460 3206 : return GWKGenericMonoThread(poWK, pfnFunc);
461 : }
462 :
463 24 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
464 : // Config option mostly useful for tests to be able to test multithreading
465 : // with small rasters
466 : const int nWarpChunkSize =
467 24 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
468 24 : if (nWarpChunkSize > 0)
469 : {
470 22 : GIntBig nChunks =
471 22 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
472 22 : if (nThreads > nChunks)
473 17 : nThreads = static_cast<int>(nChunks);
474 : }
475 24 : if (nThreads <= 0)
476 20 : nThreads = 1;
477 :
478 24 : CPLDebug("WARP", "Using %d threads", nThreads);
479 :
480 24 : auto &jobs = *psThreadData->threadJobs;
481 24 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
482 : // Fill-in job structures.
483 58 : for (int i = 0; i < nThreads; ++i)
484 : {
485 34 : auto &job = jobs[i];
486 34 : job.poWK = poWK;
487 34 : job.iYMin =
488 34 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
489 34 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
490 34 : nThreads);
491 34 : if (poWK->pfnProgress != GDALDummyProgress)
492 2 : job.pfnProgress = GWKProgressThread;
493 34 : job.pfnFunc = pfnFunc;
494 : }
495 :
496 : bool bStopFlag;
497 : {
498 : {
499 : // Important: do not run the SubmitJob() loop under the mutex
500 : // because in some cases (typically if the current thread has been
501 : // created by the GDAL global thread pool), the task will actually
502 : // be run synchronously by SubmitJob(), and as it tries to acquire
503 : // the mutex, that would result in a dead-lock
504 24 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
505 :
506 24 : psThreadData->nTotalThreadCountForThisRun = nThreads;
507 24 : psThreadData->nCurThreadCountForThisRun = 0;
508 : }
509 :
510 : // Start jobs.
511 58 : for (int i = 0; i < nThreads; ++i)
512 : {
513 34 : auto &job = jobs[i];
514 34 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
515 : static_cast<void *>(&job));
516 : }
517 :
518 : /* --------------------------------------------------------------------
519 : */
520 : /* Report progress. */
521 : /* --------------------------------------------------------------------
522 : */
523 24 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
524 24 : if (poWK->pfnProgress != GDALDummyProgress)
525 : {
526 3 : while (psThreadData->counter < nDstYSize)
527 : {
528 1 : psThreadData->cv.wait(lock);
529 1 : if (!poWK->pfnProgress(poWK->dfProgressBase +
530 1 : poWK->dfProgressScale *
531 1 : (psThreadData->counter /
532 1 : static_cast<double>(nDstYSize)),
533 : "", poWK->pProgress))
534 : {
535 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
536 0 : psThreadData->stopFlag = true;
537 0 : break;
538 : }
539 : }
540 :
541 2 : if (!psThreadData->stopFlag)
542 : {
543 2 : if (!poWK->pfnProgress(poWK->dfProgressBase +
544 2 : poWK->dfProgressScale,
545 : "", poWK->pProgress))
546 : {
547 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
548 1 : psThreadData->stopFlag = true;
549 : }
550 : }
551 : }
552 :
553 24 : bStopFlag = psThreadData->stopFlag;
554 : }
555 :
556 : /* -------------------------------------------------------------------- */
557 : /* Wait for all jobs to complete. */
558 : /* -------------------------------------------------------------------- */
559 24 : psThreadData->poJobQueue->WaitCompletion();
560 :
561 24 : return bStopFlag ? CE_Failure : CE_None;
562 : }
563 :
564 : /************************************************************************/
565 : /* ==================================================================== */
566 : /* GDALWarpKernel */
567 : /* ==================================================================== */
568 : /************************************************************************/
569 :
570 : /**
571 : * \class GDALWarpKernel "gdalwarper.h"
572 : *
573 : * Low level image warping class.
574 : *
575 : * This class is responsible for low level image warping for one
576 : * "chunk" of imagery. The class is essentially a structure with all
577 : * data members public - primarily so that new special-case functions
578 : * can be added without changing the class declaration.
579 : *
580 : * Applications are normally intended to interactive with warping facilities
581 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
582 : * theory be used directly if great care is taken in setting up the
583 : * control data.
584 : *
585 : * <h3>Design Issues</h3>
586 : *
587 : * The intention is that PerformWarp() would analyze the setup in terms
588 : * of the datatype, resampling type, and validity/density mask usage and
589 : * pick one of many specific implementations of the warping algorithm over
590 : * a continuum of optimization vs. generality. At one end there will be a
591 : * reference general purpose implementation of the algorithm that supports
592 : * any data type (working internally in double precision complex), all three
593 : * resampling types, and any or all of the validity/density masks. At the
594 : * other end would be highly optimized algorithms for common cases like
595 : * nearest neighbour resampling on GDT_UInt8 data with no masks.
596 : *
597 : * The full set of optimized versions have not been decided but we should
598 : * expect to have at least:
599 : * - One for each resampling algorithm for 8bit data with no masks.
600 : * - One for each resampling algorithm for float data with no masks.
601 : * - One for each resampling algorithm for float data with any/all masks
602 : * (essentially the generic case for just float data).
603 : * - One for each resampling algorithm for 8bit data with support for
604 : * input validity masks (per band or per pixel). This handles the common
605 : * case of nodata masking.
606 : * - One for each resampling algorithm for float data with support for
607 : * input validity masks (per band or per pixel). This handles the common
608 : * case of nodata masking.
609 : *
610 : * Some of the specializations would operate on all bands in one pass
611 : * (especially the ones without masking would do this), while others might
612 : * process each band individually to reduce code complexity.
613 : *
614 : * <h3>Masking Semantics</h3>
615 : *
616 : * A detailed explanation of the semantics of the validity and density masks,
617 : * and their effects on resampling kernels is needed here.
618 : */
619 :
620 : /************************************************************************/
621 : /* GDALWarpKernel Data Members */
622 : /************************************************************************/
623 :
624 : /**
625 : * \var GDALResampleAlg GDALWarpKernel::eResample;
626 : *
627 : * Resampling algorithm.
628 : *
629 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
630 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
631 : * GRA_Mode or GRA_Sum.
632 : *
633 : * This field is required. GDT_NearestNeighbour may be used as a default
634 : * value.
635 : */
636 :
637 : /**
638 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
639 : *
640 : * Working pixel data type.
641 : *
642 : * The datatype of pixels in the source image (papabySrcimage) and
643 : * destination image (papabyDstImage) buffers. Note that operations on
644 : * some data types (such as GDT_UInt8) may be much better optimized than other
645 : * less common cases.
646 : *
647 : * This field is required. It may not be GDT_Unknown.
648 : */
649 :
650 : /**
651 : * \var int GDALWarpKernel::nBands;
652 : *
653 : * Number of bands.
654 : *
655 : * The number of bands (layers) of imagery being warped. Determines the
656 : * number of entries in the papabySrcImage, papanBandSrcValid,
657 : * and papabyDstImage arrays.
658 : *
659 : * This field is required.
660 : */
661 :
662 : /**
663 : * \var int GDALWarpKernel::nSrcXSize;
664 : *
665 : * Source image width in pixels.
666 : *
667 : * This field is required.
668 : */
669 :
670 : /**
671 : * \var int GDALWarpKernel::nSrcYSize;
672 : *
673 : * Source image height in pixels.
674 : *
675 : * This field is required.
676 : */
677 :
678 : /**
679 : * \var double GDALWarpKernel::dfSrcXExtraSize;
680 : *
681 : * Number of pixels included in nSrcXSize that are present on the edges of
682 : * the area of interest to take into account the width of the kernel.
683 : *
684 : * This field is required.
685 : */
686 :
687 : /**
688 : * \var double GDALWarpKernel::dfSrcYExtraSize;
689 : *
690 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
691 : * the area of interest to take into account the height of the kernel.
692 : *
693 : * This field is required.
694 : */
695 :
696 : /**
697 : * \var int GDALWarpKernel::papabySrcImage;
698 : *
699 : * Array of source image band data.
700 : *
701 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
702 : * to image data. Each individual band of image data is organized as a single
703 : * block of image data in left to right, then bottom to top order. The actual
704 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
705 : *
706 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
707 : * the second band with eWorkingDataType set to GDT_Float32 use code like
708 : * this:
709 : *
710 : * \code
711 : * float dfPixelValue;
712 : * int nBand = 2-1; // Band indexes are zero based.
713 : * int nPixel = 3; // Zero based.
714 : * int nLine = 4; // Zero based.
715 : *
716 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
717 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
718 : * assert( nBand >= 0 && nBand < poKern->nBands );
719 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
720 : * [nPixel + nLine * poKern->nSrcXSize];
721 : * \endcode
722 : *
723 : * This field is required.
724 : */
725 :
726 : /**
727 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
728 : *
729 : * Per band validity mask for source pixels.
730 : *
731 : * Array of pixel validity mask layers for each source band. Each of
732 : * the mask layers is the same size (in pixels) as the source image with
733 : * one bit per pixel. Note that it is legal (and common) for this to be
734 : * NULL indicating that none of the pixels are invalidated, or for some
735 : * band validity masks to be NULL in which case all pixels of the band are
736 : * valid. The following code can be used to test the validity of a particular
737 : * pixel.
738 : *
739 : * \code
740 : * int bIsValid = TRUE;
741 : * int nBand = 2-1; // Band indexes are zero based.
742 : * int nPixel = 3; // Zero based.
743 : * int nLine = 4; // Zero based.
744 : *
745 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
746 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
747 : * assert( nBand >= 0 && nBand < poKern->nBands );
748 : *
749 : * if( poKern->papanBandSrcValid != NULL
750 : * && poKern->papanBandSrcValid[nBand] != NULL )
751 : * {
752 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
753 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
754 : *
755 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
756 : * }
757 : * \endcode
758 : */
759 :
760 : /**
761 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
762 : *
763 : * Per pixel validity mask for source pixels.
764 : *
765 : * A single validity mask layer that applies to the pixels of all source
766 : * bands. It is accessed similarly to papanBandSrcValid, but without the
767 : * extra level of band indirection.
768 : *
769 : * This pointer may be NULL indicating that all pixels are valid.
770 : *
771 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
772 : * the pixel isn't considered to be valid unless both arrays indicate it is
773 : * valid.
774 : */
775 :
776 : /**
777 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
778 : *
779 : * Per pixel density mask for source pixels.
780 : *
781 : * A single density mask layer that applies to the pixels of all source
782 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
783 : * which this pixel should be allowed to contribute to the output result.
784 : *
785 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
786 : *
787 : * The density for a pixel may be accessed like this:
788 : *
789 : * \code
790 : * float fDensity = 1.0;
791 : * int nPixel = 3; // Zero based.
792 : * int nLine = 4; // Zero based.
793 : *
794 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
795 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
796 : * if( poKern->pafUnifiedSrcDensity != NULL )
797 : * fDensity = poKern->pafUnifiedSrcDensity
798 : * [nPixel + nLine * poKern->nSrcXSize];
799 : * \endcode
800 : */
801 :
802 : /**
803 : * \var int GDALWarpKernel::nDstXSize;
804 : *
805 : * Width of destination image in pixels.
806 : *
807 : * This field is required.
808 : */
809 :
810 : /**
811 : * \var int GDALWarpKernel::nDstYSize;
812 : *
813 : * Height of destination image in pixels.
814 : *
815 : * This field is required.
816 : */
817 :
818 : /**
819 : * \var GByte **GDALWarpKernel::papabyDstImage;
820 : *
821 : * Array of destination image band data.
822 : *
823 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
824 : * to image data. Each individual band of image data is organized as a single
825 : * block of image data in left to right, then bottom to top order. The actual
826 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
827 : *
828 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
829 : * the second band with eWorkingDataType set to GDT_Float32 use code like
830 : * this:
831 : *
832 : * \code
833 : * float dfPixelValue;
834 : * int nBand = 2-1; // Band indexes are zero based.
835 : * int nPixel = 3; // Zero based.
836 : * int nLine = 4; // Zero based.
837 : *
838 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
839 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
840 : * assert( nBand >= 0 && nBand < poKern->nBands );
841 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
842 : * [nPixel + nLine * poKern->nSrcYSize];
843 : * \endcode
844 : *
845 : * This field is required.
846 : */
847 :
848 : /**
849 : * \var GUInt32 *GDALWarpKernel::panDstValid;
850 : *
851 : * Per pixel validity mask for destination pixels.
852 : *
853 : * A single validity mask layer that applies to the pixels of all destination
854 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
855 : * on the size of the destination image.
856 : *
857 : * This pointer may be NULL indicating that all pixels are valid.
858 : */
859 :
860 : /**
861 : * \var float *GDALWarpKernel::pafDstDensity;
862 : *
863 : * Per pixel density mask for destination pixels.
864 : *
865 : * A single density mask layer that applies to the pixels of all destination
866 : * bands. It contains values between 0.0 and 1.0.
867 : *
868 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
869 : *
870 : * The density for a pixel may be accessed like this:
871 : *
872 : * \code
873 : * float fDensity = 1.0;
874 : * int nPixel = 3; // Zero based.
875 : * int nLine = 4; // Zero based.
876 : *
877 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
878 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
879 : * if( poKern->pafDstDensity != NULL )
880 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
881 : * \endcode
882 : */
883 :
884 : /**
885 : * \var int GDALWarpKernel::nSrcXOff;
886 : *
887 : * X offset to source pixel coordinates for transformation.
888 : *
889 : * See pfnTransformer.
890 : *
891 : * This field is required.
892 : */
893 :
894 : /**
895 : * \var int GDALWarpKernel::nSrcYOff;
896 : *
897 : * Y offset to source pixel coordinates for transformation.
898 : *
899 : * See pfnTransformer.
900 : *
901 : * This field is required.
902 : */
903 :
904 : /**
905 : * \var int GDALWarpKernel::nDstXOff;
906 : *
907 : * X offset to destination pixel coordinates for transformation.
908 : *
909 : * See pfnTransformer.
910 : *
911 : * This field is required.
912 : */
913 :
914 : /**
915 : * \var int GDALWarpKernel::nDstYOff;
916 : *
917 : * Y offset to destination pixel coordinates for transformation.
918 : *
919 : * See pfnTransformer.
920 : *
921 : * This field is required.
922 : */
923 :
924 : /**
925 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
926 : *
927 : * Source/destination location transformer.
928 : *
929 : * The function to call to transform coordinates between source image
930 : * pixel/line coordinates and destination image pixel/line coordinates.
931 : * See GDALTransformerFunc() for details of the semantics of this function.
932 : *
933 : * The GDALWarpKern algorithm will only ever use this transformer in
934 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
935 : * partial or complete scanlines of points in the destination image as
936 : * input. This means, among other things, that it is safe to the
937 : * approximating transform GDALApproxTransform() as the transformation
938 : * function.
939 : *
940 : * Source and destination images may be subsets of a larger overall image.
941 : * The transformation algorithms will expect and return pixel/line coordinates
942 : * in terms of this larger image, so coordinates need to be offset by
943 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
944 : * passing to pfnTransformer, and after return from it.
945 : *
946 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
947 : * data to this function when it is called.
948 : *
949 : * This field is required.
950 : */
951 :
952 : /**
953 : * \var void *GDALWarpKernel::pTransformerArg;
954 : *
955 : * Callback data for pfnTransformer.
956 : *
957 : * This field may be NULL if not required for the pfnTransformer being used.
958 : */
959 :
960 : /**
961 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
962 : *
963 : * The function to call to report progress of the algorithm, and to check
964 : * for a requested termination of the operation. It operates according to
965 : * GDALProgressFunc() semantics.
966 : *
967 : * Generally speaking the progress function will be invoked for each
968 : * scanline of the destination buffer that has been processed.
969 : *
970 : * This field may be NULL (internally set to GDALDummyProgress()).
971 : */
972 :
973 : /**
974 : * \var void *GDALWarpKernel::pProgress;
975 : *
976 : * Callback data for pfnProgress.
977 : *
978 : * This field may be NULL if not required for the pfnProgress being used.
979 : */
980 :
981 : /************************************************************************/
982 : /* GDALWarpKernel() */
983 : /************************************************************************/
984 :
985 3848 : GDALWarpKernel::GDALWarpKernel()
986 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
987 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
988 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
989 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
990 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
991 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
992 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
993 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
994 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
995 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
996 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
997 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
998 3848 : eTieStrategy(GWKTS_First)
999 : {
1000 3848 : }
1001 :
1002 : /************************************************************************/
1003 : /* ~GDALWarpKernel() */
1004 : /************************************************************************/
1005 :
1006 3848 : GDALWarpKernel::~GDALWarpKernel()
1007 : {
1008 3848 : }
1009 :
1010 : /************************************************************************/
1011 : /* getArea() */
1012 : /************************************************************************/
1013 :
1014 : typedef std::pair<double, double> XYPair;
1015 :
1016 : typedef std::vector<XYPair> XYPoly;
1017 :
1018 : // poly may or may not be closed.
1019 565915 : static double getArea(const XYPoly &poly)
1020 : {
1021 : // CPLAssert(poly.size() >= 2);
1022 565915 : const size_t nPointCount = poly.size();
1023 : double dfAreaSum =
1024 565915 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
1025 :
1026 1787320 : for (size_t i = 1; i < nPointCount - 1; i++)
1027 : {
1028 1221400 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
1029 : }
1030 :
1031 565915 : dfAreaSum += poly[nPointCount - 1].first *
1032 565915 : (poly[0].second - poly[nPointCount - 2].second);
1033 :
1034 565915 : return 0.5 * std::fabs(dfAreaSum);
1035 : }
1036 :
1037 : /************************************************************************/
1038 : /* CanUse4SamplesFormula() */
1039 : /************************************************************************/
1040 :
1041 4723 : static bool CanUse4SamplesFormula(const GDALWarpKernel *poWK)
1042 : {
1043 4723 : if (poWK->eResample == GRA_Bilinear || poWK->eResample == GRA_Cubic)
1044 : {
1045 : // Use 4-sample formula if we are not downsampling by more than a
1046 : // factor of 1:2
1047 2651 : if (poWK->dfXScale > 0.5 && poWK->dfYScale > 0.5)
1048 2215 : return true;
1049 436 : CPLDebugOnce("WARP",
1050 : "Not using 4-sample bilinear/bicubic formula because "
1051 : "XSCALE(=%f) and/or YSCALE(=%f) <= 0.5",
1052 : poWK->dfXScale, poWK->dfYScale);
1053 : }
1054 2508 : return false;
1055 : }
1056 :
1057 : /************************************************************************/
1058 : /* PerformWarp() */
1059 : /************************************************************************/
1060 :
1061 : /**
1062 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1063 : *
1064 : * This method performs the warp described in the GDALWarpKernel.
1065 : *
1066 : * @return CE_None on success or CE_Failure if an error occurs.
1067 : */
1068 :
1069 3844 : CPLErr GDALWarpKernel::PerformWarp()
1070 :
1071 : {
1072 3844 : const CPLErr eErr = Validate();
1073 :
1074 3844 : if (eErr != CE_None)
1075 1 : return eErr;
1076 :
1077 : // See #2445 and #3079.
1078 3843 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1079 : {
1080 613 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1081 : {
1082 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1083 0 : return CE_Failure;
1084 : }
1085 613 : return CE_None;
1086 : }
1087 :
1088 : /* -------------------------------------------------------------------- */
1089 : /* Pre-calculate resampling scales and window sizes for filtering. */
1090 : /* -------------------------------------------------------------------- */
1091 :
1092 3230 : dfXScale = 0.0;
1093 3230 : dfYScale = 0.0;
1094 :
1095 : // XSCALE and YSCALE per warping chunk is not necessarily ideal, in case of
1096 : // heterogeneous change in shapes.
1097 : // Best would probably be a per-pixel scale computation.
1098 3230 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1099 3230 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1100 3230 : if (!pszXScale || !pszYScale)
1101 : {
1102 : // Sample points along a grid in the destination space
1103 3229 : constexpr int MAX_POINTS_PER_DIM = 10;
1104 3229 : const int nPointsX = std::min(MAX_POINTS_PER_DIM, nDstXSize);
1105 3229 : const int nPointsY = std::min(MAX_POINTS_PER_DIM, nDstYSize);
1106 3229 : constexpr int CORNER_COUNT_PER_SQUARE = 4;
1107 3229 : const int nPoints = CORNER_COUNT_PER_SQUARE * nPointsX * nPointsY;
1108 6458 : std::vector<double> adfX;
1109 6458 : std::vector<double> adfY;
1110 3229 : adfX.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1111 3229 : adfY.reserve(CORNER_COUNT_PER_SQUARE * nPoints);
1112 6458 : std::vector<double> adfZ(CORNER_COUNT_PER_SQUARE * nPoints);
1113 6458 : std::vector<int> abSuccess(CORNER_COUNT_PER_SQUARE * nPoints);
1114 31147 : for (int iY = 0; iY < nPointsY; iY++)
1115 : {
1116 27918 : const double dfYShift = (iY > 0 && iY == nPointsY - 1) ? -1.0 : 0.0;
1117 27918 : const double dfY =
1118 27918 : dfYShift + (nPointsY == 1 ? 0.0
1119 27701 : : static_cast<double>(iY) *
1120 27701 : nDstYSize / (nPointsY - 1));
1121 :
1122 296636 : for (int iX = 0; iX < nPointsX; iX++)
1123 : {
1124 268718 : const double dfXShift =
1125 268718 : (iX > 0 && iX == nPointsX - 1) ? -1.0 : 0.0;
1126 :
1127 268718 : const double dfX =
1128 268718 : dfXShift + (nPointsX == 1 ? 0.0
1129 268503 : : static_cast<double>(iX) *
1130 268503 : nDstXSize / (nPointsX - 1));
1131 :
1132 : // Reproject a unit square at each sample point
1133 268718 : adfX.push_back(dfX);
1134 268718 : adfY.push_back(dfY);
1135 :
1136 268718 : adfX.push_back(dfX + 1);
1137 268718 : adfY.push_back(dfY);
1138 :
1139 268718 : adfX.push_back(dfX);
1140 268718 : adfY.push_back(dfY + 1);
1141 :
1142 268718 : adfX.push_back(dfX + 1);
1143 268718 : adfY.push_back(dfY + 1);
1144 : }
1145 : }
1146 3229 : pfnTransformer(pTransformerArg, TRUE, static_cast<int>(adfX.size()),
1147 : adfX.data(), adfY.data(), adfZ.data(), abSuccess.data());
1148 :
1149 6458 : std::vector<XYPair> adfXYScales;
1150 3229 : adfXYScales.reserve(nPoints);
1151 271947 : for (int i = 0; i < nPoints; i += CORNER_COUNT_PER_SQUARE)
1152 : {
1153 536242 : if (abSuccess[i + 0] && abSuccess[i + 1] && abSuccess[i + 2] &&
1154 267524 : abSuccess[i + 3])
1155 : {
1156 2140180 : const auto square = [](double x) { return x * x; };
1157 :
1158 267522 : const double vx01 = adfX[i + 1] - adfX[i + 0];
1159 267522 : const double vy01 = adfY[i + 1] - adfY[i + 0];
1160 267522 : const double len01_sq = square(vx01) + square(vy01);
1161 :
1162 267522 : const double vx23 = adfX[i + 3] - adfX[i + 2];
1163 267522 : const double vy23 = adfY[i + 3] - adfY[i + 2];
1164 267522 : const double len23_sq = square(vx23) + square(vy23);
1165 :
1166 267522 : const double vx02 = adfX[i + 2] - adfX[i + 0];
1167 267522 : const double vy02 = adfY[i + 2] - adfY[i + 0];
1168 267522 : const double len02_sq = square(vx02) + square(vy02);
1169 :
1170 267522 : const double vx13 = adfX[i + 3] - adfX[i + 1];
1171 267522 : const double vy13 = adfY[i + 3] - adfY[i + 1];
1172 267522 : const double len13_sq = square(vx13) + square(vy13);
1173 :
1174 : // ~ 20 degree, heuristic
1175 267522 : constexpr double TAN_MODEST_ANGLE = 0.35;
1176 :
1177 : // 10%, heuristic
1178 267522 : constexpr double LENGTH_RELATIVE_TOLERANCE = 0.1;
1179 :
1180 : // Security margin to avoid division by zero (would only
1181 : // happen in case of degenerated coordinate transformation,
1182 : // or insane upsampling)
1183 267522 : constexpr double EPSILON = 1e-10;
1184 :
1185 : // Does the transformed square looks like an almost non-rotated
1186 : // quasi-rectangle ?
1187 267522 : if (std::fabs(vy01) < TAN_MODEST_ANGLE * vx01 &&
1188 260283 : std::fabs(vy23) < TAN_MODEST_ANGLE * vx23 &&
1189 260256 : std::fabs(len01_sq - len23_sq) <
1190 260256 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len01_sq) &&
1191 260143 : std::fabs(len02_sq - len13_sq) <
1192 260143 : LENGTH_RELATIVE_TOLERANCE * std::fabs(len02_sq))
1193 : {
1194 : // Using a geometric average here of lenAB_sq and lenCD_sq,
1195 : // hence a sqrt(), and as this is still a squared value,
1196 : // we need another sqrt() to get a distance.
1197 : const double dfXLength =
1198 260128 : std::sqrt(std::sqrt(len01_sq * len23_sq));
1199 : const double dfYLength =
1200 260128 : std::sqrt(std::sqrt(len02_sq * len13_sq));
1201 260128 : if (dfXLength > EPSILON && dfYLength > EPSILON)
1202 : {
1203 260128 : const double dfThisXScale = 1.0 / dfXLength;
1204 260128 : const double dfThisYScale = 1.0 / dfYLength;
1205 260128 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1206 260128 : }
1207 : }
1208 : else
1209 : {
1210 : // If not, then consider the area of the transformed unit
1211 : // square to determine the X/Y scales.
1212 7394 : const XYPoly poly{{adfX[i + 0], adfY[i + 0]},
1213 7394 : {adfX[i + 1], adfY[i + 1]},
1214 7394 : {adfX[i + 3], adfY[i + 3]},
1215 29576 : {adfX[i + 2], adfY[i + 2]}};
1216 7394 : const double dfSrcArea = getArea(poly);
1217 7394 : const double dfFactor = std::sqrt(dfSrcArea);
1218 7394 : if (dfFactor > EPSILON)
1219 : {
1220 7394 : const double dfThisXScale = 1.0 / dfFactor;
1221 7394 : const double dfThisYScale = dfThisXScale;
1222 7394 : adfXYScales.push_back({dfThisXScale, dfThisYScale});
1223 : }
1224 : }
1225 : }
1226 : }
1227 :
1228 3229 : if (!adfXYScales.empty())
1229 : {
1230 : // Sort by increasing xscale * yscale
1231 3229 : std::sort(adfXYScales.begin(), adfXYScales.end(),
1232 1456370 : [](const XYPair &a, const XYPair &b)
1233 1456370 : { return a.first * a.second < b.first * b.second; });
1234 :
1235 : // Compute the per-axis maximum of scale
1236 3229 : double dfXMax = 0;
1237 3229 : double dfYMax = 0;
1238 270751 : for (const auto &[dfX, dfY] : adfXYScales)
1239 : {
1240 267522 : dfXMax = std::max(dfXMax, dfX);
1241 267522 : dfYMax = std::max(dfYMax, dfY);
1242 : }
1243 :
1244 : // Now eliminate outliers, defined as ones whose value is < 10% of
1245 : // the maximum value, typically found at a polar discontinuity, and
1246 : // compute the average of non-outlier values.
1247 3229 : dfXScale = 0;
1248 3229 : dfYScale = 0;
1249 3229 : int i = 0;
1250 3229 : constexpr double THRESHOLD = 0.1; // 10%, rather arbitrary
1251 270751 : for (const auto &[dfX, dfY] : adfXYScales)
1252 : {
1253 267522 : if (dfX > THRESHOLD * dfXMax && dfY > THRESHOLD * dfYMax)
1254 : {
1255 264634 : ++i;
1256 264634 : const double dfXDelta = dfX - dfXScale;
1257 264634 : const double dfYDelta = dfY - dfYScale;
1258 264634 : const double dfInvI = 1.0 / i;
1259 264634 : dfXScale += dfXDelta * dfInvI;
1260 264634 : dfYScale += dfYDelta * dfInvI;
1261 : }
1262 : }
1263 : }
1264 : }
1265 :
1266 : // Round to closest integer reciprocal scale if we are very close to it
1267 : const auto RoundToClosestIntegerReciprocalScaleIfCloseEnough =
1268 6460 : [](double dfScale)
1269 : {
1270 6460 : if (dfScale < 1.0)
1271 : {
1272 2604 : double dfReciprocalScale = 1.0 / dfScale;
1273 2604 : const int nReciprocalScale =
1274 2604 : static_cast<int>(dfReciprocalScale + 0.5);
1275 2604 : if (fabs(dfReciprocalScale - nReciprocalScale) < 0.05)
1276 2151 : dfScale = 1.0 / nReciprocalScale;
1277 : }
1278 6460 : return dfScale;
1279 : };
1280 :
1281 3230 : if (dfXScale <= 0)
1282 1 : dfXScale = 1.0;
1283 3230 : if (dfYScale <= 0)
1284 1 : dfYScale = 1.0;
1285 :
1286 3230 : dfXScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfXScale);
1287 3230 : dfYScale = RoundToClosestIntegerReciprocalScaleIfCloseEnough(dfYScale);
1288 :
1289 3230 : if (pszXScale != nullptr)
1290 1 : dfXScale = CPLAtof(pszXScale);
1291 3230 : if (pszYScale != nullptr)
1292 1 : dfYScale = CPLAtof(pszYScale);
1293 :
1294 3230 : if (!pszXScale || !pszYScale)
1295 3229 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1296 :
1297 3230 : const int bUse4SamplesFormula = CanUse4SamplesFormula(this);
1298 :
1299 : // Safety check for callers that would use GDALWarpKernel without using
1300 : // GDALWarpOperation.
1301 3167 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1302 3102 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1303 6460 : !bUse4SamplesFormula)) &&
1304 346 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1305 : WARP_EXTRA_ELTS)
1306 : {
1307 0 : CPLError(CE_Failure, CPLE_AppDefined,
1308 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1309 : "their end. "
1310 : "See GDALWarpKernel class definition. If this condition is "
1311 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1312 : WARP_EXTRA_ELTS);
1313 0 : return CE_Failure;
1314 : }
1315 :
1316 3230 : dfXFilter = anGWKFilterRadius[eResample];
1317 3230 : dfYFilter = anGWKFilterRadius[eResample];
1318 :
1319 3230 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1320 2636 : : static_cast<int>(dfXFilter);
1321 3230 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1322 2658 : : static_cast<int>(dfYFilter);
1323 :
1324 : // Filter window offset depends on the parity of the kernel radius.
1325 3230 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1326 3230 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1327 :
1328 3230 : bApplyVerticalShift =
1329 3230 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1330 3230 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1331 3230 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1332 :
1333 : /* -------------------------------------------------------------------- */
1334 : /* Set up resampling functions. */
1335 : /* -------------------------------------------------------------------- */
1336 3230 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1337 12 : return GWKGeneralCase(this);
1338 :
1339 3218 : const bool bNoMasksOrDstDensityOnly =
1340 3208 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1341 6426 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1342 :
1343 3218 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour &&
1344 : bNoMasksOrDstDensityOnly)
1345 954 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1346 :
1347 2264 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Bilinear &&
1348 : bNoMasksOrDstDensityOnly)
1349 132 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1350 :
1351 2132 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_Cubic &&
1352 : bNoMasksOrDstDensityOnly)
1353 852 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1354 :
1355 1280 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_CubicSpline &&
1356 : bNoMasksOrDstDensityOnly)
1357 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1358 :
1359 1268 : if (eWorkingDataType == GDT_UInt8 && eResample == GRA_NearestNeighbour)
1360 363 : return GWKNearestByte(this);
1361 :
1362 905 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1363 155 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1364 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1365 :
1366 891 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1367 : bNoMasksOrDstDensityOnly)
1368 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1369 :
1370 886 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1371 : bNoMasksOrDstDensityOnly)
1372 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1373 :
1374 880 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1375 : bNoMasksOrDstDensityOnly)
1376 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1377 :
1378 875 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1379 : bNoMasksOrDstDensityOnly)
1380 14 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1381 :
1382 861 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1383 : bNoMasksOrDstDensityOnly)
1384 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1385 :
1386 856 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1387 : bNoMasksOrDstDensityOnly)
1388 7 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1389 :
1390 849 : if (eWorkingDataType == GDT_Int8 && eResample == GRA_NearestNeighbour)
1391 9 : return GWKNearestInt8(this);
1392 :
1393 840 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1394 40 : return GWKNearestShort(this);
1395 :
1396 800 : if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1397 10 : return GWKNearestUnsignedShort(this);
1398 :
1399 790 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1400 : bNoMasksOrDstDensityOnly)
1401 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1402 :
1403 779 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1404 51 : return GWKNearestFloat(this);
1405 :
1406 728 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1407 : bNoMasksOrDstDensityOnly)
1408 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1409 :
1410 724 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1411 : bNoMasksOrDstDensityOnly)
1412 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1413 :
1414 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1415 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1416 : bNoMasksOrDstDensityOnly)
1417 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1418 :
1419 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1420 : bNoMasksOrDstDensityOnly)
1421 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1422 : #endif
1423 :
1424 715 : if (eResample == GRA_Average)
1425 160 : return GWKAverageOrMode(this);
1426 :
1427 555 : if (eResample == GRA_RMS)
1428 9 : return GWKAverageOrMode(this);
1429 :
1430 546 : if (eResample == GRA_Mode)
1431 45 : return GWKAverageOrMode(this);
1432 :
1433 501 : if (eResample == GRA_Max)
1434 6 : return GWKAverageOrMode(this);
1435 :
1436 495 : if (eResample == GRA_Min)
1437 5 : return GWKAverageOrMode(this);
1438 :
1439 490 : if (eResample == GRA_Med)
1440 6 : return GWKAverageOrMode(this);
1441 :
1442 484 : if (eResample == GRA_Q1)
1443 10 : return GWKAverageOrMode(this);
1444 :
1445 474 : if (eResample == GRA_Q3)
1446 5 : return GWKAverageOrMode(this);
1447 :
1448 469 : if (eResample == GRA_Sum)
1449 19 : return GWKSumPreserving(this);
1450 :
1451 450 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1452 : {
1453 223 : return GWKRealCase(this);
1454 : }
1455 :
1456 227 : return GWKGeneralCase(this);
1457 : }
1458 :
1459 : /************************************************************************/
1460 : /* Validate() */
1461 : /************************************************************************/
1462 :
1463 : /**
1464 : * \fn CPLErr GDALWarpKernel::Validate()
1465 : *
1466 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1467 : * (and return CE_Failure) if the configuration is considered to be
1468 : * invalid for some reason.
1469 : *
1470 : * This method will also do some standard defaulting such as setting
1471 : * pfnProgress to GDALDummyProgress() if it is NULL.
1472 : *
1473 : * @return CE_None on success or CE_Failure if an error is detected.
1474 : */
1475 :
1476 3844 : CPLErr GDALWarpKernel::Validate()
1477 :
1478 : {
1479 3844 : if (static_cast<size_t>(eResample) >=
1480 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1481 : {
1482 0 : CPLError(CE_Failure, CPLE_AppDefined,
1483 : "Unsupported resampling method %d.",
1484 0 : static_cast<int>(eResample));
1485 0 : return CE_Failure;
1486 : }
1487 :
1488 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1489 : // be ignored as contributing source pixels during resampling. Only taken into account by
1490 : // Average currently
1491 : const char *pszExcludedValues =
1492 3844 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1493 3844 : if (pszExcludedValues)
1494 : {
1495 : const CPLStringList aosTokens(
1496 18 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1497 18 : if ((aosTokens.size() % nBands) != 0)
1498 : {
1499 1 : CPLError(CE_Failure, CPLE_AppDefined,
1500 : "EXCLUDED_VALUES should contain one or several tuples of "
1501 : "%d values formatted like <R>,<G>,<B> or "
1502 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1503 : "tuples",
1504 : nBands);
1505 1 : return CE_Failure;
1506 : }
1507 34 : std::vector<double> adfTuple;
1508 68 : for (int i = 0; i < aosTokens.size(); ++i)
1509 : {
1510 51 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1511 51 : if (((i + 1) % nBands) == 0)
1512 : {
1513 17 : m_aadfExcludedValues.push_back(adfTuple);
1514 17 : adfTuple.clear();
1515 : }
1516 : }
1517 : }
1518 :
1519 3843 : return CE_None;
1520 : }
1521 :
1522 : /************************************************************************/
1523 : /* GWKOverlayDensity() */
1524 : /* */
1525 : /* Compute the final density for the destination pixel. This */
1526 : /* is a function of the overlay density (passed in) and the */
1527 : /* original density. */
1528 : /************************************************************************/
1529 :
1530 17762100 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1531 : double dfDensity)
1532 : {
1533 17762100 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1534 13309900 : return;
1535 :
1536 4452160 : poWK->pafDstDensity[iDstOffset] =
1537 4452160 : 1.0f -
1538 4452160 : (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1539 : }
1540 :
1541 : /************************************************************************/
1542 : /* GWKRoundValueT() */
1543 : /************************************************************************/
1544 :
1545 : template <class T, class U, bool is_signed> struct sGWKRoundValueT
1546 : {
1547 : static T eval(U);
1548 : };
1549 :
1550 : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1551 : {
1552 791525 : static T eval(U value)
1553 : {
1554 791525 : return static_cast<T>(floor(value + U(0.5)));
1555 : }
1556 : };
1557 :
1558 : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1559 : {
1560 152219851 : static T eval(U value)
1561 : {
1562 152219851 : return static_cast<T>(value + U(0.5));
1563 : }
1564 : };
1565 :
1566 153011376 : template <class T, class U> static T GWKRoundValueT(U value)
1567 : {
1568 153011376 : return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1569 : }
1570 :
1571 268974 : template <> float GWKRoundValueT<float, double>(double value)
1572 : {
1573 268974 : return static_cast<float>(value);
1574 : }
1575 :
1576 : #ifdef notused
1577 : template <> double GWKRoundValueT<double, double>(double value)
1578 : {
1579 : return value;
1580 : }
1581 : #endif
1582 :
1583 : /************************************************************************/
1584 : /* GWKClampValueT() */
1585 : /************************************************************************/
1586 :
1587 145841452 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1588 : {
1589 145841452 : if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1590 571307 : return cpl::NumericLimits<T>::min();
1591 145270054 : else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1592 776829 : return cpl::NumericLimits<T>::max();
1593 : else
1594 144493934 : return GWKRoundValueT<T, U>(value);
1595 : }
1596 :
1597 718915 : template <> float GWKClampValueT<float, double>(double dfValue)
1598 : {
1599 718915 : return static_cast<float>(dfValue);
1600 : }
1601 :
1602 : #ifdef notused
1603 : template <> double GWKClampValueT<double, double>(double dfValue)
1604 : {
1605 : return dfValue;
1606 : }
1607 : #endif
1608 :
1609 : /************************************************************************/
1610 : /* AvoidNoData() */
1611 : /************************************************************************/
1612 :
1613 1283 : template <class T> inline void AvoidNoData(T *pDst, GPtrDiff_t iDstOffset)
1614 : {
1615 : if constexpr (cpl::NumericLimits<T>::is_integer)
1616 : {
1617 1027 : if (pDst[iDstOffset] == static_cast<T>(cpl::NumericLimits<T>::lowest()))
1618 : {
1619 515 : pDst[iDstOffset] =
1620 515 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1621 : }
1622 : else
1623 512 : pDst[iDstOffset]--;
1624 : }
1625 : else
1626 : {
1627 256 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1628 : {
1629 : using std::nextafter;
1630 0 : pDst[iDstOffset] = nextafter(pDst[iDstOffset], static_cast<T>(0));
1631 : }
1632 : else
1633 : {
1634 : using std::nextafter;
1635 256 : pDst[iDstOffset] =
1636 256 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1637 : }
1638 : }
1639 1283 : }
1640 :
1641 : /************************************************************************/
1642 : /* AvoidNoData() */
1643 : /************************************************************************/
1644 :
1645 : template <class T>
1646 25539331 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1647 : GPtrDiff_t iDstOffset)
1648 : {
1649 25539331 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1650 25539331 : T *pDst = reinterpret_cast<T *>(pabyDst);
1651 :
1652 25539331 : if (poWK->padfDstNoDataReal != nullptr &&
1653 11380639 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1654 : {
1655 640 : AvoidNoData(pDst, iDstOffset);
1656 :
1657 640 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1658 : {
1659 40 : const_cast<GDALWarpKernel *>(poWK)
1660 : ->bWarnedAboutDstNoDataReplacement = true;
1661 40 : CPLError(CE_Warning, CPLE_AppDefined,
1662 : "Value %g in the source dataset has been changed to %g "
1663 : "in the destination dataset to avoid being treated as "
1664 : "NoData. To avoid this, select a different NoData value "
1665 : "for the destination dataset.",
1666 40 : poWK->padfDstNoDataReal[iBand],
1667 40 : static_cast<double>(pDst[iDstOffset]));
1668 : }
1669 : }
1670 25539331 : }
1671 :
1672 : /************************************************************************/
1673 : /* GWKAvoidNoDataMultiBand() */
1674 : /************************************************************************/
1675 :
1676 : template <class T>
1677 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1678 : GPtrDiff_t iDstOffset)
1679 : {
1680 524573 : T **ppDst = reinterpret_cast<T **>(poWK->papabyDstImage);
1681 524573 : if (poWK->padfDstNoDataReal != nullptr)
1682 : {
1683 208615 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1684 : {
1685 208294 : if (poWK->padfDstNoDataReal[iBand] !=
1686 208294 : static_cast<double>(ppDst[iBand][iDstOffset]))
1687 205830 : return;
1688 : }
1689 964 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1690 : {
1691 643 : AvoidNoData(ppDst[iBand], iDstOffset);
1692 : }
1693 :
1694 321 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1695 : {
1696 21 : const_cast<GDALWarpKernel *>(poWK)
1697 : ->bWarnedAboutDstNoDataReplacement = true;
1698 42 : std::string valueSrc, valueDst;
1699 64 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
1700 : {
1701 43 : if (!valueSrc.empty())
1702 : {
1703 22 : valueSrc += ',';
1704 22 : valueDst += ',';
1705 : }
1706 43 : valueSrc += CPLSPrintf("%g", poWK->padfDstNoDataReal[iBand]);
1707 43 : valueDst += CPLSPrintf(
1708 43 : "%g", static_cast<double>(ppDst[iBand][iDstOffset]));
1709 : }
1710 21 : CPLError(CE_Warning, CPLE_AppDefined,
1711 : "Value %s in the source dataset has been changed to %s "
1712 : "in the destination dataset to avoid being treated as "
1713 : "NoData. To avoid this, select a different NoData value "
1714 : "for the destination dataset.",
1715 : valueSrc.c_str(), valueDst.c_str());
1716 : }
1717 : }
1718 : }
1719 :
1720 : /************************************************************************/
1721 : /* GWKAvoidNoDataMultiBand() */
1722 : /************************************************************************/
1723 :
1724 524573 : static void GWKAvoidNoDataMultiBand(const GDALWarpKernel *poWK,
1725 : GPtrDiff_t iDstOffset)
1726 : {
1727 524573 : switch (poWK->eWorkingDataType)
1728 : {
1729 523997 : case GDT_UInt8:
1730 523997 : GWKAvoidNoDataMultiBand<std::uint8_t>(poWK, iDstOffset);
1731 523997 : break;
1732 :
1733 64 : case GDT_Int8:
1734 64 : GWKAvoidNoDataMultiBand<std::int8_t>(poWK, iDstOffset);
1735 64 : break;
1736 :
1737 64 : case GDT_Int16:
1738 64 : GWKAvoidNoDataMultiBand<std::int16_t>(poWK, iDstOffset);
1739 64 : break;
1740 :
1741 64 : case GDT_UInt16:
1742 64 : GWKAvoidNoDataMultiBand<std::uint16_t>(poWK, iDstOffset);
1743 64 : break;
1744 :
1745 64 : case GDT_Int32:
1746 64 : GWKAvoidNoDataMultiBand<std::int32_t>(poWK, iDstOffset);
1747 64 : break;
1748 :
1749 64 : case GDT_UInt32:
1750 64 : GWKAvoidNoDataMultiBand<std::uint32_t>(poWK, iDstOffset);
1751 64 : break;
1752 :
1753 64 : case GDT_Int64:
1754 64 : GWKAvoidNoDataMultiBand<std::int64_t>(poWK, iDstOffset);
1755 64 : break;
1756 :
1757 64 : case GDT_UInt64:
1758 64 : GWKAvoidNoDataMultiBand<std::uint64_t>(poWK, iDstOffset);
1759 64 : break;
1760 :
1761 0 : case GDT_Float16:
1762 0 : GWKAvoidNoDataMultiBand<GFloat16>(poWK, iDstOffset);
1763 0 : break;
1764 :
1765 64 : case GDT_Float32:
1766 64 : GWKAvoidNoDataMultiBand<float>(poWK, iDstOffset);
1767 64 : break;
1768 :
1769 64 : case GDT_Float64:
1770 64 : GWKAvoidNoDataMultiBand<double>(poWK, iDstOffset);
1771 64 : break;
1772 :
1773 0 : case GDT_CInt16:
1774 : case GDT_CInt32:
1775 : case GDT_CFloat16:
1776 : case GDT_CFloat32:
1777 : case GDT_CFloat64:
1778 : case GDT_Unknown:
1779 : case GDT_TypeCount:
1780 0 : break;
1781 : }
1782 524573 : }
1783 :
1784 : /************************************************************************/
1785 : /* GWKSetPixelValueRealT() */
1786 : /************************************************************************/
1787 :
1788 : template <class T>
1789 14954278 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1790 : GPtrDiff_t iDstOffset, double dfDensity,
1791 : T value, bool bAvoidNoDataSingleBand)
1792 : {
1793 14954278 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1794 :
1795 : /* -------------------------------------------------------------------- */
1796 : /* If the source density is less than 100% we need to fetch the */
1797 : /* existing destination value, and mix it with the source to */
1798 : /* get the new "to apply" value. Also compute composite */
1799 : /* density. */
1800 : /* */
1801 : /* We avoid mixing if density is very near one or risk mixing */
1802 : /* in very extreme nodata values and causing odd results (#1610) */
1803 : /* -------------------------------------------------------------------- */
1804 14954278 : if (dfDensity < 0.9999)
1805 : {
1806 945508 : if (dfDensity < 0.0001)
1807 0 : return true;
1808 :
1809 945508 : double dfDstDensity = 1.0;
1810 :
1811 945508 : if (poWK->pafDstDensity != nullptr)
1812 944036 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1813 1472 : else if (poWK->panDstValid != nullptr &&
1814 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1815 0 : dfDstDensity = 0.0;
1816 :
1817 : // It seems like we also ought to be testing panDstValid[] here!
1818 :
1819 945508 : const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1820 :
1821 : // The destination density is really only relative to the portion
1822 : // not occluded by the overlay.
1823 945508 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1824 :
1825 945508 : const double dfReal =
1826 945508 : (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1827 945508 : (dfDensity + dfDstInfluence);
1828 :
1829 : /* --------------------------------------------------------------------
1830 : */
1831 : /* Actually apply the destination value. */
1832 : /* */
1833 : /* Avoid using the destination nodata value for integer datatypes
1834 : */
1835 : /* if by chance it is equal to the computed pixel value. */
1836 : /* --------------------------------------------------------------------
1837 : */
1838 945508 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1839 : }
1840 : else
1841 : {
1842 14008837 : pDst[iDstOffset] = value;
1843 : }
1844 :
1845 14954278 : if (bAvoidNoDataSingleBand)
1846 13681622 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1847 :
1848 14954278 : return true;
1849 : }
1850 :
1851 : /************************************************************************/
1852 : /* ClampRoundAndAvoidNoData() */
1853 : /************************************************************************/
1854 :
1855 : template <class T>
1856 12158105 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1857 : GPtrDiff_t iDstOffset, double dfReal,
1858 : bool bAvoidNoDataSingleBand)
1859 : {
1860 12158105 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1861 12158105 : T *pDst = reinterpret_cast<T *>(pabyDst);
1862 :
1863 : if constexpr (cpl::NumericLimits<T>::is_integer)
1864 : {
1865 : using std::floor;
1866 11660975 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1867 6430 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1868 11654575 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1869 23967 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1870 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1871 10410 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1872 : else
1873 11620165 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1874 : }
1875 : else
1876 : {
1877 497130 : pDst[iDstOffset] = static_cast<T>(dfReal);
1878 : }
1879 :
1880 12158105 : if (bAvoidNoDataSingleBand)
1881 11857709 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1882 12158105 : }
1883 :
1884 : /************************************************************************/
1885 : /* GWKSetPixelValue() */
1886 : /************************************************************************/
1887 :
1888 11045400 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1889 : GPtrDiff_t iDstOffset, double dfDensity,
1890 : double dfReal, double dfImag,
1891 : bool bAvoidNoDataSingleBand)
1892 :
1893 : {
1894 11045400 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1895 :
1896 : /* -------------------------------------------------------------------- */
1897 : /* If the source density is less than 100% we need to fetch the */
1898 : /* existing destination value, and mix it with the source to */
1899 : /* get the new "to apply" value. Also compute composite */
1900 : /* density. */
1901 : /* */
1902 : /* We avoid mixing if density is very near one or risk mixing */
1903 : /* in very extreme nodata values and causing odd results (#1610) */
1904 : /* -------------------------------------------------------------------- */
1905 11045400 : if (dfDensity < 0.9999)
1906 : {
1907 800 : if (dfDensity < 0.0001)
1908 0 : return true;
1909 :
1910 800 : double dfDstDensity = 1.0;
1911 800 : if (poWK->pafDstDensity != nullptr)
1912 800 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1913 0 : else if (poWK->panDstValid != nullptr &&
1914 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1915 0 : dfDstDensity = 0.0;
1916 :
1917 800 : double dfDstReal = 0.0;
1918 800 : double dfDstImag = 0.0;
1919 : // It seems like we also ought to be testing panDstValid[] here!
1920 :
1921 : // TODO(schwehr): Factor out this repreated type of set.
1922 800 : switch (poWK->eWorkingDataType)
1923 : {
1924 0 : case GDT_UInt8:
1925 0 : dfDstReal = pabyDst[iDstOffset];
1926 0 : dfDstImag = 0.0;
1927 0 : break;
1928 :
1929 0 : case GDT_Int8:
1930 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1931 0 : dfDstImag = 0.0;
1932 0 : break;
1933 :
1934 400 : case GDT_Int16:
1935 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1936 400 : dfDstImag = 0.0;
1937 400 : break;
1938 :
1939 400 : case GDT_UInt16:
1940 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1941 400 : dfDstImag = 0.0;
1942 400 : break;
1943 :
1944 0 : case GDT_Int32:
1945 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1946 0 : dfDstImag = 0.0;
1947 0 : break;
1948 :
1949 0 : case GDT_UInt32:
1950 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1951 0 : dfDstImag = 0.0;
1952 0 : break;
1953 :
1954 0 : case GDT_Int64:
1955 0 : dfDstReal = static_cast<double>(
1956 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1957 0 : dfDstImag = 0.0;
1958 0 : break;
1959 :
1960 0 : case GDT_UInt64:
1961 0 : dfDstReal = static_cast<double>(
1962 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1963 0 : dfDstImag = 0.0;
1964 0 : break;
1965 :
1966 0 : case GDT_Float16:
1967 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1968 0 : dfDstImag = 0.0;
1969 0 : break;
1970 :
1971 0 : case GDT_Float32:
1972 0 : dfDstReal =
1973 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1974 0 : dfDstImag = 0.0;
1975 0 : break;
1976 :
1977 0 : case GDT_Float64:
1978 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1979 0 : dfDstImag = 0.0;
1980 0 : break;
1981 :
1982 0 : case GDT_CInt16:
1983 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1984 0 : dfDstImag =
1985 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1986 0 : break;
1987 :
1988 0 : case GDT_CInt32:
1989 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1990 0 : dfDstImag =
1991 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1992 0 : break;
1993 :
1994 0 : case GDT_CFloat16:
1995 : dfDstReal =
1996 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1997 : dfDstImag =
1998 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1999 0 : break;
2000 :
2001 0 : case GDT_CFloat32:
2002 0 : dfDstReal =
2003 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
2004 0 : dfDstImag = double(
2005 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
2006 0 : break;
2007 :
2008 0 : case GDT_CFloat64:
2009 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
2010 0 : dfDstImag =
2011 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
2012 0 : break;
2013 :
2014 0 : case GDT_Unknown:
2015 : case GDT_TypeCount:
2016 0 : CPLAssert(false);
2017 : return false;
2018 : }
2019 :
2020 : // The destination density is really only relative to the portion
2021 : // not occluded by the overlay.
2022 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2023 :
2024 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2025 800 : (dfDensity + dfDstInfluence);
2026 :
2027 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
2028 800 : (dfDensity + dfDstInfluence);
2029 : }
2030 :
2031 : /* -------------------------------------------------------------------- */
2032 : /* Actually apply the destination value. */
2033 : /* */
2034 : /* Avoid using the destination nodata value for integer datatypes */
2035 : /* if by chance it is equal to the computed pixel value. */
2036 : /* -------------------------------------------------------------------- */
2037 :
2038 11045400 : switch (poWK->eWorkingDataType)
2039 : {
2040 10323000 : case GDT_UInt8:
2041 10323000 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2042 : bAvoidNoDataSingleBand);
2043 10323000 : break;
2044 :
2045 1 : case GDT_Int8:
2046 1 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2047 : bAvoidNoDataSingleBand);
2048 1 : break;
2049 :
2050 7471 : case GDT_Int16:
2051 7471 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2052 : bAvoidNoDataSingleBand);
2053 7471 : break;
2054 :
2055 464 : case GDT_UInt16:
2056 464 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2057 : bAvoidNoDataSingleBand);
2058 464 : break;
2059 :
2060 63 : case GDT_UInt32:
2061 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2062 : bAvoidNoDataSingleBand);
2063 63 : break;
2064 :
2065 63 : case GDT_Int32:
2066 63 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2067 : bAvoidNoDataSingleBand);
2068 63 : break;
2069 :
2070 0 : case GDT_UInt64:
2071 0 : ClampRoundAndAvoidNoData<std::uint64_t>(
2072 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2073 0 : break;
2074 :
2075 0 : case GDT_Int64:
2076 0 : ClampRoundAndAvoidNoData<std::int64_t>(
2077 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2078 0 : break;
2079 :
2080 0 : case GDT_Float16:
2081 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2082 : bAvoidNoDataSingleBand);
2083 0 : break;
2084 :
2085 478957 : case GDT_Float32:
2086 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2087 : bAvoidNoDataSingleBand);
2088 478957 : break;
2089 :
2090 149 : case GDT_Float64:
2091 149 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2092 : bAvoidNoDataSingleBand);
2093 149 : break;
2094 :
2095 234079 : case GDT_CInt16:
2096 : {
2097 : typedef GInt16 T;
2098 234079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2099 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2100 0 : cpl::NumericLimits<T>::min();
2101 234079 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2102 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2103 0 : cpl::NumericLimits<T>::max();
2104 : else
2105 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2106 234079 : static_cast<T>(floor(dfReal + 0.5));
2107 234079 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2108 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2109 0 : cpl::NumericLimits<T>::min();
2110 234079 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2111 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2112 0 : cpl::NumericLimits<T>::max();
2113 : else
2114 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2115 234079 : static_cast<T>(floor(dfImag + 0.5));
2116 234079 : break;
2117 : }
2118 :
2119 379 : case GDT_CInt32:
2120 : {
2121 : typedef GInt32 T;
2122 379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
2123 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2124 0 : cpl::NumericLimits<T>::min();
2125 379 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
2126 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2127 0 : cpl::NumericLimits<T>::max();
2128 : else
2129 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
2130 379 : static_cast<T>(floor(dfReal + 0.5));
2131 379 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
2132 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2133 0 : cpl::NumericLimits<T>::min();
2134 379 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
2135 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2136 0 : cpl::NumericLimits<T>::max();
2137 : else
2138 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
2139 379 : static_cast<T>(floor(dfImag + 0.5));
2140 379 : break;
2141 : }
2142 :
2143 0 : case GDT_CFloat16:
2144 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
2145 0 : static_cast<GFloat16>(dfReal);
2146 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
2147 0 : static_cast<GFloat16>(dfImag);
2148 0 : break;
2149 :
2150 394 : case GDT_CFloat32:
2151 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
2152 394 : static_cast<float>(dfReal);
2153 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
2154 394 : static_cast<float>(dfImag);
2155 394 : break;
2156 :
2157 380 : case GDT_CFloat64:
2158 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
2159 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
2160 380 : break;
2161 :
2162 0 : case GDT_Unknown:
2163 : case GDT_TypeCount:
2164 0 : return false;
2165 : }
2166 :
2167 11045400 : return true;
2168 : }
2169 :
2170 : /************************************************************************/
2171 : /* GWKSetPixelValueReal() */
2172 : /************************************************************************/
2173 :
2174 1347980 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2175 : GPtrDiff_t iDstOffset, double dfDensity,
2176 : double dfReal, bool bAvoidNoDataSingleBand)
2177 :
2178 : {
2179 1347980 : GByte *pabyDst = poWK->papabyDstImage[iBand];
2180 :
2181 : /* -------------------------------------------------------------------- */
2182 : /* If the source density is less than 100% we need to fetch the */
2183 : /* existing destination value, and mix it with the source to */
2184 : /* get the new "to apply" value. Also compute composite */
2185 : /* density. */
2186 : /* */
2187 : /* We avoid mixing if density is very near one or risk mixing */
2188 : /* in very extreme nodata values and causing odd results (#1610) */
2189 : /* -------------------------------------------------------------------- */
2190 1347980 : if (dfDensity < 0.9999)
2191 : {
2192 600 : if (dfDensity < 0.0001)
2193 0 : return true;
2194 :
2195 600 : double dfDstReal = 0.0;
2196 600 : double dfDstDensity = 1.0;
2197 :
2198 600 : if (poWK->pafDstDensity != nullptr)
2199 600 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
2200 0 : else if (poWK->panDstValid != nullptr &&
2201 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
2202 0 : dfDstDensity = 0.0;
2203 :
2204 : // It seems like we also ought to be testing panDstValid[] here!
2205 :
2206 600 : switch (poWK->eWorkingDataType)
2207 : {
2208 0 : case GDT_UInt8:
2209 0 : dfDstReal = pabyDst[iDstOffset];
2210 0 : break;
2211 :
2212 0 : case GDT_Int8:
2213 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
2214 0 : break;
2215 :
2216 300 : case GDT_Int16:
2217 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
2218 300 : break;
2219 :
2220 300 : case GDT_UInt16:
2221 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
2222 300 : break;
2223 :
2224 0 : case GDT_Int32:
2225 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
2226 0 : break;
2227 :
2228 0 : case GDT_UInt32:
2229 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
2230 0 : break;
2231 :
2232 0 : case GDT_Int64:
2233 0 : dfDstReal = static_cast<double>(
2234 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
2235 0 : break;
2236 :
2237 0 : case GDT_UInt64:
2238 0 : dfDstReal = static_cast<double>(
2239 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
2240 0 : break;
2241 :
2242 0 : case GDT_Float16:
2243 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
2244 0 : break;
2245 :
2246 0 : case GDT_Float32:
2247 0 : dfDstReal =
2248 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
2249 0 : break;
2250 :
2251 0 : case GDT_Float64:
2252 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
2253 0 : break;
2254 :
2255 0 : case GDT_CInt16:
2256 : case GDT_CInt32:
2257 : case GDT_CFloat16:
2258 : case GDT_CFloat32:
2259 : case GDT_CFloat64:
2260 : case GDT_Unknown:
2261 : case GDT_TypeCount:
2262 0 : CPLAssert(false);
2263 : return false;
2264 : }
2265 :
2266 : // The destination density is really only relative to the portion
2267 : // not occluded by the overlay.
2268 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2269 :
2270 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2271 600 : (dfDensity + dfDstInfluence);
2272 : }
2273 :
2274 : /* -------------------------------------------------------------------- */
2275 : /* Actually apply the destination value. */
2276 : /* */
2277 : /* Avoid using the destination nodata value for integer datatypes */
2278 : /* if by chance it is equal to the computed pixel value. */
2279 : /* -------------------------------------------------------------------- */
2280 :
2281 1347980 : switch (poWK->eWorkingDataType)
2282 : {
2283 1325840 : case GDT_UInt8:
2284 1325840 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal,
2285 : bAvoidNoDataSingleBand);
2286 1325840 : break;
2287 :
2288 112 : case GDT_Int8:
2289 112 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal,
2290 : bAvoidNoDataSingleBand);
2291 112 : break;
2292 :
2293 1197 : case GDT_Int16:
2294 1197 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal,
2295 : bAvoidNoDataSingleBand);
2296 1197 : break;
2297 :
2298 475 : case GDT_UInt16:
2299 475 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal,
2300 : bAvoidNoDataSingleBand);
2301 475 : break;
2302 :
2303 539 : case GDT_UInt32:
2304 539 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal,
2305 : bAvoidNoDataSingleBand);
2306 539 : break;
2307 :
2308 1342 : case GDT_Int32:
2309 1342 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal,
2310 : bAvoidNoDataSingleBand);
2311 1342 : break;
2312 :
2313 224 : case GDT_UInt64:
2314 224 : ClampRoundAndAvoidNoData<std::uint64_t>(
2315 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2316 224 : break;
2317 :
2318 224 : case GDT_Int64:
2319 224 : ClampRoundAndAvoidNoData<std::int64_t>(
2320 : poWK, iBand, iDstOffset, dfReal, bAvoidNoDataSingleBand);
2321 224 : break;
2322 :
2323 0 : case GDT_Float16:
2324 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal,
2325 : bAvoidNoDataSingleBand);
2326 0 : break;
2327 :
2328 3538 : case GDT_Float32:
2329 3538 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal,
2330 : bAvoidNoDataSingleBand);
2331 3538 : break;
2332 :
2333 14486 : case GDT_Float64:
2334 14486 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal,
2335 : bAvoidNoDataSingleBand);
2336 14486 : break;
2337 :
2338 0 : case GDT_CInt16:
2339 : case GDT_CInt32:
2340 : case GDT_CFloat16:
2341 : case GDT_CFloat32:
2342 : case GDT_CFloat64:
2343 0 : return false;
2344 :
2345 0 : case GDT_Unknown:
2346 : case GDT_TypeCount:
2347 0 : CPLAssert(false);
2348 : return false;
2349 : }
2350 :
2351 1347980 : return true;
2352 : }
2353 :
2354 : /************************************************************************/
2355 : /* GWKGetPixelValue() */
2356 : /************************************************************************/
2357 :
2358 : /* It is assumed that panUnifiedSrcValid has been checked before */
2359 :
2360 40173600 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2361 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2362 : double *pdfReal, double *pdfImag)
2363 :
2364 : {
2365 40173600 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2366 :
2367 80347200 : if (poWK->papanBandSrcValid != nullptr &&
2368 40173600 : poWK->papanBandSrcValid[iBand] != nullptr &&
2369 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2370 : {
2371 0 : *pdfDensity = 0.0;
2372 0 : return false;
2373 : }
2374 :
2375 40173600 : *pdfReal = 0.0;
2376 40173600 : *pdfImag = 0.0;
2377 :
2378 : // TODO(schwehr): Fix casting.
2379 40173600 : switch (poWK->eWorkingDataType)
2380 : {
2381 39096600 : case GDT_UInt8:
2382 39096600 : *pdfReal = pabySrc[iSrcOffset];
2383 39096600 : *pdfImag = 0.0;
2384 39096600 : break;
2385 :
2386 3 : case GDT_Int8:
2387 3 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2388 3 : *pdfImag = 0.0;
2389 3 : break;
2390 :
2391 28229 : case GDT_Int16:
2392 28229 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2393 28229 : *pdfImag = 0.0;
2394 28229 : break;
2395 :
2396 166 : case GDT_UInt16:
2397 166 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2398 166 : *pdfImag = 0.0;
2399 166 : break;
2400 :
2401 63 : case GDT_Int32:
2402 63 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2403 63 : *pdfImag = 0.0;
2404 63 : break;
2405 :
2406 63 : case GDT_UInt32:
2407 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2408 63 : *pdfImag = 0.0;
2409 63 : break;
2410 :
2411 0 : case GDT_Int64:
2412 0 : *pdfReal = static_cast<double>(
2413 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2414 0 : *pdfImag = 0.0;
2415 0 : break;
2416 :
2417 0 : case GDT_UInt64:
2418 0 : *pdfReal = static_cast<double>(
2419 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2420 0 : *pdfImag = 0.0;
2421 0 : break;
2422 :
2423 0 : case GDT_Float16:
2424 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2425 0 : *pdfImag = 0.0;
2426 0 : break;
2427 :
2428 1047220 : case GDT_Float32:
2429 1047220 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2430 1047220 : *pdfImag = 0.0;
2431 1047220 : break;
2432 :
2433 587 : case GDT_Float64:
2434 587 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2435 587 : *pdfImag = 0.0;
2436 587 : break;
2437 :
2438 133 : case GDT_CInt16:
2439 133 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2440 133 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2441 133 : break;
2442 :
2443 133 : case GDT_CInt32:
2444 133 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2445 133 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2446 133 : break;
2447 :
2448 0 : case GDT_CFloat16:
2449 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2450 0 : *pdfImag =
2451 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2452 0 : break;
2453 :
2454 194 : case GDT_CFloat32:
2455 194 : *pdfReal =
2456 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2457 194 : *pdfImag =
2458 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2459 194 : break;
2460 :
2461 138 : case GDT_CFloat64:
2462 138 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2463 138 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2464 138 : break;
2465 :
2466 0 : case GDT_Unknown:
2467 : case GDT_TypeCount:
2468 0 : CPLAssert(false);
2469 : *pdfDensity = 0.0;
2470 : return false;
2471 : }
2472 :
2473 40173600 : if (poWK->pafUnifiedSrcDensity != nullptr)
2474 12745700 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2475 : else
2476 27427800 : *pdfDensity = 1.0;
2477 :
2478 40173600 : return *pdfDensity != 0.0;
2479 : }
2480 :
2481 : /************************************************************************/
2482 : /* GWKGetPixelValueReal() */
2483 : /************************************************************************/
2484 :
2485 15516 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2486 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2487 : double *pdfReal)
2488 :
2489 : {
2490 15516 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2491 :
2492 31034 : if (poWK->papanBandSrcValid != nullptr &&
2493 15518 : poWK->papanBandSrcValid[iBand] != nullptr &&
2494 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2495 : {
2496 0 : *pdfDensity = 0.0;
2497 0 : return false;
2498 : }
2499 :
2500 15516 : switch (poWK->eWorkingDataType)
2501 : {
2502 1 : case GDT_UInt8:
2503 1 : *pdfReal = pabySrc[iSrcOffset];
2504 1 : break;
2505 :
2506 0 : case GDT_Int8:
2507 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2508 0 : break;
2509 :
2510 1 : case GDT_Int16:
2511 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2512 1 : break;
2513 :
2514 1 : case GDT_UInt16:
2515 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2516 1 : break;
2517 :
2518 982 : case GDT_Int32:
2519 982 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2520 982 : break;
2521 :
2522 179 : case GDT_UInt32:
2523 179 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2524 179 : break;
2525 :
2526 112 : case GDT_Int64:
2527 112 : *pdfReal = static_cast<double>(
2528 112 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2529 112 : break;
2530 :
2531 112 : case GDT_UInt64:
2532 112 : *pdfReal = static_cast<double>(
2533 112 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2534 112 : break;
2535 :
2536 0 : case GDT_Float16:
2537 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2538 0 : break;
2539 :
2540 2 : case GDT_Float32:
2541 2 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2542 2 : break;
2543 :
2544 14126 : case GDT_Float64:
2545 14126 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2546 14126 : break;
2547 :
2548 0 : case GDT_CInt16:
2549 : case GDT_CInt32:
2550 : case GDT_CFloat16:
2551 : case GDT_CFloat32:
2552 : case GDT_CFloat64:
2553 : case GDT_Unknown:
2554 : case GDT_TypeCount:
2555 0 : CPLAssert(false);
2556 : return false;
2557 : }
2558 :
2559 15516 : if (poWK->pafUnifiedSrcDensity != nullptr)
2560 0 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2561 : else
2562 15516 : *pdfDensity = 1.0;
2563 :
2564 15516 : return *pdfDensity != 0.0;
2565 : }
2566 :
2567 : /************************************************************************/
2568 : /* GWKGetPixelRow() */
2569 : /************************************************************************/
2570 :
2571 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2572 : /* data-types. */
2573 :
2574 2369710 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2575 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2576 : double *padfDensity, double adfReal[],
2577 : double *padfImag)
2578 : {
2579 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2580 2369710 : const int nSrcLen = nHalfSrcLen * 2;
2581 2369710 : bool bHasValid = false;
2582 :
2583 2369710 : if (padfDensity != nullptr)
2584 : {
2585 : // Init the density.
2586 3384030 : for (int i = 0; i < nSrcLen; i += 2)
2587 : {
2588 2211910 : padfDensity[i] = 1.0;
2589 2211910 : padfDensity[i + 1] = 1.0;
2590 : }
2591 :
2592 1172120 : if (poWK->panUnifiedSrcValid != nullptr)
2593 : {
2594 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2595 : {
2596 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2597 2067740 : bHasValid = true;
2598 : else
2599 74323 : padfDensity[i] = 0.0;
2600 :
2601 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2602 2068400 : bHasValid = true;
2603 : else
2604 73668 : padfDensity[i + 1] = 0.0;
2605 : }
2606 :
2607 : // Reset or fail as needed.
2608 1139400 : if (bHasValid)
2609 1116590 : bHasValid = false;
2610 : else
2611 22806 : return false;
2612 : }
2613 :
2614 1149320 : if (poWK->papanBandSrcValid != nullptr &&
2615 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2616 : {
2617 0 : for (int i = 0; i < nSrcLen; i += 2)
2618 : {
2619 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2620 0 : bHasValid = true;
2621 : else
2622 0 : padfDensity[i] = 0.0;
2623 :
2624 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2625 0 : iSrcOffset + i + 1))
2626 0 : bHasValid = true;
2627 : else
2628 0 : padfDensity[i + 1] = 0.0;
2629 : }
2630 :
2631 : // Reset or fail as needed.
2632 0 : if (bHasValid)
2633 0 : bHasValid = false;
2634 : else
2635 0 : return false;
2636 : }
2637 : }
2638 :
2639 : // TODO(schwehr): Fix casting.
2640 : // Fetch data.
2641 2346910 : switch (poWK->eWorkingDataType)
2642 : {
2643 1136680 : case GDT_UInt8:
2644 : {
2645 1136680 : GByte *pSrc =
2646 1136680 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2647 1136680 : pSrc += iSrcOffset;
2648 3281570 : for (int i = 0; i < nSrcLen; i += 2)
2649 : {
2650 2144890 : adfReal[i] = pSrc[i];
2651 2144890 : adfReal[i + 1] = pSrc[i + 1];
2652 : }
2653 1136680 : break;
2654 : }
2655 :
2656 196 : case GDT_Int8:
2657 : {
2658 196 : GInt8 *pSrc =
2659 196 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2660 196 : pSrc += iSrcOffset;
2661 392 : for (int i = 0; i < nSrcLen; i += 2)
2662 : {
2663 196 : adfReal[i] = pSrc[i];
2664 196 : adfReal[i + 1] = pSrc[i + 1];
2665 : }
2666 196 : break;
2667 : }
2668 :
2669 5754 : case GDT_Int16:
2670 : {
2671 5754 : GInt16 *pSrc =
2672 5754 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2673 5754 : pSrc += iSrcOffset;
2674 21772 : for (int i = 0; i < nSrcLen; i += 2)
2675 : {
2676 16018 : adfReal[i] = pSrc[i];
2677 16018 : adfReal[i + 1] = pSrc[i + 1];
2678 : }
2679 5754 : break;
2680 : }
2681 :
2682 4310 : case GDT_UInt16:
2683 : {
2684 4310 : GUInt16 *pSrc =
2685 4310 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2686 4310 : pSrc += iSrcOffset;
2687 18884 : for (int i = 0; i < nSrcLen; i += 2)
2688 : {
2689 14574 : adfReal[i] = pSrc[i];
2690 14574 : adfReal[i + 1] = pSrc[i + 1];
2691 : }
2692 4310 : break;
2693 : }
2694 :
2695 946 : case GDT_Int32:
2696 : {
2697 946 : GInt32 *pSrc =
2698 946 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2699 946 : pSrc += iSrcOffset;
2700 2624 : for (int i = 0; i < nSrcLen; i += 2)
2701 : {
2702 1678 : adfReal[i] = pSrc[i];
2703 1678 : adfReal[i + 1] = pSrc[i + 1];
2704 : }
2705 946 : break;
2706 : }
2707 :
2708 946 : case GDT_UInt32:
2709 : {
2710 946 : GUInt32 *pSrc =
2711 946 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2712 946 : pSrc += iSrcOffset;
2713 2624 : for (int i = 0; i < nSrcLen; i += 2)
2714 : {
2715 1678 : adfReal[i] = pSrc[i];
2716 1678 : adfReal[i + 1] = pSrc[i + 1];
2717 : }
2718 946 : break;
2719 : }
2720 :
2721 196 : case GDT_Int64:
2722 : {
2723 196 : auto pSrc =
2724 196 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2725 196 : pSrc += iSrcOffset;
2726 392 : for (int i = 0; i < nSrcLen; i += 2)
2727 : {
2728 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2729 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2730 : }
2731 196 : break;
2732 : }
2733 :
2734 196 : case GDT_UInt64:
2735 : {
2736 196 : auto pSrc =
2737 196 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2738 196 : pSrc += iSrcOffset;
2739 392 : for (int i = 0; i < nSrcLen; i += 2)
2740 : {
2741 196 : adfReal[i] = static_cast<double>(pSrc[i]);
2742 196 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2743 : }
2744 196 : break;
2745 : }
2746 :
2747 0 : case GDT_Float16:
2748 : {
2749 0 : GFloat16 *pSrc =
2750 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2751 0 : pSrc += iSrcOffset;
2752 0 : for (int i = 0; i < nSrcLen; i += 2)
2753 : {
2754 0 : adfReal[i] = pSrc[i];
2755 0 : adfReal[i + 1] = pSrc[i + 1];
2756 : }
2757 0 : break;
2758 : }
2759 :
2760 25270 : case GDT_Float32:
2761 : {
2762 25270 : float *pSrc =
2763 25270 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2764 25270 : pSrc += iSrcOffset;
2765 121739 : for (int i = 0; i < nSrcLen; i += 2)
2766 : {
2767 96469 : adfReal[i] = double(pSrc[i]);
2768 96469 : adfReal[i + 1] = double(pSrc[i + 1]);
2769 : }
2770 25270 : break;
2771 : }
2772 :
2773 946 : case GDT_Float64:
2774 : {
2775 946 : double *pSrc =
2776 946 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2777 946 : pSrc += iSrcOffset;
2778 2624 : for (int i = 0; i < nSrcLen; i += 2)
2779 : {
2780 1678 : adfReal[i] = pSrc[i];
2781 1678 : adfReal[i + 1] = pSrc[i + 1];
2782 : }
2783 946 : break;
2784 : }
2785 :
2786 1169220 : case GDT_CInt16:
2787 : {
2788 1169220 : GInt16 *pSrc =
2789 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2790 1169220 : pSrc += 2 * iSrcOffset;
2791 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2792 : {
2793 3506800 : adfReal[i] = pSrc[2 * i];
2794 3506800 : padfImag[i] = pSrc[2 * i + 1];
2795 :
2796 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2797 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2798 : }
2799 1169220 : break;
2800 : }
2801 :
2802 750 : case GDT_CInt32:
2803 : {
2804 750 : GInt32 *pSrc =
2805 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2806 750 : pSrc += 2 * iSrcOffset;
2807 2232 : for (int i = 0; i < nSrcLen; i += 2)
2808 : {
2809 1482 : adfReal[i] = pSrc[2 * i];
2810 1482 : padfImag[i] = pSrc[2 * i + 1];
2811 :
2812 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2813 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2814 : }
2815 750 : break;
2816 : }
2817 :
2818 0 : case GDT_CFloat16:
2819 : {
2820 0 : GFloat16 *pSrc =
2821 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2822 0 : pSrc += 2 * iSrcOffset;
2823 0 : for (int i = 0; i < nSrcLen; i += 2)
2824 : {
2825 0 : adfReal[i] = pSrc[2 * i];
2826 0 : padfImag[i] = pSrc[2 * i + 1];
2827 :
2828 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2829 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2830 : }
2831 0 : break;
2832 : }
2833 :
2834 750 : case GDT_CFloat32:
2835 : {
2836 750 : float *pSrc =
2837 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2838 750 : pSrc += 2 * iSrcOffset;
2839 2232 : for (int i = 0; i < nSrcLen; i += 2)
2840 : {
2841 1482 : adfReal[i] = double(pSrc[2 * i]);
2842 1482 : padfImag[i] = double(pSrc[2 * i + 1]);
2843 :
2844 1482 : adfReal[i + 1] = double(pSrc[2 * i + 2]);
2845 1482 : padfImag[i + 1] = double(pSrc[2 * i + 3]);
2846 : }
2847 750 : break;
2848 : }
2849 :
2850 750 : case GDT_CFloat64:
2851 : {
2852 750 : double *pSrc =
2853 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2854 750 : pSrc += 2 * iSrcOffset;
2855 2232 : for (int i = 0; i < nSrcLen; i += 2)
2856 : {
2857 1482 : adfReal[i] = pSrc[2 * i];
2858 1482 : padfImag[i] = pSrc[2 * i + 1];
2859 :
2860 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2861 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2862 : }
2863 750 : break;
2864 : }
2865 :
2866 0 : case GDT_Unknown:
2867 : case GDT_TypeCount:
2868 0 : CPLAssert(false);
2869 : if (padfDensity)
2870 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2871 : return false;
2872 : }
2873 :
2874 2346910 : if (padfDensity == nullptr)
2875 1197590 : return true;
2876 :
2877 1149320 : if (poWK->pafUnifiedSrcDensity == nullptr)
2878 : {
2879 3256740 : for (int i = 0; i < nSrcLen; i += 2)
2880 : {
2881 : // Take into account earlier calcs.
2882 2127390 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2883 : {
2884 2087480 : padfDensity[i] = 1.0;
2885 2087480 : bHasValid = true;
2886 : }
2887 :
2888 2127390 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2889 : {
2890 2088140 : padfDensity[i + 1] = 1.0;
2891 2088140 : bHasValid = true;
2892 : }
2893 : }
2894 : }
2895 : else
2896 : {
2897 70068 : for (int i = 0; i < nSrcLen; i += 2)
2898 : {
2899 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2900 50103 : padfDensity[i] =
2901 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2902 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2903 49252 : bHasValid = true;
2904 :
2905 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2906 50103 : padfDensity[i + 1] =
2907 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2908 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2909 49170 : bHasValid = true;
2910 : }
2911 : }
2912 :
2913 1149320 : return bHasValid;
2914 : }
2915 :
2916 : /************************************************************************/
2917 : /* GWKGetPixelT() */
2918 : /************************************************************************/
2919 :
2920 : template <class T>
2921 14964660 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2922 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2923 :
2924 : {
2925 14964660 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2926 :
2927 33172045 : if ((poWK->panUnifiedSrcValid != nullptr &&
2928 29929220 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2929 14964660 : (poWK->papanBandSrcValid != nullptr &&
2930 589863 : poWK->papanBandSrcValid[iBand] != nullptr &&
2931 589863 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2932 : {
2933 9 : *pdfDensity = 0.0;
2934 9 : return false;
2935 : }
2936 :
2937 14964560 : *pValue = pSrc[iSrcOffset];
2938 :
2939 14964560 : if (poWK->pafUnifiedSrcDensity == nullptr)
2940 13842267 : *pdfDensity = 1.0;
2941 : else
2942 1122362 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2943 :
2944 14964560 : return *pdfDensity != 0.0;
2945 : }
2946 :
2947 : /************************************************************************/
2948 : /* GWKBilinearResample() */
2949 : /* Set of bilinear interpolators */
2950 : /************************************************************************/
2951 :
2952 77448 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2953 : double dfSrcX, double dfSrcY,
2954 : double *pdfDensity, double *pdfReal,
2955 : double *pdfImag)
2956 :
2957 : {
2958 : // Save as local variables to avoid following pointers.
2959 77448 : const int nSrcXSize = poWK->nSrcXSize;
2960 77448 : const int nSrcYSize = poWK->nSrcYSize;
2961 :
2962 77448 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2963 77448 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2964 77448 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2965 77448 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2966 77448 : bool bShifted = false;
2967 :
2968 77448 : if (iSrcX == -1)
2969 : {
2970 1534 : iSrcX = 0;
2971 1534 : dfRatioX = 1;
2972 : }
2973 77448 : if (iSrcY == -1)
2974 : {
2975 7734 : iSrcY = 0;
2976 7734 : dfRatioY = 1;
2977 : }
2978 77448 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2979 :
2980 : // Shift so we don't overrun the array.
2981 77448 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2982 77330 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2983 77330 : iSrcOffset + nSrcXSize + 1)
2984 : {
2985 230 : bShifted = true;
2986 230 : --iSrcOffset;
2987 : }
2988 :
2989 77448 : double adfDensity[2] = {0.0, 0.0};
2990 77448 : double adfReal[2] = {0.0, 0.0};
2991 77448 : double adfImag[2] = {0.0, 0.0};
2992 77448 : double dfAccumulatorReal = 0.0;
2993 77448 : double dfAccumulatorImag = 0.0;
2994 77448 : double dfAccumulatorDensity = 0.0;
2995 77448 : double dfAccumulatorDivisor = 0.0;
2996 :
2997 77448 : const GPtrDiff_t nSrcPixels =
2998 77448 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2999 : // Get pixel row.
3000 77448 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
3001 154896 : iSrcOffset < nSrcPixels &&
3002 77448 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
3003 : adfImag))
3004 : {
3005 71504 : double dfMult1 = dfRatioX * dfRatioY;
3006 71504 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
3007 :
3008 : // Shifting corrected.
3009 71504 : if (bShifted)
3010 : {
3011 230 : adfReal[0] = adfReal[1];
3012 230 : adfImag[0] = adfImag[1];
3013 230 : adfDensity[0] = adfDensity[1];
3014 : }
3015 :
3016 : // Upper Left Pixel.
3017 71504 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3018 71504 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3019 : {
3020 66050 : dfAccumulatorDivisor += dfMult1;
3021 :
3022 66050 : dfAccumulatorReal += adfReal[0] * dfMult1;
3023 66050 : dfAccumulatorImag += adfImag[0] * dfMult1;
3024 66050 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3025 : }
3026 :
3027 : // Upper Right Pixel.
3028 71504 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3029 70609 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3030 : {
3031 65335 : dfAccumulatorDivisor += dfMult2;
3032 :
3033 65335 : dfAccumulatorReal += adfReal[1] * dfMult2;
3034 65335 : dfAccumulatorImag += adfImag[1] * dfMult2;
3035 65335 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3036 : }
3037 : }
3038 :
3039 : // Get pixel row.
3040 77448 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
3041 228032 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
3042 73136 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
3043 : adfReal, adfImag))
3044 : {
3045 67577 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
3046 67577 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3047 :
3048 : // Shifting corrected
3049 67577 : if (bShifted)
3050 : {
3051 112 : adfReal[0] = adfReal[1];
3052 112 : adfImag[0] = adfImag[1];
3053 112 : adfDensity[0] = adfDensity[1];
3054 : }
3055 :
3056 : // Lower Left Pixel
3057 67577 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
3058 67577 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
3059 : {
3060 62298 : dfAccumulatorDivisor += dfMult1;
3061 :
3062 62298 : dfAccumulatorReal += adfReal[0] * dfMult1;
3063 62298 : dfAccumulatorImag += adfImag[0] * dfMult1;
3064 62298 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
3065 : }
3066 :
3067 : // Lower Right Pixel.
3068 67577 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
3069 66800 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
3070 : {
3071 61823 : dfAccumulatorDivisor += dfMult2;
3072 :
3073 61823 : dfAccumulatorReal += adfReal[1] * dfMult2;
3074 61823 : dfAccumulatorImag += adfImag[1] * dfMult2;
3075 61823 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
3076 : }
3077 : }
3078 :
3079 : /* -------------------------------------------------------------------- */
3080 : /* Return result. */
3081 : /* -------------------------------------------------------------------- */
3082 77448 : if (dfAccumulatorDivisor == 1.0)
3083 : {
3084 45929 : *pdfReal = dfAccumulatorReal;
3085 45929 : *pdfImag = dfAccumulatorImag;
3086 45929 : *pdfDensity = dfAccumulatorDensity;
3087 45929 : return false;
3088 : }
3089 31519 : else if (dfAccumulatorDivisor < 0.00001)
3090 : {
3091 0 : *pdfReal = 0.0;
3092 0 : *pdfImag = 0.0;
3093 0 : *pdfDensity = 0.0;
3094 0 : return false;
3095 : }
3096 : else
3097 : {
3098 31519 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
3099 31519 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
3100 31519 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
3101 31519 : return true;
3102 : }
3103 : }
3104 :
3105 : template <class T>
3106 8786376 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3107 : int iBand, double dfSrcX,
3108 : double dfSrcY, T *pValue)
3109 :
3110 : {
3111 :
3112 8786376 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3113 8786376 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3114 8786376 : GPtrDiff_t iSrcOffset =
3115 8786376 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3116 8786376 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
3117 8786376 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
3118 :
3119 8786376 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
3120 :
3121 8786376 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3122 6227729 : iSrcY + 1 < poWK->nSrcYSize)
3123 : {
3124 6036147 : const double dfAccumulator =
3125 6036147 : (double(pSrc[iSrcOffset]) * dfRatioX +
3126 6036147 : double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
3127 : dfRatioY +
3128 6036147 : (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
3129 6036147 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
3130 6036147 : (1.0 - dfRatioX)) *
3131 6036147 : (1.0 - dfRatioY);
3132 :
3133 6036147 : *pValue = GWKRoundValueT<T>(dfAccumulator);
3134 :
3135 6036147 : return true;
3136 : }
3137 :
3138 2750229 : double dfAccumulatorDivisor = 0.0;
3139 2750229 : double dfAccumulator = 0.0;
3140 :
3141 : // Upper Left Pixel.
3142 2750229 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
3143 565015 : iSrcY < poWK->nSrcYSize)
3144 : {
3145 565015 : const double dfMult = dfRatioX * dfRatioY;
3146 :
3147 565015 : dfAccumulatorDivisor += dfMult;
3148 :
3149 565015 : dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
3150 : }
3151 :
3152 : // Upper Right Pixel.
3153 2750229 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
3154 2065499 : iSrcY < poWK->nSrcYSize)
3155 : {
3156 2065499 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
3157 :
3158 2065499 : dfAccumulatorDivisor += dfMult;
3159 :
3160 2065499 : dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
3161 : }
3162 :
3163 : // Lower Right Pixel.
3164 2750229 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3165 2316563 : iSrcY + 1 < poWK->nSrcYSize)
3166 : {
3167 2064786 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
3168 :
3169 2064786 : dfAccumulatorDivisor += dfMult;
3170 :
3171 2064786 : dfAccumulator +=
3172 2064786 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
3173 : }
3174 :
3175 : // Lower Left Pixel.
3176 2750229 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
3177 815797 : iSrcY + 1 < poWK->nSrcYSize)
3178 : {
3179 564024 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
3180 :
3181 564024 : dfAccumulatorDivisor += dfMult;
3182 :
3183 564024 : dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
3184 : }
3185 :
3186 : /* -------------------------------------------------------------------- */
3187 : /* Return result. */
3188 : /* -------------------------------------------------------------------- */
3189 2750229 : double dfValue = 0.0;
3190 :
3191 2750229 : if (dfAccumulatorDivisor < 0.00001)
3192 : {
3193 0 : *pValue = 0;
3194 0 : return false;
3195 : }
3196 2750229 : else if (dfAccumulatorDivisor == 1.0)
3197 : {
3198 22176 : dfValue = dfAccumulator;
3199 : }
3200 : else
3201 : {
3202 2728047 : dfValue = dfAccumulator / dfAccumulatorDivisor;
3203 : }
3204 :
3205 2750229 : *pValue = GWKRoundValueT<T>(dfValue);
3206 :
3207 2750229 : return true;
3208 : }
3209 :
3210 : /************************************************************************/
3211 : /* GWKCubicResample() */
3212 : /* Set of bicubic interpolators using cubic convolution. */
3213 : /************************************************************************/
3214 :
3215 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
3216 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
3217 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
3218 :
3219 : template <typename T>
3220 1810810 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
3221 : T f1, T f2, T f3)
3222 : {
3223 1810810 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
3224 1810810 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
3225 1810810 : distance3 * (3 * (f1 - f2) + f3 - f0)));
3226 : }
3227 :
3228 : /************************************************************************/
3229 : /* GWKCubicComputeWeights() */
3230 : /************************************************************************/
3231 :
3232 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
3233 :
3234 : template <typename T>
3235 98041250 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
3236 : {
3237 98041250 : const T halfX = T(0.5) * x;
3238 98041250 : const T threeX = T(3.0) * x;
3239 98041250 : const T halfX2 = halfX * x;
3240 :
3241 98041250 : coeffs[0] = halfX * (-1 + x * (2 - x));
3242 98041250 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
3243 98041250 : coeffs[2] = halfX * (1 + x * (4 - threeX));
3244 98041250 : coeffs[3] = halfX2 * (-1 + x);
3245 98041250 : }
3246 :
3247 14682906 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
3248 : {
3249 14682906 : return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
3250 14682906 : v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
3251 : }
3252 :
3253 : #if 0
3254 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
3255 : // instead of 17.
3256 : // TODO(schwehr): Use an inline function.
3257 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
3258 : { \
3259 : const double dfX = dfX_; \
3260 : dfHalfX = 0.5 * dfX; \
3261 : const double dfThreeX = 3.0 * dfX; \
3262 : const double dfXMinus1 = dfX - 1; \
3263 : \
3264 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
3265 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
3266 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
3267 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
3268 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
3269 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
3270 : }
3271 :
3272 : // TODO(schwehr): Use an inline function.
3273 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
3274 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
3275 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
3276 : #endif
3277 :
3278 302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3279 : double dfSrcX, double dfSrcY,
3280 : double *pdfDensity, double *pdfReal,
3281 : double *pdfImag)
3282 :
3283 : {
3284 302045 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3285 302045 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3286 302045 : GPtrDiff_t iSrcOffset =
3287 302045 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3288 302045 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3289 302045 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3290 302045 : double adfDensity[4] = {};
3291 302045 : double adfReal[4] = {};
3292 302045 : double adfImag[4] = {};
3293 :
3294 : // Get the bilinear interpolation at the image borders.
3295 302045 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3296 286140 : iSrcY + 2 >= poWK->nSrcYSize)
3297 24670 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3298 24670 : pdfDensity, pdfReal, pdfImag);
3299 :
3300 277375 : double adfValueDens[4] = {};
3301 277375 : double adfValueReal[4] = {};
3302 277375 : double adfValueImag[4] = {};
3303 :
3304 277375 : double adfCoeffsX[4] = {};
3305 277375 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3306 :
3307 1240570 : for (GPtrDiff_t i = -1; i < 3; i++)
3308 : {
3309 1009640 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3310 998035 : 2, adfDensity, adfReal, adfImag) ||
3311 998035 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3312 980395 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3313 2979770 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3314 972094 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3315 : {
3316 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3317 46449 : pdfDensity, pdfReal, pdfImag);
3318 : }
3319 :
3320 963196 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3321 963196 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3322 963196 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3323 : }
3324 :
3325 : /* -------------------------------------------------------------------- */
3326 : /* For now, if we have any pixels missing in the kernel area, */
3327 : /* we fallback on using bilinear interpolation. Ideally we */
3328 : /* should do "weight adjustment" of our results similarly to */
3329 : /* what is done for the cubic spline and lanc. interpolators. */
3330 : /* -------------------------------------------------------------------- */
3331 :
3332 230926 : double adfCoeffsY[4] = {};
3333 230926 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3334 :
3335 230926 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3336 230926 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3337 230926 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3338 :
3339 230926 : return true;
3340 : }
3341 :
3342 : #ifdef USE_SSE2
3343 :
3344 : /************************************************************************/
3345 : /* XMMLoad4Values() */
3346 : /* */
3347 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3348 : /* m128 register. */
3349 : /************************************************************************/
3350 :
3351 568577000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3352 : {
3353 : unsigned int i;
3354 568577000 : memcpy(&i, ptr, 4);
3355 1137150000 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3356 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3357 : // 32-bit integers.
3358 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3359 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3360 : #else
3361 1137150000 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3362 1137150000 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3363 : #endif
3364 1137150000 : return _mm_cvtepi32_ps(xmm_i);
3365 : }
3366 :
3367 1108340 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3368 : {
3369 : GUInt64 i;
3370 1108340 : memcpy(&i, ptr, 8);
3371 2216690 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3372 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3373 : // 32-bit integers.
3374 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3375 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3376 : #else
3377 2216690 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3378 : #endif
3379 2216690 : return _mm_cvtepi32_ps(xmm_i);
3380 : }
3381 :
3382 : /************************************************************************/
3383 : /* XMMHorizontalAdd() */
3384 : /* */
3385 : /* Return the sum of the 4 floating points of the register. */
3386 : /************************************************************************/
3387 :
3388 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3389 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3390 : {
3391 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3392 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3393 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3394 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3395 : return _mm_cvtss_f32(sums);
3396 : }
3397 : #else
3398 142421000 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3399 : {
3400 142421000 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3401 142421000 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3402 142421000 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3403 142421000 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3404 142421000 : return _mm_cvtss_f32(sums);
3405 : }
3406 : #endif
3407 :
3408 : #endif // define USE_SSE2
3409 :
3410 : /************************************************************************/
3411 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3412 : /************************************************************************/
3413 :
3414 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3415 : // because there are a few assumptions above those types.
3416 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3417 : // perf benefit.
3418 :
3419 : template <class T>
3420 389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3421 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3422 : double *pdfDensity, double *pdfReal)
3423 : {
3424 389755 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3425 389755 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3426 389755 : const GPtrDiff_t iSrcOffset =
3427 389755 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3428 :
3429 : // Get the bilinear interpolation at the image borders.
3430 389755 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3431 387271 : iSrcY + 2 >= poWK->nSrcYSize)
3432 : {
3433 2484 : double adfImagIgnored[4] = {};
3434 2484 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3435 2484 : pdfDensity, pdfReal, adfImagIgnored);
3436 : }
3437 :
3438 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3439 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3440 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3441 :
3442 : // TODO(schwehr): Explain the magic numbers.
3443 : float afTemp[4 + 4 + 4 + 1];
3444 : float *pafAligned =
3445 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3446 : float *pafCoeffs = pafAligned;
3447 : float *pafDensity = pafAligned + 4;
3448 : float *pafValue = pafAligned + 8;
3449 :
3450 : const float fHalfDeltaX = 0.5f * fDeltaX;
3451 : const float fThreeDeltaX = 3.0f * fDeltaX;
3452 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3453 :
3454 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3455 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3456 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3457 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3458 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3459 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3460 :
3461 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3462 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3463 : i++, iOffset += poWK->nSrcXSize)
3464 : {
3465 : const __m128 xmmDensity =
3466 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3467 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3468 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3469 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3470 :
3471 : const __m128 xmmValues =
3472 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3473 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3474 : }
3475 : if (_mm_movemask_ps(xmmMaskLowDensity))
3476 : {
3477 : double adfImagIgnored[4] = {};
3478 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3479 : pdfDensity, pdfReal, adfImagIgnored);
3480 : }
3481 :
3482 : const float fHalfDeltaY = 0.5f * fDeltaY;
3483 : const float fThreeDeltaY = 3.0f * fDeltaY;
3484 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3485 :
3486 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3487 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3488 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3489 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3490 :
3491 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3492 :
3493 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3494 : const __m128 xmmValue = _mm_load_ps(pafValue);
3495 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3496 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3497 :
3498 : // We did all above computations on float32 whereas the general case is
3499 : // float64. Not sure if one is fundamentally more correct than the other
3500 : // one, but we want our optimization to give the same result as the
3501 : // general case as much as possible, so if the resulting value is
3502 : // close to some_int_value + 0.5, redo the computation with the general
3503 : // case.
3504 : // Note: If other types than Byte or UInt16, will need changes.
3505 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3506 : return true;
3507 :
3508 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3509 :
3510 387271 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3511 387271 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3512 :
3513 387271 : double adfValueDens[4] = {};
3514 387271 : double adfValueReal[4] = {};
3515 :
3516 387271 : double adfCoeffsX[4] = {};
3517 387271 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3518 :
3519 387271 : double adfCoeffsY[4] = {};
3520 387271 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3521 :
3522 1930200 : for (GPtrDiff_t i = -1; i < 3; i++)
3523 : {
3524 1544480 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3525 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3526 1544480 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3527 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3528 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 1] <
3529 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3530 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 2] <
3531 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3532 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 3] <
3533 : SRC_DENSITY_THRESHOLD_FLOAT)
3534 : {
3535 1551 : double adfImagIgnored[4] = {};
3536 1551 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3537 : pdfDensity, pdfReal,
3538 1551 : adfImagIgnored);
3539 : }
3540 : #endif
3541 :
3542 3085860 : adfValueDens[i + 1] =
3543 1542930 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3544 :
3545 1542930 : adfValueReal[i + 1] = CONVOL4(
3546 : adfCoeffsX,
3547 1542930 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3548 : }
3549 :
3550 385720 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3551 385720 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3552 :
3553 385720 : return true;
3554 : }
3555 :
3556 : /************************************************************************/
3557 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3558 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3559 : /************************************************************************/
3560 :
3561 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3562 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3563 : double *pdfDensity, double *pdfReal)
3564 :
3565 : {
3566 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3567 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3568 0 : const GPtrDiff_t iSrcOffset =
3569 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3570 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3571 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3572 :
3573 : // Get the bilinear interpolation at the image borders.
3574 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3575 0 : iSrcY + 2 >= poWK->nSrcYSize)
3576 : {
3577 0 : double adfImagIgnored[4] = {};
3578 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3579 0 : pdfDensity, pdfReal, adfImagIgnored);
3580 : }
3581 :
3582 0 : double adfCoeffsX[4] = {};
3583 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3584 :
3585 0 : double adfCoeffsY[4] = {};
3586 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3587 :
3588 0 : double adfValueDens[4] = {};
3589 0 : double adfValueReal[4] = {};
3590 0 : double adfDensity[4] = {};
3591 0 : double adfReal[4] = {};
3592 0 : double adfImagIgnored[4] = {};
3593 :
3594 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3595 : {
3596 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3597 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3598 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3599 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3600 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3601 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3602 : {
3603 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3604 : pdfDensity, pdfReal,
3605 0 : adfImagIgnored);
3606 : }
3607 :
3608 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3609 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3610 : }
3611 :
3612 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3613 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3614 :
3615 0 : return true;
3616 : }
3617 :
3618 : template <class T>
3619 2301250 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3620 : int iBand, double dfSrcX,
3621 : double dfSrcY, T *pValue)
3622 :
3623 : {
3624 2301250 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3625 2301250 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3626 2301250 : const GPtrDiff_t iSrcOffset =
3627 2301250 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3628 2301250 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3629 2301250 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3630 2301250 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3631 2301250 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3632 :
3633 : // Get the bilinear interpolation at the image borders.
3634 2301250 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3635 1883214 : iSrcY + 2 >= poWK->nSrcYSize)
3636 490439 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3637 490439 : pValue);
3638 :
3639 1810811 : double adfCoeffs[4] = {};
3640 1810811 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3641 :
3642 1810811 : double adfValue[4] = {};
3643 :
3644 9054050 : for (GPtrDiff_t i = -1; i < 3; i++)
3645 : {
3646 7243246 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3647 :
3648 7243246 : adfValue[i + 1] = CONVOL4(
3649 : adfCoeffs,
3650 7243246 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3651 : }
3652 :
3653 : const double dfValue =
3654 1810811 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3655 : adfValue[1], adfValue[2], adfValue[3]);
3656 :
3657 1810811 : *pValue = GWKClampValueT<T>(dfValue);
3658 :
3659 1810811 : return true;
3660 : }
3661 :
3662 : /************************************************************************/
3663 : /* GWKLanczosSinc() */
3664 : /************************************************************************/
3665 :
3666 : /*
3667 : * Lanczos windowed sinc interpolation kernel with radius r.
3668 : * /
3669 : * | sinc(x) * sinc(x/r), if |x| < r
3670 : * L(x) = | 1, if x = 0 ,
3671 : * | 0, otherwise
3672 : * \
3673 : *
3674 : * where sinc(x) = sin(PI * x) / (PI * x).
3675 : */
3676 :
3677 1704 : static double GWKLanczosSinc(double dfX)
3678 : {
3679 1704 : if (dfX == 0.0)
3680 0 : return 1.0;
3681 :
3682 1704 : const double dfPIX = M_PI * dfX;
3683 1704 : const double dfPIXoverR = dfPIX / 3;
3684 1704 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3685 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3686 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3687 1704 : const double dfSinPIXoverR = sin(dfPIXoverR);
3688 1704 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3689 1704 : const double dfSinPIXMulSinPIXoverR =
3690 1704 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3691 1704 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3692 : }
3693 :
3694 106844 : static double GWKLanczosSinc4Values(double *padfValues)
3695 : {
3696 534220 : for (int i = 0; i < 4; i++)
3697 : {
3698 427376 : if (padfValues[i] == 0.0)
3699 : {
3700 0 : padfValues[i] = 1.0;
3701 : }
3702 : else
3703 : {
3704 427376 : const double dfPIX = M_PI * padfValues[i];
3705 427376 : const double dfPIXoverR = dfPIX / 3;
3706 427376 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3707 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3708 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3709 427376 : const double dfSinPIXoverR = sin(dfPIXoverR);
3710 427376 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3711 427376 : const double dfSinPIXMulSinPIXoverR =
3712 427376 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3713 427376 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3714 : }
3715 : }
3716 106844 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3717 : }
3718 :
3719 : /************************************************************************/
3720 : /* GWKBilinear() */
3721 : /************************************************************************/
3722 :
3723 6349880 : static double GWKBilinear(double dfX)
3724 : {
3725 6349880 : double dfAbsX = fabs(dfX);
3726 6349880 : if (dfAbsX <= 1.0)
3727 5880560 : return 1 - dfAbsX;
3728 : else
3729 469322 : return 0.0;
3730 : }
3731 :
3732 780803 : static double GWKBilinear4Values(double *padfValues)
3733 : {
3734 780803 : double dfAbsX0 = fabs(padfValues[0]);
3735 780803 : double dfAbsX1 = fabs(padfValues[1]);
3736 780803 : double dfAbsX2 = fabs(padfValues[2]);
3737 780803 : double dfAbsX3 = fabs(padfValues[3]);
3738 780803 : if (dfAbsX0 <= 1.0)
3739 780803 : padfValues[0] = 1 - dfAbsX0;
3740 : else
3741 0 : padfValues[0] = 0.0;
3742 780803 : if (dfAbsX1 <= 1.0)
3743 780803 : padfValues[1] = 1 - dfAbsX1;
3744 : else
3745 0 : padfValues[1] = 0.0;
3746 780803 : if (dfAbsX2 <= 1.0)
3747 780803 : padfValues[2] = 1 - dfAbsX2;
3748 : else
3749 0 : padfValues[2] = 0.0;
3750 780803 : if (dfAbsX3 <= 1.0)
3751 780781 : padfValues[3] = 1 - dfAbsX3;
3752 : else
3753 22 : padfValues[3] = 0.0;
3754 780803 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3755 : }
3756 :
3757 : /************************************************************************/
3758 : /* GWKCubic() */
3759 : /************************************************************************/
3760 :
3761 83022 : static double GWKCubic(double dfX)
3762 : {
3763 83022 : return CubicKernel(dfX);
3764 : }
3765 :
3766 2453550 : static double GWKCubic4Values(double *padfValues)
3767 : {
3768 2453550 : const double dfAbsX_0 = fabs(padfValues[0]);
3769 2453550 : const double dfAbsX_1 = fabs(padfValues[1]);
3770 2453550 : const double dfAbsX_2 = fabs(padfValues[2]);
3771 2453550 : const double dfAbsX_3 = fabs(padfValues[3]);
3772 2453550 : const double dfX2_0 = padfValues[0] * padfValues[0];
3773 2453550 : const double dfX2_1 = padfValues[1] * padfValues[1];
3774 2453550 : const double dfX2_2 = padfValues[2] * padfValues[2];
3775 2453550 : const double dfX2_3 = padfValues[3] * padfValues[3];
3776 :
3777 2453550 : double dfVal0 = 0.0;
3778 2453550 : if (dfAbsX_0 <= 1.0)
3779 861032 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3780 1592520 : else if (dfAbsX_0 <= 2.0)
3781 1592340 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3782 :
3783 2453550 : double dfVal1 = 0.0;
3784 2453550 : if (dfAbsX_1 <= 1.0)
3785 1588760 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3786 864787 : else if (dfAbsX_1 <= 2.0)
3787 864787 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3788 :
3789 2453550 : double dfVal2 = 0.0;
3790 2453550 : if (dfAbsX_2 <= 1.0)
3791 1599800 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3792 853749 : else if (dfAbsX_2 <= 2.0)
3793 853749 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3794 :
3795 2453550 : double dfVal3 = 0.0;
3796 2453550 : if (dfAbsX_3 <= 1.0)
3797 871793 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3798 1581760 : else if (dfAbsX_3 <= 2.0)
3799 1581600 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3800 :
3801 2453550 : padfValues[0] = dfVal0;
3802 2453550 : padfValues[1] = dfVal1;
3803 2453550 : padfValues[2] = dfVal2;
3804 2453550 : padfValues[3] = dfVal3;
3805 2453550 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3806 : }
3807 :
3808 : /************************************************************************/
3809 : /* GWKBSpline() */
3810 : /************************************************************************/
3811 :
3812 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3813 : // Equation 8 with (B,C)=(1,0)
3814 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3815 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3816 :
3817 136704 : static double GWKBSpline(double x)
3818 : {
3819 136704 : const double xp2 = x + 2.0;
3820 136704 : const double xp1 = x + 1.0;
3821 136704 : const double xm1 = x - 1.0;
3822 :
3823 : // This will most likely be used, so we'll compute it ahead of time to
3824 : // avoid stalling the processor.
3825 136704 : const double xp2c = xp2 * xp2 * xp2;
3826 :
3827 : // Note that the test is computed only if it is needed.
3828 : // TODO(schwehr): Make this easier to follow.
3829 : return xp2 > 0.0
3830 273408 : ? ((xp1 > 0.0)
3831 136704 : ? ((x > 0.0)
3832 122310 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3833 87812 : 6.0 * x * x * x
3834 : : 0.0) +
3835 122310 : -4.0 * xp1 * xp1 * xp1
3836 : : 0.0) +
3837 : xp2c
3838 136704 : : 0.0; // * 0.166666666666666666666
3839 : }
3840 :
3841 1895260 : static double GWKBSpline4Values(double *padfValues)
3842 : {
3843 9476280 : for (int i = 0; i < 4; i++)
3844 : {
3845 7581020 : const double x = padfValues[i];
3846 7581020 : const double xp2 = x + 2.0;
3847 7581020 : const double xp1 = x + 1.0;
3848 7581020 : const double xm1 = x - 1.0;
3849 :
3850 : // This will most likely be used, so we'll compute it ahead of time to
3851 : // avoid stalling the processor.
3852 7581020 : const double xp2c = xp2 * xp2 * xp2;
3853 :
3854 : // Note that the test is computed only if it is needed.
3855 : // TODO(schwehr): Make this easier to follow.
3856 7581020 : padfValues[i] =
3857 : (xp2 > 0.0)
3858 15105200 : ? ((xp1 > 0.0)
3859 7524190 : ? ((x > 0.0)
3860 5656910 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3861 3788650 : 6.0 * x * x * x
3862 : : 0.0) +
3863 5656910 : -4.0 * xp1 * xp1 * xp1
3864 : : 0.0) +
3865 : xp2c
3866 : : 0.0; // * 0.166666666666666666666
3867 : }
3868 1895260 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3869 : }
3870 : /************************************************************************/
3871 : /* GWKResampleWrkStruct */
3872 : /************************************************************************/
3873 :
3874 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3875 :
3876 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3877 : double dfSrcX, double dfSrcY,
3878 : double *pdfDensity, double *pdfReal,
3879 : double *pdfImag,
3880 : GWKResampleWrkStruct *psWrkStruct);
3881 :
3882 : struct _GWKResampleWrkStruct
3883 : {
3884 : pfnGWKResampleType pfnGWKResample;
3885 :
3886 : // Space for saved X weights.
3887 : double *padfWeightsX;
3888 : bool *pabCalcX;
3889 :
3890 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3891 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3892 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3893 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3894 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3895 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3896 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3897 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3898 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3899 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3900 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3901 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3902 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3903 :
3904 : // Space for saving a row of pixels.
3905 : double *padfRowDensity;
3906 : double *padfRowReal;
3907 : double *padfRowImag;
3908 : };
3909 :
3910 : /************************************************************************/
3911 : /* GWKResampleCreateWrkStruct() */
3912 : /************************************************************************/
3913 :
3914 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3915 : double dfSrcY, double *pdfDensity, double *pdfReal,
3916 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3917 :
3918 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3919 : double dfSrcX, double dfSrcY,
3920 : double *pdfDensity, double *pdfReal,
3921 : double *pdfImag,
3922 : GWKResampleWrkStruct *psWrkStruct);
3923 :
3924 401 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3925 : {
3926 401 : const int nXDist = (poWK->nXRadius + 1) * 2;
3927 401 : const int nYDist = (poWK->nYRadius + 1) * 2;
3928 :
3929 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3930 401 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3931 :
3932 : // Alloc space for saved X weights.
3933 401 : psWrkStruct->padfWeightsX =
3934 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3935 401 : psWrkStruct->pabCalcX =
3936 401 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3937 :
3938 401 : psWrkStruct->padfWeightsY =
3939 401 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3940 401 : psWrkStruct->iLastSrcX = -10;
3941 401 : psWrkStruct->iLastSrcY = -10;
3942 401 : psWrkStruct->dfLastDeltaX = -10;
3943 401 : psWrkStruct->dfLastDeltaY = -10;
3944 :
3945 : // Alloc space for saving a row of pixels.
3946 401 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3947 365 : poWK->panUnifiedSrcValid == nullptr &&
3948 342 : poWK->papanBandSrcValid == nullptr)
3949 : {
3950 342 : psWrkStruct->padfRowDensity = nullptr;
3951 : }
3952 : else
3953 : {
3954 59 : psWrkStruct->padfRowDensity =
3955 59 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3956 : }
3957 401 : psWrkStruct->padfRowReal =
3958 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3959 401 : psWrkStruct->padfRowImag =
3960 401 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3961 :
3962 401 : if (poWK->eResample == GRA_Lanczos)
3963 : {
3964 65 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3965 :
3966 65 : if (poWK->dfXScale < 1)
3967 : {
3968 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3969 4 : psWrkStruct->dfSinPiXScaleOver3 =
3970 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3971 4 : psWrkStruct->dfCosPiXScaleOver3);
3972 : // "Naive":
3973 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3974 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3975 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3976 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3977 4 : psWrkStruct->dfCosPiXScaleOver3 -
3978 4 : 3) *
3979 4 : psWrkStruct->dfCosPiXScaleOver3;
3980 4 : psWrkStruct->dfSinPiXScale = sqrt(
3981 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3982 : }
3983 :
3984 65 : if (poWK->dfYScale < 1)
3985 : {
3986 12 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3987 12 : psWrkStruct->dfSinPiYScaleOver3 =
3988 12 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3989 12 : psWrkStruct->dfCosPiYScaleOver3);
3990 : // "Naive":
3991 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3992 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3993 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3994 12 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3995 12 : psWrkStruct->dfCosPiYScaleOver3 -
3996 12 : 3) *
3997 12 : psWrkStruct->dfCosPiYScaleOver3;
3998 12 : psWrkStruct->dfSinPiYScale = sqrt(
3999 12 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
4000 : }
4001 : }
4002 : else
4003 336 : psWrkStruct->pfnGWKResample = GWKResample;
4004 :
4005 401 : return psWrkStruct;
4006 : }
4007 :
4008 : /************************************************************************/
4009 : /* GWKResampleDeleteWrkStruct() */
4010 : /************************************************************************/
4011 :
4012 401 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
4013 : {
4014 401 : CPLFree(psWrkStruct->padfWeightsX);
4015 401 : CPLFree(psWrkStruct->padfWeightsY);
4016 401 : CPLFree(psWrkStruct->pabCalcX);
4017 401 : CPLFree(psWrkStruct->padfRowDensity);
4018 401 : CPLFree(psWrkStruct->padfRowReal);
4019 401 : CPLFree(psWrkStruct->padfRowImag);
4020 401 : CPLFree(psWrkStruct);
4021 401 : }
4022 :
4023 : /************************************************************************/
4024 : /* GWKResample() */
4025 : /************************************************************************/
4026 :
4027 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4028 : double dfSrcY, double *pdfDensity, double *pdfReal,
4029 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
4030 :
4031 : {
4032 : // Save as local variables to avoid following pointers in loops.
4033 239383 : const int nSrcXSize = poWK->nSrcXSize;
4034 239383 : const int nSrcYSize = poWK->nSrcYSize;
4035 :
4036 239383 : double dfAccumulatorReal = 0.0;
4037 239383 : double dfAccumulatorImag = 0.0;
4038 239383 : double dfAccumulatorDensity = 0.0;
4039 239383 : double dfAccumulatorWeight = 0.0;
4040 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4041 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4042 239383 : const GPtrDiff_t iSrcOffset =
4043 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4044 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4045 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4046 :
4047 239383 : const double dfXScale = poWK->dfXScale;
4048 239383 : const double dfYScale = poWK->dfYScale;
4049 :
4050 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
4051 :
4052 : // Space for saved X weights.
4053 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
4054 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
4055 :
4056 : // Space for saving a row of pixels.
4057 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
4058 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
4059 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
4060 :
4061 : // Mark as needing calculation (don't calculate the weights yet,
4062 : // because a mask may render it unnecessary).
4063 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
4064 :
4065 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
4066 239383 : CPLAssert(pfnGetWeight);
4067 :
4068 : // Skip sampling over edge of image.
4069 239383 : int j = poWK->nFiltInitY;
4070 239383 : int jMax = poWK->nYRadius;
4071 239383 : if (iSrcY + j < 0)
4072 566 : j = -iSrcY;
4073 239383 : if (iSrcY + jMax >= nSrcYSize)
4074 662 : jMax = nSrcYSize - iSrcY - 1;
4075 :
4076 239383 : int iMin = poWK->nFiltInitX;
4077 239383 : int iMax = poWK->nXRadius;
4078 239383 : if (iSrcX + iMin < 0)
4079 566 : iMin = -iSrcX;
4080 239383 : if (iSrcX + iMax >= nSrcXSize)
4081 659 : iMax = nSrcXSize - iSrcX - 1;
4082 :
4083 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
4084 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
4085 :
4086 239383 : GPtrDiff_t iRowOffset =
4087 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
4088 :
4089 : // Loop over pixel rows in the kernel.
4090 1445930 : for (; j <= jMax; ++j)
4091 : {
4092 1206540 : iRowOffset += nSrcXSize;
4093 :
4094 : // Get pixel values.
4095 : // We can potentially read extra elements after the "normal" end of the
4096 : // source arrays, but the contract of papabySrcImage[iBand],
4097 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4098 : // is to have WARP_EXTRA_ELTS reserved at their end.
4099 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4100 : padfRowDensity, padfRowReal, padfRowImag))
4101 72 : continue;
4102 :
4103 : // Calculate the Y weight.
4104 : double dfWeight1 = (bYScaleBelow1)
4105 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
4106 1600 : : pfnGetWeight(j - dfDeltaY);
4107 :
4108 : // Iterate over pixels in row.
4109 1206470 : double dfAccumulatorRealLocal = 0.0;
4110 1206470 : double dfAccumulatorImagLocal = 0.0;
4111 1206470 : double dfAccumulatorDensityLocal = 0.0;
4112 1206470 : double dfAccumulatorWeightLocal = 0.0;
4113 :
4114 7317420 : for (int i = iMin; i <= iMax; ++i)
4115 : {
4116 : // Skip sampling if pixel has zero density.
4117 6110940 : if (padfRowDensity != nullptr &&
4118 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4119 546 : continue;
4120 :
4121 6110400 : double dfWeight2 = 0.0;
4122 :
4123 : // Make or use a cached set of weights for this row.
4124 6110400 : if (pabCalcX[i - iMin])
4125 : {
4126 : // Use saved weight value instead of recomputing it.
4127 4903920 : dfWeight2 = padfWeightsX[i - iMin];
4128 : }
4129 : else
4130 : {
4131 : // Calculate & save the X weight.
4132 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
4133 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
4134 1600 : : pfnGetWeight(i - dfDeltaX);
4135 :
4136 1206480 : pabCalcX[i - iMin] = true;
4137 : }
4138 :
4139 : // Accumulate!
4140 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
4141 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
4142 6110400 : if (padfRowDensity != nullptr)
4143 76731 : dfAccumulatorDensityLocal +=
4144 76731 : padfRowDensity[i - iMin] * dfWeight2;
4145 6110400 : dfAccumulatorWeightLocal += dfWeight2;
4146 : }
4147 :
4148 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
4149 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
4150 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
4151 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
4152 : }
4153 :
4154 239383 : if (dfAccumulatorWeight < 0.000001 ||
4155 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
4156 : {
4157 0 : *pdfDensity = 0.0;
4158 0 : return false;
4159 : }
4160 :
4161 : // Calculate the output taking into account weighting.
4162 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4163 : {
4164 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
4165 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
4166 239380 : if (padfRowDensity != nullptr)
4167 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
4168 : else
4169 237496 : *pdfDensity = 1.0;
4170 : }
4171 : else
4172 : {
4173 3 : *pdfReal = dfAccumulatorReal;
4174 3 : *pdfImag = dfAccumulatorImag;
4175 3 : if (padfRowDensity != nullptr)
4176 3 : *pdfDensity = dfAccumulatorDensity;
4177 : else
4178 0 : *pdfDensity = 1.0;
4179 : }
4180 :
4181 239383 : return true;
4182 : }
4183 :
4184 : /************************************************************************/
4185 : /* GWKResampleOptimizedLanczos() */
4186 : /************************************************************************/
4187 :
4188 634574 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
4189 : double dfSrcX, double dfSrcY,
4190 : double *pdfDensity, double *pdfReal,
4191 : double *pdfImag,
4192 : GWKResampleWrkStruct *psWrkStruct)
4193 :
4194 : {
4195 : // Save as local variables to avoid following pointers in loops.
4196 634574 : const int nSrcXSize = poWK->nSrcXSize;
4197 634574 : const int nSrcYSize = poWK->nSrcYSize;
4198 :
4199 634574 : double dfAccumulatorReal = 0.0;
4200 634574 : double dfAccumulatorImag = 0.0;
4201 634574 : double dfAccumulatorDensity = 0.0;
4202 634574 : double dfAccumulatorWeight = 0.0;
4203 634574 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4204 634574 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4205 634574 : const GPtrDiff_t iSrcOffset =
4206 634574 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4207 634574 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4208 634574 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4209 :
4210 634574 : const double dfXScale = poWK->dfXScale;
4211 634574 : const double dfYScale = poWK->dfYScale;
4212 :
4213 : // Space for saved X weights.
4214 634574 : double *const padfWeightsXShifted =
4215 634574 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
4216 634574 : double *const padfWeightsYShifted =
4217 634574 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
4218 :
4219 : // Space for saving a row of pixels.
4220 634574 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
4221 634574 : double *const padfRowReal = psWrkStruct->padfRowReal;
4222 634574 : double *const padfRowImag = psWrkStruct->padfRowImag;
4223 :
4224 : // Skip sampling over edge of image.
4225 634574 : int jMin = poWK->nFiltInitY;
4226 634574 : int jMax = poWK->nYRadius;
4227 634574 : if (iSrcY + jMin < 0)
4228 17334 : jMin = -iSrcY;
4229 634574 : if (iSrcY + jMax >= nSrcYSize)
4230 5638 : jMax = nSrcYSize - iSrcY - 1;
4231 :
4232 634574 : int iMin = poWK->nFiltInitX;
4233 634574 : int iMax = poWK->nXRadius;
4234 634574 : if (iSrcX + iMin < 0)
4235 19595 : iMin = -iSrcX;
4236 634574 : if (iSrcX + iMax >= nSrcXSize)
4237 6817 : iMax = nSrcXSize - iSrcX - 1;
4238 :
4239 634574 : if (dfXScale < 1.0)
4240 : {
4241 462945 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
4242 260083 : iMin++;
4243 263534 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
4244 60672 : iMax--;
4245 :
4246 : // clang-format off
4247 : /*
4248 : Naive version:
4249 : for (int i = iMin; i <= iMax; ++i)
4250 : {
4251 : psWrkStruct->padfWeightsXShifted[i] =
4252 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
4253 : }
4254 :
4255 : but given that:
4256 :
4257 : GWKLanczosSinc(x):
4258 : if (dfX == 0.0)
4259 : return 1.0;
4260 :
4261 : const double dfPIX = M_PI * dfX;
4262 : const double dfPIXoverR = dfPIX / 3;
4263 : const double dfPIX2overR = dfPIX * dfPIXoverR;
4264 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
4265 :
4266 : and
4267 : sin (a + b) = sin a cos b + cos a sin b.
4268 : cos (a + b) = cos a cos b - sin a sin b.
4269 :
4270 : we can skip any sin() computation within the loop
4271 : */
4272 : // clang-format on
4273 :
4274 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
4275 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4276 : {
4277 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4278 :
4279 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4280 71790 : double dfCosOver3 = cos(dfPIXover3);
4281 71790 : double dfSinOver3 = sin(dfPIXover3);
4282 :
4283 : // "Naive":
4284 : // double dfSin = sin( M_PI * dfX );
4285 : // double dfCos = cos( M_PI * dfX );
4286 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4287 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4288 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4289 :
4290 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4291 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4292 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4293 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4294 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4295 71790 : padfWeightsXShifted[iMin] =
4296 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4297 683646 : for (int i = iMin + 1; i <= iMax; ++i)
4298 : {
4299 611856 : dfX += dfXScale;
4300 611856 : const double dfNewSin =
4301 611856 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4302 611856 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4303 611856 : dfCosOver3 * dfSinPiXScaleOver3;
4304 611856 : padfWeightsXShifted[i] =
4305 : dfX == 0
4306 611856 : ? 1.0
4307 611856 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4308 611856 : const double dfNewCos =
4309 611856 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4310 611856 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4311 611856 : dfSinOver3 * dfSinPiXScaleOver3;
4312 611856 : dfSin = dfNewSin;
4313 611856 : dfCos = dfNewCos;
4314 611856 : dfSinOver3 = dfNewSinOver3;
4315 611856 : dfCosOver3 = dfNewCosOver3;
4316 : }
4317 :
4318 71790 : psWrkStruct->iLastSrcX = iSrcX;
4319 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4320 : }
4321 : }
4322 : else
4323 : {
4324 789372 : while (iMin - dfDeltaX < -3.0)
4325 357660 : iMin++;
4326 431712 : while (iMax - dfDeltaX > 3.0)
4327 0 : iMax--;
4328 :
4329 431712 : if (iSrcX != psWrkStruct->iLastSrcX ||
4330 225330 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4331 : {
4332 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4333 : // following trigonometric formulas.
4334 :
4335 : // TODO(schwehr): Move this somewhere where it can be rendered at
4336 : // LaTeX.
4337 : // clang-format off
4338 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4339 : // cos(M_PI * dfBase) * sin(M_PI * k)
4340 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4341 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4342 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4343 :
4344 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4345 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4346 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4347 : // clang-format on
4348 :
4349 420092 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4350 420092 : const double dfSin2PIDeltaXOver3 =
4351 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4352 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4353 420092 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4354 420092 : const double dfSinPIDeltaX =
4355 420092 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4356 420092 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4357 420092 : const double dfInvPI2Over3xSinPIDeltaX =
4358 : dfInvPI2Over3 * dfSinPIDeltaX;
4359 420092 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4360 420092 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4361 420092 : const double dfSinPIOver3 = 0.8660254037844386;
4362 420092 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4363 420092 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4364 : const double padfCst[] = {
4365 420092 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4366 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4367 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4368 420092 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4369 420092 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4370 :
4371 2974940 : for (int i = iMin; i <= iMax; ++i)
4372 : {
4373 2554850 : const double dfX = i - dfDeltaX;
4374 2554850 : if (dfX == 0.0)
4375 58282 : padfWeightsXShifted[i] = 1.0;
4376 : else
4377 2496570 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4378 : #if DEBUG_VERBOSE
4379 : // TODO(schwehr): AlmostEqual.
4380 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4381 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4382 : #endif
4383 : }
4384 :
4385 420092 : psWrkStruct->iLastSrcX = iSrcX;
4386 420092 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4387 : }
4388 : }
4389 :
4390 634574 : if (dfYScale < 1.0)
4391 : {
4392 15754 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4393 9500 : jMin++;
4394 9854 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4395 3600 : jMax--;
4396 :
4397 : // clang-format off
4398 : /*
4399 : Naive version:
4400 : for (int j = jMin; j <= jMax; ++j)
4401 : {
4402 : padfWeightsYShifted[j] =
4403 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4404 : }
4405 : */
4406 : // clang-format on
4407 :
4408 6254 : if (iSrcY != psWrkStruct->iLastSrcY ||
4409 6127 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4410 : {
4411 127 : double dfY = (jMin - dfDeltaY) * dfYScale;
4412 :
4413 127 : double dfPIYover3 = M_PI / 3 * dfY;
4414 127 : double dfCosOver3 = cos(dfPIYover3);
4415 127 : double dfSinOver3 = sin(dfPIYover3);
4416 :
4417 : // "Naive":
4418 : // double dfSin = sin( M_PI * dfY );
4419 : // double dfCos = cos( M_PI * dfY );
4420 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4421 127 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4422 127 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4423 :
4424 127 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4425 127 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4426 127 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4427 127 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4428 127 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4429 127 : padfWeightsYShifted[jMin] =
4430 127 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4431 1210 : for (int j = jMin + 1; j <= jMax; ++j)
4432 : {
4433 1083 : dfY += dfYScale;
4434 1083 : const double dfNewSin =
4435 1083 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4436 1083 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4437 1083 : dfCosOver3 * dfSinPiYScaleOver3;
4438 1083 : padfWeightsYShifted[j] =
4439 : dfY == 0
4440 1083 : ? 1.0
4441 1083 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4442 1083 : const double dfNewCos =
4443 1083 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4444 1083 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4445 1083 : dfSinOver3 * dfSinPiYScaleOver3;
4446 1083 : dfSin = dfNewSin;
4447 1083 : dfCos = dfNewCos;
4448 1083 : dfSinOver3 = dfNewSinOver3;
4449 1083 : dfCosOver3 = dfNewCosOver3;
4450 : }
4451 :
4452 127 : psWrkStruct->iLastSrcY = iSrcY;
4453 127 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4454 : }
4455 : }
4456 : else
4457 : {
4458 1106550 : while (jMin - dfDeltaY < -3.0)
4459 478232 : jMin++;
4460 628320 : while (jMax - dfDeltaY > 3.0)
4461 0 : jMax--;
4462 :
4463 628320 : if (iSrcY != psWrkStruct->iLastSrcY ||
4464 627488 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4465 : {
4466 7198 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4467 7198 : const double dfSin2PIDeltaYOver3 =
4468 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4469 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4470 7198 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4471 7198 : const double dfSinPIDeltaY =
4472 7198 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4473 7198 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4474 7198 : const double dfInvPI2Over3xSinPIDeltaY =
4475 : dfInvPI2Over3 * dfSinPIDeltaY;
4476 7198 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4477 7198 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4478 7198 : const double dfSinPIOver3 = 0.8660254037844386;
4479 7198 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4480 7198 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4481 : const double padfCst[] = {
4482 7198 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4483 7198 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4484 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4485 7198 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4486 7198 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4487 :
4488 47777 : for (int j = jMin; j <= jMax; ++j)
4489 : {
4490 40579 : const double dfY = j - dfDeltaY;
4491 40579 : if (dfY == 0.0)
4492 468 : padfWeightsYShifted[j] = 1.0;
4493 : else
4494 40111 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4495 : #if DEBUG_VERBOSE
4496 : // TODO(schwehr): AlmostEqual.
4497 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4498 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4499 : #endif
4500 : }
4501 :
4502 7198 : psWrkStruct->iLastSrcY = iSrcY;
4503 7198 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4504 : }
4505 : }
4506 :
4507 : // If we have no density information, we can simply compute the
4508 : // accumulated weight.
4509 634574 : if (padfRowDensity == nullptr)
4510 : {
4511 634574 : double dfRowAccWeight = 0.0;
4512 5159250 : for (int i = iMin; i <= iMax; ++i)
4513 : {
4514 4524680 : dfRowAccWeight += padfWeightsXShifted[i];
4515 : }
4516 634574 : double dfColAccWeight = 0.0;
4517 4564130 : for (int j = jMin; j <= jMax; ++j)
4518 : {
4519 3929550 : dfColAccWeight += padfWeightsYShifted[j];
4520 : }
4521 634574 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4522 : }
4523 :
4524 : // Loop over pixel rows in the kernel.
4525 :
4526 634574 : if (poWK->eWorkingDataType == GDT_UInt8 && !poWK->panUnifiedSrcValid &&
4527 633954 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4528 : !padfRowDensity)
4529 : {
4530 : // Optimization for Byte case without any masking/alpha
4531 :
4532 633954 : if (dfAccumulatorWeight < 0.000001)
4533 : {
4534 0 : *pdfDensity = 0.0;
4535 0 : return false;
4536 : }
4537 :
4538 633954 : const GByte *pSrc =
4539 633954 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4540 633954 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4541 :
4542 : #if defined(USE_SSE2)
4543 633954 : if (iMax - iMin + 1 == 6)
4544 : {
4545 : // This is just an optimized version of the general case in
4546 : // the else clause.
4547 :
4548 359916 : pSrc += iMin;
4549 359916 : int j = jMin;
4550 : const auto fourXWeights =
4551 359916 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4552 :
4553 : // Process 2 lines at the same time.
4554 1424180 : for (; j < jMax; j += 2)
4555 : {
4556 : const XMMReg4Double v_acc =
4557 1064270 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4558 : const XMMReg4Double v_acc2 =
4559 1064270 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4560 1064270 : const double dfRowAcc = v_acc.GetHorizSum();
4561 1064270 : const double dfRowAccEnd =
4562 1064270 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4563 1064270 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4564 1064270 : dfAccumulatorReal +=
4565 1064270 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4566 1064270 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4567 1064270 : const double dfRowAcc2End =
4568 1064270 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4569 1064270 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4570 1064270 : dfAccumulatorReal +=
4571 1064270 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4572 1064270 : pSrc += 2 * nSrcXSize;
4573 : }
4574 359916 : if (j == jMax)
4575 : {
4576 : // Process last line if there's an odd number of them.
4577 :
4578 : const XMMReg4Double v_acc =
4579 90039 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4580 90039 : const double dfRowAcc = v_acc.GetHorizSum();
4581 90039 : const double dfRowAccEnd =
4582 90039 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4583 90039 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4584 90039 : dfAccumulatorReal +=
4585 90039 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4586 : }
4587 : }
4588 : else
4589 : #endif
4590 : {
4591 1982080 : for (int j = jMin; j <= jMax; ++j)
4592 : {
4593 1708040 : int i = iMin;
4594 1708040 : double dfRowAcc1 = 0.0;
4595 1708040 : double dfRowAcc2 = 0.0;
4596 : // A bit of loop unrolling
4597 8474620 : for (; i < iMax; i += 2)
4598 : {
4599 6766580 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4600 6766580 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4601 : }
4602 1708040 : if (i == iMax)
4603 : {
4604 : // Process last column if there's an odd number of them.
4605 1188570 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4606 : }
4607 :
4608 1708040 : dfAccumulatorReal +=
4609 1708040 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4610 1708040 : pSrc += nSrcXSize;
4611 : }
4612 : }
4613 :
4614 : // Calculate the output taking into account weighting.
4615 633954 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4616 : {
4617 579748 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4618 579748 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4619 579748 : *pdfDensity = 1.0;
4620 : }
4621 : else
4622 : {
4623 54206 : *pdfReal = dfAccumulatorReal;
4624 54206 : *pdfDensity = 1.0;
4625 : }
4626 :
4627 633954 : return true;
4628 : }
4629 :
4630 620 : GPtrDiff_t iRowOffset =
4631 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4632 :
4633 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4634 :
4635 3560 : for (int j = jMin; j <= jMax; ++j)
4636 : {
4637 2940 : iRowOffset += nSrcXSize;
4638 :
4639 : // Get pixel values.
4640 : // We can potentially read extra elements after the "normal" end of the
4641 : // source arrays, but the contract of papabySrcImage[iBand],
4642 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4643 : // is to have WARP_EXTRA_ELTS reserved at their end.
4644 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4645 : padfRowDensity, padfRowReal, padfRowImag))
4646 0 : continue;
4647 :
4648 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4649 :
4650 : // Iterate over pixels in row.
4651 2940 : if (padfRowDensity != nullptr)
4652 : {
4653 0 : for (int i = iMin; i <= iMax; ++i)
4654 : {
4655 : // Skip sampling if pixel has zero density.
4656 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4657 0 : continue;
4658 :
4659 : // Use a cached set of weights for this row.
4660 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4661 :
4662 : // Accumulate!
4663 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4664 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4665 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4666 0 : dfAccumulatorWeight += dfWeight2;
4667 : }
4668 : }
4669 2940 : else if (bIsNonComplex)
4670 : {
4671 1764 : double dfRowAccReal = 0.0;
4672 10560 : for (int i = iMin; i <= iMax; ++i)
4673 : {
4674 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4675 :
4676 : // Accumulate!
4677 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4678 : }
4679 :
4680 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4681 : }
4682 : else
4683 : {
4684 1176 : double dfRowAccReal = 0.0;
4685 1176 : double dfRowAccImag = 0.0;
4686 7040 : for (int i = iMin; i <= iMax; ++i)
4687 : {
4688 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4689 :
4690 : // Accumulate!
4691 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4692 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4693 : }
4694 :
4695 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4696 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4697 : }
4698 : }
4699 :
4700 620 : if (dfAccumulatorWeight < 0.000001)
4701 : {
4702 0 : *pdfDensity = 0.0;
4703 0 : return false;
4704 : }
4705 620 : else if (padfRowDensity)
4706 : {
4707 0 : if (dfAccumulatorDensity < 0.000001)
4708 : {
4709 0 : *pdfDensity = 0.0;
4710 0 : return false;
4711 : }
4712 :
4713 : // TODO: previously we returned *pdfDensity when
4714 : // nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)
4715 : // that was initially introduced in
4716 : // https://github.com/OSGeo/gdal/commit/b68d31418f4826402dc44b52152c0493b682fea8
4717 : // but in scenarios like https://github.com/OSGeo/gdal/issues/14560
4718 : // this lead to almst full removal of text printed on transparent
4719 : // background. It is not clear what we should do.
4720 : //
4721 : // Wisdom from https://mastodon.social/@martinfleis@fosstodon.org/116568957538009577
4722 : // LJW: "It is a fundamental change of support problem with no closed
4723 : // solution. We looked into bootstrapping to solve it, but never
4724 : // published. Basic idea was to bootstrap a constant sample size, set
4725 : // the weight of candidates as a kernel function on the distance from
4726 : // the target, and set the bandwidth needed at each pixel as that
4727 : // which maximizes the entropy of a histogram of sample weights.
4728 : // Best you can do is define some loss and optimize the resampling
4729 : // against it."
4730 : }
4731 :
4732 : // Calculate the output taking into account weighting.
4733 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4734 : {
4735 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4736 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4737 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4738 0 : if (padfRowDensity != nullptr)
4739 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4740 : else
4741 0 : *pdfDensity = 1.0;
4742 : }
4743 : else
4744 : {
4745 620 : *pdfReal = dfAccumulatorReal;
4746 620 : *pdfImag = dfAccumulatorImag;
4747 620 : if (padfRowDensity != nullptr)
4748 0 : *pdfDensity = dfAccumulatorDensity;
4749 : else
4750 620 : *pdfDensity = 1.0;
4751 : }
4752 :
4753 620 : return true;
4754 : }
4755 :
4756 : /************************************************************************/
4757 : /* GWKComputeWeights() */
4758 : /************************************************************************/
4759 :
4760 1091070 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4761 : double dfDeltaX, double dfXScale, int jMin,
4762 : int jMax, double dfDeltaY, double dfYScale,
4763 : double *padfWeightsHorizontal,
4764 : double *padfWeightsVertical, double &dfInvWeights)
4765 : {
4766 :
4767 1091070 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4768 1091070 : CPLAssert(pfnGetWeight);
4769 1091070 : const FilterFunc4ValuesType pfnGetWeight4Values =
4770 1091070 : apfGWKFilter4Values[eResample];
4771 1091070 : CPLAssert(pfnGetWeight4Values);
4772 :
4773 1091070 : int i = iMin; // Used after for.
4774 1091070 : int iC = 0; // Used after for.
4775 : // Not zero, but as close as possible to it, to avoid potential division by
4776 : // zero at end of function
4777 1091070 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4778 2403700 : for (; i + 2 < iMax; i += 4, iC += 4)
4779 : {
4780 1312620 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4781 1312620 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4782 1312620 : padfWeightsHorizontal[iC + 2] =
4783 1312620 : padfWeightsHorizontal[iC + 1] + dfXScale;
4784 1312620 : padfWeightsHorizontal[iC + 3] =
4785 1312620 : padfWeightsHorizontal[iC + 2] + dfXScale;
4786 1312620 : dfAccumulatorWeightHorizontal +=
4787 1312620 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4788 : }
4789 1145700 : for (; i <= iMax; ++i, ++iC)
4790 : {
4791 54623 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4792 54623 : padfWeightsHorizontal[iC] = dfWeight;
4793 54623 : dfAccumulatorWeightHorizontal += dfWeight;
4794 : }
4795 :
4796 1091070 : int j = jMin; // Used after for.
4797 1091070 : int jC = 0; // Used after for.
4798 : // Not zero, but as close as possible to it, to avoid potential division by
4799 : // zero at end of function
4800 1091070 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4801 2332840 : for (; j + 2 < jMax; j += 4, jC += 4)
4802 : {
4803 1241770 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4804 1241770 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4805 1241770 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4806 1241770 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4807 1241770 : dfAccumulatorWeightVertical +=
4808 1241770 : pfnGetWeight4Values(padfWeightsVertical + jC);
4809 : }
4810 1152230 : for (; j <= jMax; ++j, ++jC)
4811 : {
4812 61154 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4813 61154 : padfWeightsVertical[jC] = dfWeight;
4814 61154 : dfAccumulatorWeightVertical += dfWeight;
4815 : }
4816 :
4817 1091070 : dfInvWeights =
4818 1091070 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4819 1091070 : }
4820 :
4821 : /************************************************************************/
4822 : /* GWKResampleNoMasksT() */
4823 : /************************************************************************/
4824 :
4825 : template <class T>
4826 : static bool
4827 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4828 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4829 : double *padfWeightsVertical, double &dfInvWeights)
4830 :
4831 : {
4832 : // Commonly used; save locally.
4833 : const int nSrcXSize = poWK->nSrcXSize;
4834 : const int nSrcYSize = poWK->nSrcYSize;
4835 :
4836 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4837 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4838 : const GPtrDiff_t iSrcOffset =
4839 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4840 :
4841 : const int nXRadius = poWK->nXRadius;
4842 : const int nYRadius = poWK->nYRadius;
4843 :
4844 : // Politely refuse to process invalid coordinates or obscenely small image.
4845 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4846 : nYRadius > nSrcYSize)
4847 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4848 : pValue);
4849 :
4850 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4851 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4852 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4853 :
4854 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4855 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4856 :
4857 : int iMin = 1 - nXRadius;
4858 : if (iSrcX + iMin < 0)
4859 : iMin = -iSrcX;
4860 : int iMax = nXRadius;
4861 : if (iSrcX + iMax >= nSrcXSize - 1)
4862 : iMax = nSrcXSize - 1 - iSrcX;
4863 :
4864 : int jMin = 1 - nYRadius;
4865 : if (iSrcY + jMin < 0)
4866 : jMin = -iSrcY;
4867 : int jMax = nYRadius;
4868 : if (iSrcY + jMax >= nSrcYSize - 1)
4869 : jMax = nSrcYSize - 1 - iSrcY;
4870 :
4871 : if (iBand == 0)
4872 : {
4873 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4874 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4875 : padfWeightsVertical, dfInvWeights);
4876 : }
4877 :
4878 : // Loop over all rows in the kernel.
4879 : double dfAccumulator = 0.0;
4880 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4881 : {
4882 : const GPtrDiff_t iSampJ =
4883 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4884 :
4885 : // Loop over all pixels in the row.
4886 : double dfAccumulatorLocal = 0.0;
4887 : double dfAccumulatorLocal2 = 0.0;
4888 : int iC = 0;
4889 : int i = iMin;
4890 : // Process by chunk of 4 cols.
4891 : for (; i + 2 < iMax; i += 4, iC += 4)
4892 : {
4893 : // Retrieve the pixel & accumulate.
4894 : dfAccumulatorLocal +=
4895 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4896 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4897 : padfWeightsHorizontal[iC + 1];
4898 : dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4899 : padfWeightsHorizontal[iC + 2];
4900 : dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4901 : padfWeightsHorizontal[iC + 3];
4902 : }
4903 : dfAccumulatorLocal += dfAccumulatorLocal2;
4904 : if (i < iMax)
4905 : {
4906 : dfAccumulatorLocal +=
4907 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4908 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4909 : padfWeightsHorizontal[iC + 1];
4910 : i += 2;
4911 : iC += 2;
4912 : }
4913 : if (i == iMax)
4914 : {
4915 : dfAccumulatorLocal +=
4916 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4917 : }
4918 :
4919 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4920 : }
4921 :
4922 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4923 :
4924 : return true;
4925 : }
4926 :
4927 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4928 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4929 : #if defined(USE_SSE2)
4930 :
4931 : /************************************************************************/
4932 : /* GWKResampleNoMasks_SSE2_T() */
4933 : /************************************************************************/
4934 :
4935 : template <class T>
4936 1382149 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4937 : double dfSrcX, double dfSrcY, T *pValue,
4938 : double *padfWeightsHorizontal,
4939 : double *padfWeightsVertical,
4940 : double &dfInvWeights)
4941 : {
4942 : // Commonly used; save locally.
4943 1382149 : const int nSrcXSize = poWK->nSrcXSize;
4944 1382149 : const int nSrcYSize = poWK->nSrcYSize;
4945 :
4946 1382149 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4947 1382149 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4948 1382149 : const GPtrDiff_t iSrcOffset =
4949 1382149 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4950 1382149 : const int nXRadius = poWK->nXRadius;
4951 1382149 : const int nYRadius = poWK->nYRadius;
4952 :
4953 : // Politely refuse to process invalid coordinates or obscenely small image.
4954 1382149 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4955 : nYRadius > nSrcYSize)
4956 3 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4957 3 : pValue);
4958 :
4959 1382146 : const T *pSrcBand =
4960 1382146 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4961 :
4962 1382146 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4963 1382146 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4964 1382146 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4965 1382146 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4966 :
4967 1382146 : int iMin = 1 - nXRadius;
4968 1382146 : if (iSrcX + iMin < 0)
4969 20312 : iMin = -iSrcX;
4970 1382146 : int iMax = nXRadius;
4971 1382146 : if (iSrcX + iMax >= nSrcXSize - 1)
4972 7970 : iMax = nSrcXSize - 1 - iSrcX;
4973 :
4974 1382146 : int jMin = 1 - nYRadius;
4975 1382146 : if (iSrcY + jMin < 0)
4976 22209 : jMin = -iSrcY;
4977 1382146 : int jMax = nYRadius;
4978 1382146 : if (iSrcY + jMax >= nSrcYSize - 1)
4979 9295 : jMax = nSrcYSize - 1 - iSrcY;
4980 :
4981 1382146 : if (iBand == 0)
4982 : {
4983 1091074 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4984 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4985 : padfWeightsVertical, dfInvWeights);
4986 : }
4987 :
4988 1382146 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4989 : // Process by chunk of 4 rows.
4990 1382146 : int jC = 0;
4991 1382146 : int j = jMin;
4992 1382146 : double dfAccumulator = 0.0;
4993 3068580 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4994 : {
4995 : // Loop over all pixels in the row.
4996 1686436 : int iC = 0;
4997 1686436 : int i = iMin;
4998 : // Process by chunk of 4 cols.
4999 1686436 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
5000 1686436 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
5001 1686436 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
5002 1686436 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
5003 4251632 : for (; i + 2 < iMax; i += 4, iC += 4)
5004 : {
5005 : // Retrieve the pixel & accumulate.
5006 2565196 : XMMReg4Double v_pixels_1 =
5007 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
5008 2565196 : XMMReg4Double v_pixels_2 =
5009 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
5010 2565196 : XMMReg4Double v_pixels_3 =
5011 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
5012 2565196 : XMMReg4Double v_pixels_4 =
5013 2565196 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
5014 :
5015 2565196 : XMMReg4Double v_padfWeight =
5016 2565196 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
5017 :
5018 2565196 : v_acc_1 += v_pixels_1 * v_padfWeight;
5019 2565196 : v_acc_2 += v_pixels_2 * v_padfWeight;
5020 2565196 : v_acc_3 += v_pixels_3 * v_padfWeight;
5021 2565196 : v_acc_4 += v_pixels_4 * v_padfWeight;
5022 : }
5023 :
5024 1686436 : if (i < iMax)
5025 : {
5026 25512 : XMMReg2Double v_pixels_1 =
5027 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
5028 25512 : XMMReg2Double v_pixels_2 =
5029 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
5030 25512 : XMMReg2Double v_pixels_3 =
5031 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
5032 25512 : XMMReg2Double v_pixels_4 =
5033 25512 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
5034 :
5035 25512 : XMMReg2Double v_padfWeight =
5036 25512 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
5037 :
5038 25512 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
5039 25512 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
5040 25512 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
5041 25512 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
5042 :
5043 25512 : i += 2;
5044 25512 : iC += 2;
5045 : }
5046 :
5047 1686436 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
5048 1686436 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
5049 1686436 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
5050 1686436 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
5051 :
5052 1686436 : if (i == iMax)
5053 : {
5054 27557 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
5055 27557 : padfWeightsHorizontal[iC];
5056 27557 : dfAccumulatorLocal_2 +=
5057 27557 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
5058 27557 : padfWeightsHorizontal[iC];
5059 27557 : dfAccumulatorLocal_3 +=
5060 27557 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
5061 27557 : padfWeightsHorizontal[iC];
5062 27557 : dfAccumulatorLocal_4 +=
5063 27557 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
5064 27557 : padfWeightsHorizontal[iC];
5065 : }
5066 :
5067 1686436 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
5068 1686436 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
5069 1686436 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
5070 1686436 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
5071 : }
5072 1456100 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
5073 : {
5074 : // Loop over all pixels in the row.
5075 73954 : int iC = 0;
5076 73954 : int i = iMin;
5077 : // Process by chunk of 4 cols.
5078 73954 : XMMReg4Double v_acc = XMMReg4Double::Zero();
5079 172926 : for (; i + 2 < iMax; i += 4, iC += 4)
5080 : {
5081 : // Retrieve the pixel & accumulate.
5082 98972 : XMMReg4Double v_pixels =
5083 98972 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
5084 98972 : XMMReg4Double v_padfWeight =
5085 98972 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
5086 :
5087 98972 : v_acc += v_pixels * v_padfWeight;
5088 : }
5089 :
5090 73954 : double dfAccumulatorLocal = v_acc.GetHorizSum();
5091 :
5092 73954 : if (i < iMax)
5093 : {
5094 1862 : dfAccumulatorLocal +=
5095 1862 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
5096 1862 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
5097 1862 : padfWeightsHorizontal[iC + 1];
5098 1862 : i += 2;
5099 1862 : iC += 2;
5100 : }
5101 73954 : if (i == iMax)
5102 : {
5103 1803 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
5104 1803 : padfWeightsHorizontal[iC];
5105 : }
5106 :
5107 73954 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
5108 : }
5109 :
5110 1382146 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
5111 :
5112 1382146 : return true;
5113 : }
5114 :
5115 : /************************************************************************/
5116 : /* GWKResampleNoMasksT<GByte>() */
5117 : /************************************************************************/
5118 :
5119 : template <>
5120 877023 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
5121 : double dfSrcX, double dfSrcY, GByte *pValue,
5122 : double *padfWeightsHorizontal,
5123 : double *padfWeightsVertical,
5124 : double &dfInvWeights)
5125 : {
5126 877023 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5127 : padfWeightsHorizontal, padfWeightsVertical,
5128 877023 : dfInvWeights);
5129 : }
5130 :
5131 : /************************************************************************/
5132 : /* GWKResampleNoMasksT<GInt16>() */
5133 : /************************************************************************/
5134 :
5135 : template <>
5136 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
5137 : double dfSrcX, double dfSrcY, GInt16 *pValue,
5138 : double *padfWeightsHorizontal,
5139 : double *padfWeightsVertical,
5140 : double &dfInvWeights)
5141 : {
5142 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5143 : padfWeightsHorizontal, padfWeightsVertical,
5144 252563 : dfInvWeights);
5145 : }
5146 :
5147 : /************************************************************************/
5148 : /* GWKResampleNoMasksT<GUInt16>() */
5149 : /************************************************************************/
5150 :
5151 : template <>
5152 250063 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
5153 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
5154 : double *padfWeightsHorizontal,
5155 : double *padfWeightsVertical,
5156 : double &dfInvWeights)
5157 : {
5158 250063 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5159 : padfWeightsHorizontal, padfWeightsVertical,
5160 250063 : dfInvWeights);
5161 : }
5162 :
5163 : /************************************************************************/
5164 : /* GWKResampleNoMasksT<float>() */
5165 : /************************************************************************/
5166 :
5167 : template <>
5168 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
5169 : double dfSrcX, double dfSrcY, float *pValue,
5170 : double *padfWeightsHorizontal,
5171 : double *padfWeightsVertical,
5172 : double &dfInvWeights)
5173 : {
5174 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5175 : padfWeightsHorizontal, padfWeightsVertical,
5176 2500 : dfInvWeights);
5177 : }
5178 :
5179 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
5180 :
5181 : /************************************************************************/
5182 : /* GWKResampleNoMasksT<double>() */
5183 : /************************************************************************/
5184 :
5185 : template <>
5186 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
5187 : double dfSrcX, double dfSrcY, double *pValue,
5188 : double *padfWeightsHorizontal,
5189 : double *padfWeightsVertical,
5190 : double &dfInvWeights)
5191 : {
5192 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
5193 : padfWeightsHorizontal, padfWeightsVertical,
5194 : dfInvWeights);
5195 : }
5196 :
5197 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
5198 :
5199 : #endif /* defined(USE_SSE2) */
5200 :
5201 : /************************************************************************/
5202 : /* GWKRoundSourceCoordinates() */
5203 : /************************************************************************/
5204 :
5205 1000 : static void GWKRoundSourceCoordinates(
5206 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
5207 : double dfSrcCoordPrecision, double dfErrorThreshold,
5208 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
5209 : double dfDstY)
5210 : {
5211 1000 : double dfPct = 0.8;
5212 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
5213 : {
5214 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
5215 : }
5216 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
5217 :
5218 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5219 : {
5220 500000 : const double dfXBefore = padfX[iDstX];
5221 500000 : const double dfYBefore = padfY[iDstX];
5222 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5223 : dfSrcCoordPrecision;
5224 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5225 : dfSrcCoordPrecision;
5226 :
5227 : // If we are in an uncertainty zone, go to non-approximated
5228 : // transformation.
5229 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
5230 : // be at least 10 times greater than the approximation error.
5231 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
5232 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
5233 : {
5234 180090 : padfX[iDstX] = iDstX + dfDstXOff;
5235 180090 : padfY[iDstX] = dfDstY;
5236 180090 : padfZ[iDstX] = 0.0;
5237 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
5238 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
5239 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
5240 : dfSrcCoordPrecision;
5241 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
5242 : dfSrcCoordPrecision;
5243 : }
5244 : }
5245 1000 : }
5246 :
5247 : /************************************************************************/
5248 : /* GWKCheckAndComputeSrcOffsets() */
5249 : /************************************************************************/
5250 : static CPL_INLINE bool
5251 190187000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5252 : int _iDstY, double *_padfX, double *_padfY,
5253 : int _nSrcXSize, int _nSrcYSize,
5254 : GPtrDiff_t &iSrcOffset)
5255 : {
5256 190187000 : const GDALWarpKernel *_poWK = psJob->poWK;
5257 196802000 : for (int iTry = 0; iTry < 2; ++iTry)
5258 : {
5259 196802000 : if (iTry == 1)
5260 : {
5261 : // If the source coordinate is slightly outside of the source raster
5262 : // retry to transform it alone, so that the exact coordinate
5263 : // transformer is used.
5264 :
5265 6614120 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5266 6614120 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5267 6614120 : double dfZ = 0;
5268 6614120 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5269 6614120 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5270 6614120 : _pabSuccess + _iDstX);
5271 : }
5272 196802000 : if (!_pabSuccess[_iDstX])
5273 3619620 : return false;
5274 :
5275 : // If this happens this is likely the symptom of a bug somewhere.
5276 193182000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5277 : {
5278 : static bool bNanCoordFound = false;
5279 0 : if (!bNanCoordFound)
5280 : {
5281 0 : CPLDebug("WARP",
5282 : "GWKCheckAndComputeSrcOffsets(): "
5283 : "NaN coordinate found on point %d.",
5284 : _iDstX);
5285 0 : bNanCoordFound = true;
5286 : }
5287 0 : return false;
5288 : }
5289 :
5290 : /* --------------------------------------------------------------------
5291 : */
5292 : /* Figure out what pixel we want in our source raster, and skip */
5293 : /* further processing if it is well off the source image. */
5294 : /* --------------------------------------------------------------------
5295 : */
5296 : /* We test against the value before casting to avoid the */
5297 : /* problem of asymmetric truncation effects around zero. That is */
5298 : /* -0.5 will be 0 when cast to an int. */
5299 193182000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5300 : {
5301 : // If the source coordinate is slightly outside of the source raster
5302 : // retry to transform it alone, so that the exact coordinate
5303 : // transformer is used.
5304 17441500 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5305 2892850 : continue;
5306 14548600 : return false;
5307 : }
5308 :
5309 175740000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5310 : {
5311 : // If the source coordinate is slightly outside of the source raster
5312 : // retry to transform it alone, so that the exact coordinate
5313 : // transformer is used.
5314 8491820 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5315 638882 : continue;
5316 7852940 : return false;
5317 : }
5318 :
5319 : // Check for potential overflow when casting from float to int, (if
5320 : // operating outside natural projection area, padfX/Y can be a very huge
5321 : // positive number before doing the actual conversion), as such cast is
5322 : // undefined behavior that can trigger exception with some compilers
5323 : // (see #6753)
5324 167249000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5325 : {
5326 : // If the source coordinate is slightly outside of the source raster
5327 : // retry to transform it alone, so that the exact coordinate
5328 : // transformer is used.
5329 13456100 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5330 2714910 : continue;
5331 10741100 : return false;
5332 : }
5333 153793000 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5334 : {
5335 : // If the source coordinate is slightly outside of the source raster
5336 : // retry to transform it alone, so that the exact coordinate
5337 : // transformer is used.
5338 5815260 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5339 367484 : continue;
5340 5447770 : return false;
5341 : }
5342 :
5343 147977000 : break;
5344 : }
5345 :
5346 147977000 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5347 147977000 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5348 147977000 : if (iSrcX == _nSrcXSize)
5349 0 : iSrcX--;
5350 147977000 : if (iSrcY == _nSrcYSize)
5351 0 : iSrcY--;
5352 :
5353 : // Those checks should normally be OK given the previous ones.
5354 147977000 : CPLAssert(iSrcX >= 0);
5355 147977000 : CPLAssert(iSrcY >= 0);
5356 147977000 : CPLAssert(iSrcX < _nSrcXSize);
5357 147977000 : CPLAssert(iSrcY < _nSrcYSize);
5358 :
5359 147977000 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5360 :
5361 147977000 : return true;
5362 : }
5363 :
5364 : /************************************************************************/
5365 : /* GWKOneSourceCornerFailsToReproject() */
5366 : /************************************************************************/
5367 :
5368 939 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5369 : {
5370 939 : GDALWarpKernel *poWK = psJob->poWK;
5371 2805 : for (int iY = 0; iY <= 1; ++iY)
5372 : {
5373 5605 : for (int iX = 0; iX <= 1; ++iX)
5374 : {
5375 3739 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5376 3739 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5377 3739 : double dfZTmp = 0;
5378 3739 : int nSuccess = FALSE;
5379 3739 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5380 : &dfYTmp, &dfZTmp, &nSuccess);
5381 3739 : if (!nSuccess)
5382 7 : return true;
5383 : }
5384 : }
5385 932 : return false;
5386 : }
5387 :
5388 : /************************************************************************/
5389 : /* GWKAdjustSrcOffsetOnEdge() */
5390 : /************************************************************************/
5391 :
5392 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5393 : GPtrDiff_t &iSrcOffset)
5394 : {
5395 9714 : GDALWarpKernel *poWK = psJob->poWK;
5396 9714 : const int nSrcXSize = poWK->nSrcXSize;
5397 9714 : const int nSrcYSize = poWK->nSrcYSize;
5398 :
5399 : // Check if the computed source position slightly altered
5400 : // fails to reproject. If so, then we are at the edge of
5401 : // the validity area, and it is worth checking neighbour
5402 : // source pixels for validity.
5403 9714 : int nSuccess = FALSE;
5404 : {
5405 9714 : double dfXTmp =
5406 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5407 9714 : double dfYTmp =
5408 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5409 9714 : double dfZTmp = 0;
5410 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5411 : &dfZTmp, &nSuccess);
5412 : }
5413 9714 : if (nSuccess)
5414 : {
5415 6996 : double dfXTmp =
5416 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5417 6996 : double dfYTmp =
5418 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5419 6996 : double dfZTmp = 0;
5420 6996 : nSuccess = FALSE;
5421 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5422 : &dfZTmp, &nSuccess);
5423 : }
5424 9714 : if (nSuccess)
5425 : {
5426 5624 : double dfXTmp =
5427 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5428 5624 : double dfYTmp =
5429 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5430 5624 : double dfZTmp = 0;
5431 5624 : nSuccess = FALSE;
5432 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5433 : &dfZTmp, &nSuccess);
5434 : }
5435 :
5436 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5437 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5438 : {
5439 1860 : iSrcOffset++;
5440 1860 : return true;
5441 : }
5442 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5443 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5444 : {
5445 1334 : iSrcOffset += nSrcXSize;
5446 1334 : return true;
5447 : }
5448 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5449 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5450 : {
5451 956 : iSrcOffset--;
5452 956 : return true;
5453 : }
5454 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5455 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5456 : {
5457 340 : iSrcOffset -= nSrcXSize;
5458 340 : return true;
5459 : }
5460 :
5461 5224 : return false;
5462 : }
5463 :
5464 : /************************************************************************/
5465 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5466 : /************************************************************************/
5467 :
5468 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5469 : GPtrDiff_t &iSrcOffset)
5470 : {
5471 0 : GDALWarpKernel *poWK = psJob->poWK;
5472 0 : const int nSrcXSize = poWK->nSrcXSize;
5473 0 : const int nSrcYSize = poWK->nSrcYSize;
5474 :
5475 : // Check if the computed source position slightly altered
5476 : // fails to reproject. If so, then we are at the edge of
5477 : // the validity area, and it is worth checking neighbour
5478 : // source pixels for validity.
5479 0 : int nSuccess = FALSE;
5480 : {
5481 0 : double dfXTmp =
5482 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5483 0 : double dfYTmp =
5484 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5485 0 : double dfZTmp = 0;
5486 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5487 : &dfZTmp, &nSuccess);
5488 : }
5489 0 : if (nSuccess)
5490 : {
5491 0 : double dfXTmp =
5492 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5493 0 : double dfYTmp =
5494 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5495 0 : double dfZTmp = 0;
5496 0 : nSuccess = FALSE;
5497 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5498 : &dfZTmp, &nSuccess);
5499 : }
5500 0 : if (nSuccess)
5501 : {
5502 0 : double dfXTmp =
5503 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5504 0 : double dfYTmp =
5505 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5506 0 : double dfZTmp = 0;
5507 0 : nSuccess = FALSE;
5508 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5509 : &dfZTmp, &nSuccess);
5510 : }
5511 :
5512 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5513 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5514 : SRC_DENSITY_THRESHOLD_FLOAT)
5515 : {
5516 0 : iSrcOffset++;
5517 0 : return true;
5518 : }
5519 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5520 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5521 : SRC_DENSITY_THRESHOLD_FLOAT)
5522 : {
5523 0 : iSrcOffset += nSrcXSize;
5524 0 : return true;
5525 : }
5526 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5527 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5528 : SRC_DENSITY_THRESHOLD_FLOAT)
5529 : {
5530 0 : iSrcOffset--;
5531 0 : return true;
5532 : }
5533 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5534 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5535 : SRC_DENSITY_THRESHOLD_FLOAT)
5536 : {
5537 0 : iSrcOffset -= nSrcXSize;
5538 0 : return true;
5539 : }
5540 :
5541 0 : return false;
5542 : }
5543 :
5544 : /************************************************************************/
5545 : /* GWKGeneralCase() */
5546 : /* */
5547 : /* This is the most general case. It attempts to handle all */
5548 : /* possible features with relatively little concern for */
5549 : /* efficiency. */
5550 : /************************************************************************/
5551 :
5552 239 : static void GWKGeneralCaseThread(void *pData)
5553 : {
5554 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5555 239 : GDALWarpKernel *poWK = psJob->poWK;
5556 239 : const int iYMin = psJob->iYMin;
5557 239 : const int iYMax = psJob->iYMax;
5558 : const double dfMultFactorVerticalShiftPipeline =
5559 239 : poWK->bApplyVerticalShift
5560 239 : ? CPLAtof(CSLFetchNameValueDef(
5561 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5562 : "1.0"))
5563 239 : : 0.0;
5564 : const bool bAvoidNoDataSingleBand =
5565 239 : poWK->nBands == 1 ||
5566 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5567 239 : "UNIFIED_SRC_NODATA", "FALSE"));
5568 :
5569 239 : int nDstXSize = poWK->nDstXSize;
5570 239 : int nSrcXSize = poWK->nSrcXSize;
5571 239 : int nSrcYSize = poWK->nSrcYSize;
5572 :
5573 : /* -------------------------------------------------------------------- */
5574 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5575 : /* scanlines worth of positions. */
5576 : /* -------------------------------------------------------------------- */
5577 : // For x, 2 *, because we cache the precomputed values at the end.
5578 : double *padfX =
5579 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5580 : double *padfY =
5581 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5582 : double *padfZ =
5583 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5584 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5585 :
5586 239 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5587 :
5588 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5589 239 : if (poWK->eResample != GRA_NearestNeighbour)
5590 : {
5591 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5592 : }
5593 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5594 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5595 239 : const double dfErrorThreshold = CPLAtof(
5596 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5597 :
5598 : const bool bOneSourceCornerFailsToReproject =
5599 239 : GWKOneSourceCornerFailsToReproject(psJob);
5600 :
5601 : // Precompute values.
5602 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5603 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5604 :
5605 : /* ==================================================================== */
5606 : /* Loop over output lines. */
5607 : /* ==================================================================== */
5608 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5609 : {
5610 : /* --------------------------------------------------------------------
5611 : */
5612 : /* Setup points to transform to source image space. */
5613 : /* --------------------------------------------------------------------
5614 : */
5615 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5616 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5617 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5618 236160 : padfY[iDstX] = dfY;
5619 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5620 :
5621 : /* --------------------------------------------------------------------
5622 : */
5623 : /* Transform the points from destination pixel/line coordinates */
5624 : /* to source pixel/line coordinates. */
5625 : /* --------------------------------------------------------------------
5626 : */
5627 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5628 : padfY, padfZ, pabSuccess);
5629 6230 : if (dfSrcCoordPrecision > 0.0)
5630 : {
5631 0 : GWKRoundSourceCoordinates(
5632 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5633 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5634 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5635 : }
5636 :
5637 : /* ====================================================================
5638 : */
5639 : /* Loop over pixels in output scanline. */
5640 : /* ====================================================================
5641 : */
5642 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5643 : {
5644 236160 : GPtrDiff_t iSrcOffset = 0;
5645 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5646 : padfX, padfY, nSrcXSize,
5647 : nSrcYSize, iSrcOffset))
5648 0 : continue;
5649 :
5650 : /* --------------------------------------------------------------------
5651 : */
5652 : /* Do not try to apply transparent/invalid source pixels to the
5653 : */
5654 : /* destination. This currently ignores the multi-pixel input
5655 : */
5656 : /* of bilinear and cubic resamples. */
5657 : /* --------------------------------------------------------------------
5658 : */
5659 236160 : double dfDensity = 1.0;
5660 :
5661 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5662 : {
5663 1200 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5664 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5665 : {
5666 0 : if (!bOneSourceCornerFailsToReproject)
5667 : {
5668 0 : continue;
5669 : }
5670 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5671 : psJob, iSrcOffset))
5672 : {
5673 0 : dfDensity =
5674 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5675 : }
5676 : else
5677 : {
5678 0 : continue;
5679 : }
5680 : }
5681 : }
5682 :
5683 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5684 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5685 : {
5686 0 : if (!bOneSourceCornerFailsToReproject)
5687 : {
5688 0 : continue;
5689 : }
5690 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5691 : {
5692 0 : continue;
5693 : }
5694 : }
5695 :
5696 : /* ====================================================================
5697 : */
5698 : /* Loop processing each band. */
5699 : /* ====================================================================
5700 : */
5701 236160 : bool bHasFoundDensity = false;
5702 :
5703 236160 : const GPtrDiff_t iDstOffset =
5704 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5705 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5706 : {
5707 236160 : double dfBandDensity = 0.0;
5708 236160 : double dfValueReal = 0.0;
5709 236160 : double dfValueImag = 0.0;
5710 :
5711 : /* --------------------------------------------------------------------
5712 : */
5713 : /* Collect the source value. */
5714 : /* --------------------------------------------------------------------
5715 : */
5716 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5717 : nSrcYSize == 1)
5718 : {
5719 : // FALSE is returned if dfBandDensity == 0, which is
5720 : // checked below.
5721 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5722 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5723 : &dfValueImag));
5724 : }
5725 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5726 : {
5727 248 : GWKBilinearResample4Sample(
5728 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5729 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5730 : &dfValueReal, &dfValueImag);
5731 : }
5732 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5733 : {
5734 248 : GWKCubicResample4Sample(
5735 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5736 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5737 : &dfValueReal, &dfValueImag);
5738 : }
5739 : else
5740 : #ifdef DEBUG
5741 : // Only useful for clang static analyzer.
5742 235096 : if (psWrkStruct != nullptr)
5743 : #endif
5744 : {
5745 235096 : psWrkStruct->pfnGWKResample(
5746 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5747 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5748 : &dfValueReal, &dfValueImag, psWrkStruct);
5749 : }
5750 :
5751 : // If we didn't find any valid inputs skip to next band.
5752 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5753 0 : continue;
5754 :
5755 236160 : if (poWK->bApplyVerticalShift)
5756 : {
5757 0 : if (!std::isfinite(padfZ[iDstX]))
5758 0 : continue;
5759 : // Subtract padfZ[] since the coordinate transformation is
5760 : // from target to source
5761 0 : dfValueReal =
5762 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5763 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5764 : }
5765 :
5766 236160 : bHasFoundDensity = true;
5767 :
5768 : /* --------------------------------------------------------------------
5769 : */
5770 : /* We have a computed value from the source. Now apply it
5771 : * to */
5772 : /* the destination pixel. */
5773 : /* --------------------------------------------------------------------
5774 : */
5775 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5776 : dfValueReal, dfValueImag,
5777 : bAvoidNoDataSingleBand);
5778 : }
5779 :
5780 236160 : if (!bHasFoundDensity)
5781 0 : continue;
5782 :
5783 236160 : if (!bAvoidNoDataSingleBand)
5784 : {
5785 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
5786 : }
5787 :
5788 : /* --------------------------------------------------------------------
5789 : */
5790 : /* Update destination density/validity masks. */
5791 : /* --------------------------------------------------------------------
5792 : */
5793 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5794 :
5795 236160 : if (poWK->panDstValid != nullptr)
5796 : {
5797 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5798 : }
5799 : } /* Next iDstX */
5800 :
5801 : /* --------------------------------------------------------------------
5802 : */
5803 : /* Report progress to the user, and optionally cancel out. */
5804 : /* --------------------------------------------------------------------
5805 : */
5806 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5807 0 : break;
5808 : }
5809 :
5810 : /* -------------------------------------------------------------------- */
5811 : /* Cleanup and return. */
5812 : /* -------------------------------------------------------------------- */
5813 239 : CPLFree(padfX);
5814 239 : CPLFree(padfY);
5815 239 : CPLFree(padfZ);
5816 239 : CPLFree(pabSuccess);
5817 239 : if (psWrkStruct)
5818 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5819 239 : }
5820 :
5821 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5822 : {
5823 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5824 : }
5825 :
5826 : /************************************************************************/
5827 : /* GWKRealCase() */
5828 : /* */
5829 : /* General case for non-complex data types. */
5830 : /************************************************************************/
5831 :
5832 223 : static void GWKRealCaseThread(void *pData)
5833 :
5834 : {
5835 223 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5836 223 : GDALWarpKernel *poWK = psJob->poWK;
5837 223 : const int iYMin = psJob->iYMin;
5838 223 : const int iYMax = psJob->iYMax;
5839 :
5840 223 : const int nDstXSize = poWK->nDstXSize;
5841 223 : const int nSrcXSize = poWK->nSrcXSize;
5842 223 : const int nSrcYSize = poWK->nSrcYSize;
5843 : const double dfMultFactorVerticalShiftPipeline =
5844 223 : poWK->bApplyVerticalShift
5845 223 : ? CPLAtof(CSLFetchNameValueDef(
5846 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5847 : "1.0"))
5848 223 : : 0.0;
5849 : const bool bAvoidNoDataSingleBand =
5850 305 : poWK->nBands == 1 ||
5851 82 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
5852 223 : "UNIFIED_SRC_NODATA", "FALSE"));
5853 :
5854 : /* -------------------------------------------------------------------- */
5855 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5856 : /* scanlines worth of positions. */
5857 : /* -------------------------------------------------------------------- */
5858 :
5859 : // For x, 2 *, because we cache the precomputed values at the end.
5860 : double *padfX =
5861 223 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5862 : double *padfY =
5863 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5864 : double *padfZ =
5865 223 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5866 223 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5867 :
5868 223 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
5869 :
5870 223 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5871 223 : if (poWK->eResample != GRA_NearestNeighbour)
5872 : {
5873 181 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5874 : }
5875 223 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5876 223 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5877 223 : const double dfErrorThreshold = CPLAtof(
5878 223 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5879 :
5880 638 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5881 415 : poWK->papanBandSrcValid == nullptr &&
5882 192 : poWK->pafUnifiedSrcDensity != nullptr;
5883 :
5884 : const bool bOneSourceCornerFailsToReproject =
5885 223 : GWKOneSourceCornerFailsToReproject(psJob);
5886 :
5887 : // Precompute values.
5888 24657 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5889 24434 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5890 :
5891 : /* ==================================================================== */
5892 : /* Loop over output lines. */
5893 : /* ==================================================================== */
5894 25909 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5895 : {
5896 : /* --------------------------------------------------------------------
5897 : */
5898 : /* Setup points to transform to source image space. */
5899 : /* --------------------------------------------------------------------
5900 : */
5901 25686 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5902 25686 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5903 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5904 44568500 : padfY[iDstX] = dfY;
5905 25686 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5906 :
5907 : /* --------------------------------------------------------------------
5908 : */
5909 : /* Transform the points from destination pixel/line coordinates */
5910 : /* to source pixel/line coordinates. */
5911 : /* --------------------------------------------------------------------
5912 : */
5913 25686 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5914 : padfY, padfZ, pabSuccess);
5915 25686 : if (dfSrcCoordPrecision > 0.0)
5916 : {
5917 0 : GWKRoundSourceCoordinates(
5918 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5919 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5920 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5921 : }
5922 :
5923 : /* ====================================================================
5924 : */
5925 : /* Loop over pixels in output scanline. */
5926 : /* ====================================================================
5927 : */
5928 44594200 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5929 : {
5930 44568500 : GPtrDiff_t iSrcOffset = 0;
5931 44568500 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5932 : padfX, padfY, nSrcXSize,
5933 : nSrcYSize, iSrcOffset))
5934 43823900 : continue;
5935 :
5936 : /* --------------------------------------------------------------------
5937 : */
5938 : /* Do not try to apply transparent/invalid source pixels to the
5939 : */
5940 : /* destination. This currently ignores the multi-pixel input
5941 : */
5942 : /* of bilinear and cubic resamples. */
5943 : /* --------------------------------------------------------------------
5944 : */
5945 31812400 : double dfDensity = 1.0;
5946 :
5947 31812400 : if (poWK->pafUnifiedSrcDensity != nullptr)
5948 : {
5949 1669560 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5950 1669560 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5951 : {
5952 1538480 : if (!bOneSourceCornerFailsToReproject)
5953 : {
5954 1538480 : continue;
5955 : }
5956 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5957 : psJob, iSrcOffset))
5958 : {
5959 0 : dfDensity =
5960 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5961 : }
5962 : else
5963 : {
5964 0 : continue;
5965 : }
5966 : }
5967 : }
5968 :
5969 59903100 : if (poWK->panUnifiedSrcValid != nullptr &&
5970 29629200 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5971 : {
5972 29531600 : if (!bOneSourceCornerFailsToReproject)
5973 : {
5974 29529300 : continue;
5975 : }
5976 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5977 : {
5978 0 : continue;
5979 : }
5980 : }
5981 :
5982 : /* ====================================================================
5983 : */
5984 : /* Loop processing each band. */
5985 : /* ====================================================================
5986 : */
5987 744578 : bool bHasFoundDensity = false;
5988 :
5989 744578 : const GPtrDiff_t iDstOffset =
5990 744578 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5991 2092550 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5992 : {
5993 1347980 : double dfBandDensity = 0.0;
5994 1347980 : double dfValueReal = 0.0;
5995 :
5996 : /* --------------------------------------------------------------------
5997 : */
5998 : /* Collect the source value. */
5999 : /* --------------------------------------------------------------------
6000 : */
6001 1347980 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
6002 : nSrcYSize == 1)
6003 : {
6004 : // FALSE is returned if dfBandDensity == 0, which is
6005 : // checked below.
6006 15516 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
6007 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
6008 : }
6009 1332460 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
6010 : {
6011 2046 : double dfValueImagIgnored = 0.0;
6012 2046 : GWKBilinearResample4Sample(
6013 2046 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6014 2046 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6015 2046 : &dfValueReal, &dfValueImagIgnored);
6016 : }
6017 1330410 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
6018 : {
6019 691552 : if (bSrcMaskIsDensity)
6020 : {
6021 389755 : if (poWK->eWorkingDataType == GDT_UInt8)
6022 : {
6023 389755 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
6024 389755 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6025 389755 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6026 : &dfValueReal);
6027 : }
6028 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6029 : {
6030 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
6031 0 : GUInt16>(poWK, iBand,
6032 0 : padfX[iDstX] - poWK->nSrcXOff,
6033 0 : padfY[iDstX] - poWK->nSrcYOff,
6034 : &dfBandDensity, &dfValueReal);
6035 : }
6036 : else
6037 : {
6038 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
6039 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6040 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6041 : &dfValueReal);
6042 : }
6043 : }
6044 : else
6045 : {
6046 301797 : double dfValueImagIgnored = 0.0;
6047 301797 : GWKCubicResample4Sample(
6048 301797 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6049 301797 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6050 : &dfValueReal, &dfValueImagIgnored);
6051 691552 : }
6052 : }
6053 : else
6054 : #ifdef DEBUG
6055 : // Only useful for clang static analyzer.
6056 638861 : if (psWrkStruct != nullptr)
6057 : #endif
6058 : {
6059 638861 : double dfValueImagIgnored = 0.0;
6060 638861 : psWrkStruct->pfnGWKResample(
6061 638861 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6062 638861 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6063 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6064 : }
6065 :
6066 : // If we didn't find any valid inputs skip to next band.
6067 1347980 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6068 0 : continue;
6069 :
6070 1347980 : if (poWK->bApplyVerticalShift)
6071 : {
6072 0 : if (!std::isfinite(padfZ[iDstX]))
6073 0 : continue;
6074 : // Subtract padfZ[] since the coordinate transformation is
6075 : // from target to source
6076 0 : dfValueReal =
6077 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6078 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6079 : }
6080 :
6081 1347980 : bHasFoundDensity = true;
6082 :
6083 : /* --------------------------------------------------------------------
6084 : */
6085 : /* We have a computed value from the source. Now apply it
6086 : * to */
6087 : /* the destination pixel. */
6088 : /* --------------------------------------------------------------------
6089 : */
6090 1347980 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6091 : dfValueReal, bAvoidNoDataSingleBand);
6092 : }
6093 :
6094 744578 : if (!bHasFoundDensity)
6095 0 : continue;
6096 :
6097 744578 : if (!bAvoidNoDataSingleBand)
6098 : {
6099 100295 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6100 : }
6101 :
6102 : /* --------------------------------------------------------------------
6103 : */
6104 : /* Update destination density/validity masks. */
6105 : /* --------------------------------------------------------------------
6106 : */
6107 744578 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6108 :
6109 744578 : if (poWK->panDstValid != nullptr)
6110 : {
6111 104586 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6112 : }
6113 : } // Next iDstX.
6114 :
6115 : /* --------------------------------------------------------------------
6116 : */
6117 : /* Report progress to the user, and optionally cancel out. */
6118 : /* --------------------------------------------------------------------
6119 : */
6120 25686 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6121 0 : break;
6122 : }
6123 :
6124 : /* -------------------------------------------------------------------- */
6125 : /* Cleanup and return. */
6126 : /* -------------------------------------------------------------------- */
6127 223 : CPLFree(padfX);
6128 223 : CPLFree(padfY);
6129 223 : CPLFree(padfZ);
6130 223 : CPLFree(pabSuccess);
6131 223 : if (psWrkStruct)
6132 181 : GWKResampleDeleteWrkStruct(psWrkStruct);
6133 223 : }
6134 :
6135 223 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6136 : {
6137 223 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6138 : }
6139 :
6140 : /************************************************************************/
6141 : /* GWKCubicResampleNoMasks4MultiBandT() */
6142 : /************************************************************************/
6143 :
6144 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6145 : /* and enough SSE registries */
6146 : #if defined(USE_SSE2)
6147 :
6148 142421000 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6149 : const __m128 row2, const __m128 row3,
6150 : const __m128 weightsXY0,
6151 : const __m128 weightsXY1,
6152 : const __m128 weightsXY2,
6153 : const __m128 weightsXY3)
6154 : {
6155 996949000 : return XMMHorizontalAdd(_mm_add_ps(
6156 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6157 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6158 142421000 : _mm_mul_ps(row3, weightsXY3))));
6159 : }
6160 :
6161 : template <class T>
6162 48891642 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6163 : double dfSrcX, double dfSrcY,
6164 : const GPtrDiff_t iDstOffset)
6165 : {
6166 48891642 : const double dfSrcXShifted = dfSrcX - 0.5;
6167 48891642 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6168 48891642 : const double dfSrcYShifted = dfSrcY - 0.5;
6169 48891642 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6170 48891642 : const GPtrDiff_t iSrcOffset =
6171 48891642 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6172 :
6173 : // Get the bilinear interpolation at the image borders.
6174 48891642 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6175 47609162 : iSrcY + 2 >= poWK->nSrcYSize)
6176 : {
6177 5671540 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6178 : {
6179 : T value;
6180 4253650 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6181 : &value);
6182 4253650 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6183 : value;
6184 1417880 : }
6185 : }
6186 : else
6187 : {
6188 47473762 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6189 47473762 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6190 :
6191 : float afCoeffsX[4];
6192 : float afCoeffsY[4];
6193 47473762 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6194 47473762 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6195 47473762 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6196 : const auto weightsXY0 =
6197 94947524 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6198 : const auto weightsXY1 =
6199 94947524 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6200 : const auto weightsXY2 =
6201 94947524 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6202 : const auto weightsXY3 =
6203 47473762 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6204 :
6205 47473762 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6206 :
6207 47473762 : int iBand = 0;
6208 : // Process 2 bands at a time
6209 94947524 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6210 : {
6211 47473762 : const T *CPL_RESTRICT pBand0 =
6212 47473762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6213 47473762 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6214 : const auto row1_0 =
6215 47473762 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6216 : const auto row2_0 =
6217 47473762 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6218 : const auto row3_0 =
6219 47473762 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6220 :
6221 47473762 : const T *CPL_RESTRICT pBand1 =
6222 47473762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6223 47473762 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6224 : const auto row1_1 =
6225 47473762 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6226 : const auto row2_1 =
6227 47473762 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6228 : const auto row3_1 =
6229 47473762 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6230 :
6231 : const float fValue_0 =
6232 47473762 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6233 : weightsXY1, weightsXY2, weightsXY3);
6234 :
6235 : const float fValue_1 =
6236 47473762 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6237 : weightsXY1, weightsXY2, weightsXY3);
6238 :
6239 47473762 : T *CPL_RESTRICT pDstBand0 =
6240 47473762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6241 47473762 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6242 :
6243 47473762 : T *CPL_RESTRICT pDstBand1 =
6244 47473762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6245 47473762 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6246 : }
6247 47473762 : if (iBand < poWK->nBands)
6248 : {
6249 47473762 : const T *CPL_RESTRICT pBand0 =
6250 47473762 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6251 47473762 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6252 : const auto row1 =
6253 47473762 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6254 : const auto row2 =
6255 47473762 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6256 : const auto row3 =
6257 47473762 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6258 :
6259 : const float fValue =
6260 47473762 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6261 : weightsXY2, weightsXY3);
6262 :
6263 47473762 : T *CPL_RESTRICT pDstBand =
6264 47473762 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6265 47473762 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6266 : }
6267 : }
6268 :
6269 48891642 : if (poWK->pafDstDensity)
6270 46737601 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6271 48891642 : }
6272 :
6273 : #endif // defined(USE_SSE2)
6274 :
6275 : /************************************************************************/
6276 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6277 : /************************************************************************/
6278 :
6279 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6280 2036 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6281 :
6282 : {
6283 2036 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6284 2036 : GDALWarpKernel *poWK = psJob->poWK;
6285 2036 : const int iYMin = psJob->iYMin;
6286 2036 : const int iYMax = psJob->iYMax;
6287 2018 : const double dfMultFactorVerticalShiftPipeline =
6288 2036 : poWK->bApplyVerticalShift
6289 18 : ? CPLAtof(CSLFetchNameValueDef(
6290 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6291 : "1.0"))
6292 : : 0.0;
6293 :
6294 2036 : const int nDstXSize = poWK->nDstXSize;
6295 2036 : const int nSrcXSize = poWK->nSrcXSize;
6296 2036 : const int nSrcYSize = poWK->nSrcYSize;
6297 :
6298 : /* -------------------------------------------------------------------- */
6299 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6300 : /* scanlines worth of positions. */
6301 : /* -------------------------------------------------------------------- */
6302 :
6303 : // For x, 2 *, because we cache the precomputed values at the end.
6304 : double *padfX =
6305 2036 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6306 : double *padfY =
6307 2036 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6308 : double *padfZ =
6309 2036 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6310 2036 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6311 :
6312 2036 : const int nXRadius = poWK->nXRadius;
6313 : double *padfWeightsX =
6314 2036 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6315 : double *padfWeightsY = static_cast<double *>(
6316 2036 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6317 2036 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6318 2036 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6319 2036 : const double dfErrorThreshold = CPLAtof(
6320 2036 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6321 :
6322 : // Precompute values.
6323 504244 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6324 502208 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6325 :
6326 : /* ==================================================================== */
6327 : /* Loop over output lines. */
6328 : /* ==================================================================== */
6329 324090 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6330 : {
6331 : /* --------------------------------------------------------------------
6332 : */
6333 : /* Setup points to transform to source image space. */
6334 : /* --------------------------------------------------------------------
6335 : */
6336 322055 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6337 322055 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6338 111932457 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6339 111610449 : padfY[iDstX] = dfY;
6340 322055 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6341 :
6342 : /* --------------------------------------------------------------------
6343 : */
6344 : /* Transform the points from destination pixel/line coordinates */
6345 : /* to source pixel/line coordinates. */
6346 : /* --------------------------------------------------------------------
6347 : */
6348 322055 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6349 : padfY, padfZ, pabSuccess);
6350 322055 : if (dfSrcCoordPrecision > 0.0)
6351 : {
6352 1000 : GWKRoundSourceCoordinates(
6353 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6354 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6355 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6356 : }
6357 :
6358 : /* ====================================================================
6359 : */
6360 : /* Loop over pixels in output scanline. */
6361 : /* ====================================================================
6362 : */
6363 111932457 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6364 : {
6365 111610449 : GPtrDiff_t iSrcOffset = 0;
6366 111610449 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6367 : padfX, padfY, nSrcXSize,
6368 : nSrcYSize, iSrcOffset))
6369 63085888 : continue;
6370 :
6371 : /* ====================================================================
6372 : */
6373 : /* Loop processing each band. */
6374 : /* ====================================================================
6375 : */
6376 97416161 : const GPtrDiff_t iDstOffset =
6377 97416161 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6378 :
6379 : #if defined(USE_SSE2)
6380 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6381 : (std::is_same<T, GByte>::value ||
6382 : std::is_same<T, GUInt16>::value))
6383 : {
6384 49957541 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6385 : {
6386 48891642 : GWKCubicResampleNoMasks4MultiBandT<T>(
6387 48891642 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6388 48891642 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6389 :
6390 48891642 : continue;
6391 : }
6392 : }
6393 : #endif // defined(USE_SSE2)
6394 :
6395 48524518 : [[maybe_unused]] double dfInvWeights = 0;
6396 133360926 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6397 : {
6398 84836804 : T value = 0;
6399 : if constexpr (eResample == GRA_NearestNeighbour)
6400 : {
6401 77111130 : value = reinterpret_cast<T *>(
6402 77111130 : poWK->papabySrcImage[iBand])[iSrcOffset];
6403 : }
6404 : else if constexpr (bUse4SamplesFormula)
6405 : {
6406 : if constexpr (eResample == GRA_Bilinear)
6407 4042275 : GWKBilinearResampleNoMasks4SampleT(
6408 4042275 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6409 4042275 : padfY[iDstX] - poWK->nSrcYOff, &value);
6410 : else
6411 2301250 : GWKCubicResampleNoMasks4SampleT(
6412 2301250 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6413 2301250 : padfY[iDstX] - poWK->nSrcYOff, &value);
6414 : }
6415 : else
6416 : {
6417 1382149 : GWKResampleNoMasksT(
6418 1382149 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6419 1382149 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6420 : padfWeightsY, dfInvWeights);
6421 : }
6422 :
6423 84836804 : if (poWK->bApplyVerticalShift)
6424 : {
6425 818 : if (!std::isfinite(padfZ[iDstX]))
6426 0 : continue;
6427 : // Subtract padfZ[] since the coordinate transformation is
6428 : // from target to source
6429 818 : value = GWKClampValueT<T>(
6430 818 : double(value) * poWK->dfMultFactorVerticalShift -
6431 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6432 : }
6433 :
6434 84836804 : if (poWK->pafDstDensity)
6435 10261231 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6436 :
6437 84836804 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6438 : value;
6439 : }
6440 : }
6441 :
6442 : /* --------------------------------------------------------------------
6443 : */
6444 : /* Report progress to the user, and optionally cancel out. */
6445 : /* --------------------------------------------------------------------
6446 : */
6447 322055 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6448 1 : break;
6449 : }
6450 :
6451 : /* -------------------------------------------------------------------- */
6452 : /* Cleanup and return. */
6453 : /* -------------------------------------------------------------------- */
6454 2036 : CPLFree(padfX);
6455 2036 : CPLFree(padfY);
6456 2036 : CPLFree(padfZ);
6457 2036 : CPLFree(pabSuccess);
6458 2036 : CPLFree(padfWeightsX);
6459 2036 : CPLFree(padfWeightsY);
6460 2036 : }
6461 :
6462 : template <class T, GDALResampleAlg eResample>
6463 1005 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6464 : {
6465 1005 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6466 : pData);
6467 1005 : }
6468 :
6469 : template <class T, GDALResampleAlg eResample>
6470 1031 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6471 :
6472 : {
6473 1031 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6474 1031 : GDALWarpKernel *poWK = psJob->poWK;
6475 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6476 1031 : const bool bUse4SamplesFormula = CanUse4SamplesFormula(poWK);
6477 1031 : if (bUse4SamplesFormula)
6478 976 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6479 : pData);
6480 : else
6481 55 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6482 : pData);
6483 1031 : }
6484 :
6485 954 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6486 : {
6487 954 : return GWKRun(
6488 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6489 954 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6490 : }
6491 :
6492 132 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6493 : {
6494 132 : return GWKRun(
6495 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6496 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6497 132 : GRA_Bilinear>);
6498 : }
6499 :
6500 852 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6501 : {
6502 852 : return GWKRun(
6503 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6504 852 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6505 : }
6506 :
6507 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6508 : {
6509 9 : return GWKRun(
6510 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6511 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6512 : }
6513 :
6514 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6515 :
6516 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6517 : {
6518 : return GWKRun(
6519 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6520 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6521 : }
6522 : #endif
6523 :
6524 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6525 : {
6526 12 : return GWKRun(
6527 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6528 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6529 : }
6530 :
6531 : /************************************************************************/
6532 : /* GWKNearestByte() */
6533 : /* */
6534 : /* Case for 8bit input data with nearest neighbour resampling */
6535 : /* using valid flags. Should be as fast as possible for this */
6536 : /* particular transformation type. */
6537 : /************************************************************************/
6538 :
6539 477 : template <class T> static void GWKNearestThread(void *pData)
6540 :
6541 : {
6542 477 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6543 477 : GDALWarpKernel *poWK = psJob->poWK;
6544 477 : const int iYMin = psJob->iYMin;
6545 477 : const int iYMax = psJob->iYMax;
6546 476 : const double dfMultFactorVerticalShiftPipeline =
6547 477 : poWK->bApplyVerticalShift
6548 1 : ? CPLAtof(CSLFetchNameValueDef(
6549 1 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6550 : "1.0"))
6551 : : 0.0;
6552 477 : const bool bAvoidNoDataSingleBand =
6553 546 : poWK->nBands == 1 ||
6554 69 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
6555 : "UNIFIED_SRC_NODATA", "FALSE"));
6556 :
6557 477 : const int nDstXSize = poWK->nDstXSize;
6558 477 : const int nSrcXSize = poWK->nSrcXSize;
6559 477 : const int nSrcYSize = poWK->nSrcYSize;
6560 :
6561 : /* -------------------------------------------------------------------- */
6562 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6563 : /* scanlines worth of positions. */
6564 : /* -------------------------------------------------------------------- */
6565 :
6566 : // For x, 2 *, because we cache the precomputed values at the end.
6567 : double *padfX =
6568 477 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6569 : double *padfY =
6570 477 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6571 : double *padfZ =
6572 477 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6573 477 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6574 :
6575 477 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6576 477 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6577 477 : const double dfErrorThreshold = CPLAtof(
6578 477 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6579 :
6580 : const bool bOneSourceCornerFailsToReproject =
6581 477 : GWKOneSourceCornerFailsToReproject(psJob);
6582 :
6583 : // Precompute values.
6584 80557 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6585 80080 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6586 :
6587 : /* ==================================================================== */
6588 : /* Loop over output lines. */
6589 : /* ==================================================================== */
6590 64713 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6591 : {
6592 :
6593 : /* --------------------------------------------------------------------
6594 : */
6595 : /* Setup points to transform to source image space. */
6596 : /* --------------------------------------------------------------------
6597 : */
6598 64236 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6599 64236 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6600 33836599 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6601 33772442 : padfY[iDstX] = dfY;
6602 64236 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6603 :
6604 : /* --------------------------------------------------------------------
6605 : */
6606 : /* Transform the points from destination pixel/line coordinates */
6607 : /* to source pixel/line coordinates. */
6608 : /* --------------------------------------------------------------------
6609 : */
6610 64236 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6611 : padfY, padfZ, pabSuccess);
6612 64236 : if (dfSrcCoordPrecision > 0.0)
6613 : {
6614 0 : GWKRoundSourceCoordinates(
6615 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6616 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6617 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6618 : }
6619 : /* ====================================================================
6620 : */
6621 : /* Loop over pixels in output scanline. */
6622 : /* ====================================================================
6623 : */
6624 33836599 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6625 : {
6626 33772442 : GPtrDiff_t iSrcOffset = 0;
6627 33772442 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6628 : padfX, padfY, nSrcXSize,
6629 : nSrcYSize, iSrcOffset))
6630 21383643 : continue;
6631 :
6632 : /* --------------------------------------------------------------------
6633 : */
6634 : /* Do not try to apply invalid source pixels to the dest. */
6635 : /* --------------------------------------------------------------------
6636 : */
6637 25227006 : if (poWK->panUnifiedSrcValid != nullptr &&
6638 6714445 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6639 : {
6640 5120982 : if (!bOneSourceCornerFailsToReproject)
6641 : {
6642 5113496 : continue;
6643 : }
6644 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6645 : {
6646 5224 : continue;
6647 : }
6648 : }
6649 :
6650 : /* --------------------------------------------------------------------
6651 : */
6652 : /* Do not try to apply transparent source pixels to the
6653 : * destination.*/
6654 : /* --------------------------------------------------------------------
6655 : */
6656 13393881 : double dfDensity = 1.0;
6657 :
6658 13393881 : if (poWK->pafUnifiedSrcDensity != nullptr)
6659 : {
6660 1557335 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6661 1557335 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6662 1005075 : continue;
6663 : }
6664 :
6665 : /* ====================================================================
6666 : */
6667 : /* Loop processing each band. */
6668 : /* ====================================================================
6669 : */
6670 :
6671 12388799 : const GPtrDiff_t iDstOffset =
6672 12388799 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6673 :
6674 27339660 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6675 : {
6676 14950961 : T value = 0;
6677 14950961 : double dfBandDensity = 0.0;
6678 :
6679 : /* --------------------------------------------------------------------
6680 : */
6681 : /* Collect the source value. */
6682 : /* --------------------------------------------------------------------
6683 : */
6684 14950961 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6685 : &value))
6686 : {
6687 :
6688 14950861 : if (poWK->bApplyVerticalShift)
6689 : {
6690 1 : if (!std::isfinite(padfZ[iDstX]))
6691 0 : continue;
6692 : // Subtract padfZ[] since the coordinate transformation
6693 : // is from target to source
6694 1 : value = GWKClampValueT<T>(
6695 1 : double(value) * poWK->dfMultFactorVerticalShift -
6696 1 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6697 : }
6698 :
6699 14950861 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6700 : dfBandDensity, value,
6701 : bAvoidNoDataSingleBand);
6702 : }
6703 : }
6704 :
6705 : /* --------------------------------------------------------------------
6706 : */
6707 : /* Mark this pixel valid/opaque in the output. */
6708 : /* --------------------------------------------------------------------
6709 : */
6710 :
6711 12388799 : if (!bAvoidNoDataSingleBand)
6712 : {
6713 424278 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
6714 : }
6715 :
6716 12388799 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6717 :
6718 12388799 : if (poWK->panDstValid != nullptr)
6719 : {
6720 11118346 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6721 : }
6722 : } /* Next iDstX */
6723 :
6724 : /* --------------------------------------------------------------------
6725 : */
6726 : /* Report progress to the user, and optionally cancel out. */
6727 : /* --------------------------------------------------------------------
6728 : */
6729 64236 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6730 0 : break;
6731 : }
6732 :
6733 : /* -------------------------------------------------------------------- */
6734 : /* Cleanup and return. */
6735 : /* -------------------------------------------------------------------- */
6736 477 : CPLFree(padfX);
6737 477 : CPLFree(padfY);
6738 477 : CPLFree(padfZ);
6739 477 : CPLFree(pabSuccess);
6740 477 : }
6741 :
6742 363 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6743 : {
6744 363 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6745 : }
6746 :
6747 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6748 : {
6749 14 : return GWKRun(
6750 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6751 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6752 : }
6753 :
6754 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6755 : {
6756 5 : return GWKRun(
6757 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6758 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6759 5 : GRA_Bilinear>);
6760 : }
6761 :
6762 7 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6763 : {
6764 7 : return GWKRun(
6765 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6766 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6767 7 : GRA_Bilinear>);
6768 : }
6769 :
6770 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6771 : {
6772 4 : return GWKRun(
6773 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6774 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6775 4 : GRA_Bilinear>);
6776 : }
6777 :
6778 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6779 :
6780 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6781 : {
6782 : return GWKRun(
6783 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6784 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6785 : GRA_Bilinear>);
6786 : }
6787 : #endif
6788 :
6789 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6790 : {
6791 5 : return GWKRun(
6792 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6793 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6794 : }
6795 :
6796 14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6797 : {
6798 14 : return GWKRun(
6799 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6800 14 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6801 : }
6802 :
6803 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6804 : {
6805 6 : return GWKRun(
6806 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6807 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6808 : }
6809 :
6810 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6811 : {
6812 5 : return GWKRun(
6813 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6814 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6815 : }
6816 :
6817 9 : static CPLErr GWKNearestInt8(GDALWarpKernel *poWK)
6818 : {
6819 9 : return GWKRun(poWK, "GWKNearestInt8", GWKNearestThread<int8_t>);
6820 : }
6821 :
6822 40 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6823 : {
6824 40 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6825 : }
6826 :
6827 10 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6828 : {
6829 10 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6830 : }
6831 :
6832 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6833 : {
6834 11 : return GWKRun(
6835 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6836 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6837 : }
6838 :
6839 51 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6840 : {
6841 51 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6842 : }
6843 :
6844 : /************************************************************************/
6845 : /* GWKAverageOrMode() */
6846 : /* */
6847 : /************************************************************************/
6848 :
6849 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6850 : ((iSrcY == iSrcYMin) \
6851 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6852 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6853 : : 1.0)
6854 :
6855 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6856 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6857 : ? dfWeightY \
6858 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6859 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6860 : : dfWeightY)
6861 :
6862 : static void GWKAverageOrModeThread(void *pData);
6863 :
6864 246 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6865 : {
6866 246 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6867 : }
6868 :
6869 : /************************************************************************/
6870 : /* GWKAverageOrModeComputeLineCoords() */
6871 : /************************************************************************/
6872 :
6873 28663 : static void GWKAverageOrModeComputeLineCoords(
6874 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6875 : double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6876 : int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6877 : double dfErrorThreshold)
6878 : {
6879 28663 : const GDALWarpKernel *poWK = psJob->poWK;
6880 28663 : const int nDstXSize = poWK->nDstXSize;
6881 :
6882 : // Setup points to transform to source image space.
6883 7360890 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6884 : {
6885 7332220 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6886 7332220 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6887 7332220 : padfZ[iDstX] = 0.0;
6888 7332220 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6889 7332220 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6890 7332220 : padfZ2[iDstX] = 0.0;
6891 : }
6892 :
6893 : /* ----------------------------------------------------------------- */
6894 : /* Transform the points from destination pixel/line coordinates */
6895 : /* to source pixel/line coordinates. */
6896 : /* ----------------------------------------------------------------- */
6897 28663 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6898 : padfZ, pabSuccess);
6899 28663 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6900 : padfY2, padfZ2, pabSuccess2);
6901 :
6902 28663 : if (dfSrcCoordPrecision > 0.0)
6903 : {
6904 0 : GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6905 : dfSrcCoordPrecision, dfErrorThreshold,
6906 0 : poWK->pfnTransformer, psJob->pTransformerArg,
6907 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6908 0 : GWKRoundSourceCoordinates(
6909 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6910 0 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6911 0 : 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6912 : }
6913 28663 : }
6914 :
6915 : /************************************************************************/
6916 : /* GWKAverageOrModeComputeSourceCoords() */
6917 : /************************************************************************/
6918 :
6919 7332220 : static bool GWKAverageOrModeComputeSourceCoords(
6920 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6921 : double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6922 : // Output:
6923 : bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6924 : double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6925 : {
6926 7332220 : const GDALWarpKernel *poWK = psJob->poWK;
6927 7332220 : const int nSrcXSize = poWK->nSrcXSize;
6928 7332220 : const int nSrcYSize = poWK->nSrcYSize;
6929 :
6930 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6931 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6932 7332220 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6933 6814810 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6934 6814810 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6935 6532210 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6936 6532210 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6937 5870420 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6938 5865780 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6939 5350790 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6940 : {
6941 1985190 : return false;
6942 : }
6943 :
6944 : // Compute corners in source crs.
6945 :
6946 : // The transformation might not have preserved ordering of
6947 : // coordinates so do the necessary swapping (#5433).
6948 : // NOTE: this is really an approximative fix. To do something
6949 : // more precise we would for example need to compute the
6950 : // transformation of coordinates in the
6951 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6952 : // coordinates, and take the bounding box of the got source
6953 : // coordinates.
6954 :
6955 5347040 : if (padfX[iDstX] > padfX2[iDstX])
6956 269148 : std::swap(padfX[iDstX], padfX2[iDstX]);
6957 :
6958 : // Detect situations where the target pixel is close to the
6959 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6960 : // close to the left-most and right-most columns of the source
6961 : // raster. The 2 value below was experimentally determined to
6962 : // avoid false-positives and false-negatives.
6963 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6964 5347040 : bWrapOverX = false;
6965 5347040 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6966 5347040 : if (poWK->nSrcXOff == 0 && iDstX + 1 < poWK->nDstXSize &&
6967 3298690 : 2 * padfX[iDstX] - padfX[iDstX + 1] < nThresholdWrapOverX &&
6968 55362 : nSrcXSize - padfX2[iDstX] < nThresholdWrapOverX)
6969 : {
6970 : // Check there is a discontinuity by checking at mid-pixel.
6971 : // NOTE: all this remains fragile. To confidently
6972 : // detect antimeridian warping we should probably try to access
6973 : // georeferenced coordinates, and not rely only on tests on
6974 : // image space coordinates. But accessing georeferenced
6975 : // coordinates from here is not trivial, and we would for example
6976 : // have to handle both geographic, Mercator, etc.
6977 : // Let's hope this heuristics is good enough for now.
6978 1610 : double x = iDstX + 0.5 + poWK->nDstXOff;
6979 1610 : double y = iDstY + poWK->nDstYOff;
6980 1610 : double z = 0;
6981 1610 : int bSuccess = FALSE;
6982 1610 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6983 : &bSuccess);
6984 1610 : if (bSuccess && x < padfX[iDstX])
6985 : {
6986 1596 : bWrapOverX = true;
6987 1596 : std::swap(padfX[iDstX], padfX2[iDstX]);
6988 1596 : padfX2[iDstX] += nSrcXSize;
6989 : }
6990 : }
6991 :
6992 5347040 : dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6993 5347040 : dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6994 5347040 : constexpr double EPSILON = 1e-10;
6995 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6996 5347040 : if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6997 15528 : return false;
6998 5331510 : iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6999 5331510 : iSrcXMax = static_cast<int>(
7000 5331510 : std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
7001 5331510 : if (!bWrapOverX)
7002 5329910 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
7003 5331510 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
7004 472 : iSrcXMax++;
7005 :
7006 5331510 : if (padfY[iDstX] > padfY2[iDstX])
7007 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
7008 5331510 : dfYMin = padfY[iDstX] - poWK->nSrcYOff;
7009 5331510 : dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
7010 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
7011 5331510 : if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
7012 13334 : return false;
7013 5318180 : iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
7014 5318180 : iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
7015 5318180 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
7016 0 : iSrcYMax++;
7017 :
7018 5318180 : return true;
7019 : }
7020 :
7021 : /************************************************************************/
7022 : /* GWKModeRealType() */
7023 : /************************************************************************/
7024 :
7025 17780 : template <class T> static inline bool IsSame(T a, T b)
7026 : {
7027 17780 : return a == b;
7028 : }
7029 :
7030 0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
7031 : {
7032 0 : return a == b || (CPLIsNan(a) && CPLIsNan(b));
7033 : }
7034 :
7035 18 : template <> bool IsSame<float>(float a, float b)
7036 : {
7037 18 : return a == b || (std::isnan(a) && std::isnan(b));
7038 : }
7039 :
7040 56 : template <> bool IsSame<double>(double a, double b)
7041 : {
7042 56 : return a == b || (std::isnan(a) && std::isnan(b));
7043 : }
7044 :
7045 19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
7046 : {
7047 19 : const GDALWarpKernel *poWK = psJob->poWK;
7048 19 : const int iYMin = psJob->iYMin;
7049 19 : const int iYMax = psJob->iYMax;
7050 19 : const int nDstXSize = poWK->nDstXSize;
7051 19 : const int nSrcXSize = poWK->nSrcXSize;
7052 19 : const int nSrcYSize = poWK->nSrcYSize;
7053 19 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7054 :
7055 19 : T *pVals = nullptr;
7056 19 : float *pafCounts = nullptr;
7057 :
7058 19 : if (nSrcXSize > 0 && nSrcYSize > 0)
7059 : {
7060 : pVals = static_cast<T *>(
7061 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
7062 : pafCounts = static_cast<float *>(
7063 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7064 19 : if (pVals == nullptr || pafCounts == nullptr)
7065 : {
7066 0 : VSIFree(pVals);
7067 0 : VSIFree(pafCounts);
7068 0 : return;
7069 : }
7070 : }
7071 :
7072 : /* -------------------------------------------------------------------- */
7073 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7074 : /* scanlines worth of positions. */
7075 : /* -------------------------------------------------------------------- */
7076 :
7077 : double *padfX =
7078 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7079 : double *padfY =
7080 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7081 : double *padfZ =
7082 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7083 : double *padfX2 =
7084 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7085 : double *padfY2 =
7086 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7087 : double *padfZ2 =
7088 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7089 19 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7090 19 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7091 :
7092 19 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7093 19 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7094 19 : const double dfErrorThreshold = CPLAtof(
7095 19 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7096 19 : const bool bAvoidNoDataSingleBand =
7097 19 : poWK->nBands == 1 ||
7098 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7099 : "UNIFIED_SRC_NODATA", "FALSE"));
7100 :
7101 19 : const int nXMargin =
7102 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7103 19 : const int nYMargin =
7104 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7105 :
7106 : /* ==================================================================== */
7107 : /* Loop over output lines. */
7108 : /* ==================================================================== */
7109 116 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7110 : {
7111 97 : GWKAverageOrModeComputeLineCoords(
7112 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7113 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7114 :
7115 : // Loop over pixels in output scanline.
7116 3514 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7117 : {
7118 3417 : GPtrDiff_t iSrcOffset = 0;
7119 3417 : double dfDensity = 1.0;
7120 3417 : bool bHasFoundDensity = false;
7121 :
7122 3417 : bool bWrapOverX = false;
7123 3417 : double dfXMin = 0;
7124 3417 : double dfYMin = 0;
7125 3417 : double dfXMax = 0;
7126 3417 : double dfYMax = 0;
7127 3417 : int iSrcXMin = 0;
7128 3417 : int iSrcYMin = 0;
7129 3417 : int iSrcXMax = 0;
7130 3417 : int iSrcYMax = 0;
7131 3417 : if (!GWKAverageOrModeComputeSourceCoords(
7132 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7133 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7134 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7135 : {
7136 0 : continue;
7137 : }
7138 :
7139 3417 : const GPtrDiff_t iDstOffset =
7140 3417 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7141 :
7142 : // Loop processing each band.
7143 6834 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7144 : {
7145 3417 : double dfBandDensity = 0.0;
7146 :
7147 3417 : int nBins = 0;
7148 3417 : int iModeIndex = -1;
7149 3417 : T nVal{};
7150 :
7151 10248 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7152 : {
7153 6831 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7154 6831 : iSrcOffset =
7155 6831 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7156 20530 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7157 : iSrcX++, iSrcOffset++)
7158 : {
7159 13699 : if (bWrapOverX)
7160 0 : iSrcOffset =
7161 0 : (iSrcX % nSrcXSize) +
7162 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7163 :
7164 13699 : if (poWK->panUnifiedSrcValid != nullptr &&
7165 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7166 0 : continue;
7167 :
7168 13699 : if (GWKGetPixelT(poWK, iBand, iSrcOffset,
7169 27398 : &dfBandDensity, &nVal) &&
7170 13699 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7171 : {
7172 13699 : const double dfWeight =
7173 13699 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7174 :
7175 : // Check array for existing entry.
7176 13699 : int i = 0;
7177 29194 : for (i = 0; i < nBins; ++i)
7178 : {
7179 17807 : if (IsSame(pVals[i], nVal))
7180 : {
7181 :
7182 2312 : pafCounts[i] +=
7183 2312 : static_cast<float>(dfWeight);
7184 2312 : bool bValIsMaxCount =
7185 2312 : (pafCounts[i] > pafCounts[iModeIndex]);
7186 :
7187 2312 : if (!bValIsMaxCount &&
7188 1498 : pafCounts[i] == pafCounts[iModeIndex])
7189 : {
7190 1490 : switch (eTieStrategy)
7191 : {
7192 1477 : case GWKTS_First:
7193 1477 : break;
7194 6 : case GWKTS_Min:
7195 6 : bValIsMaxCount =
7196 6 : nVal < pVals[iModeIndex];
7197 6 : break;
7198 7 : case GWKTS_Max:
7199 7 : bValIsMaxCount =
7200 7 : nVal > pVals[iModeIndex];
7201 7 : break;
7202 : }
7203 : }
7204 :
7205 2312 : if (bValIsMaxCount)
7206 : {
7207 817 : iModeIndex = i;
7208 : }
7209 :
7210 2312 : break;
7211 : }
7212 : }
7213 :
7214 : // Add to arr if entry not already there.
7215 13699 : if (i == nBins)
7216 : {
7217 11387 : pVals[i] = nVal;
7218 11387 : pafCounts[i] = static_cast<float>(dfWeight);
7219 :
7220 11387 : if (iModeIndex < 0)
7221 3417 : iModeIndex = i;
7222 :
7223 11387 : ++nBins;
7224 : }
7225 : }
7226 : }
7227 : }
7228 :
7229 3417 : if (iModeIndex != -1)
7230 : {
7231 3417 : nVal = pVals[iModeIndex];
7232 3417 : dfBandDensity = 1;
7233 3417 : bHasFoundDensity = true;
7234 : }
7235 :
7236 : // We have a computed value from the source. Now apply it
7237 : // to the destination pixel
7238 3417 : if (bHasFoundDensity)
7239 : {
7240 3417 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
7241 : dfBandDensity, nVal,
7242 : bAvoidNoDataSingleBand);
7243 : }
7244 : }
7245 :
7246 3417 : if (!bHasFoundDensity)
7247 0 : continue;
7248 :
7249 3417 : if (!bAvoidNoDataSingleBand)
7250 : {
7251 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7252 : }
7253 :
7254 : /* --------------------------------------------------------------------
7255 : */
7256 : /* Update destination density/validity masks. */
7257 : /* --------------------------------------------------------------------
7258 : */
7259 3417 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7260 :
7261 3417 : if (poWK->panDstValid != nullptr)
7262 : {
7263 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7264 : }
7265 : } /* Next iDstX */
7266 :
7267 : /* --------------------------------------------------------------------
7268 : */
7269 : /* Report progress to the user, and optionally cancel out. */
7270 : /* --------------------------------------------------------------------
7271 : */
7272 97 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7273 0 : break;
7274 : }
7275 :
7276 : /* -------------------------------------------------------------------- */
7277 : /* Cleanup and return. */
7278 : /* -------------------------------------------------------------------- */
7279 19 : CPLFree(padfX);
7280 19 : CPLFree(padfY);
7281 19 : CPLFree(padfZ);
7282 19 : CPLFree(padfX2);
7283 19 : CPLFree(padfY2);
7284 19 : CPLFree(padfZ2);
7285 19 : CPLFree(pabSuccess);
7286 19 : CPLFree(pabSuccess2);
7287 19 : VSIFree(pVals);
7288 19 : VSIFree(pafCounts);
7289 : }
7290 :
7291 : /************************************************************************/
7292 : /* GWKModeComplexType() */
7293 : /************************************************************************/
7294 :
7295 8 : static void GWKModeComplexType(GWKJobStruct *psJob)
7296 : {
7297 8 : const GDALWarpKernel *poWK = psJob->poWK;
7298 8 : const int iYMin = psJob->iYMin;
7299 8 : const int iYMax = psJob->iYMax;
7300 8 : const int nDstXSize = poWK->nDstXSize;
7301 8 : const int nSrcXSize = poWK->nSrcXSize;
7302 8 : const int nSrcYSize = poWK->nSrcYSize;
7303 8 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7304 : const double dfMultFactorVerticalShiftPipeline =
7305 8 : poWK->bApplyVerticalShift
7306 8 : ? CPLAtof(CSLFetchNameValueDef(
7307 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7308 : "1.0"))
7309 8 : : 0.0;
7310 : const bool bAvoidNoDataSingleBand =
7311 8 : poWK->nBands == 1 ||
7312 0 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7313 8 : "UNIFIED_SRC_NODATA", "FALSE"));
7314 :
7315 8 : double *padfRealVals = nullptr;
7316 8 : double *padfImagVals = nullptr;
7317 8 : float *pafCounts = nullptr;
7318 :
7319 8 : if (nSrcXSize > 0 && nSrcYSize > 0)
7320 : {
7321 : padfRealVals = static_cast<double *>(
7322 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7323 : padfImagVals = static_cast<double *>(
7324 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
7325 : pafCounts = static_cast<float *>(
7326 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7327 8 : if (padfRealVals == nullptr || padfImagVals == nullptr ||
7328 : pafCounts == nullptr)
7329 : {
7330 0 : VSIFree(padfRealVals);
7331 0 : VSIFree(padfImagVals);
7332 0 : VSIFree(pafCounts);
7333 0 : return;
7334 : }
7335 : }
7336 :
7337 : /* -------------------------------------------------------------------- */
7338 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7339 : /* scanlines worth of positions. */
7340 : /* -------------------------------------------------------------------- */
7341 :
7342 : double *padfX =
7343 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7344 : double *padfY =
7345 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7346 : double *padfZ =
7347 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7348 : double *padfX2 =
7349 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7350 : double *padfY2 =
7351 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7352 : double *padfZ2 =
7353 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7354 8 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7355 8 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7356 :
7357 8 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7358 8 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7359 8 : const double dfErrorThreshold = CPLAtof(
7360 8 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7361 :
7362 : const int nXMargin =
7363 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7364 : const int nYMargin =
7365 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7366 :
7367 : /* ==================================================================== */
7368 : /* Loop over output lines. */
7369 : /* ==================================================================== */
7370 16 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7371 : {
7372 8 : GWKAverageOrModeComputeLineCoords(
7373 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7374 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7375 :
7376 : // Loop over pixels in output scanline.
7377 16 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7378 : {
7379 8 : GPtrDiff_t iSrcOffset = 0;
7380 8 : double dfDensity = 1.0;
7381 8 : bool bHasFoundDensity = false;
7382 :
7383 8 : bool bWrapOverX = false;
7384 8 : double dfXMin = 0;
7385 8 : double dfYMin = 0;
7386 8 : double dfXMax = 0;
7387 8 : double dfYMax = 0;
7388 8 : int iSrcXMin = 0;
7389 8 : int iSrcYMin = 0;
7390 8 : int iSrcXMax = 0;
7391 8 : int iSrcYMax = 0;
7392 8 : if (!GWKAverageOrModeComputeSourceCoords(
7393 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7394 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7395 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7396 : {
7397 0 : continue;
7398 : }
7399 :
7400 8 : const GPtrDiff_t iDstOffset =
7401 8 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7402 :
7403 : // Loop processing each band.
7404 16 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7405 : {
7406 8 : double dfBandDensity = 0.0;
7407 :
7408 8 : int nBins = 0;
7409 8 : int iModeIndex = -1;
7410 8 : double dfValueReal = 0;
7411 8 : double dfValueImag = 0;
7412 :
7413 16 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7414 : {
7415 8 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7416 8 : iSrcOffset =
7417 8 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7418 38 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7419 : iSrcX++, iSrcOffset++)
7420 : {
7421 30 : if (bWrapOverX)
7422 0 : iSrcOffset =
7423 0 : (iSrcX % nSrcXSize) +
7424 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7425 :
7426 30 : if (poWK->panUnifiedSrcValid != nullptr &&
7427 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7428 0 : continue;
7429 :
7430 30 : if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7431 : &dfBandDensity, &dfValueReal,
7432 60 : &dfValueImag) &&
7433 30 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7434 : {
7435 30 : const double dfWeight =
7436 30 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7437 :
7438 : // Check array for existing entry.
7439 30 : int i = 0;
7440 49 : for (i = 0; i < nBins; ++i)
7441 : {
7442 47 : if (IsSame(padfRealVals[i], dfValueReal) &&
7443 14 : IsSame(padfImagVals[i], dfValueImag))
7444 : {
7445 :
7446 14 : pafCounts[i] +=
7447 14 : static_cast<float>(dfWeight);
7448 14 : bool bValIsMaxCount =
7449 14 : (pafCounts[i] > pafCounts[iModeIndex]);
7450 :
7451 14 : if (!bValIsMaxCount &&
7452 6 : pafCounts[i] == pafCounts[iModeIndex])
7453 : {
7454 3 : switch (eTieStrategy)
7455 : {
7456 3 : case GWKTS_First:
7457 3 : break;
7458 0 : case GWKTS_Min:
7459 0 : bValIsMaxCount =
7460 0 : dfValueReal <
7461 0 : padfRealVals[iModeIndex];
7462 0 : break;
7463 0 : case GWKTS_Max:
7464 0 : bValIsMaxCount =
7465 0 : dfValueReal >
7466 0 : padfRealVals[iModeIndex];
7467 0 : break;
7468 : }
7469 : }
7470 :
7471 14 : if (bValIsMaxCount)
7472 : {
7473 8 : iModeIndex = i;
7474 : }
7475 :
7476 14 : break;
7477 : }
7478 : }
7479 :
7480 : // Add to arr if entry not already there.
7481 30 : if (i == nBins)
7482 : {
7483 16 : padfRealVals[i] = dfValueReal;
7484 16 : padfImagVals[i] = dfValueImag;
7485 16 : pafCounts[i] = static_cast<float>(dfWeight);
7486 :
7487 16 : if (iModeIndex < 0)
7488 8 : iModeIndex = i;
7489 :
7490 16 : ++nBins;
7491 : }
7492 : }
7493 : }
7494 : }
7495 :
7496 8 : if (iModeIndex != -1)
7497 : {
7498 8 : dfValueReal = padfRealVals[iModeIndex];
7499 8 : dfValueImag = padfImagVals[iModeIndex];
7500 8 : dfBandDensity = 1;
7501 :
7502 8 : if (poWK->bApplyVerticalShift)
7503 : {
7504 0 : if (!std::isfinite(padfZ[iDstX]))
7505 0 : continue;
7506 : // Subtract padfZ[] since the coordinate
7507 : // transformation is from target to source
7508 0 : dfValueReal =
7509 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7510 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7511 : }
7512 :
7513 8 : bHasFoundDensity = true;
7514 : }
7515 :
7516 : // We have a computed value from the source. Now apply it
7517 : // to the destination pixel
7518 8 : if (bHasFoundDensity)
7519 : {
7520 8 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7521 : dfValueReal, dfValueImag,
7522 : bAvoidNoDataSingleBand);
7523 : }
7524 : }
7525 :
7526 8 : if (!bHasFoundDensity)
7527 0 : continue;
7528 :
7529 8 : if (!bAvoidNoDataSingleBand)
7530 : {
7531 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7532 : }
7533 :
7534 : /* --------------------------------------------------------------------
7535 : */
7536 : /* Update destination density/validity masks. */
7537 : /* --------------------------------------------------------------------
7538 : */
7539 8 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7540 :
7541 8 : if (poWK->panDstValid != nullptr)
7542 : {
7543 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7544 : }
7545 : } /* Next iDstX */
7546 :
7547 : /* --------------------------------------------------------------------
7548 : */
7549 : /* Report progress to the user, and optionally cancel out. */
7550 : /* --------------------------------------------------------------------
7551 : */
7552 8 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7553 0 : break;
7554 : }
7555 :
7556 : /* -------------------------------------------------------------------- */
7557 : /* Cleanup and return. */
7558 : /* -------------------------------------------------------------------- */
7559 8 : CPLFree(padfX);
7560 8 : CPLFree(padfY);
7561 8 : CPLFree(padfZ);
7562 8 : CPLFree(padfX2);
7563 8 : CPLFree(padfY2);
7564 8 : CPLFree(padfZ2);
7565 8 : CPLFree(pabSuccess);
7566 8 : CPLFree(pabSuccess2);
7567 8 : VSIFree(padfRealVals);
7568 8 : VSIFree(padfImagVals);
7569 8 : VSIFree(pafCounts);
7570 : }
7571 :
7572 : /************************************************************************/
7573 : /* GWKAverageOrModeThread() */
7574 : /************************************************************************/
7575 :
7576 : // Overall logic based on GWKGeneralCaseThread().
7577 246 : static void GWKAverageOrModeThread(void *pData)
7578 : {
7579 246 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7580 246 : const GDALWarpKernel *poWK = psJob->poWK;
7581 246 : const int iYMin = psJob->iYMin;
7582 246 : const int iYMax = psJob->iYMax;
7583 : const double dfMultFactorVerticalShiftPipeline =
7584 246 : poWK->bApplyVerticalShift
7585 246 : ? CPLAtof(CSLFetchNameValueDef(
7586 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7587 : "1.0"))
7588 246 : : 0.0;
7589 : const bool bAvoidNoDataSingleBand =
7590 342 : poWK->nBands == 1 ||
7591 96 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
7592 246 : "UNIFIED_SRC_NODATA", "FALSE"));
7593 :
7594 246 : const int nDstXSize = poWK->nDstXSize;
7595 246 : const int nSrcXSize = poWK->nSrcXSize;
7596 :
7597 : /* -------------------------------------------------------------------- */
7598 : /* Find out which algorithm to use (small optim.) */
7599 : /* -------------------------------------------------------------------- */
7600 :
7601 : // Only used for GRA_Mode
7602 246 : float *pafCounts = nullptr;
7603 246 : int nBins = 0;
7604 246 : int nBinsOffset = 0;
7605 246 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7606 :
7607 : // Only used with Q1, Med and Q3
7608 246 : float quant = 0.0f;
7609 :
7610 : // To control array allocation only when data type is complex
7611 246 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7612 :
7613 246 : if (poWK->eResample == GRA_Mode)
7614 : {
7615 45 : if (poWK->bApplyVerticalShift)
7616 : {
7617 0 : return GWKModeComplexType(psJob);
7618 : }
7619 :
7620 45 : switch (poWK->eWorkingDataType)
7621 : {
7622 7 : case GDT_UInt8:
7623 7 : nBins = 256;
7624 7 : break;
7625 :
7626 1 : case GDT_Int8:
7627 1 : nBins = 256;
7628 1 : nBinsOffset = nBins / 2;
7629 1 : break;
7630 :
7631 1 : case GDT_UInt16:
7632 1 : nBins = 65536;
7633 1 : break;
7634 :
7635 9 : case GDT_Int16:
7636 9 : nBins = 65536;
7637 9 : nBinsOffset = nBins / 2;
7638 9 : break;
7639 :
7640 10 : case GDT_Int32:
7641 10 : return GWKModeRealType<int32_t>(psJob);
7642 :
7643 1 : case GDT_UInt32:
7644 1 : return GWKModeRealType<uint32_t>(psJob);
7645 :
7646 1 : case GDT_Int64:
7647 1 : return GWKModeRealType<int64_t>(psJob);
7648 :
7649 1 : case GDT_UInt64:
7650 1 : return GWKModeRealType<uint64_t>(psJob);
7651 :
7652 0 : case GDT_Float16:
7653 0 : return GWKModeRealType<GFloat16>(psJob);
7654 :
7655 4 : case GDT_Float32:
7656 4 : return GWKModeRealType<float>(psJob);
7657 :
7658 2 : case GDT_Float64:
7659 2 : return GWKModeRealType<double>(psJob);
7660 :
7661 8 : case GDT_CInt16:
7662 : case GDT_CInt32:
7663 : case GDT_CFloat16:
7664 : case GDT_CFloat32:
7665 : case GDT_CFloat64:
7666 8 : return GWKModeComplexType(psJob);
7667 :
7668 0 : case GDT_Unknown:
7669 : case GDT_TypeCount:
7670 0 : CPLAssert(false);
7671 : return;
7672 : }
7673 :
7674 18 : if (nBins)
7675 : {
7676 : pafCounts =
7677 18 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7678 18 : if (pafCounts == nullptr)
7679 0 : return;
7680 : }
7681 : }
7682 201 : else if (poWK->eResample == GRA_Med)
7683 : {
7684 6 : quant = 0.5f;
7685 : }
7686 195 : else if (poWK->eResample == GRA_Q1)
7687 : {
7688 10 : quant = 0.25f;
7689 : }
7690 185 : else if (poWK->eResample == GRA_Q3)
7691 : {
7692 5 : quant = 0.75f;
7693 : }
7694 180 : else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7695 11 : poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7696 : {
7697 : // Other resample algorithms not permitted here.
7698 0 : CPLError(CE_Fatal, CPLE_AppDefined,
7699 : "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7700 : "illegal resample");
7701 : }
7702 :
7703 219 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7704 :
7705 : /* -------------------------------------------------------------------- */
7706 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7707 : /* scanlines worth of positions. */
7708 : /* -------------------------------------------------------------------- */
7709 :
7710 : double *padfX =
7711 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7712 : double *padfY =
7713 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7714 : double *padfZ =
7715 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7716 : double *padfX2 =
7717 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7718 : double *padfY2 =
7719 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7720 : double *padfZ2 =
7721 219 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7722 219 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7723 219 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7724 :
7725 219 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7726 219 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7727 219 : const double dfErrorThreshold = CPLAtof(
7728 219 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7729 :
7730 : const double dfExcludedValuesThreshold =
7731 219 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7732 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7733 219 : 100.0;
7734 : const double dfNodataValuesThreshold =
7735 219 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7736 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7737 219 : 100.0;
7738 :
7739 : const int nXMargin =
7740 219 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7741 : const int nYMargin =
7742 219 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7743 :
7744 : /* ==================================================================== */
7745 : /* Loop over output lines. */
7746 : /* ==================================================================== */
7747 28777 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7748 : {
7749 28558 : GWKAverageOrModeComputeLineCoords(
7750 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7751 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7752 :
7753 : /* ====================================================================
7754 : */
7755 : /* Loop over pixels in output scanline. */
7756 : /* ====================================================================
7757 : */
7758 7357360 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7759 : {
7760 7328800 : GPtrDiff_t iSrcOffset = 0;
7761 7328800 : double dfDensity = 1.0;
7762 7328800 : bool bHasFoundDensity = false;
7763 :
7764 7328800 : bool bWrapOverX = false;
7765 7328800 : double dfXMin = 0;
7766 7328800 : double dfYMin = 0;
7767 7328800 : double dfXMax = 0;
7768 7328800 : double dfYMax = 0;
7769 7328800 : int iSrcXMin = 0;
7770 7328800 : int iSrcYMin = 0;
7771 7328800 : int iSrcXMax = 0;
7772 7328800 : int iSrcYMax = 0;
7773 7328800 : if (!GWKAverageOrModeComputeSourceCoords(
7774 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7775 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7776 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7777 : {
7778 3158560 : continue;
7779 : }
7780 :
7781 5314750 : const GPtrDiff_t iDstOffset =
7782 5314750 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7783 :
7784 5314750 : bool bDone = false;
7785 :
7786 : // Special Average mode where we process all bands together,
7787 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7788 5314750 : constexpr double EPSILON = 1e-10;
7789 14838200 : if (poWK->eResample == GRA_Average &&
7790 4208720 : (!poWK->m_aadfExcludedValues.empty() ||
7791 393224 : dfNodataValuesThreshold < 1 - EPSILON) &&
7792 9523480 : !poWK->bApplyVerticalShift && !bIsComplex)
7793 : {
7794 393224 : double dfTotalWeightInvalid = 0.0;
7795 393224 : double dfTotalWeightExcluded = 0.0;
7796 393224 : double dfTotalWeightRegular = 0.0;
7797 786448 : std::vector<double> adfValueReal(poWK->nBands, 0);
7798 786448 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7799 : std::vector<int> anCountExcludedValues(
7800 393224 : poWK->m_aadfExcludedValues.size(), 0);
7801 :
7802 1179670 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7803 : {
7804 786448 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7805 786448 : iSrcOffset =
7806 786448 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7807 2359340 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7808 : iSrcX++, iSrcOffset++)
7809 : {
7810 1572900 : if (bWrapOverX)
7811 0 : iSrcOffset =
7812 0 : (iSrcX % nSrcXSize) +
7813 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7814 :
7815 1572900 : const double dfWeight =
7816 1572900 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7817 1572900 : if (dfWeight <= 0)
7818 0 : continue;
7819 :
7820 1572910 : if (poWK->panUnifiedSrcValid != nullptr &&
7821 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7822 : {
7823 3 : dfTotalWeightInvalid += dfWeight;
7824 3 : continue;
7825 : }
7826 :
7827 1572890 : bool bAllValid = true;
7828 2359410 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7829 : {
7830 2097230 : double dfBandDensity = 0;
7831 2097230 : double dfValueImagTmp = 0;
7832 2883740 : if (!(GWKGetPixelValue(
7833 : poWK, iBand, iSrcOffset, &dfBandDensity,
7834 2097230 : &adfValueReal[iBand], &dfValueImagTmp) &&
7835 786513 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7836 : {
7837 1310720 : bAllValid = false;
7838 1310720 : break;
7839 : }
7840 : }
7841 :
7842 1572890 : if (!bAllValid)
7843 : {
7844 1310720 : dfTotalWeightInvalid += dfWeight;
7845 1310720 : continue;
7846 : }
7847 :
7848 262177 : bool bExcludedValueFound = false;
7849 393263 : for (size_t i = 0;
7850 393263 : i < poWK->m_aadfExcludedValues.size(); ++i)
7851 : {
7852 131092 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7853 : {
7854 6 : bExcludedValueFound = true;
7855 6 : ++anCountExcludedValues[i];
7856 6 : dfTotalWeightExcluded += dfWeight;
7857 6 : break;
7858 : }
7859 : }
7860 262177 : if (!bExcludedValueFound)
7861 : {
7862 : // Weighted incremental algorithm mean
7863 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7864 262171 : dfTotalWeightRegular += dfWeight;
7865 1048670 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7866 : {
7867 786495 : adfValueAveraged[iBand] +=
7868 1572990 : (dfWeight / dfTotalWeightRegular) *
7869 1572990 : (adfValueReal[iBand] -
7870 786495 : adfValueAveraged[iBand]);
7871 : }
7872 : }
7873 : }
7874 : }
7875 :
7876 393224 : const double dfTotalWeight = dfTotalWeightInvalid +
7877 : dfTotalWeightExcluded +
7878 : dfTotalWeightRegular;
7879 393224 : if (dfTotalWeightInvalid > 0 &&
7880 : dfTotalWeightInvalid >=
7881 327685 : dfNodataValuesThreshold * dfTotalWeight)
7882 : {
7883 : // Do nothing. Let bHasFoundDensity to false.
7884 : }
7885 65543 : else if (dfTotalWeightExcluded > 0 &&
7886 : dfTotalWeightExcluded >=
7887 6 : dfExcludedValuesThreshold * dfTotalWeight)
7888 : {
7889 : // Find the most represented excluded value tuple
7890 2 : size_t iExcludedValue = 0;
7891 2 : int nExcludedValueCount = 0;
7892 4 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7893 : ++i)
7894 : {
7895 2 : if (anCountExcludedValues[i] > nExcludedValueCount)
7896 : {
7897 2 : iExcludedValue = i;
7898 2 : nExcludedValueCount = anCountExcludedValues[i];
7899 : }
7900 : }
7901 :
7902 2 : bHasFoundDensity = true;
7903 :
7904 8 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7905 : {
7906 6 : GWKSetPixelValue(
7907 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7908 6 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7909 : 0, bAvoidNoDataSingleBand);
7910 : }
7911 :
7912 2 : if (!bAvoidNoDataSingleBand)
7913 : {
7914 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7915 2 : }
7916 : }
7917 65541 : else if (dfTotalWeightRegular > 0)
7918 : {
7919 65541 : bHasFoundDensity = true;
7920 :
7921 262160 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7922 : {
7923 196619 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7924 : /* dfBandDensity = */ 1.0,
7925 196619 : adfValueAveraged[iBand], 0,
7926 : bAvoidNoDataSingleBand);
7927 : }
7928 :
7929 65541 : if (!bAvoidNoDataSingleBand)
7930 : {
7931 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
7932 : }
7933 : }
7934 :
7935 : // Skip below loop on bands
7936 393224 : bDone = true;
7937 : }
7938 :
7939 : /* ====================================================================
7940 : */
7941 : /* Loop processing each band. */
7942 : /* ====================================================================
7943 : */
7944 :
7945 17670500 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7946 : {
7947 12355700 : double dfBandDensity = 0.0;
7948 12355700 : double dfValueReal = 0.0;
7949 12355700 : double dfValueImag = 0.0;
7950 12355700 : double dfValueRealTmp = 0.0;
7951 12355700 : double dfValueImagTmp = 0.0;
7952 :
7953 : /* --------------------------------------------------------------------
7954 : */
7955 : /* Collect the source value. */
7956 : /* --------------------------------------------------------------------
7957 : */
7958 :
7959 : // Loop over source lines and pixels - 3 possible algorithms.
7960 :
7961 12355700 : if (poWK->eResample == GRA_Average)
7962 : {
7963 9833240 : double dfTotalWeight = 0.0;
7964 :
7965 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7966 : // in gcore/overview.cpp.
7967 25243600 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7968 : {
7969 15410300 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7970 15410300 : iSrcOffset = iSrcXMin +
7971 15410300 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7972 44761400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7973 : iSrcX++, iSrcOffset++)
7974 : {
7975 29351100 : if (bWrapOverX)
7976 2571 : iSrcOffset =
7977 2571 : (iSrcX % nSrcXSize) +
7978 2571 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7979 :
7980 29351100 : if (poWK->panUnifiedSrcValid != nullptr &&
7981 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7982 : iSrcOffset))
7983 : {
7984 1 : continue;
7985 : }
7986 :
7987 29351100 : if (GWKGetPixelValue(
7988 : poWK, iBand, iSrcOffset, &dfBandDensity,
7989 48239400 : &dfValueRealTmp, &dfValueImagTmp) &&
7990 18888400 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7991 : {
7992 18888400 : const double dfWeight =
7993 18888400 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7994 18888400 : if (dfWeight > 0)
7995 : {
7996 : // Weighted incremental algorithm mean
7997 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7998 18888400 : dfTotalWeight += dfWeight;
7999 18888400 : dfValueReal +=
8000 18888400 : (dfWeight / dfTotalWeight) *
8001 18888400 : (dfValueRealTmp - dfValueReal);
8002 18888400 : if (bIsComplex)
8003 : {
8004 252 : dfValueImag +=
8005 252 : (dfWeight / dfTotalWeight) *
8006 252 : (dfValueImagTmp - dfValueImag);
8007 : }
8008 : }
8009 : }
8010 : }
8011 : }
8012 :
8013 9833240 : if (dfTotalWeight > 0)
8014 : {
8015 7530420 : if (poWK->bApplyVerticalShift)
8016 : {
8017 0 : if (!std::isfinite(padfZ[iDstX]))
8018 0 : continue;
8019 : // Subtract padfZ[] since the coordinate
8020 : // transformation is from target to source
8021 0 : dfValueReal =
8022 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8023 0 : padfZ[iDstX] *
8024 : dfMultFactorVerticalShiftPipeline;
8025 : }
8026 :
8027 7530420 : dfBandDensity = 1;
8028 7530420 : bHasFoundDensity = true;
8029 : }
8030 : } // GRA_Average.
8031 :
8032 2522460 : else if (poWK->eResample == GRA_RMS)
8033 : {
8034 300416 : double dfTotalReal = 0.0;
8035 300416 : double dfTotalImag = 0.0;
8036 300416 : double dfTotalWeight = 0.0;
8037 : // This code adapted from GDALDownsampleChunk32R_AverageT()
8038 : // in gcore/overview.cpp.
8039 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8040 : {
8041 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8042 330162 : iSrcOffset = iSrcXMin +
8043 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8044 772930 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8045 : iSrcX++, iSrcOffset++)
8046 : {
8047 442768 : if (bWrapOverX)
8048 1371 : iSrcOffset =
8049 1371 : (iSrcX % nSrcXSize) +
8050 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8051 :
8052 442768 : if (poWK->panUnifiedSrcValid != nullptr &&
8053 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8054 : iSrcOffset))
8055 : {
8056 0 : continue;
8057 : }
8058 :
8059 442768 : if (GWKGetPixelValue(
8060 : poWK, iBand, iSrcOffset, &dfBandDensity,
8061 885536 : &dfValueRealTmp, &dfValueImagTmp) &&
8062 442768 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8063 : {
8064 442768 : const double dfWeight =
8065 442768 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8066 442768 : dfTotalWeight += dfWeight;
8067 442768 : dfTotalReal +=
8068 442768 : dfValueRealTmp * dfValueRealTmp * dfWeight;
8069 442768 : if (bIsComplex)
8070 48 : dfTotalImag += dfValueImagTmp *
8071 48 : dfValueImagTmp * dfWeight;
8072 : }
8073 : }
8074 : }
8075 :
8076 300416 : if (dfTotalWeight > 0)
8077 : {
8078 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
8079 :
8080 300416 : if (poWK->bApplyVerticalShift)
8081 : {
8082 0 : if (!std::isfinite(padfZ[iDstX]))
8083 0 : continue;
8084 : // Subtract padfZ[] since the coordinate
8085 : // transformation is from target to source
8086 0 : dfValueReal =
8087 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8088 0 : padfZ[iDstX] *
8089 : dfMultFactorVerticalShiftPipeline;
8090 : }
8091 :
8092 300416 : if (bIsComplex)
8093 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
8094 :
8095 300416 : dfBandDensity = 1;
8096 300416 : bHasFoundDensity = true;
8097 : }
8098 : } // GRA_RMS.
8099 :
8100 2222040 : else if (poWK->eResample == GRA_Mode)
8101 : {
8102 496623 : float fMaxCount = 0.0f;
8103 496623 : int nMode = -1;
8104 496623 : bool bHasSourceValues = false;
8105 :
8106 496623 : memset(pafCounts, 0, nBins * sizeof(float));
8107 :
8108 1167120 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8109 : {
8110 670495 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
8111 670495 : iSrcOffset = iSrcXMin +
8112 670495 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8113 1964680 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8114 : iSrcX++, iSrcOffset++)
8115 : {
8116 1294190 : if (bWrapOverX)
8117 1371 : iSrcOffset =
8118 1371 : (iSrcX % nSrcXSize) +
8119 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8120 :
8121 1294190 : if (poWK->panUnifiedSrcValid != nullptr &&
8122 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8123 : iSrcOffset))
8124 0 : continue;
8125 :
8126 1294190 : if (GWKGetPixelValue(
8127 : poWK, iBand, iSrcOffset, &dfBandDensity,
8128 2588370 : &dfValueRealTmp, &dfValueImagTmp) &&
8129 1294190 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8130 : {
8131 1294190 : bHasSourceValues = true;
8132 1294190 : const int nVal =
8133 1294190 : static_cast<int>(dfValueRealTmp);
8134 1294190 : const int iBin = nVal + nBinsOffset;
8135 1294190 : const double dfWeight =
8136 1294190 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
8137 :
8138 : // Sum the density.
8139 1294190 : pafCounts[iBin] += static_cast<float>(dfWeight);
8140 : // Is it the most common value so far?
8141 1294190 : bool bUpdateMode = pafCounts[iBin] > fMaxCount;
8142 1294190 : if (!bUpdateMode &&
8143 227545 : pafCounts[iBin] == fMaxCount)
8144 : {
8145 15866 : switch (eTieStrategy)
8146 : {
8147 15858 : case GWKTS_First:
8148 15858 : break;
8149 4 : case GWKTS_Min:
8150 4 : bUpdateMode = nVal < nMode;
8151 4 : break;
8152 4 : case GWKTS_Max:
8153 4 : bUpdateMode = nVal > nMode;
8154 4 : break;
8155 : }
8156 : }
8157 1294190 : if (bUpdateMode)
8158 : {
8159 1066640 : nMode = nVal;
8160 1066640 : fMaxCount = pafCounts[iBin];
8161 : }
8162 : }
8163 : }
8164 : }
8165 :
8166 496623 : if (bHasSourceValues)
8167 : {
8168 496623 : dfValueReal = nMode;
8169 496623 : dfBandDensity = 1;
8170 496623 : bHasFoundDensity = true;
8171 : }
8172 : } // GRA_Mode.
8173 :
8174 1725420 : else if (poWK->eResample == GRA_Max)
8175 : {
8176 335037 : bool bFoundValid = false;
8177 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
8178 : // This code adapted from nAlgo 1 method, GRA_Average.
8179 842572 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8180 : {
8181 507535 : iSrcOffset = iSrcXMin +
8182 507535 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8183 1638060 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8184 : iSrcX++, iSrcOffset++)
8185 : {
8186 1130520 : if (bWrapOverX)
8187 1371 : iSrcOffset =
8188 1371 : (iSrcX % nSrcXSize) +
8189 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8190 :
8191 1133330 : if (poWK->panUnifiedSrcValid != nullptr &&
8192 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8193 : iSrcOffset))
8194 : {
8195 2446 : continue;
8196 : }
8197 :
8198 : // Returns pixel value if it is not no data.
8199 1128070 : if (GWKGetPixelValue(
8200 : poWK, iBand, iSrcOffset, &dfBandDensity,
8201 2256150 : &dfValueRealTmp, &dfValueImagTmp) &&
8202 1128070 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8203 : {
8204 1128070 : bFoundValid = true;
8205 1128070 : if (dfTotalReal < dfValueRealTmp)
8206 : {
8207 463372 : dfTotalReal = dfValueRealTmp;
8208 : }
8209 : }
8210 : }
8211 : }
8212 :
8213 335037 : if (bFoundValid)
8214 : {
8215 335037 : dfValueReal = dfTotalReal;
8216 :
8217 335037 : if (poWK->bApplyVerticalShift)
8218 : {
8219 0 : if (!std::isfinite(padfZ[iDstX]))
8220 0 : continue;
8221 : // Subtract padfZ[] since the coordinate
8222 : // transformation is from target to source
8223 0 : dfValueReal =
8224 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8225 0 : padfZ[iDstX] *
8226 : dfMultFactorVerticalShiftPipeline;
8227 : }
8228 :
8229 335037 : dfBandDensity = 1;
8230 335037 : bHasFoundDensity = true;
8231 : }
8232 : }
8233 :
8234 1390380 : else if (poWK->eResample == GRA_Min)
8235 : {
8236 335012 : bool bFoundValid = false;
8237 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
8238 : // This code adapted from nAlgo 1 method, GRA_Average.
8239 842282 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8240 : {
8241 507270 : iSrcOffset = iSrcXMin +
8242 507270 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8243 1634980 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8244 : iSrcX++, iSrcOffset++)
8245 : {
8246 1127710 : if (bWrapOverX)
8247 1371 : iSrcOffset =
8248 1371 : (iSrcX % nSrcXSize) +
8249 1371 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8250 :
8251 1127710 : if (poWK->panUnifiedSrcValid != nullptr &&
8252 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8253 : iSrcOffset))
8254 : {
8255 0 : continue;
8256 : }
8257 :
8258 : // Returns pixel value if it is not no data.
8259 1127710 : if (GWKGetPixelValue(
8260 : poWK, iBand, iSrcOffset, &dfBandDensity,
8261 2255420 : &dfValueRealTmp, &dfValueImagTmp) &&
8262 1127710 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8263 : {
8264 1127710 : bFoundValid = true;
8265 1127710 : if (dfTotalReal > dfValueRealTmp)
8266 : {
8267 464157 : dfTotalReal = dfValueRealTmp;
8268 : }
8269 : }
8270 : }
8271 : }
8272 :
8273 335012 : if (bFoundValid)
8274 : {
8275 335012 : dfValueReal = dfTotalReal;
8276 :
8277 335012 : if (poWK->bApplyVerticalShift)
8278 : {
8279 0 : if (!std::isfinite(padfZ[iDstX]))
8280 0 : continue;
8281 : // Subtract padfZ[] since the coordinate
8282 : // transformation is from target to source
8283 0 : dfValueReal =
8284 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8285 0 : padfZ[iDstX] *
8286 : dfMultFactorVerticalShiftPipeline;
8287 : }
8288 :
8289 335012 : dfBandDensity = 1;
8290 335012 : bHasFoundDensity = true;
8291 : }
8292 : } // GRA_Min.
8293 :
8294 : else
8295 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
8296 : {
8297 1055370 : CPLAssert(quant > 0.0f);
8298 :
8299 1055370 : bool bFoundValid = false;
8300 1055370 : std::vector<double> dfRealValuesTmp;
8301 :
8302 : // This code adapted from nAlgo 1 method, GRA_Average.
8303 2677810 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8304 : {
8305 1622440 : iSrcOffset = iSrcXMin +
8306 1622440 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8307 5205220 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8308 : iSrcX++, iSrcOffset++)
8309 : {
8310 3582770 : if (bWrapOverX)
8311 4113 : iSrcOffset =
8312 4113 : (iSrcX % nSrcXSize) +
8313 4113 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8314 :
8315 3779380 : if (poWK->panUnifiedSrcValid != nullptr &&
8316 196608 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8317 : iSrcOffset))
8318 : {
8319 195449 : continue;
8320 : }
8321 :
8322 : // Returns pixel value if it is not no data.
8323 3387320 : if (GWKGetPixelValue(
8324 : poWK, iBand, iSrcOffset, &dfBandDensity,
8325 6774650 : &dfValueRealTmp, &dfValueImagTmp) &&
8326 3387320 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8327 : {
8328 3387320 : bFoundValid = true;
8329 3387320 : dfRealValuesTmp.push_back(dfValueRealTmp);
8330 : }
8331 : }
8332 : }
8333 :
8334 1055370 : if (bFoundValid)
8335 : {
8336 1006150 : std::sort(dfRealValuesTmp.begin(),
8337 : dfRealValuesTmp.end());
8338 : int quantIdx = static_cast<int>(
8339 1006150 : std::ceil(quant * dfRealValuesTmp.size() - 1));
8340 1006150 : dfValueReal = dfRealValuesTmp[quantIdx];
8341 :
8342 1006150 : if (poWK->bApplyVerticalShift)
8343 : {
8344 0 : if (!std::isfinite(padfZ[iDstX]))
8345 0 : continue;
8346 : // Subtract padfZ[] since the coordinate
8347 : // transformation is from target to source
8348 0 : dfValueReal =
8349 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8350 0 : padfZ[iDstX] *
8351 : dfMultFactorVerticalShiftPipeline;
8352 : }
8353 :
8354 1006150 : dfBandDensity = 1;
8355 1006150 : bHasFoundDensity = true;
8356 1006150 : dfRealValuesTmp.clear();
8357 : }
8358 : } // Quantile.
8359 :
8360 : /* --------------------------------------------------------------------
8361 : */
8362 : /* We have a computed value from the source. Now apply it
8363 : * to */
8364 : /* the destination pixel. */
8365 : /* --------------------------------------------------------------------
8366 : */
8367 12355700 : if (bHasFoundDensity)
8368 : {
8369 : // TODO: Should we compute dfBandDensity in fct of
8370 : // nCount/nCount2, or use as a threshold to set the dest
8371 : // value?
8372 : // dfBandDensity = (float) nCount / nCount2;
8373 : // if( (float) nCount / nCount2 > 0.1 )
8374 : // or fix gdalwarp crop_to_cutline to crop partially
8375 : // overlapping pixels.
8376 10003600 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8377 : dfValueReal, dfValueImag,
8378 : bAvoidNoDataSingleBand);
8379 : }
8380 : }
8381 :
8382 5314750 : if (!bHasFoundDensity)
8383 1144510 : continue;
8384 :
8385 4170240 : if (!bAvoidNoDataSingleBand)
8386 : {
8387 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
8388 : }
8389 :
8390 : /* --------------------------------------------------------------------
8391 : */
8392 : /* Update destination density/validity masks. */
8393 : /* --------------------------------------------------------------------
8394 : */
8395 4170240 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8396 :
8397 4170240 : if (poWK->panDstValid != nullptr)
8398 : {
8399 1184 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8400 : }
8401 : } /* Next iDstX */
8402 :
8403 : /* --------------------------------------------------------------------
8404 : */
8405 : /* Report progress to the user, and optionally cancel out. */
8406 : /* --------------------------------------------------------------------
8407 : */
8408 28558 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8409 0 : break;
8410 : }
8411 :
8412 : /* -------------------------------------------------------------------- */
8413 : /* Cleanup and return. */
8414 : /* -------------------------------------------------------------------- */
8415 219 : CPLFree(padfX);
8416 219 : CPLFree(padfY);
8417 219 : CPLFree(padfZ);
8418 219 : CPLFree(padfX2);
8419 219 : CPLFree(padfY2);
8420 219 : CPLFree(padfZ2);
8421 219 : CPLFree(pabSuccess);
8422 219 : CPLFree(pabSuccess2);
8423 219 : VSIFree(pafCounts);
8424 : }
8425 :
8426 : /************************************************************************/
8427 : /* getOrientation() */
8428 : /************************************************************************/
8429 :
8430 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8431 : // -1 if it is counter-clockwise oriented,
8432 : // or 0 if it is colinear.
8433 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8434 : {
8435 2355910 : const double p1x = p1.first;
8436 2355910 : const double p1y = p1.second;
8437 2355910 : const double p2x = p2.first;
8438 2355910 : const double p2y = p2.second;
8439 2355910 : const double p3x = p3.first;
8440 2355910 : const double p3y = p3.second;
8441 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8442 2355910 : if (std::abs(val) < 1e-20)
8443 2690 : return 0;
8444 2353220 : else if (val > 0)
8445 0 : return 1;
8446 : else
8447 2353220 : return -1;
8448 : }
8449 :
8450 : /************************************************************************/
8451 : /* isConvex() */
8452 : /************************************************************************/
8453 :
8454 : // poly must be closed
8455 785302 : static bool isConvex(const XYPoly &poly)
8456 : {
8457 785302 : const size_t n = poly.size();
8458 785302 : size_t i = 0;
8459 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8460 785302 : ++i;
8461 2355910 : for (; i < n - 2; ++i)
8462 : {
8463 : const int orientation =
8464 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8465 1570600 : if (orientation != 0)
8466 : {
8467 1567910 : if (last_orientation == 0)
8468 0 : last_orientation = orientation;
8469 1567910 : else if (orientation != last_orientation)
8470 0 : return false;
8471 : }
8472 : }
8473 785302 : return true;
8474 : }
8475 :
8476 : /************************************************************************/
8477 : /* pointIntersectsConvexPoly() */
8478 : /************************************************************************/
8479 :
8480 : // Returns whether xy intersects poly, that must be closed and convex.
8481 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8482 : {
8483 6049100 : const size_t n = poly.size();
8484 6049100 : double dx1 = xy.first - poly[0].first;
8485 6049100 : double dy1 = xy.second - poly[0].second;
8486 6049100 : double dx2 = poly[1].first - poly[0].first;
8487 6049100 : double dy2 = poly[1].second - poly[0].second;
8488 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8489 :
8490 : // Check if the point remains on the same side (left/right) of all edges
8491 14556400 : for (size_t i = 2; i < n; i++)
8492 : {
8493 12793100 : dx1 = xy.first - poly[i - 1].first;
8494 12793100 : dy1 = xy.second - poly[i - 1].second;
8495 :
8496 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8497 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8498 :
8499 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8500 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8501 725558 : prevCrossProduct = crossProduct;
8502 12067500 : else if (prevCrossProduct * crossProduct < 0)
8503 4285760 : return false;
8504 : }
8505 :
8506 1763340 : return true;
8507 : }
8508 :
8509 : /************************************************************************/
8510 : /* getIntersection() */
8511 : /************************************************************************/
8512 :
8513 : /* Returns intersection of [p1,p2] with [p3,p4], if
8514 : * it is a single point, and the 2 segments are not colinear.
8515 : */
8516 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8517 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8518 : {
8519 11811000 : const double x1 = p1.first;
8520 11811000 : const double y1 = p1.second;
8521 11811000 : const double x2 = p2.first;
8522 11811000 : const double y2 = p2.second;
8523 11811000 : const double x3 = p3.first;
8524 11811000 : const double y3 = p3.second;
8525 11811000 : const double x4 = p4.first;
8526 11811000 : const double y4 = p4.second;
8527 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8528 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8529 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8530 9260780 : return false;
8531 :
8532 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8533 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8534 973924 : return false;
8535 :
8536 1576340 : const double t = t_num / denom;
8537 1576340 : xy.first = x1 + t * (x2 - x1);
8538 1576340 : xy.second = y1 + t * (y2 - y1);
8539 1576340 : return true;
8540 : }
8541 :
8542 : /************************************************************************/
8543 : /* getConvexPolyIntersection() */
8544 : /************************************************************************/
8545 :
8546 : // poly1 and poly2 must be closed and convex.
8547 : // The returned intersection will not necessary be closed.
8548 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8549 : XYPoly &intersection)
8550 : {
8551 785302 : intersection.clear();
8552 :
8553 : // Add all points of poly1 inside poly2
8554 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8555 : {
8556 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8557 1187430 : intersection.push_back(poly1[i]);
8558 : }
8559 785302 : if (intersection.size() == poly1.size() - 1)
8560 : {
8561 : // poly1 is inside poly2
8562 119100 : return;
8563 : }
8564 :
8565 : // Add all points of poly2 inside poly1
8566 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8567 : {
8568 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8569 575904 : intersection.push_back(poly2[i]);
8570 : }
8571 :
8572 : // Compute the intersection of all edges of both polygons
8573 726972 : XYPair xy;
8574 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8575 : {
8576 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8577 : {
8578 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8579 11631600 : poly2[i2 + 1], xy))
8580 : {
8581 1576230 : intersection.push_back(xy);
8582 : }
8583 : }
8584 : }
8585 :
8586 726972 : if (intersection.empty())
8587 60770 : return;
8588 :
8589 : // Find lowest-left point in intersection set
8590 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8591 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8592 3772450 : for (const auto &pair : intersection)
8593 : {
8594 3106240 : const double x = pair.first;
8595 3106240 : const double y = pair.second;
8596 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8597 : {
8598 1096040 : lowest_x = x;
8599 1096040 : lowest_y = y;
8600 : }
8601 : }
8602 :
8603 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8604 : {
8605 5737980 : const double p1x_diff = p1.first - lowest_x;
8606 5737980 : const double p1y_diff = p1.second - lowest_y;
8607 5737980 : const double p2x_diff = p2.first - lowest_x;
8608 5737980 : const double p2y_diff = p2.second - lowest_y;
8609 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8610 : {
8611 2655420 : if (p1x_diff >= 0)
8612 : {
8613 2655420 : if (p2x_diff >= 0)
8614 2655420 : return p1.first < p2.first;
8615 0 : return true;
8616 : }
8617 : else
8618 : {
8619 0 : if (p2x_diff >= 0)
8620 0 : return false;
8621 0 : return p1.first < p2.first;
8622 : }
8623 : }
8624 :
8625 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8626 1046960 : return p1.second < p2.second;
8627 :
8628 : double tan_p1;
8629 2035600 : if (p1x_diff == 0.0)
8630 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8631 : else
8632 1570980 : tan_p1 = p1y_diff / p1x_diff;
8633 :
8634 : double tan_p2;
8635 2035600 : if (p2x_diff == 0.0)
8636 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8637 : else
8638 1196080 : tan_p2 = p2y_diff / p2x_diff;
8639 :
8640 2035600 : if (tan_p1 >= 0)
8641 : {
8642 1904790 : if (tan_p2 >= 0)
8643 1881590 : return tan_p1 < tan_p2;
8644 : else
8645 23199 : return true;
8646 : }
8647 : else
8648 : {
8649 130806 : if (tan_p2 >= 0)
8650 103900 : return false;
8651 : else
8652 26906 : return tan_p1 < tan_p2;
8653 : }
8654 666202 : };
8655 :
8656 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8657 : // hull
8658 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8659 :
8660 : // Remove duplicated points
8661 666202 : size_t j = 1;
8662 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8663 : {
8664 2440040 : if (intersection[i] != intersection[i - 1])
8665 : {
8666 1452560 : if (j < i)
8667 545275 : intersection[j] = intersection[i];
8668 1452560 : ++j;
8669 : }
8670 : }
8671 666202 : intersection.resize(j);
8672 : }
8673 :
8674 : /************************************************************************/
8675 : /* GWKSumPreserving() */
8676 : /************************************************************************/
8677 :
8678 : static void GWKSumPreservingThread(void *pData);
8679 :
8680 19 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8681 : {
8682 19 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8683 : }
8684 :
8685 19 : static void GWKSumPreservingThread(void *pData)
8686 : {
8687 19 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8688 19 : GDALWarpKernel *poWK = psJob->poWK;
8689 19 : const int iYMin = psJob->iYMin;
8690 19 : const int iYMax = psJob->iYMax;
8691 : const bool bIsAffineNoRotation =
8692 19 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8693 28 : poWK->pTransformerArg) &&
8694 : // for debug/testing purposes
8695 9 : CPLTestBool(
8696 19 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8697 : const bool bAvoidNoDataSingleBand =
8698 21 : poWK->nBands == 1 ||
8699 2 : !CPLTestBool(CSLFetchNameValueDef(poWK->papszWarpOptions,
8700 19 : "UNIFIED_SRC_NODATA", "FALSE"));
8701 :
8702 19 : const int nDstXSize = poWK->nDstXSize;
8703 19 : const int nSrcXSize = poWK->nSrcXSize;
8704 19 : const int nSrcYSize = poWK->nSrcYSize;
8705 :
8706 38 : std::vector<double> adfX0(nSrcXSize + 1);
8707 38 : std::vector<double> adfY0(nSrcXSize + 1);
8708 38 : std::vector<double> adfZ0(nSrcXSize + 1);
8709 38 : std::vector<double> adfX1(nSrcXSize + 1);
8710 38 : std::vector<double> adfY1(nSrcXSize + 1);
8711 38 : std::vector<double> adfZ1(nSrcXSize + 1);
8712 38 : std::vector<int> abSuccess0(nSrcXSize + 1);
8713 38 : std::vector<int> abSuccess1(nSrcXSize + 1);
8714 :
8715 : CPLRectObj sGlobalBounds;
8716 19 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8717 19 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8718 19 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8719 19 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8720 19 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8721 :
8722 : struct SourcePixel
8723 : {
8724 : int iSrcX;
8725 : int iSrcY;
8726 :
8727 : // Coordinates of source pixel in target pixel coordinates
8728 : double dfDstX0;
8729 : double dfDstY0;
8730 : double dfDstX1;
8731 : double dfDstY1;
8732 : double dfDstX2;
8733 : double dfDstY2;
8734 : double dfDstX3;
8735 : double dfDstY3;
8736 :
8737 : // Source pixel total area (might be larger than the one described
8738 : // by above coordinates, if the pixel was crossing the antimeridian
8739 : // and split)
8740 : double dfArea;
8741 : };
8742 :
8743 38 : std::vector<SourcePixel> sourcePixels;
8744 :
8745 38 : XYPoly discontinuityLeft(5);
8746 38 : XYPoly discontinuityRight(5);
8747 :
8748 : /* ==================================================================== */
8749 : /* First pass: transform the 4 corners of each potential */
8750 : /* contributing source pixel to target pixel coordinates. */
8751 : /* ==================================================================== */
8752 :
8753 : // Special case for top line
8754 : {
8755 19 : int iY = 0;
8756 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8757 : {
8758 3345 : adfX1[iX] = iX + poWK->nSrcXOff;
8759 3345 : adfY1[iX] = iY + poWK->nSrcYOff;
8760 3345 : adfZ1[iX] = 0;
8761 : }
8762 :
8763 19 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8764 : adfX1.data(), adfY1.data(), adfZ1.data(),
8765 : abSuccess1.data());
8766 :
8767 3364 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8768 : {
8769 3345 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8770 0 : abSuccess1[iX] = FALSE;
8771 : else
8772 : {
8773 3345 : adfX1[iX] -= poWK->nDstXOff;
8774 3345 : adfY1[iX] -= poWK->nDstYOff;
8775 : }
8776 : }
8777 : }
8778 :
8779 2032 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8780 : {
8781 2032 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8782 872 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8783 2032 : ? 1
8784 1160 : : -1;
8785 19 : };
8786 :
8787 : const auto FindDiscontinuity =
8788 80 : [poWK, psJob, getInsideXSign](
8789 : double dfXLeft, double dfXRight, double dfY,
8790 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8791 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8792 : {
8793 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8794 : {
8795 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8796 800 : double dfXMidReprojected = dfXMid;
8797 800 : dfYMidReprojected = dfY;
8798 800 : double dfZ = 0;
8799 800 : int nSuccess = 0;
8800 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8801 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8802 : &nSuccess);
8803 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8804 : {
8805 456 : dfXRight = dfXMid;
8806 456 : dfXMidReprojectedRight = dfXMidReprojected;
8807 : }
8808 : else
8809 : {
8810 344 : dfXLeft = dfXMid;
8811 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8812 : }
8813 : }
8814 80 : };
8815 :
8816 2685 : for (int iY = 0; iY < nSrcYSize; ++iY)
8817 : {
8818 2666 : std::swap(adfX0, adfX1);
8819 2666 : std::swap(adfY0, adfY1);
8820 2666 : std::swap(adfZ0, adfZ1);
8821 2666 : std::swap(abSuccess0, abSuccess1);
8822 :
8823 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8824 : {
8825 4833460 : adfX1[iX] = iX + poWK->nSrcXOff;
8826 4833460 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8827 4833460 : adfZ1[iX] = 0;
8828 : }
8829 :
8830 2666 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8831 : adfX1.data(), adfY1.data(), adfZ1.data(),
8832 : abSuccess1.data());
8833 :
8834 4836120 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8835 : {
8836 4833460 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8837 0 : abSuccess1[iX] = FALSE;
8838 : else
8839 : {
8840 4833460 : adfX1[iX] -= poWK->nDstXOff;
8841 4833460 : adfY1[iX] -= poWK->nDstYOff;
8842 : }
8843 : }
8844 :
8845 4833460 : for (int iX = 0; iX < nSrcXSize; ++iX)
8846 : {
8847 9661580 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8848 4830790 : abSuccess1[iX + 1])
8849 : {
8850 : /* --------------------------------------------------------------------
8851 : */
8852 : /* Do not try to apply transparent source pixels to the
8853 : * destination.*/
8854 : /* --------------------------------------------------------------------
8855 : */
8856 4830790 : const auto iSrcOffset =
8857 4830790 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8858 9560570 : if (poWK->panUnifiedSrcValid != nullptr &&
8859 4729780 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8860 : {
8861 4738340 : continue;
8862 : }
8863 :
8864 103415 : if (poWK->pafUnifiedSrcDensity != nullptr)
8865 : {
8866 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8867 : SRC_DENSITY_THRESHOLD_FLOAT)
8868 0 : continue;
8869 : }
8870 :
8871 : SourcePixel sp;
8872 103415 : sp.dfArea = 0;
8873 103415 : sp.dfDstX0 = adfX0[iX];
8874 103415 : sp.dfDstY0 = adfY0[iX];
8875 103415 : sp.dfDstX1 = adfX0[iX + 1];
8876 103415 : sp.dfDstY1 = adfY0[iX + 1];
8877 103415 : sp.dfDstX2 = adfX1[iX + 1];
8878 103415 : sp.dfDstY2 = adfY1[iX + 1];
8879 103415 : sp.dfDstX3 = adfX1[iX];
8880 103415 : sp.dfDstY3 = adfY1[iX];
8881 :
8882 : // Detect pixel that likely cross the anti-meridian and
8883 : // introduce a discontinuity when reprojected.
8884 :
8885 103415 : if (std::fabs(adfX0[iX] - adfX0[iX + 1]) > 2 * poWK->dfXScale &&
8886 80 : std::fabs(adfX1[iX] - adfX1[iX + 1]) > 2 * poWK->dfXScale &&
8887 40 : getInsideXSign(adfX0[iX]) !=
8888 80 : getInsideXSign(adfX0[iX + 1]) &&
8889 80 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8890 40 : getInsideXSign(adfX0[iX + 1]) ==
8891 103495 : getInsideXSign(adfX1[iX + 1]) &&
8892 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8893 : 0)
8894 : {
8895 : #ifdef DEBUG_VERBOSE
8896 : CPLDebug(
8897 : "WARP",
8898 : "Discontinuity for iSrcX=%d, iSrcY=%d, dest corners:"
8899 : "X0[iX]=%f X0[iX+1]=%f X1[iX]=%f X1[iX+1]=%f,"
8900 : "Y0[iX]=%f Y0[iX+1]=%f Y1[iX]=%f Y1[iX+1]=%f",
8901 : iX + poWK->nSrcXOff, iY + poWK->nSrcYOff, adfX0[iX],
8902 : adfX0[iX + 1], adfX1[iX], adfX1[iX + 1], adfY0[iX],
8903 : adfY0[iX + 1], adfY1[iX], adfY1[iX + 1]);
8904 : #endif
8905 40 : double dfXMidReprojectedLeftTop = 0;
8906 40 : double dfXMidReprojectedRightTop = 0;
8907 40 : double dfYMidReprojectedTop = 0;
8908 40 : FindDiscontinuity(
8909 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8910 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8911 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8912 : dfYMidReprojectedTop);
8913 40 : double dfXMidReprojectedLeftBottom = 0;
8914 40 : double dfXMidReprojectedRightBottom = 0;
8915 40 : double dfYMidReprojectedBottom = 0;
8916 40 : FindDiscontinuity(
8917 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8918 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8919 : dfXMidReprojectedLeftBottom,
8920 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8921 :
8922 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8923 40 : discontinuityLeft[1] =
8924 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8925 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8926 40 : dfYMidReprojectedBottom);
8927 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8928 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8929 :
8930 40 : discontinuityRight[0] =
8931 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8932 40 : discontinuityRight[1] =
8933 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8934 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8935 40 : dfYMidReprojectedBottom);
8936 40 : discontinuityRight[3] =
8937 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8938 40 : discontinuityRight[4] =
8939 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8940 :
8941 40 : sp.dfArea = getArea(discontinuityLeft) +
8942 40 : getArea(discontinuityRight);
8943 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8944 : {
8945 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8946 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8947 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8948 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8949 : }
8950 : else
8951 : {
8952 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8953 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8954 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8955 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8956 : }
8957 : }
8958 :
8959 : // Bounding box of source pixel (expressed in target pixel
8960 : // coordinates)
8961 : CPLRectObj sRect;
8962 103415 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8963 103415 : std::min(sp.dfDstX2, sp.dfDstX3));
8964 103415 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8965 103415 : std::min(sp.dfDstY2, sp.dfDstY3));
8966 103415 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8967 103415 : std::max(sp.dfDstX2, sp.dfDstX3));
8968 103415 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8969 103415 : std::max(sp.dfDstY2, sp.dfDstY3));
8970 103415 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8971 101355 : sRect.miny < iYMax && sRect.maxy > iYMin))
8972 : {
8973 10852 : continue;
8974 : }
8975 :
8976 92563 : sp.iSrcX = iX;
8977 92563 : sp.iSrcY = iY;
8978 :
8979 92563 : if (!bIsAffineNoRotation)
8980 : {
8981 : // Check polygon validity (no self-crossing)
8982 89745 : XYPair xy;
8983 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8984 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8985 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8986 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8987 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8988 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8989 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8990 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8991 : {
8992 113 : continue;
8993 : }
8994 : }
8995 :
8996 92450 : CPLQuadTreeInsertWithBounds(
8997 : hQuadTree,
8998 : reinterpret_cast<void *>(
8999 92450 : static_cast<uintptr_t>(sourcePixels.size())),
9000 : &sRect);
9001 :
9002 92450 : sourcePixels.push_back(sp);
9003 : }
9004 : }
9005 : }
9006 :
9007 38 : std::vector<double> adfRealValue(poWK->nBands);
9008 38 : std::vector<double> adfImagValue(poWK->nBands);
9009 38 : std::vector<double> adfBandDensity(poWK->nBands);
9010 38 : std::vector<double> adfWeight(poWK->nBands);
9011 :
9012 : #ifdef CHECK_SUM_WITH_GEOS
9013 : auto hGEOSContext = OGRGeometry::createGEOSContext();
9014 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
9015 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
9016 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
9017 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
9018 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
9019 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
9020 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
9021 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
9022 :
9023 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
9024 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
9025 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
9026 : #endif
9027 :
9028 : const XYPoly xy1{
9029 38 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
9030 38 : XYPoly xy2(5);
9031 38 : XYPoly xy2_triangle(4);
9032 38 : XYPoly intersection;
9033 :
9034 : /* ==================================================================== */
9035 : /* Loop over output lines. */
9036 : /* ==================================================================== */
9037 1951 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
9038 : {
9039 : CPLRectObj sRect;
9040 1932 : sRect.miny = iDstY;
9041 1932 : sRect.maxy = iDstY + 1;
9042 :
9043 : /* ====================================================================
9044 : */
9045 : /* Loop over pixels in output scanline. */
9046 : /* ====================================================================
9047 : */
9048 1403940 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
9049 : {
9050 1402010 : sRect.minx = iDstX;
9051 1402010 : sRect.maxx = iDstX + 1;
9052 1402010 : int nSourcePixels = 0;
9053 : void **pahSourcePixel =
9054 1402010 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
9055 1402010 : if (nSourcePixels == 0)
9056 : {
9057 1183090 : CPLFree(pahSourcePixel);
9058 1183100 : continue;
9059 : }
9060 :
9061 218919 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
9062 218919 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
9063 218919 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
9064 218919 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
9065 218919 : double dfDensity = 0;
9066 : // Just above zero to please Coveriy Scan
9067 218919 : double dfTotalWeight = std::numeric_limits<double>::min();
9068 :
9069 : /* ====================================================================
9070 : */
9071 : /* Iterate over each contributing source pixel to add its
9072 : */
9073 : /* value weighed by the ratio of the area of its
9074 : * intersection */
9075 : /* with the target pixel divided by the area of the source
9076 : */
9077 : /* pixel. */
9078 : /* ====================================================================
9079 : */
9080 1020550 : for (int i = 0; i < nSourcePixels; ++i)
9081 : {
9082 801628 : const int iSourcePixel = static_cast<int>(
9083 801628 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
9084 801628 : auto &sp = sourcePixels[iSourcePixel];
9085 :
9086 801628 : double dfWeight = 0.0;
9087 801628 : if (bIsAffineNoRotation)
9088 : {
9089 : // Optimization since the source pixel is a rectangle in
9090 : // target pixel coordinates
9091 16326 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
9092 16326 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
9093 16326 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
9094 16326 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
9095 16326 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
9096 16326 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
9097 16326 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
9098 16326 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
9099 16326 : dfWeight =
9100 16326 : ((dfIntersMaxX - dfIntersMinX) *
9101 16326 : (dfIntersMaxY - dfIntersMinY)) /
9102 16326 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
9103 : }
9104 : else
9105 : {
9106 : // Compute the polygon of the source pixel in target pixel
9107 : // coordinates, and shifted to the target pixel (unit square
9108 : // coordinates)
9109 :
9110 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9111 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
9112 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
9113 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
9114 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
9115 :
9116 785302 : if (isConvex(xy2))
9117 : {
9118 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
9119 785302 : if (intersection.size() >= 3)
9120 : {
9121 468849 : dfWeight = getArea(intersection);
9122 : }
9123 : }
9124 : else
9125 : {
9126 : // Split xy2 into 2 triangles.
9127 0 : xy2_triangle[0] = xy2[0];
9128 0 : xy2_triangle[1] = xy2[1];
9129 0 : xy2_triangle[2] = xy2[2];
9130 0 : xy2_triangle[3] = xy2[0];
9131 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9132 : intersection);
9133 0 : if (intersection.size() >= 3)
9134 : {
9135 0 : dfWeight = getArea(intersection);
9136 : }
9137 :
9138 0 : xy2_triangle[1] = xy2[2];
9139 0 : xy2_triangle[2] = xy2[3];
9140 0 : getConvexPolyIntersection(xy1, xy2_triangle,
9141 : intersection);
9142 0 : if (intersection.size() >= 3)
9143 : {
9144 0 : dfWeight += getArea(intersection);
9145 : }
9146 : }
9147 785302 : if (dfWeight > 0.0)
9148 : {
9149 468828 : if (sp.dfArea == 0)
9150 89592 : sp.dfArea = getArea(xy2);
9151 468828 : dfWeight /= sp.dfArea;
9152 : }
9153 :
9154 : #ifdef CHECK_SUM_WITH_GEOS
9155 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
9156 : sp.dfDstX0 - iDstX,
9157 : sp.dfDstY0 - iDstY);
9158 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
9159 : sp.dfDstX1 - iDstX,
9160 : sp.dfDstY1 - iDstY);
9161 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
9162 : sp.dfDstX2 - iDstX,
9163 : sp.dfDstY2 - iDstY);
9164 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
9165 : sp.dfDstX3 - iDstX,
9166 : sp.dfDstY3 - iDstY);
9167 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
9168 : sp.dfDstX0 - iDstX,
9169 : sp.dfDstY0 - iDstY);
9170 :
9171 : double dfWeightGEOS = 0.0;
9172 : auto hIntersection =
9173 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
9174 : if (hIntersection)
9175 : {
9176 : double dfIntersArea = 0.0;
9177 : if (GEOSArea_r(hGEOSContext, hIntersection,
9178 : &dfIntersArea) &&
9179 : dfIntersArea > 0)
9180 : {
9181 : double dfSourceArea = 0.0;
9182 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
9183 : {
9184 : dfWeightGEOS = dfIntersArea / dfSourceArea;
9185 : }
9186 : }
9187 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
9188 : }
9189 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
9190 : {
9191 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
9192 : dfWeight, dfWeightGEOS);
9193 : printf("xy2: "); // ok
9194 : for (const auto &xy : xy2)
9195 : printf("[%f, %f], ", xy.first, xy.second); // ok
9196 : printf("\n"); // ok
9197 : printf("intersection: "); // ok
9198 : for (const auto &xy : intersection)
9199 : printf("[%f, %f], ", xy.first, xy.second); // ok
9200 : printf("\n"); // ok
9201 : }
9202 : #endif
9203 : }
9204 801628 : if (dfWeight > 0.0)
9205 : {
9206 : #ifdef DEBUG_VERBOSE
9207 : #if defined(DST_X) && defined(DST_Y)
9208 : if (iDstX + poWK->nDstXOff == DST_X &&
9209 : iDstY + poWK->nDstYOff == DST_Y)
9210 : {
9211 : CPLDebug("WARP",
9212 : "iSrcX = %d, iSrcY = %d, weight =%.17g",
9213 : sp.iSrcX + poWK->nSrcXOff,
9214 : sp.iSrcY + poWK->nSrcYOff, dfWeight);
9215 : }
9216 : #endif
9217 : #endif
9218 :
9219 474104 : const GPtrDiff_t iSrcOffset =
9220 474104 : sp.iSrcX +
9221 474104 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
9222 474104 : dfTotalWeight += dfWeight;
9223 :
9224 474104 : if (poWK->pafUnifiedSrcDensity != nullptr)
9225 : {
9226 0 : dfDensity +=
9227 0 : dfWeight *
9228 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
9229 : }
9230 : else
9231 : {
9232 474104 : dfDensity += dfWeight;
9233 : }
9234 :
9235 1818730 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9236 : {
9237 : // Returns pixel value if it is not no data.
9238 : double dfBandDensity;
9239 : double dfRealValue;
9240 : double dfImagValue;
9241 2689250 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
9242 : &dfBandDensity, &dfRealValue,
9243 : &dfImagValue) &&
9244 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
9245 : {
9246 0 : continue;
9247 : }
9248 : #ifdef DEBUG_VERBOSE
9249 : #if defined(DST_X) && defined(DST_Y)
9250 : if (iDstX + poWK->nDstXOff == DST_X &&
9251 : iDstY + poWK->nDstYOff == DST_Y)
9252 : {
9253 : CPLDebug("WARP", "value * weight = %.17g",
9254 : dfRealValue * dfWeight);
9255 : }
9256 : #endif
9257 : #endif
9258 :
9259 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
9260 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
9261 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
9262 1344620 : adfWeight[iBand] += dfWeight;
9263 : }
9264 : }
9265 : }
9266 :
9267 218919 : CPLFree(pahSourcePixel);
9268 :
9269 : /* --------------------------------------------------------------------
9270 : */
9271 : /* Update destination pixel value. */
9272 : /* --------------------------------------------------------------------
9273 : */
9274 218919 : bool bHasFoundDensity = false;
9275 218919 : const GPtrDiff_t iDstOffset =
9276 218919 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
9277 827838 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
9278 : {
9279 608919 : if (adfWeight[iBand] > 0)
9280 : {
9281 : const double dfBandDensity =
9282 608909 : adfBandDensity[iBand] / adfWeight[iBand];
9283 608909 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
9284 : {
9285 608909 : bHasFoundDensity = true;
9286 608909 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
9287 608909 : adfRealValue[iBand],
9288 608909 : adfImagValue[iBand],
9289 : bAvoidNoDataSingleBand);
9290 : }
9291 : }
9292 : }
9293 :
9294 218919 : if (!bHasFoundDensity)
9295 10 : continue;
9296 :
9297 218909 : if (!bAvoidNoDataSingleBand)
9298 : {
9299 0 : GWKAvoidNoDataMultiBand(poWK, iDstOffset);
9300 : }
9301 :
9302 : /* --------------------------------------------------------------------
9303 : */
9304 : /* Update destination density/validity masks. */
9305 : /* --------------------------------------------------------------------
9306 : */
9307 218909 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
9308 :
9309 218909 : if (poWK->panDstValid != nullptr)
9310 : {
9311 11752 : CPLMaskSet(poWK->panDstValid, iDstOffset);
9312 : }
9313 : }
9314 :
9315 : /* --------------------------------------------------------------------
9316 : */
9317 : /* Report progress to the user, and optionally cancel out. */
9318 : /* --------------------------------------------------------------------
9319 : */
9320 1932 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
9321 0 : break;
9322 : }
9323 :
9324 : #ifdef CHECK_SUM_WITH_GEOS
9325 : GEOSGeom_destroy_r(hGEOSContext, hP1);
9326 : GEOSGeom_destroy_r(hGEOSContext, hP2);
9327 : OGRGeometry::freeGEOSContext(hGEOSContext);
9328 : #endif
9329 19 : CPLQuadTreeDestroy(hQuadTree);
9330 19 : }
|