Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 : #include "gdalwarpkernel_opencl.h"
49 :
50 : // #define CHECK_SUM_WITH_GEOS
51 : #ifdef CHECK_SUM_WITH_GEOS
52 : #include "ogr_geometry.h"
53 : #include "ogr_geos.h"
54 : #endif
55 :
56 : #ifdef USE_NEON_OPTIMIZATIONS
57 : #include "include_sse2neon.h"
58 : #define USE_SSE2
59 :
60 : #include "gdalsse_priv.h"
61 :
62 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
63 : // Could possibly be used too on 32bit, but we would need to check at runtime.
64 : #elif defined(__x86_64) || defined(_M_X64)
65 : #define USE_SSE2
66 :
67 : #include "gdalsse_priv.h"
68 :
69 : #if __SSE4_1__
70 : #include <smmintrin.h>
71 : #endif
72 :
73 : #if __SSE3__
74 : #include <pmmintrin.h>
75 : #endif
76 :
77 : #endif
78 :
79 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
80 : constexpr float SRC_DENSITY_THRESHOLD = 0.000000001f;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 9631 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 9631 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 3700 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 3700 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 3699 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 3699 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : #ifdef HAVE_OPENCL
171 : static CPLErr GWKOpenCLCase(GDALWarpKernel *);
172 : #endif
173 :
174 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
175 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
176 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
177 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
178 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
179 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
180 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
181 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
182 : #endif
183 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
184 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
185 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
186 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
187 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
188 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
189 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
190 : #endif
191 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
192 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
193 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
194 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
195 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
196 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
197 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
198 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
199 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
200 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
201 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
202 :
203 : /************************************************************************/
204 : /* GWKJobStruct */
205 : /************************************************************************/
206 :
207 : struct GWKJobStruct
208 : {
209 : std::mutex &mutex;
210 : std::condition_variable &cv;
211 : int &counter;
212 : bool &stopFlag;
213 : GDALWarpKernel *poWK;
214 : int iYMin;
215 : int iYMax;
216 : int (*pfnProgress)(GWKJobStruct *psJob);
217 : void *pTransformerArg;
218 : void (*pfnFunc)(
219 : void *); // used by GWKRun() to assign the proper pTransformerArg
220 :
221 2052 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
222 : int &counter_, bool &stopFlag_)
223 2052 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_),
224 : poWK(nullptr), iYMin(0), iYMax(0), pfnProgress(nullptr),
225 2052 : pTransformerArg(nullptr), pfnFunc(nullptr)
226 : {
227 2052 : }
228 : };
229 :
230 : struct GWKThreadData
231 : {
232 : std::unique_ptr<CPLJobQueue> poJobQueue{};
233 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
234 : int nMaxThreads{0};
235 : int counter{0};
236 : bool stopFlag{false};
237 : std::mutex mutex{};
238 : std::condition_variable cv{};
239 : bool bTransformerArgInputAssignedToThread{false};
240 : void *pTransformerArgInput{
241 : nullptr}; // owned by calling layer. Not to be destroyed
242 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
243 : int nTotalThreadCountForThisRun = 0;
244 : int nCurThreadCountForThisRun = 0;
245 : };
246 :
247 : /************************************************************************/
248 : /* GWKProgressThread() */
249 : /************************************************************************/
250 :
251 : // Return TRUE if the computation must be interrupted.
252 18 : static int GWKProgressThread(GWKJobStruct *psJob)
253 : {
254 18 : bool stop = false;
255 : {
256 18 : std::lock_guard<std::mutex> lock(psJob->mutex);
257 18 : psJob->counter++;
258 18 : stop = psJob->stopFlag;
259 : }
260 18 : psJob->cv.notify_one();
261 :
262 18 : return stop;
263 : }
264 :
265 : /************************************************************************/
266 : /* GWKProgressMonoThread() */
267 : /************************************************************************/
268 :
269 : // Return TRUE if the computation must be interrupted.
270 198871 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
271 : {
272 198871 : GDALWarpKernel *poWK = psJob->poWK;
273 : // coverity[missing_lock]
274 198871 : if (!poWK->pfnProgress(
275 198871 : poWK->dfProgressBase +
276 198871 : poWK->dfProgressScale *
277 198871 : (++psJob->counter / static_cast<double>(psJob->iYMax)),
278 : "", poWK->pProgress))
279 : {
280 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
281 1 : psJob->stopFlag = true;
282 1 : return TRUE;
283 : }
284 198870 : return FALSE;
285 : }
286 :
287 : /************************************************************************/
288 : /* GWKGenericMonoThread() */
289 : /************************************************************************/
290 :
291 2047 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
292 : void (*pfnFunc)(void *pUserData))
293 : {
294 2047 : GWKThreadData td;
295 :
296 : // NOTE: the mutex is not used.
297 2047 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
298 2047 : job.poWK = poWK;
299 2047 : job.iYMin = 0;
300 2047 : job.iYMax = poWK->nDstYSize;
301 2047 : job.pfnProgress = GWKProgressMonoThread;
302 2047 : job.pTransformerArg = poWK->pTransformerArg;
303 2047 : pfnFunc(&job);
304 :
305 4094 : return td.stopFlag ? CE_Failure : CE_None;
306 : }
307 :
308 : /************************************************************************/
309 : /* GWKThreadsCreate() */
310 : /************************************************************************/
311 :
312 1421 : void *GWKThreadsCreate(char **papszWarpOptions,
313 : GDALTransformerFunc /* pfnTransformer */,
314 : void *pTransformerArg)
315 : {
316 : const char *pszWarpThreads =
317 1421 : CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
318 1421 : if (pszWarpThreads == nullptr)
319 1421 : pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
320 :
321 1421 : int nThreads = 0;
322 1421 : if (EQUAL(pszWarpThreads, "ALL_CPUS"))
323 3 : nThreads = CPLGetNumCPUs();
324 : else
325 1418 : nThreads = atoi(pszWarpThreads);
326 1421 : if (nThreads <= 1)
327 1416 : nThreads = 0;
328 1421 : if (nThreads > 128)
329 0 : nThreads = 128;
330 :
331 1421 : GWKThreadData *psThreadData = new GWKThreadData();
332 : auto poThreadPool =
333 1421 : nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
334 1421 : if (nThreads && poThreadPool)
335 : {
336 5 : psThreadData->nMaxThreads = nThreads;
337 5 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
338 : nThreads,
339 5 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
340 10 : psThreadData->counter, psThreadData->stopFlag)));
341 :
342 5 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
343 5 : psThreadData->pTransformerArgInput = pTransformerArg;
344 : }
345 :
346 1421 : return psThreadData;
347 : }
348 :
349 : /************************************************************************/
350 : /* GWKThreadsEnd() */
351 : /************************************************************************/
352 :
353 1421 : void GWKThreadsEnd(void *psThreadDataIn)
354 : {
355 1421 : if (psThreadDataIn == nullptr)
356 0 : return;
357 :
358 1421 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
359 1421 : if (psThreadData->poJobQueue)
360 : {
361 : // cppcheck-suppress constVariableReference
362 15 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
363 : {
364 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
365 10 : GDALDestroyTransformer(pair.second);
366 : }
367 5 : psThreadData->poJobQueue.reset();
368 : }
369 1421 : delete psThreadData;
370 : }
371 :
372 : /************************************************************************/
373 : /* ThreadFuncAdapter() */
374 : /************************************************************************/
375 :
376 15 : static void ThreadFuncAdapter(void *pData)
377 : {
378 15 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
379 15 : GWKThreadData *psThreadData =
380 15 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
381 :
382 : // Look if we have already a per-thread transformer
383 15 : void *pTransformerArg = nullptr;
384 15 : const GIntBig nThreadId = CPLGetPID();
385 :
386 : {
387 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
388 15 : ++psThreadData->nCurThreadCountForThisRun;
389 :
390 15 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
391 15 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
392 : {
393 0 : pTransformerArg = oIter->second;
394 : }
395 15 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
396 15 : psThreadData->nCurThreadCountForThisRun ==
397 15 : psThreadData->nTotalThreadCountForThisRun)
398 : {
399 : // If we are the last thread to be started, temporarily borrow the
400 : // original transformer
401 5 : psThreadData->bTransformerArgInputAssignedToThread = true;
402 5 : pTransformerArg = psThreadData->pTransformerArgInput;
403 5 : psThreadData->mapThreadToTransformerArg[nThreadId] =
404 : pTransformerArg;
405 : }
406 :
407 15 : if (pTransformerArg == nullptr)
408 : {
409 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
410 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
411 : }
412 : }
413 :
414 : // If no transformer assigned to current thread, instantiate one
415 15 : if (pTransformerArg == nullptr)
416 : {
417 : // This somehow assumes that GDALCloneTransformer() is thread-safe
418 : // which should normally be the case.
419 : pTransformerArg =
420 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
421 :
422 : // Lock for the stop flag and the transformer map.
423 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
424 10 : if (!pTransformerArg)
425 : {
426 0 : psJob->stopFlag = true;
427 0 : return;
428 : }
429 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
430 : }
431 :
432 15 : psJob->pTransformerArg = pTransformerArg;
433 15 : psJob->pfnFunc(pData);
434 :
435 : // Give back original transformer, if borrowed.
436 : {
437 30 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
438 15 : if (psThreadData->bTransformerArgInputAssignedToThread &&
439 5 : pTransformerArg == psThreadData->pTransformerArgInput)
440 : {
441 : psThreadData->mapThreadToTransformerArg.erase(
442 5 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
443 5 : psThreadData->bTransformerArgInputAssignedToThread = false;
444 : }
445 : }
446 : }
447 :
448 : /************************************************************************/
449 : /* GWKRun() */
450 : /************************************************************************/
451 :
452 2052 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
453 : void (*pfnFunc)(void *pUserData))
454 :
455 : {
456 2052 : const int nDstYSize = poWK->nDstYSize;
457 :
458 2052 : CPLDebug("GDAL",
459 : "GDALWarpKernel()::%s() "
460 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
461 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
462 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
463 : poWK->nDstYSize);
464 :
465 2052 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
466 : {
467 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
468 0 : return CE_Failure;
469 : }
470 :
471 2052 : GWKThreadData *psThreadData =
472 : static_cast<GWKThreadData *>(poWK->psThreadData);
473 2052 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
474 : {
475 2047 : return GWKGenericMonoThread(poWK, pfnFunc);
476 : }
477 :
478 5 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
479 : // Config option mostly useful for tests to be able to test multithreading
480 : // with small rasters
481 : const int nWarpChunkSize =
482 5 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
483 5 : if (nWarpChunkSize > 0)
484 : {
485 3 : GIntBig nChunks =
486 3 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
487 3 : if (nThreads > nChunks)
488 1 : nThreads = static_cast<int>(nChunks);
489 : }
490 5 : if (nThreads <= 0)
491 1 : nThreads = 1;
492 :
493 5 : CPLDebug("WARP", "Using %d threads", nThreads);
494 :
495 5 : auto &jobs = *psThreadData->threadJobs;
496 5 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
497 : // Fill-in job structures.
498 20 : for (int i = 0; i < nThreads; ++i)
499 : {
500 15 : auto &job = jobs[i];
501 15 : job.poWK = poWK;
502 15 : job.iYMin =
503 15 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
504 15 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
505 15 : nThreads);
506 15 : if (poWK->pfnProgress != GDALDummyProgress)
507 1 : job.pfnProgress = GWKProgressThread;
508 15 : job.pfnFunc = pfnFunc;
509 : }
510 :
511 : bool bStopFlag;
512 : {
513 5 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
514 :
515 5 : psThreadData->nTotalThreadCountForThisRun = nThreads;
516 : // coverity[missing_lock]
517 5 : psThreadData->nCurThreadCountForThisRun = 0;
518 :
519 : // Start jobs.
520 20 : for (int i = 0; i < nThreads; ++i)
521 : {
522 15 : auto &job = jobs[i];
523 15 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
524 : static_cast<void *>(&job));
525 : }
526 :
527 : /* --------------------------------------------------------------------
528 : */
529 : /* Report progress. */
530 : /* --------------------------------------------------------------------
531 : */
532 5 : if (poWK->pfnProgress != GDALDummyProgress)
533 : {
534 1 : while (psThreadData->counter < nDstYSize)
535 : {
536 1 : psThreadData->cv.wait(lock);
537 1 : if (!poWK->pfnProgress(poWK->dfProgressBase +
538 1 : poWK->dfProgressScale *
539 1 : (psThreadData->counter /
540 1 : static_cast<double>(nDstYSize)),
541 : "", poWK->pProgress))
542 : {
543 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
544 1 : psThreadData->stopFlag = true;
545 1 : break;
546 : }
547 : }
548 : }
549 :
550 5 : bStopFlag = psThreadData->stopFlag;
551 : }
552 :
553 : /* -------------------------------------------------------------------- */
554 : /* Wait for all jobs to complete. */
555 : /* -------------------------------------------------------------------- */
556 5 : psThreadData->poJobQueue->WaitCompletion();
557 :
558 5 : return bStopFlag ? CE_Failure : CE_None;
559 : }
560 :
561 : /************************************************************************/
562 : /* ==================================================================== */
563 : /* GDALWarpKernel */
564 : /* ==================================================================== */
565 : /************************************************************************/
566 :
567 : /**
568 : * \class GDALWarpKernel "gdalwarper.h"
569 : *
570 : * Low level image warping class.
571 : *
572 : * This class is responsible for low level image warping for one
573 : * "chunk" of imagery. The class is essentially a structure with all
574 : * data members public - primarily so that new special-case functions
575 : * can be added without changing the class declaration.
576 : *
577 : * Applications are normally intended to interactive with warping facilities
578 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
579 : * theory be used directly if great care is taken in setting up the
580 : * control data.
581 : *
582 : * <h3>Design Issues</h3>
583 : *
584 : * The intention is that PerformWarp() would analyze the setup in terms
585 : * of the datatype, resampling type, and validity/density mask usage and
586 : * pick one of many specific implementations of the warping algorithm over
587 : * a continuum of optimization vs. generality. At one end there will be a
588 : * reference general purpose implementation of the algorithm that supports
589 : * any data type (working internally in double precision complex), all three
590 : * resampling types, and any or all of the validity/density masks. At the
591 : * other end would be highly optimized algorithms for common cases like
592 : * nearest neighbour resampling on GDT_Byte data with no masks.
593 : *
594 : * The full set of optimized versions have not been decided but we should
595 : * expect to have at least:
596 : * - One for each resampling algorithm for 8bit data with no masks.
597 : * - One for each resampling algorithm for float data with no masks.
598 : * - One for each resampling algorithm for float data with any/all masks
599 : * (essentially the generic case for just float data).
600 : * - One for each resampling algorithm for 8bit data with support for
601 : * input validity masks (per band or per pixel). This handles the common
602 : * case of nodata masking.
603 : * - One for each resampling algorithm for float data with support for
604 : * input validity masks (per band or per pixel). This handles the common
605 : * case of nodata masking.
606 : *
607 : * Some of the specializations would operate on all bands in one pass
608 : * (especially the ones without masking would do this), while others might
609 : * process each band individually to reduce code complexity.
610 : *
611 : * <h3>Masking Semantics</h3>
612 : *
613 : * A detailed explanation of the semantics of the validity and density masks,
614 : * and their effects on resampling kernels is needed here.
615 : */
616 :
617 : /************************************************************************/
618 : /* GDALWarpKernel Data Members */
619 : /************************************************************************/
620 :
621 : /**
622 : * \var GDALResampleAlg GDALWarpKernel::eResample;
623 : *
624 : * Resampling algorithm.
625 : *
626 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
627 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
628 : * GRA_Mode or GRA_Sum.
629 : *
630 : * This field is required. GDT_NearestNeighbour may be used as a default
631 : * value.
632 : */
633 :
634 : /**
635 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
636 : *
637 : * Working pixel data type.
638 : *
639 : * The datatype of pixels in the source image (papabySrcimage) and
640 : * destination image (papabyDstImage) buffers. Note that operations on
641 : * some data types (such as GDT_Byte) may be much better optimized than other
642 : * less common cases.
643 : *
644 : * This field is required. It may not be GDT_Unknown.
645 : */
646 :
647 : /**
648 : * \var int GDALWarpKernel::nBands;
649 : *
650 : * Number of bands.
651 : *
652 : * The number of bands (layers) of imagery being warped. Determines the
653 : * number of entries in the papabySrcImage, papanBandSrcValid,
654 : * and papabyDstImage arrays.
655 : *
656 : * This field is required.
657 : */
658 :
659 : /**
660 : * \var int GDALWarpKernel::nSrcXSize;
661 : *
662 : * Source image width in pixels.
663 : *
664 : * This field is required.
665 : */
666 :
667 : /**
668 : * \var int GDALWarpKernel::nSrcYSize;
669 : *
670 : * Source image height in pixels.
671 : *
672 : * This field is required.
673 : */
674 :
675 : /**
676 : * \var double GDALWarpKernel::dfSrcXExtraSize;
677 : *
678 : * Number of pixels included in nSrcXSize that are present on the edges of
679 : * the area of interest to take into account the width of the kernel.
680 : *
681 : * This field is required.
682 : */
683 :
684 : /**
685 : * \var double GDALWarpKernel::dfSrcYExtraSize;
686 : *
687 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
688 : * the area of interest to take into account the height of the kernel.
689 : *
690 : * This field is required.
691 : */
692 :
693 : /**
694 : * \var int GDALWarpKernel::papabySrcImage;
695 : *
696 : * Array of source image band data.
697 : *
698 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
699 : * to image data. Each individual band of image data is organized as a single
700 : * block of image data in left to right, then bottom to top order. The actual
701 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
702 : *
703 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
704 : * the second band with eWorkingDataType set to GDT_Float32 use code like
705 : * this:
706 : *
707 : * \code
708 : * float dfPixelValue;
709 : * int nBand = 2-1; // Band indexes are zero based.
710 : * int nPixel = 3; // Zero based.
711 : * int nLine = 4; // Zero based.
712 : *
713 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
714 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
715 : * assert( nBand >= 0 && nBand < poKern->nBands );
716 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
717 : * [nPixel + nLine * poKern->nSrcXSize];
718 : * \endcode
719 : *
720 : * This field is required.
721 : */
722 :
723 : /**
724 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
725 : *
726 : * Per band validity mask for source pixels.
727 : *
728 : * Array of pixel validity mask layers for each source band. Each of
729 : * the mask layers is the same size (in pixels) as the source image with
730 : * one bit per pixel. Note that it is legal (and common) for this to be
731 : * NULL indicating that none of the pixels are invalidated, or for some
732 : * band validity masks to be NULL in which case all pixels of the band are
733 : * valid. The following code can be used to test the validity of a particular
734 : * pixel.
735 : *
736 : * \code
737 : * int bIsValid = TRUE;
738 : * int nBand = 2-1; // Band indexes are zero based.
739 : * int nPixel = 3; // Zero based.
740 : * int nLine = 4; // Zero based.
741 : *
742 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
743 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
744 : * assert( nBand >= 0 && nBand < poKern->nBands );
745 : *
746 : * if( poKern->papanBandSrcValid != NULL
747 : * && poKern->papanBandSrcValid[nBand] != NULL )
748 : * {
749 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
750 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
751 : *
752 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
753 : * }
754 : * \endcode
755 : */
756 :
757 : /**
758 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
759 : *
760 : * Per pixel validity mask for source pixels.
761 : *
762 : * A single validity mask layer that applies to the pixels of all source
763 : * bands. It is accessed similarly to papanBandSrcValid, but without the
764 : * extra level of band indirection.
765 : *
766 : * This pointer may be NULL indicating that all pixels are valid.
767 : *
768 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
769 : * the pixel isn't considered to be valid unless both arrays indicate it is
770 : * valid.
771 : */
772 :
773 : /**
774 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
775 : *
776 : * Per pixel density mask for source pixels.
777 : *
778 : * A single density mask layer that applies to the pixels of all source
779 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
780 : * which this pixel should be allowed to contribute to the output result.
781 : *
782 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
783 : *
784 : * The density for a pixel may be accessed like this:
785 : *
786 : * \code
787 : * float fDensity = 1.0;
788 : * int nPixel = 3; // Zero based.
789 : * int nLine = 4; // Zero based.
790 : *
791 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
792 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
793 : * if( poKern->pafUnifiedSrcDensity != NULL )
794 : * fDensity = poKern->pafUnifiedSrcDensity
795 : * [nPixel + nLine * poKern->nSrcXSize];
796 : * \endcode
797 : */
798 :
799 : /**
800 : * \var int GDALWarpKernel::nDstXSize;
801 : *
802 : * Width of destination image in pixels.
803 : *
804 : * This field is required.
805 : */
806 :
807 : /**
808 : * \var int GDALWarpKernel::nDstYSize;
809 : *
810 : * Height of destination image in pixels.
811 : *
812 : * This field is required.
813 : */
814 :
815 : /**
816 : * \var GByte **GDALWarpKernel::papabyDstImage;
817 : *
818 : * Array of destination image band data.
819 : *
820 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
821 : * to image data. Each individual band of image data is organized as a single
822 : * block of image data in left to right, then bottom to top order. The actual
823 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
824 : *
825 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
826 : * the second band with eWorkingDataType set to GDT_Float32 use code like
827 : * this:
828 : *
829 : * \code
830 : * float dfPixelValue;
831 : * int nBand = 2-1; // Band indexes are zero based.
832 : * int nPixel = 3; // Zero based.
833 : * int nLine = 4; // Zero based.
834 : *
835 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
836 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
837 : * assert( nBand >= 0 && nBand < poKern->nBands );
838 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
839 : * [nPixel + nLine * poKern->nSrcYSize];
840 : * \endcode
841 : *
842 : * This field is required.
843 : */
844 :
845 : /**
846 : * \var GUInt32 *GDALWarpKernel::panDstValid;
847 : *
848 : * Per pixel validity mask for destination pixels.
849 : *
850 : * A single validity mask layer that applies to the pixels of all destination
851 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
852 : * on the size of the destination image.
853 : *
854 : * This pointer may be NULL indicating that all pixels are valid.
855 : */
856 :
857 : /**
858 : * \var float *GDALWarpKernel::pafDstDensity;
859 : *
860 : * Per pixel density mask for destination pixels.
861 : *
862 : * A single density mask layer that applies to the pixels of all destination
863 : * bands. It contains values between 0.0 and 1.0.
864 : *
865 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
866 : *
867 : * The density for a pixel may be accessed like this:
868 : *
869 : * \code
870 : * float fDensity = 1.0;
871 : * int nPixel = 3; // Zero based.
872 : * int nLine = 4; // Zero based.
873 : *
874 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
875 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
876 : * if( poKern->pafDstDensity != NULL )
877 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
878 : * \endcode
879 : */
880 :
881 : /**
882 : * \var int GDALWarpKernel::nSrcXOff;
883 : *
884 : * X offset to source pixel coordinates for transformation.
885 : *
886 : * See pfnTransformer.
887 : *
888 : * This field is required.
889 : */
890 :
891 : /**
892 : * \var int GDALWarpKernel::nSrcYOff;
893 : *
894 : * Y offset to source pixel coordinates for transformation.
895 : *
896 : * See pfnTransformer.
897 : *
898 : * This field is required.
899 : */
900 :
901 : /**
902 : * \var int GDALWarpKernel::nDstXOff;
903 : *
904 : * X offset to destination pixel coordinates for transformation.
905 : *
906 : * See pfnTransformer.
907 : *
908 : * This field is required.
909 : */
910 :
911 : /**
912 : * \var int GDALWarpKernel::nDstYOff;
913 : *
914 : * Y offset to destination pixel coordinates for transformation.
915 : *
916 : * See pfnTransformer.
917 : *
918 : * This field is required.
919 : */
920 :
921 : /**
922 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
923 : *
924 : * Source/destination location transformer.
925 : *
926 : * The function to call to transform coordinates between source image
927 : * pixel/line coordinates and destination image pixel/line coordinates.
928 : * See GDALTransformerFunc() for details of the semantics of this function.
929 : *
930 : * The GDALWarpKern algorithm will only ever use this transformer in
931 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
932 : * partial or complete scanlines of points in the destination image as
933 : * input. This means, among other things, that it is safe to the
934 : * approximating transform GDALApproxTransform() as the transformation
935 : * function.
936 : *
937 : * Source and destination images may be subsets of a larger overall image.
938 : * The transformation algorithms will expect and return pixel/line coordinates
939 : * in terms of this larger image, so coordinates need to be offset by
940 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
941 : * passing to pfnTransformer, and after return from it.
942 : *
943 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
944 : * data to this function when it is called.
945 : *
946 : * This field is required.
947 : */
948 :
949 : /**
950 : * \var void *GDALWarpKernel::pTransformerArg;
951 : *
952 : * Callback data for pfnTransformer.
953 : *
954 : * This field may be NULL if not required for the pfnTransformer being used.
955 : */
956 :
957 : /**
958 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
959 : *
960 : * The function to call to report progress of the algorithm, and to check
961 : * for a requested termination of the operation. It operates according to
962 : * GDALProgressFunc() semantics.
963 : *
964 : * Generally speaking the progress function will be invoked for each
965 : * scanline of the destination buffer that has been processed.
966 : *
967 : * This field may be NULL (internally set to GDALDummyProgress()).
968 : */
969 :
970 : /**
971 : * \var void *GDALWarpKernel::pProgress;
972 : *
973 : * Callback data for pfnProgress.
974 : *
975 : * This field may be NULL if not required for the pfnProgress being used.
976 : */
977 :
978 : /************************************************************************/
979 : /* GDALWarpKernel() */
980 : /************************************************************************/
981 :
982 2358 : GDALWarpKernel::GDALWarpKernel()
983 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
984 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
985 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
986 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
987 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
988 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
989 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
990 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
991 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
992 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
993 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
994 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
995 2358 : eTieStrategy(GWKTS_First)
996 : {
997 2358 : }
998 :
999 : /************************************************************************/
1000 : /* ~GDALWarpKernel() */
1001 : /************************************************************************/
1002 :
1003 2358 : GDALWarpKernel::~GDALWarpKernel()
1004 : {
1005 2358 : }
1006 :
1007 : /************************************************************************/
1008 : /* PerformWarp() */
1009 : /************************************************************************/
1010 :
1011 : /**
1012 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1013 : *
1014 : * This method performs the warp described in the GDALWarpKernel.
1015 : *
1016 : * @return CE_None on success or CE_Failure if an error occurs.
1017 : */
1018 :
1019 2356 : CPLErr GDALWarpKernel::PerformWarp()
1020 :
1021 : {
1022 2356 : const CPLErr eErr = Validate();
1023 :
1024 2356 : if (eErr != CE_None)
1025 1 : return eErr;
1026 :
1027 : // See #2445 and #3079.
1028 2355 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1029 : {
1030 303 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1031 : {
1032 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1033 0 : return CE_Failure;
1034 : }
1035 303 : return CE_None;
1036 : }
1037 :
1038 : /* -------------------------------------------------------------------- */
1039 : /* Pre-calculate resampling scales and window sizes for filtering. */
1040 : /* -------------------------------------------------------------------- */
1041 :
1042 2052 : dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1043 2052 : dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1044 2052 : if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1045 1283 : dfXScale = 1.0;
1046 2052 : if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1047 1036 : dfYScale = 1.0;
1048 2052 : if (dfXScale < 1.0)
1049 : {
1050 548 : double dfXReciprocalScale = 1.0 / dfXScale;
1051 548 : const int nXReciprocalScale =
1052 548 : static_cast<int>(dfXReciprocalScale + 0.5);
1053 548 : if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1054 431 : dfXScale = 1.0 / nXReciprocalScale;
1055 : }
1056 2052 : if (dfYScale < 1.0)
1057 : {
1058 517 : double dfYReciprocalScale = 1.0 / dfYScale;
1059 517 : const int nYReciprocalScale =
1060 517 : static_cast<int>(dfYReciprocalScale + 0.5);
1061 517 : if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1062 368 : dfYScale = 1.0 / nYReciprocalScale;
1063 : }
1064 :
1065 : // XSCALE and YSCALE undocumented for now. Can help in some cases.
1066 : // Best would probably be a per-pixel scale computation.
1067 2052 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1068 2052 : if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1069 1 : dfXScale = CPLAtof(pszXScale);
1070 2052 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1071 2052 : if (pszYScale != nullptr)
1072 1 : dfYScale = CPLAtof(pszYScale);
1073 :
1074 : // If the xscale is significantly lower than the yscale, this is highly
1075 : // suspicious of a situation of wrapping a very large virtual file in
1076 : // geographic coordinates with left and right parts being close to the
1077 : // antimeridian. In that situation, the xscale computed by the above method
1078 : // is completely wrong. Prefer doing an average of a few sample points
1079 : // instead
1080 2052 : if ((dfYScale / dfXScale > 100 ||
1081 1 : (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1082 : {
1083 : // Sample points along a grid
1084 4 : const int nPointsX = std::min(10, nDstXSize);
1085 4 : const int nPointsY = std::min(10, nDstYSize);
1086 4 : const int nPoints = 3 * nPointsX * nPointsY;
1087 8 : std::vector<double> padfX;
1088 8 : std::vector<double> padfY;
1089 8 : std::vector<double> padfZ(nPoints);
1090 8 : std::vector<int> pabSuccess(nPoints);
1091 44 : for (int iY = 0; iY < nPointsY; iY++)
1092 : {
1093 440 : for (int iX = 0; iX < nPointsX; iX++)
1094 : {
1095 400 : const double dfX =
1096 : nPointsX == 1
1097 400 : ? 0.0
1098 400 : : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1099 400 : const double dfY =
1100 : nPointsY == 1
1101 400 : ? 0.0
1102 400 : : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1103 :
1104 : // Reproject each destination sample point and its neighbours
1105 : // at (x+1,y) and (x,y+1), so as to get the local scale.
1106 400 : padfX.push_back(dfX);
1107 400 : padfY.push_back(dfY);
1108 :
1109 400 : padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1110 400 : padfY.push_back(dfY);
1111 :
1112 400 : padfX.push_back(dfX);
1113 400 : padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1114 : }
1115 : }
1116 4 : pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1117 4 : &padfZ[0], &pabSuccess[0]);
1118 :
1119 : // Compute the xscale at each sampling point
1120 8 : std::vector<double> adfXScales;
1121 404 : for (int i = 0; i < nPoints; i += 3)
1122 : {
1123 400 : if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1124 : {
1125 : const double dfPointXScale =
1126 400 : 1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1127 800 : std::abs(padfX[i + 2] - padfX[i]));
1128 400 : adfXScales.push_back(dfPointXScale);
1129 : }
1130 : }
1131 :
1132 : // Sort by increasing xcale
1133 4 : std::sort(adfXScales.begin(), adfXScales.end());
1134 :
1135 4 : if (!adfXScales.empty())
1136 : {
1137 : // Compute the average of scales, but eliminate outliers small
1138 : // scales, if some samples are just along the discontinuity.
1139 4 : const double dfMaxPointXScale = adfXScales.back();
1140 4 : double dfSumPointXScale = 0;
1141 4 : int nCountPointScale = 0;
1142 404 : for (double dfPointXScale : adfXScales)
1143 : {
1144 400 : if (dfPointXScale > dfMaxPointXScale / 10)
1145 : {
1146 398 : dfSumPointXScale += dfPointXScale;
1147 398 : nCountPointScale++;
1148 : }
1149 : }
1150 4 : if (nCountPointScale > 0) // should always be true
1151 : {
1152 4 : const double dfXScaleFromSampling =
1153 4 : dfSumPointXScale / nCountPointScale;
1154 : #if DEBUG_VERBOSE
1155 : CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1156 : dfXScaleFromSampling);
1157 : #endif
1158 4 : dfXScale = dfXScaleFromSampling;
1159 : }
1160 : }
1161 : }
1162 :
1163 : #if DEBUG_VERBOSE
1164 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1165 : #endif
1166 :
1167 2052 : const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1168 :
1169 : // Safety check for callers that would use GDALWarpKernel without using
1170 : // GDALWarpOperation.
1171 1989 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1172 1926 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1173 4104 : !bUse4SamplesFormula)) &&
1174 388 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1175 : WARP_EXTRA_ELTS)
1176 : {
1177 0 : CPLError(CE_Failure, CPLE_AppDefined,
1178 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1179 : "their end. "
1180 : "See GDALWarpKernel class definition. If this condition is "
1181 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1182 : WARP_EXTRA_ELTS);
1183 0 : return CE_Failure;
1184 : }
1185 :
1186 2052 : dfXFilter = anGWKFilterRadius[eResample];
1187 2052 : dfYFilter = anGWKFilterRadius[eResample];
1188 :
1189 2052 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1190 1586 : : static_cast<int>(dfXFilter);
1191 2052 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1192 1563 : : static_cast<int>(dfYFilter);
1193 :
1194 : // Filter window offset depends on the parity of the kernel radius.
1195 2052 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1196 2052 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1197 :
1198 2052 : bApplyVerticalShift =
1199 2052 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1200 2052 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1201 2052 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1202 :
1203 : /* -------------------------------------------------------------------- */
1204 : /* Set up resampling functions. */
1205 : /* -------------------------------------------------------------------- */
1206 2052 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1207 12 : return GWKGeneralCase(this);
1208 :
1209 : #if defined(HAVE_OPENCL)
1210 589 : if ((eWorkingDataType == GDT_Byte || eWorkingDataType == GDT_CInt16 ||
1211 417 : eWorkingDataType == GDT_UInt16 || eWorkingDataType == GDT_Int16 ||
1212 277 : eWorkingDataType == GDT_CFloat32 || eWorkingDataType == GDT_Float32) &&
1213 1885 : (eResample == GRA_Bilinear || eResample == GRA_Cubic ||
1214 1440 : eResample == GRA_CubicSpline || eResample == GRA_Lanczos) &&
1215 4608 : !bApplyVerticalShift &&
1216 : // OpenCL warping gives different results than the ones expected by autotest,
1217 : // so disable it by default even if found.
1218 1056 : CPLTestBool(
1219 528 : CSLFetchNameValueDef(papszWarpOptions, "USE_OPENCL",
1220 : CPLGetConfigOption("GDAL_USE_OPENCL", "NO"))))
1221 : {
1222 0 : if (pafUnifiedSrcDensity != nullptr)
1223 : {
1224 : // If pafUnifiedSrcDensity is only set to 1.0, then we can
1225 : // discard it.
1226 0 : bool bFoundNotOne = false;
1227 0 : for (GPtrDiff_t j = 0;
1228 0 : j < static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize; j++)
1229 : {
1230 0 : if (pafUnifiedSrcDensity[j] != 1.0)
1231 : {
1232 0 : bFoundNotOne = true;
1233 0 : break;
1234 : }
1235 : }
1236 0 : if (!bFoundNotOne)
1237 : {
1238 0 : CPLFree(pafUnifiedSrcDensity);
1239 0 : pafUnifiedSrcDensity = nullptr;
1240 : }
1241 : }
1242 :
1243 0 : if (pafUnifiedSrcDensity != nullptr)
1244 : {
1245 : // Typically if there's a cutline or an alpha band
1246 0 : CPLDebugOnce("WARP", "pafUnifiedSrcDensity is not null, "
1247 : "hence OpenCL warper cannot be used");
1248 : }
1249 : else
1250 : {
1251 0 : const CPLErr eResult = GWKOpenCLCase(this);
1252 :
1253 : // CE_Warning tells us a suitable OpenCL environment was not available
1254 : // so we fall through to other CPU based methods.
1255 0 : if (eResult != CE_Warning)
1256 0 : return eResult;
1257 : }
1258 : }
1259 : #endif // defined HAVE_OPENCL
1260 :
1261 2040 : const bool bNoMasksOrDstDensityOnly =
1262 2036 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1263 4076 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1264 :
1265 2040 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
1266 : bNoMasksOrDstDensityOnly)
1267 866 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1268 :
1269 1174 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
1270 : bNoMasksOrDstDensityOnly)
1271 126 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1272 :
1273 1048 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
1274 : bNoMasksOrDstDensityOnly)
1275 72 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1276 :
1277 976 : if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
1278 : bNoMasksOrDstDensityOnly)
1279 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1280 :
1281 964 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
1282 274 : return GWKNearestByte(this);
1283 :
1284 690 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1285 140 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1286 18 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1287 :
1288 672 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1289 : bNoMasksOrDstDensityOnly)
1290 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1291 :
1292 667 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1293 : bNoMasksOrDstDensityOnly)
1294 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1295 :
1296 661 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1297 : bNoMasksOrDstDensityOnly)
1298 18 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1299 :
1300 643 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1301 : bNoMasksOrDstDensityOnly)
1302 12 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1303 :
1304 631 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1305 : bNoMasksOrDstDensityOnly)
1306 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1307 :
1308 626 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1309 : bNoMasksOrDstDensityOnly)
1310 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1311 :
1312 620 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1313 24 : return GWKNearestShort(this);
1314 :
1315 596 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1316 0 : return GWKNearestUnsignedShort(this);
1317 :
1318 596 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1319 : bNoMasksOrDstDensityOnly)
1320 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1321 :
1322 585 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1323 37 : return GWKNearestFloat(this);
1324 :
1325 548 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1326 : bNoMasksOrDstDensityOnly)
1327 5 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1328 :
1329 543 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1330 : bNoMasksOrDstDensityOnly)
1331 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1332 :
1333 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1334 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1335 : bNoMasksOrDstDensityOnly)
1336 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1337 :
1338 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1339 : bNoMasksOrDstDensityOnly)
1340 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1341 : #endif
1342 :
1343 534 : if (eResample == GRA_Average)
1344 71 : return GWKAverageOrMode(this);
1345 :
1346 463 : if (eResample == GRA_RMS)
1347 9 : return GWKAverageOrMode(this);
1348 :
1349 454 : if (eResample == GRA_Mode)
1350 23 : return GWKAverageOrMode(this);
1351 :
1352 431 : if (eResample == GRA_Max)
1353 6 : return GWKAverageOrMode(this);
1354 :
1355 425 : if (eResample == GRA_Min)
1356 5 : return GWKAverageOrMode(this);
1357 :
1358 420 : if (eResample == GRA_Med)
1359 6 : return GWKAverageOrMode(this);
1360 :
1361 414 : if (eResample == GRA_Q1)
1362 5 : return GWKAverageOrMode(this);
1363 :
1364 409 : if (eResample == GRA_Q3)
1365 5 : return GWKAverageOrMode(this);
1366 :
1367 404 : if (eResample == GRA_Sum)
1368 18 : return GWKSumPreserving(this);
1369 :
1370 386 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1371 : {
1372 155 : return GWKRealCase(this);
1373 : }
1374 :
1375 231 : return GWKGeneralCase(this);
1376 : }
1377 :
1378 : /************************************************************************/
1379 : /* Validate() */
1380 : /************************************************************************/
1381 :
1382 : /**
1383 : * \fn CPLErr GDALWarpKernel::Validate()
1384 : *
1385 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1386 : * (and return CE_Failure) if the configuration is considered to be
1387 : * invalid for some reason.
1388 : *
1389 : * This method will also do some standard defaulting such as setting
1390 : * pfnProgress to GDALDummyProgress() if it is NULL.
1391 : *
1392 : * @return CE_None on success or CE_Failure if an error is detected.
1393 : */
1394 :
1395 2356 : CPLErr GDALWarpKernel::Validate()
1396 :
1397 : {
1398 2356 : if (static_cast<size_t>(eResample) >=
1399 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1400 : {
1401 0 : CPLError(CE_Failure, CPLE_AppDefined,
1402 : "Unsupported resampling method %d.",
1403 0 : static_cast<int>(eResample));
1404 0 : return CE_Failure;
1405 : }
1406 :
1407 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1408 : // be ignored as contributing source pixels during resampling. Only taken into account by
1409 : // Average currently
1410 : const char *pszExcludedValues =
1411 2356 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1412 2356 : if (pszExcludedValues)
1413 : {
1414 : const CPLStringList aosTokens(
1415 8 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1416 8 : if ((aosTokens.size() % nBands) != 0)
1417 : {
1418 1 : CPLError(CE_Failure, CPLE_AppDefined,
1419 : "EXCLUDED_VALUES should contain one or several tuples of "
1420 : "%d values formatted like <R>,<G>,<B> or "
1421 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1422 : "tuples",
1423 : nBands);
1424 1 : return CE_Failure;
1425 : }
1426 14 : std::vector<double> adfTuple;
1427 28 : for (int i = 0; i < aosTokens.size(); ++i)
1428 : {
1429 21 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1430 21 : if (((i + 1) % nBands) == 0)
1431 : {
1432 7 : m_aadfExcludedValues.push_back(adfTuple);
1433 7 : adfTuple.clear();
1434 : }
1435 : }
1436 : }
1437 :
1438 2355 : return CE_None;
1439 : }
1440 :
1441 : /************************************************************************/
1442 : /* GWKOverlayDensity() */
1443 : /* */
1444 : /* Compute the final density for the destination pixel. This */
1445 : /* is a function of the overlay density (passed in) and the */
1446 : /* original density. */
1447 : /************************************************************************/
1448 :
1449 7934110 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1450 : double dfDensity)
1451 : {
1452 7934110 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1453 6743240 : return;
1454 :
1455 1190880 : poWK->pafDstDensity[iDstOffset] = static_cast<float>(
1456 1190880 : 1.0 - (1.0 - dfDensity) * (1.0 - poWK->pafDstDensity[iDstOffset]));
1457 : }
1458 :
1459 : /************************************************************************/
1460 : /* GWKRoundValueT() */
1461 : /************************************************************************/
1462 :
1463 : template <class T, bool is_signed> struct sGWKRoundValueT
1464 : {
1465 : static T eval(double);
1466 : };
1467 :
1468 : template <class T> struct sGWKRoundValueT<T, true> /* signed */
1469 : {
1470 2235130 : static T eval(double dfValue)
1471 : {
1472 2235130 : return static_cast<T>(floor(dfValue + 0.5));
1473 : }
1474 : };
1475 :
1476 : template <class T> struct sGWKRoundValueT<T, false> /* unsigned */
1477 : {
1478 12928381 : static T eval(double dfValue)
1479 : {
1480 12928381 : return static_cast<T>(dfValue + 0.5);
1481 : }
1482 : };
1483 :
1484 15154811 : template <class T> static T GWKRoundValueT(double dfValue)
1485 : {
1486 15154811 : return sGWKRoundValueT<T, cpl::NumericLimits<T>::is_signed>::eval(dfValue);
1487 : }
1488 :
1489 269074 : template <> float GWKRoundValueT<float>(double dfValue)
1490 : {
1491 269074 : return static_cast<float>(dfValue);
1492 : }
1493 :
1494 : #ifdef notused
1495 : template <> double GWKRoundValueT<double>(double dfValue)
1496 : {
1497 : return dfValue;
1498 : }
1499 : #endif
1500 :
1501 : /************************************************************************/
1502 : /* GWKClampValueT() */
1503 : /************************************************************************/
1504 :
1505 10366934 : template <class T> static CPL_INLINE T GWKClampValueT(double dfValue)
1506 : {
1507 10366934 : if (dfValue < cpl::NumericLimits<T>::min())
1508 3969 : return cpl::NumericLimits<T>::min();
1509 10375466 : else if (dfValue > cpl::NumericLimits<T>::max())
1510 18463 : return cpl::NumericLimits<T>::max();
1511 : else
1512 10345056 : return GWKRoundValueT<T>(dfValue);
1513 : }
1514 :
1515 718914 : template <> float GWKClampValueT<float>(double dfValue)
1516 : {
1517 718914 : return static_cast<float>(dfValue);
1518 : }
1519 :
1520 : #ifdef notused
1521 : template <> double GWKClampValueT<double>(double dfValue)
1522 : {
1523 : return dfValue;
1524 : }
1525 : #endif
1526 :
1527 : /************************************************************************/
1528 : /* AvoidNoData() */
1529 : /************************************************************************/
1530 :
1531 : template <class T>
1532 11865687 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1533 : GPtrDiff_t iDstOffset)
1534 : {
1535 11865687 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1536 11865687 : T *pDst = reinterpret_cast<T *>(pabyDst);
1537 :
1538 11865687 : if (poWK->padfDstNoDataReal != nullptr &&
1539 5729937 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1540 : {
1541 : if constexpr (cpl::NumericLimits<T>::is_integer)
1542 : {
1543 2637 : if (pDst[iDstOffset] ==
1544 2637 : static_cast<T>(cpl::NumericLimits<T>::lowest()))
1545 : {
1546 2509 : pDst[iDstOffset] =
1547 2509 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1548 : }
1549 : else
1550 128 : pDst[iDstOffset]--;
1551 : }
1552 : else
1553 : {
1554 64 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1555 : {
1556 : using std::nextafter;
1557 0 : pDst[iDstOffset] =
1558 0 : nextafter(pDst[iDstOffset], static_cast<T>(0));
1559 : }
1560 : else
1561 : {
1562 : using std::nextafter;
1563 64 : pDst[iDstOffset] =
1564 64 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1565 : }
1566 : }
1567 :
1568 2701 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1569 : {
1570 25 : const_cast<GDALWarpKernel *>(poWK)
1571 : ->bWarnedAboutDstNoDataReplacement = true;
1572 25 : CPLError(CE_Warning, CPLE_AppDefined,
1573 : "Value %g in the source dataset has been changed to %g "
1574 : "in the destination dataset to avoid being treated as "
1575 : "NoData. To avoid this, select a different NoData value "
1576 : "for the destination dataset.",
1577 25 : poWK->padfDstNoDataReal[iBand],
1578 25 : static_cast<double>(pDst[iDstOffset]));
1579 : }
1580 : }
1581 11865687 : }
1582 :
1583 : /************************************************************************/
1584 : /* GWKSetPixelValueRealT() */
1585 : /************************************************************************/
1586 :
1587 : template <class T>
1588 7159332 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1589 : GPtrDiff_t iDstOffset, double dfDensity,
1590 : T value)
1591 : {
1592 7159332 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1593 :
1594 : /* -------------------------------------------------------------------- */
1595 : /* If the source density is less than 100% we need to fetch the */
1596 : /* existing destination value, and mix it with the source to */
1597 : /* get the new "to apply" value. Also compute composite */
1598 : /* density. */
1599 : /* */
1600 : /* We avoid mixing if density is very near one or risk mixing */
1601 : /* in very extreme nodata values and causing odd results (#1610) */
1602 : /* -------------------------------------------------------------------- */
1603 7159332 : if (dfDensity < 0.9999)
1604 : {
1605 81504 : if (dfDensity < 0.0001)
1606 0 : return true;
1607 :
1608 81504 : double dfDstDensity = 1.0;
1609 :
1610 81504 : if (poWK->pafDstDensity != nullptr)
1611 80032 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1612 1472 : else if (poWK->panDstValid != nullptr &&
1613 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1614 0 : dfDstDensity = 0.0;
1615 :
1616 : // It seems like we also ought to be testing panDstValid[] here!
1617 :
1618 81504 : const double dfDstReal = pDst[iDstOffset];
1619 :
1620 : // The destination density is really only relative to the portion
1621 : // not occluded by the overlay.
1622 81504 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1623 :
1624 81504 : const double dfReal = (value * dfDensity + dfDstReal * dfDstInfluence) /
1625 81504 : (dfDensity + dfDstInfluence);
1626 :
1627 : /* --------------------------------------------------------------------
1628 : */
1629 : /* Actually apply the destination value. */
1630 : /* */
1631 : /* Avoid using the destination nodata value for integer datatypes
1632 : */
1633 : /* if by chance it is equal to the computed pixel value. */
1634 : /* --------------------------------------------------------------------
1635 : */
1636 81504 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1637 : }
1638 : else
1639 : {
1640 7077823 : pDst[iDstOffset] = value;
1641 : }
1642 :
1643 7159332 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1644 :
1645 7159332 : return true;
1646 : }
1647 :
1648 : /************************************************************************/
1649 : /* ClampRoundAndAvoidNoData() */
1650 : /************************************************************************/
1651 :
1652 : template <class T>
1653 4706375 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1654 : GPtrDiff_t iDstOffset, double dfReal)
1655 : {
1656 4706375 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1657 4706375 : T *pDst = reinterpret_cast<T *>(pabyDst);
1658 :
1659 : if constexpr (cpl::NumericLimits<T>::is_integer)
1660 : {
1661 : using std::floor;
1662 4223379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1663 1638 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1664 4221739 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1665 13640 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1666 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1667 13539 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1668 : else
1669 4194560 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1670 : }
1671 : else
1672 : {
1673 482996 : pDst[iDstOffset] = static_cast<T>(dfReal);
1674 : }
1675 :
1676 4706375 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1677 4706375 : }
1678 :
1679 : /************************************************************************/
1680 : /* GWKSetPixelValue() */
1681 : /************************************************************************/
1682 :
1683 3867640 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1684 : GPtrDiff_t iDstOffset, double dfDensity,
1685 : double dfReal, double dfImag)
1686 :
1687 : {
1688 3867640 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1689 :
1690 : /* -------------------------------------------------------------------- */
1691 : /* If the source density is less than 100% we need to fetch the */
1692 : /* existing destination value, and mix it with the source to */
1693 : /* get the new "to apply" value. Also compute composite */
1694 : /* density. */
1695 : /* */
1696 : /* We avoid mixing if density is very near one or risk mixing */
1697 : /* in very extreme nodata values and causing odd results (#1610) */
1698 : /* -------------------------------------------------------------------- */
1699 3867640 : if (dfDensity < 0.9999)
1700 : {
1701 800 : if (dfDensity < 0.0001)
1702 0 : return true;
1703 :
1704 800 : double dfDstDensity = 1.0;
1705 800 : if (poWK->pafDstDensity != nullptr)
1706 800 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1707 0 : else if (poWK->panDstValid != nullptr &&
1708 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1709 0 : dfDstDensity = 0.0;
1710 :
1711 800 : double dfDstReal = 0.0;
1712 800 : double dfDstImag = 0.0;
1713 : // It seems like we also ought to be testing panDstValid[] here!
1714 :
1715 : // TODO(schwehr): Factor out this repreated type of set.
1716 800 : switch (poWK->eWorkingDataType)
1717 : {
1718 0 : case GDT_Byte:
1719 0 : dfDstReal = pabyDst[iDstOffset];
1720 0 : dfDstImag = 0.0;
1721 0 : break;
1722 :
1723 0 : case GDT_Int8:
1724 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1725 0 : dfDstImag = 0.0;
1726 0 : break;
1727 :
1728 400 : case GDT_Int16:
1729 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1730 400 : dfDstImag = 0.0;
1731 400 : break;
1732 :
1733 400 : case GDT_UInt16:
1734 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1735 400 : dfDstImag = 0.0;
1736 400 : break;
1737 :
1738 0 : case GDT_Int32:
1739 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1740 0 : dfDstImag = 0.0;
1741 0 : break;
1742 :
1743 0 : case GDT_UInt32:
1744 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1745 0 : dfDstImag = 0.0;
1746 0 : break;
1747 :
1748 0 : case GDT_Int64:
1749 0 : dfDstReal = static_cast<double>(
1750 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1751 0 : dfDstImag = 0.0;
1752 0 : break;
1753 :
1754 0 : case GDT_UInt64:
1755 0 : dfDstReal = static_cast<double>(
1756 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1757 0 : dfDstImag = 0.0;
1758 0 : break;
1759 :
1760 0 : case GDT_Float16:
1761 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1762 0 : dfDstImag = 0.0;
1763 0 : break;
1764 :
1765 0 : case GDT_Float32:
1766 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
1767 0 : dfDstImag = 0.0;
1768 0 : break;
1769 :
1770 0 : case GDT_Float64:
1771 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1772 0 : dfDstImag = 0.0;
1773 0 : break;
1774 :
1775 0 : case GDT_CInt16:
1776 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1777 0 : dfDstImag =
1778 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1779 0 : break;
1780 :
1781 0 : case GDT_CInt32:
1782 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1783 0 : dfDstImag =
1784 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1785 0 : break;
1786 :
1787 0 : case GDT_CFloat16:
1788 : dfDstReal =
1789 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1790 : dfDstImag =
1791 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1792 0 : break;
1793 :
1794 0 : case GDT_CFloat32:
1795 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset * 2];
1796 0 : dfDstImag =
1797 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1];
1798 0 : break;
1799 :
1800 0 : case GDT_CFloat64:
1801 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1802 0 : dfDstImag =
1803 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1804 0 : break;
1805 :
1806 0 : case GDT_Unknown:
1807 : case GDT_TypeCount:
1808 0 : CPLAssert(false);
1809 : return false;
1810 : }
1811 :
1812 : // The destination density is really only relative to the portion
1813 : // not occluded by the overlay.
1814 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1815 :
1816 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1817 800 : (dfDensity + dfDstInfluence);
1818 :
1819 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1820 800 : (dfDensity + dfDstInfluence);
1821 : }
1822 :
1823 : /* -------------------------------------------------------------------- */
1824 : /* Actually apply the destination value. */
1825 : /* */
1826 : /* Avoid using the destination nodata value for integer datatypes */
1827 : /* if by chance it is equal to the computed pixel value. */
1828 : /* -------------------------------------------------------------------- */
1829 :
1830 3867640 : switch (poWK->eWorkingDataType)
1831 : {
1832 3141450 : case GDT_Byte:
1833 3141450 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
1834 3141450 : break;
1835 :
1836 0 : case GDT_Int8:
1837 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
1838 0 : break;
1839 :
1840 7470 : case GDT_Int16:
1841 7470 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
1842 7470 : break;
1843 :
1844 463 : case GDT_UInt16:
1845 463 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
1846 463 : break;
1847 :
1848 63 : case GDT_UInt32:
1849 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
1850 63 : break;
1851 :
1852 3470 : case GDT_Int32:
1853 3470 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
1854 3470 : break;
1855 :
1856 0 : case GDT_UInt64:
1857 0 : ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
1858 : dfReal);
1859 0 : break;
1860 :
1861 0 : case GDT_Int64:
1862 0 : ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
1863 : dfReal);
1864 0 : break;
1865 :
1866 0 : case GDT_Float16:
1867 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
1868 0 : break;
1869 :
1870 478957 : case GDT_Float32:
1871 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
1872 478957 : break;
1873 :
1874 147 : case GDT_Float64:
1875 147 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
1876 147 : break;
1877 :
1878 234178 : case GDT_CInt16:
1879 : {
1880 : typedef GInt16 T;
1881 234178 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1882 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1883 0 : cpl::NumericLimits<T>::min();
1884 234178 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1885 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1886 0 : cpl::NumericLimits<T>::max();
1887 : else
1888 234178 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1889 234178 : static_cast<T>(floor(dfReal + 0.5));
1890 234178 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1891 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1892 0 : cpl::NumericLimits<T>::min();
1893 234178 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1894 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1895 0 : cpl::NumericLimits<T>::max();
1896 : else
1897 234178 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1898 234178 : static_cast<T>(floor(dfImag + 0.5));
1899 234178 : break;
1900 : }
1901 :
1902 478 : case GDT_CInt32:
1903 : {
1904 : typedef GInt32 T;
1905 478 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1906 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1907 0 : cpl::NumericLimits<T>::min();
1908 478 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1909 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1910 0 : cpl::NumericLimits<T>::max();
1911 : else
1912 478 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1913 478 : static_cast<T>(floor(dfReal + 0.5));
1914 478 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1915 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1916 0 : cpl::NumericLimits<T>::min();
1917 478 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1918 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1919 0 : cpl::NumericLimits<T>::max();
1920 : else
1921 478 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1922 478 : static_cast<T>(floor(dfImag + 0.5));
1923 478 : break;
1924 : }
1925 :
1926 0 : case GDT_CFloat16:
1927 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
1928 0 : static_cast<GFloat16>(dfReal);
1929 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
1930 0 : static_cast<GFloat16>(dfImag);
1931 0 : break;
1932 :
1933 490 : case GDT_CFloat32:
1934 490 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
1935 490 : static_cast<float>(dfReal);
1936 490 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
1937 490 : static_cast<float>(dfImag);
1938 490 : break;
1939 :
1940 478 : case GDT_CFloat64:
1941 478 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
1942 478 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
1943 478 : break;
1944 :
1945 0 : case GDT_Unknown:
1946 : case GDT_TypeCount:
1947 0 : return false;
1948 : }
1949 :
1950 3867640 : return true;
1951 : }
1952 :
1953 : /************************************************************************/
1954 : /* GWKSetPixelValueReal() */
1955 : /************************************************************************/
1956 :
1957 1074360 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
1958 : GPtrDiff_t iDstOffset, double dfDensity,
1959 : double dfReal)
1960 :
1961 : {
1962 1074360 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1963 :
1964 : /* -------------------------------------------------------------------- */
1965 : /* If the source density is less than 100% we need to fetch the */
1966 : /* existing destination value, and mix it with the source to */
1967 : /* get the new "to apply" value. Also compute composite */
1968 : /* density. */
1969 : /* */
1970 : /* We avoid mixing if density is very near one or risk mixing */
1971 : /* in very extreme nodata values and causing odd results (#1610) */
1972 : /* -------------------------------------------------------------------- */
1973 1074360 : if (dfDensity < 0.9999)
1974 : {
1975 78172 : if (dfDensity < 0.0001)
1976 0 : return true;
1977 :
1978 78172 : double dfDstReal = 0.0;
1979 78172 : double dfDstDensity = 1.0;
1980 :
1981 78172 : if (poWK->pafDstDensity != nullptr)
1982 78172 : dfDstDensity = poWK->pafDstDensity[iDstOffset];
1983 0 : else if (poWK->panDstValid != nullptr &&
1984 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1985 0 : dfDstDensity = 0.0;
1986 :
1987 : // It seems like we also ought to be testing panDstValid[] here!
1988 :
1989 78172 : switch (poWK->eWorkingDataType)
1990 : {
1991 0 : case GDT_Byte:
1992 0 : dfDstReal = pabyDst[iDstOffset];
1993 0 : break;
1994 :
1995 0 : case GDT_Int8:
1996 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1997 0 : break;
1998 :
1999 300 : case GDT_Int16:
2000 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
2001 300 : break;
2002 :
2003 77872 : case GDT_UInt16:
2004 77872 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
2005 77872 : break;
2006 :
2007 0 : case GDT_Int32:
2008 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
2009 0 : break;
2010 :
2011 0 : case GDT_UInt32:
2012 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
2013 0 : break;
2014 :
2015 0 : case GDT_Int64:
2016 0 : dfDstReal = static_cast<double>(
2017 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
2018 0 : break;
2019 :
2020 0 : case GDT_UInt64:
2021 0 : dfDstReal = static_cast<double>(
2022 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
2023 0 : break;
2024 :
2025 0 : case GDT_Float16:
2026 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
2027 0 : break;
2028 :
2029 0 : case GDT_Float32:
2030 0 : dfDstReal = reinterpret_cast<float *>(pabyDst)[iDstOffset];
2031 0 : break;
2032 :
2033 0 : case GDT_Float64:
2034 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
2035 0 : break;
2036 :
2037 0 : case GDT_CInt16:
2038 : case GDT_CInt32:
2039 : case GDT_CFloat16:
2040 : case GDT_CFloat32:
2041 : case GDT_CFloat64:
2042 : case GDT_Unknown:
2043 : case GDT_TypeCount:
2044 0 : CPLAssert(false);
2045 : return false;
2046 : }
2047 :
2048 : // The destination density is really only relative to the portion
2049 : // not occluded by the overlay.
2050 78172 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2051 :
2052 78172 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2053 78172 : (dfDensity + dfDstInfluence);
2054 : }
2055 :
2056 : /* -------------------------------------------------------------------- */
2057 : /* Actually apply the destination value. */
2058 : /* */
2059 : /* Avoid using the destination nodata value for integer datatypes */
2060 : /* if by chance it is equal to the computed pixel value. */
2061 : /* -------------------------------------------------------------------- */
2062 :
2063 1074360 : switch (poWK->eWorkingDataType)
2064 : {
2065 916752 : case GDT_Byte:
2066 916752 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
2067 916752 : break;
2068 :
2069 0 : case GDT_Int8:
2070 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
2071 0 : break;
2072 :
2073 1117 : case GDT_Int16:
2074 1117 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
2075 1117 : break;
2076 :
2077 150735 : case GDT_UInt16:
2078 150735 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
2079 150735 : break;
2080 :
2081 347 : case GDT_UInt32:
2082 347 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
2083 347 : break;
2084 :
2085 1350 : case GDT_Int32:
2086 1350 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
2087 1350 : break;
2088 :
2089 32 : case GDT_UInt64:
2090 32 : ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
2091 : dfReal);
2092 32 : break;
2093 :
2094 132 : case GDT_Int64:
2095 132 : ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
2096 : dfReal);
2097 132 : break;
2098 :
2099 0 : case GDT_Float16:
2100 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
2101 0 : break;
2102 :
2103 3442 : case GDT_Float32:
2104 3442 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
2105 3442 : break;
2106 :
2107 450 : case GDT_Float64:
2108 450 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
2109 450 : break;
2110 :
2111 0 : case GDT_CInt16:
2112 : case GDT_CInt32:
2113 : case GDT_CFloat16:
2114 : case GDT_CFloat32:
2115 : case GDT_CFloat64:
2116 0 : return false;
2117 :
2118 0 : case GDT_Unknown:
2119 : case GDT_TypeCount:
2120 0 : CPLAssert(false);
2121 : return false;
2122 : }
2123 :
2124 1074360 : return true;
2125 : }
2126 :
2127 : /************************************************************************/
2128 : /* GWKGetPixelValue() */
2129 : /************************************************************************/
2130 :
2131 : /* It is assumed that panUnifiedSrcValid has been checked before */
2132 :
2133 29336100 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2134 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2135 : double *pdfReal, double *pdfImag)
2136 :
2137 : {
2138 29336100 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2139 :
2140 58672300 : if (poWK->papanBandSrcValid != nullptr &&
2141 29336100 : poWK->papanBandSrcValid[iBand] != nullptr &&
2142 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2143 : {
2144 0 : *pdfDensity = 0.0;
2145 0 : return false;
2146 : }
2147 :
2148 29336100 : *pdfReal = 0.0;
2149 29336100 : *pdfImag = 0.0;
2150 :
2151 : // TODO(schwehr): Fix casting.
2152 29336100 : switch (poWK->eWorkingDataType)
2153 : {
2154 28245600 : case GDT_Byte:
2155 28245600 : *pdfReal = pabySrc[iSrcOffset];
2156 28245600 : *pdfImag = 0.0;
2157 28245600 : break;
2158 :
2159 0 : case GDT_Int8:
2160 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2161 0 : *pdfImag = 0.0;
2162 0 : break;
2163 :
2164 28226 : case GDT_Int16:
2165 28226 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2166 28226 : *pdfImag = 0.0;
2167 28226 : break;
2168 :
2169 163 : case GDT_UInt16:
2170 163 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2171 163 : *pdfImag = 0.0;
2172 163 : break;
2173 :
2174 13726 : case GDT_Int32:
2175 13726 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2176 13726 : *pdfImag = 0.0;
2177 13726 : break;
2178 :
2179 63 : case GDT_UInt32:
2180 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2181 63 : *pdfImag = 0.0;
2182 63 : break;
2183 :
2184 0 : case GDT_Int64:
2185 0 : *pdfReal = static_cast<double>(
2186 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2187 0 : *pdfImag = 0.0;
2188 0 : break;
2189 :
2190 0 : case GDT_UInt64:
2191 0 : *pdfReal = static_cast<double>(
2192 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2193 0 : *pdfImag = 0.0;
2194 0 : break;
2195 :
2196 0 : case GDT_Float16:
2197 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2198 0 : *pdfImag = 0.0;
2199 0 : break;
2200 :
2201 1047220 : case GDT_Float32:
2202 1047220 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2203 1047220 : *pdfImag = 0.0;
2204 1047220 : break;
2205 :
2206 582 : case GDT_Float64:
2207 582 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2208 582 : *pdfImag = 0.0;
2209 582 : break;
2210 :
2211 130 : case GDT_CInt16:
2212 130 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2213 130 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2214 130 : break;
2215 :
2216 130 : case GDT_CInt32:
2217 130 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2218 130 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2219 130 : break;
2220 :
2221 0 : case GDT_CFloat16:
2222 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2223 0 : *pdfImag =
2224 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2225 0 : break;
2226 :
2227 178 : case GDT_CFloat32:
2228 178 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2];
2229 178 : *pdfImag = reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1];
2230 178 : break;
2231 :
2232 130 : case GDT_CFloat64:
2233 130 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2234 130 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2235 130 : break;
2236 :
2237 0 : case GDT_Unknown:
2238 : case GDT_TypeCount:
2239 0 : CPLAssert(false);
2240 : *pdfDensity = 0.0;
2241 : return false;
2242 : }
2243 :
2244 29336100 : if (poWK->pafUnifiedSrcDensity != nullptr)
2245 3015160 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2246 : else
2247 26321000 : *pdfDensity = 1.0;
2248 :
2249 29336100 : return *pdfDensity != 0.0;
2250 : }
2251 :
2252 : /************************************************************************/
2253 : /* GWKGetPixelValueReal() */
2254 : /************************************************************************/
2255 :
2256 151448 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2257 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2258 : double *pdfReal)
2259 :
2260 : {
2261 151448 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2262 :
2263 302898 : if (poWK->papanBandSrcValid != nullptr &&
2264 151450 : poWK->papanBandSrcValid[iBand] != nullptr &&
2265 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2266 : {
2267 0 : *pdfDensity = 0.0;
2268 0 : return false;
2269 : }
2270 :
2271 151448 : switch (poWK->eWorkingDataType)
2272 : {
2273 1 : case GDT_Byte:
2274 1 : *pdfReal = pabySrc[iSrcOffset];
2275 1 : break;
2276 :
2277 0 : case GDT_Int8:
2278 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2279 0 : break;
2280 :
2281 1 : case GDT_Int16:
2282 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2283 1 : break;
2284 :
2285 150357 : case GDT_UInt16:
2286 150357 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2287 150357 : break;
2288 :
2289 886 : case GDT_Int32:
2290 886 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2291 886 : break;
2292 :
2293 83 : case GDT_UInt32:
2294 83 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2295 83 : break;
2296 :
2297 16 : case GDT_Int64:
2298 16 : *pdfReal = static_cast<double>(
2299 16 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2300 16 : break;
2301 :
2302 16 : case GDT_UInt64:
2303 16 : *pdfReal = static_cast<double>(
2304 16 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2305 16 : break;
2306 :
2307 0 : case GDT_Float16:
2308 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2309 0 : break;
2310 :
2311 2 : case GDT_Float32:
2312 2 : *pdfReal = reinterpret_cast<float *>(pabySrc)[iSrcOffset];
2313 2 : break;
2314 :
2315 86 : case GDT_Float64:
2316 86 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2317 86 : break;
2318 :
2319 0 : case GDT_CInt16:
2320 : case GDT_CInt32:
2321 : case GDT_CFloat16:
2322 : case GDT_CFloat32:
2323 : case GDT_CFloat64:
2324 : case GDT_Unknown:
2325 : case GDT_TypeCount:
2326 0 : CPLAssert(false);
2327 : return false;
2328 : }
2329 :
2330 151448 : if (poWK->pafUnifiedSrcDensity != nullptr)
2331 150340 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2332 : else
2333 1108 : *pdfDensity = 1.0;
2334 :
2335 151448 : return *pdfDensity != 0.0;
2336 : }
2337 :
2338 : /************************************************************************/
2339 : /* GWKGetPixelRow() */
2340 : /************************************************************************/
2341 :
2342 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2343 : /* data-types. */
2344 :
2345 2354130 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2346 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2347 : double *padfDensity, double adfReal[],
2348 : double *padfImag)
2349 : {
2350 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2351 2354130 : const int nSrcLen = nHalfSrcLen * 2;
2352 2354130 : bool bHasValid = false;
2353 :
2354 2354130 : if (padfDensity != nullptr)
2355 : {
2356 : // Init the density.
2357 3346330 : for (int i = 0; i < nSrcLen; i += 2)
2358 : {
2359 2189790 : padfDensity[i] = 1.0;
2360 2189790 : padfDensity[i + 1] = 1.0;
2361 : }
2362 :
2363 1156540 : if (poWK->panUnifiedSrcValid != nullptr)
2364 : {
2365 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2366 : {
2367 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2368 2067740 : bHasValid = true;
2369 : else
2370 74323 : padfDensity[i] = 0.0;
2371 :
2372 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2373 2068400 : bHasValid = true;
2374 : else
2375 73668 : padfDensity[i + 1] = 0.0;
2376 : }
2377 :
2378 : // Reset or fail as needed.
2379 1139400 : if (bHasValid)
2380 1116590 : bHasValid = false;
2381 : else
2382 22806 : return false;
2383 : }
2384 :
2385 1133730 : if (poWK->papanBandSrcValid != nullptr &&
2386 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2387 : {
2388 0 : for (int i = 0; i < nSrcLen; i += 2)
2389 : {
2390 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2391 0 : bHasValid = true;
2392 : else
2393 0 : padfDensity[i] = 0.0;
2394 :
2395 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2396 0 : iSrcOffset + i + 1))
2397 0 : bHasValid = true;
2398 : else
2399 0 : padfDensity[i + 1] = 0.0;
2400 : }
2401 :
2402 : // Reset or fail as needed.
2403 0 : if (bHasValid)
2404 0 : bHasValid = false;
2405 : else
2406 0 : return false;
2407 : }
2408 : }
2409 :
2410 : // TODO(schwehr): Fix casting.
2411 : // Fetch data.
2412 2331320 : switch (poWK->eWorkingDataType)
2413 : {
2414 1121080 : case GDT_Byte:
2415 : {
2416 1121080 : GByte *pSrc =
2417 1121080 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2418 1121080 : pSrc += iSrcOffset;
2419 3243850 : for (int i = 0; i < nSrcLen; i += 2)
2420 : {
2421 2122770 : adfReal[i] = pSrc[i];
2422 2122770 : adfReal[i + 1] = pSrc[i + 1];
2423 : }
2424 1121080 : break;
2425 : }
2426 :
2427 0 : case GDT_Int8:
2428 : {
2429 0 : GInt8 *pSrc =
2430 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2431 0 : pSrc += iSrcOffset;
2432 0 : for (int i = 0; i < nSrcLen; i += 2)
2433 : {
2434 0 : adfReal[i] = pSrc[i];
2435 0 : adfReal[i + 1] = pSrc[i + 1];
2436 : }
2437 0 : break;
2438 : }
2439 :
2440 5614 : case GDT_Int16:
2441 : {
2442 5614 : GInt16 *pSrc =
2443 5614 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2444 5614 : pSrc += iSrcOffset;
2445 21492 : for (int i = 0; i < nSrcLen; i += 2)
2446 : {
2447 15878 : adfReal[i] = pSrc[i];
2448 15878 : adfReal[i + 1] = pSrc[i + 1];
2449 : }
2450 5614 : break;
2451 : }
2452 :
2453 4142 : case GDT_UInt16:
2454 : {
2455 4142 : GUInt16 *pSrc =
2456 4142 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2457 4142 : pSrc += iSrcOffset;
2458 18548 : for (int i = 0; i < nSrcLen; i += 2)
2459 : {
2460 14406 : adfReal[i] = pSrc[i];
2461 14406 : adfReal[i + 1] = pSrc[i + 1];
2462 : }
2463 4142 : break;
2464 : }
2465 :
2466 1158 : case GDT_Int32:
2467 : {
2468 1158 : GInt32 *pSrc =
2469 1158 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2470 1158 : pSrc += iSrcOffset;
2471 3048 : for (int i = 0; i < nSrcLen; i += 2)
2472 : {
2473 1890 : adfReal[i] = pSrc[i];
2474 1890 : adfReal[i + 1] = pSrc[i + 1];
2475 : }
2476 1158 : break;
2477 : }
2478 :
2479 778 : case GDT_UInt32:
2480 : {
2481 778 : GUInt32 *pSrc =
2482 778 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2483 778 : pSrc += iSrcOffset;
2484 2288 : for (int i = 0; i < nSrcLen; i += 2)
2485 : {
2486 1510 : adfReal[i] = pSrc[i];
2487 1510 : adfReal[i + 1] = pSrc[i + 1];
2488 : }
2489 778 : break;
2490 : }
2491 :
2492 218 : case GDT_Int64:
2493 : {
2494 218 : auto pSrc =
2495 218 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2496 218 : pSrc += iSrcOffset;
2497 436 : for (int i = 0; i < nSrcLen; i += 2)
2498 : {
2499 218 : adfReal[i] = static_cast<double>(pSrc[i]);
2500 218 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2501 : }
2502 218 : break;
2503 : }
2504 :
2505 28 : case GDT_UInt64:
2506 : {
2507 28 : auto pSrc =
2508 28 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2509 28 : pSrc += iSrcOffset;
2510 56 : for (int i = 0; i < nSrcLen; i += 2)
2511 : {
2512 28 : adfReal[i] = static_cast<double>(pSrc[i]);
2513 28 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2514 : }
2515 28 : break;
2516 : }
2517 :
2518 0 : case GDT_Float16:
2519 : {
2520 0 : GFloat16 *pSrc =
2521 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2522 0 : pSrc += iSrcOffset;
2523 0 : for (int i = 0; i < nSrcLen; i += 2)
2524 : {
2525 0 : adfReal[i] = pSrc[i];
2526 0 : adfReal[i + 1] = pSrc[i + 1];
2527 : }
2528 0 : break;
2529 : }
2530 :
2531 25102 : case GDT_Float32:
2532 : {
2533 25102 : float *pSrc =
2534 25102 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2535 25102 : pSrc += iSrcOffset;
2536 121403 : for (int i = 0; i < nSrcLen; i += 2)
2537 : {
2538 96301 : adfReal[i] = pSrc[i];
2539 96301 : adfReal[i + 1] = pSrc[i + 1];
2540 : }
2541 25102 : break;
2542 : }
2543 :
2544 968 : case GDT_Float64:
2545 : {
2546 968 : double *pSrc =
2547 968 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2548 968 : pSrc += iSrcOffset;
2549 2668 : for (int i = 0; i < nSrcLen; i += 2)
2550 : {
2551 1700 : adfReal[i] = pSrc[i];
2552 1700 : adfReal[i + 1] = pSrc[i + 1];
2553 : }
2554 968 : break;
2555 : }
2556 :
2557 1169410 : case GDT_CInt16:
2558 : {
2559 1169410 : GInt16 *pSrc =
2560 1169410 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2561 1169410 : pSrc += 2 * iSrcOffset;
2562 4676400 : for (int i = 0; i < nSrcLen; i += 2)
2563 : {
2564 3506990 : adfReal[i] = pSrc[2 * i];
2565 3506990 : padfImag[i] = pSrc[2 * i + 1];
2566 :
2567 3506990 : adfReal[i + 1] = pSrc[2 * i + 2];
2568 3506990 : padfImag[i + 1] = pSrc[2 * i + 3];
2569 : }
2570 1169410 : break;
2571 : }
2572 :
2573 940 : case GDT_CInt32:
2574 : {
2575 940 : GInt32 *pSrc =
2576 940 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2577 940 : pSrc += 2 * iSrcOffset;
2578 2612 : for (int i = 0; i < nSrcLen; i += 2)
2579 : {
2580 1672 : adfReal[i] = pSrc[2 * i];
2581 1672 : padfImag[i] = pSrc[2 * i + 1];
2582 :
2583 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2584 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2585 : }
2586 940 : break;
2587 : }
2588 :
2589 0 : case GDT_CFloat16:
2590 : {
2591 0 : GFloat16 *pSrc =
2592 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2593 0 : pSrc += 2 * iSrcOffset;
2594 0 : for (int i = 0; i < nSrcLen; i += 2)
2595 : {
2596 0 : adfReal[i] = pSrc[2 * i];
2597 0 : padfImag[i] = pSrc[2 * i + 1];
2598 :
2599 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2600 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2601 : }
2602 0 : break;
2603 : }
2604 :
2605 940 : case GDT_CFloat32:
2606 : {
2607 940 : float *pSrc =
2608 940 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2609 940 : pSrc += 2 * iSrcOffset;
2610 2612 : for (int i = 0; i < nSrcLen; i += 2)
2611 : {
2612 1672 : adfReal[i] = pSrc[2 * i];
2613 1672 : padfImag[i] = pSrc[2 * i + 1];
2614 :
2615 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2616 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2617 : }
2618 940 : break;
2619 : }
2620 :
2621 940 : case GDT_CFloat64:
2622 : {
2623 940 : double *pSrc =
2624 940 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2625 940 : pSrc += 2 * iSrcOffset;
2626 2612 : for (int i = 0; i < nSrcLen; i += 2)
2627 : {
2628 1672 : adfReal[i] = pSrc[2 * i];
2629 1672 : padfImag[i] = pSrc[2 * i + 1];
2630 :
2631 1672 : adfReal[i + 1] = pSrc[2 * i + 2];
2632 1672 : padfImag[i + 1] = pSrc[2 * i + 3];
2633 : }
2634 940 : break;
2635 : }
2636 :
2637 0 : case GDT_Unknown:
2638 : case GDT_TypeCount:
2639 0 : CPLAssert(false);
2640 : if (padfDensity)
2641 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2642 : return false;
2643 : }
2644 :
2645 2331320 : if (padfDensity == nullptr)
2646 1197590 : return true;
2647 :
2648 1133730 : if (poWK->pafUnifiedSrcDensity == nullptr)
2649 : {
2650 3234760 : for (int i = 0; i < nSrcLen; i += 2)
2651 : {
2652 : // Take into account earlier calcs.
2653 2113130 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2654 : {
2655 2073230 : padfDensity[i] = 1.0;
2656 2073230 : bHasValid = true;
2657 : }
2658 :
2659 2113130 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2660 : {
2661 2073880 : padfDensity[i + 1] = 1.0;
2662 2073880 : bHasValid = true;
2663 : }
2664 : }
2665 : }
2666 : else
2667 : {
2668 54348 : for (int i = 0; i < nSrcLen; i += 2)
2669 : {
2670 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2671 42243 : padfDensity[i] = poWK->pafUnifiedSrcDensity[iSrcOffset + i];
2672 42243 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD)
2673 41704 : bHasValid = true;
2674 :
2675 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2676 42243 : padfDensity[i + 1] =
2677 42243 : poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1];
2678 42243 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD)
2679 41598 : bHasValid = true;
2680 : }
2681 : }
2682 :
2683 1133730 : return bHasValid;
2684 : }
2685 :
2686 : /************************************************************************/
2687 : /* GWKGetPixelT() */
2688 : /************************************************************************/
2689 :
2690 : template <class T>
2691 7159332 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2692 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2693 :
2694 : {
2695 7159332 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2696 :
2697 16456570 : if ((poWK->panUnifiedSrcValid != nullptr &&
2698 14318624 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2699 7159332 : (poWK->papanBandSrcValid != nullptr &&
2700 21 : poWK->papanBandSrcValid[iBand] != nullptr &&
2701 21 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2702 : {
2703 9 : *pdfDensity = 0.0;
2704 9 : return false;
2705 : }
2706 :
2707 7159332 : *pValue = pSrc[iSrcOffset];
2708 :
2709 7159332 : if (poWK->pafUnifiedSrcDensity == nullptr)
2710 6974919 : *pdfDensity = 1.0;
2711 : else
2712 184414 : *pdfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
2713 :
2714 7159332 : return *pdfDensity != 0.0;
2715 : }
2716 :
2717 : /************************************************************************/
2718 : /* GWKBilinearResample() */
2719 : /* Set of bilinear interpolators */
2720 : /************************************************************************/
2721 :
2722 72824 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2723 : double dfSrcX, double dfSrcY,
2724 : double *pdfDensity, double *pdfReal,
2725 : double *pdfImag)
2726 :
2727 : {
2728 : // Save as local variables to avoid following pointers.
2729 72824 : const int nSrcXSize = poWK->nSrcXSize;
2730 72824 : const int nSrcYSize = poWK->nSrcYSize;
2731 :
2732 72824 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2733 72824 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2734 72824 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2735 72824 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2736 72824 : bool bShifted = false;
2737 :
2738 72824 : if (iSrcX == -1)
2739 : {
2740 292 : iSrcX = 0;
2741 292 : dfRatioX = 1;
2742 : }
2743 72824 : if (iSrcY == -1)
2744 : {
2745 7686 : iSrcY = 0;
2746 7686 : dfRatioY = 1;
2747 : }
2748 72824 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2749 :
2750 : // Shift so we don't overrun the array.
2751 72824 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2752 72764 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2753 72764 : iSrcOffset + nSrcXSize + 1)
2754 : {
2755 120 : bShifted = true;
2756 120 : --iSrcOffset;
2757 : }
2758 :
2759 72824 : double adfDensity[2] = {0.0, 0.0};
2760 72824 : double adfReal[2] = {0.0, 0.0};
2761 72824 : double adfImag[2] = {0.0, 0.0};
2762 72824 : double dfAccumulatorReal = 0.0;
2763 72824 : double dfAccumulatorImag = 0.0;
2764 72824 : double dfAccumulatorDensity = 0.0;
2765 72824 : double dfAccumulatorDivisor = 0.0;
2766 :
2767 72824 : const GPtrDiff_t nSrcPixels =
2768 72824 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2769 : // Get pixel row.
2770 72824 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2771 145648 : iSrcOffset < nSrcPixels &&
2772 72824 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2773 : adfImag))
2774 : {
2775 67168 : double dfMult1 = dfRatioX * dfRatioY;
2776 67168 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2777 :
2778 : // Shifting corrected.
2779 67168 : if (bShifted)
2780 : {
2781 120 : adfReal[0] = adfReal[1];
2782 120 : adfImag[0] = adfImag[1];
2783 120 : adfDensity[0] = adfDensity[1];
2784 : }
2785 :
2786 : // Upper Left Pixel.
2787 67168 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2788 67168 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2789 : {
2790 61738 : dfAccumulatorDivisor += dfMult1;
2791 :
2792 61738 : dfAccumulatorReal += adfReal[0] * dfMult1;
2793 61738 : dfAccumulatorImag += adfImag[0] * dfMult1;
2794 61738 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2795 : }
2796 :
2797 : // Upper Right Pixel.
2798 67168 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2799 66547 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2800 : {
2801 61273 : dfAccumulatorDivisor += dfMult2;
2802 :
2803 61273 : dfAccumulatorReal += adfReal[1] * dfMult2;
2804 61273 : dfAccumulatorImag += adfImag[1] * dfMult2;
2805 61273 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2806 : }
2807 : }
2808 :
2809 : // Get pixel row.
2810 72824 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2811 214350 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2812 68702 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2813 : adfReal, adfImag))
2814 : {
2815 63143 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2816 63143 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2817 :
2818 : // Shifting corrected
2819 63143 : if (bShifted)
2820 : {
2821 60 : adfReal[0] = adfReal[1];
2822 60 : adfImag[0] = adfImag[1];
2823 60 : adfDensity[0] = adfDensity[1];
2824 : }
2825 :
2826 : // Lower Left Pixel
2827 63143 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2828 63143 : adfDensity[0] > SRC_DENSITY_THRESHOLD)
2829 : {
2830 57864 : dfAccumulatorDivisor += dfMult1;
2831 :
2832 57864 : dfAccumulatorReal += adfReal[0] * dfMult1;
2833 57864 : dfAccumulatorImag += adfImag[0] * dfMult1;
2834 57864 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2835 : }
2836 :
2837 : // Lower Right Pixel.
2838 63143 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2839 62582 : adfDensity[1] > SRC_DENSITY_THRESHOLD)
2840 : {
2841 57605 : dfAccumulatorDivisor += dfMult2;
2842 :
2843 57605 : dfAccumulatorReal += adfReal[1] * dfMult2;
2844 57605 : dfAccumulatorImag += adfImag[1] * dfMult2;
2845 57605 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2846 : }
2847 : }
2848 :
2849 : /* -------------------------------------------------------------------- */
2850 : /* Return result. */
2851 : /* -------------------------------------------------------------------- */
2852 72824 : if (dfAccumulatorDivisor == 1.0)
2853 : {
2854 41767 : *pdfReal = dfAccumulatorReal;
2855 41767 : *pdfImag = dfAccumulatorImag;
2856 41767 : *pdfDensity = dfAccumulatorDensity;
2857 41767 : return false;
2858 : }
2859 31057 : else if (dfAccumulatorDivisor < 0.00001)
2860 : {
2861 0 : *pdfReal = 0.0;
2862 0 : *pdfImag = 0.0;
2863 0 : *pdfDensity = 0.0;
2864 0 : return false;
2865 : }
2866 : else
2867 : {
2868 31057 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2869 31057 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2870 31057 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2871 31057 : return true;
2872 : }
2873 : }
2874 :
2875 : template <class T>
2876 5116014 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2877 : int iBand, double dfSrcX,
2878 : double dfSrcY, T *pValue)
2879 :
2880 : {
2881 :
2882 5116014 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2883 5116014 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2884 5116014 : GPtrDiff_t iSrcOffset =
2885 5116014 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2886 5116014 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2887 5116014 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2888 :
2889 5116014 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2890 :
2891 5116014 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2892 5012847 : iSrcY + 1 < poWK->nSrcYSize)
2893 : {
2894 4988678 : const double dfAccumulator =
2895 4988678 : (pSrc[iSrcOffset] * dfRatioX +
2896 4988678 : pSrc[iSrcOffset + 1] * (1.0 - dfRatioX)) *
2897 : dfRatioY +
2898 4988678 : (pSrc[iSrcOffset + poWK->nSrcXSize] * dfRatioX +
2899 4988678 : pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * (1.0 - dfRatioX)) *
2900 4988678 : (1.0 - dfRatioY);
2901 :
2902 4988678 : *pValue = GWKRoundValueT<T>(dfAccumulator);
2903 :
2904 4988678 : return true;
2905 : }
2906 :
2907 127349 : double dfAccumulatorDivisor = 0.0;
2908 127349 : double dfAccumulator = 0.0;
2909 :
2910 : // Upper Left Pixel.
2911 127349 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
2912 53440 : iSrcY < poWK->nSrcYSize)
2913 : {
2914 53440 : const double dfMult = dfRatioX * dfRatioY;
2915 :
2916 53440 : dfAccumulatorDivisor += dfMult;
2917 :
2918 53440 : dfAccumulator += pSrc[iSrcOffset] * dfMult;
2919 : }
2920 :
2921 : // Upper Right Pixel.
2922 127349 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2923 61354 : iSrcY < poWK->nSrcYSize)
2924 : {
2925 61354 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
2926 :
2927 61354 : dfAccumulatorDivisor += dfMult;
2928 :
2929 61354 : dfAccumulator += pSrc[iSrcOffset + 1] * dfMult;
2930 : }
2931 :
2932 : // Lower Right Pixel.
2933 127349 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2934 97471 : iSrcY + 1 < poWK->nSrcYSize)
2935 : {
2936 72902 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2937 :
2938 72902 : dfAccumulatorDivisor += dfMult;
2939 :
2940 72902 : dfAccumulator += pSrc[iSrcOffset + 1 + poWK->nSrcXSize] * dfMult;
2941 : }
2942 :
2943 : // Lower Left Pixel.
2944 127349 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2945 89535 : iSrcY + 1 < poWK->nSrcYSize)
2946 : {
2947 64758 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
2948 :
2949 64758 : dfAccumulatorDivisor += dfMult;
2950 :
2951 64758 : dfAccumulator += pSrc[iSrcOffset + poWK->nSrcXSize] * dfMult;
2952 : }
2953 :
2954 : /* -------------------------------------------------------------------- */
2955 : /* Return result. */
2956 : /* -------------------------------------------------------------------- */
2957 127349 : double dfValue = 0.0;
2958 :
2959 127349 : if (dfAccumulatorDivisor < 0.00001)
2960 : {
2961 0 : *pValue = 0;
2962 0 : return false;
2963 : }
2964 127349 : else if (dfAccumulatorDivisor == 1.0)
2965 : {
2966 8767 : dfValue = dfAccumulator;
2967 : }
2968 : else
2969 : {
2970 118582 : dfValue = dfAccumulator / dfAccumulatorDivisor;
2971 : }
2972 :
2973 127349 : *pValue = GWKRoundValueT<T>(dfValue);
2974 :
2975 127349 : return true;
2976 : }
2977 :
2978 : /************************************************************************/
2979 : /* GWKCubicResample() */
2980 : /* Set of bicubic interpolators using cubic convolution. */
2981 : /************************************************************************/
2982 :
2983 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2984 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2985 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2986 :
2987 : template <typename T>
2988 1602850 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
2989 : T f1, T f2, T f3)
2990 : {
2991 1602850 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
2992 1602850 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
2993 1602850 : distance3 * (3 * (f1 - f2) + f3 - f0)));
2994 : }
2995 :
2996 : /************************************************************************/
2997 : /* GWKCubicComputeWeights() */
2998 : /************************************************************************/
2999 :
3000 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
3001 :
3002 : template <typename T>
3003 2267674 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
3004 : {
3005 2267674 : const T halfX = T(0.5) * x;
3006 2267674 : const T threeX = T(3.0) * x;
3007 2267674 : const T halfX2 = halfX * x;
3008 :
3009 2267674 : coeffs[0] = halfX * (-1 + x * (2 - x));
3010 2267674 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
3011 2267674 : coeffs[2] = halfX * (1 + x * (4 - threeX));
3012 2267674 : coeffs[3] = halfX2 * (-1 + x);
3013 2267674 : }
3014 :
3015 : // TODO(schwehr): Use an inline function.
3016 : #define CONVOL4(v1, v2) \
3017 : ((v1)[0] * (v2)[0] + (v1)[1] * (v2)[1] + (v1)[2] * (v2)[2] + \
3018 : (v1)[3] * (v2)[3])
3019 :
3020 : #if 0
3021 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
3022 : // instead of 17.
3023 : // TODO(schwehr): Use an inline function.
3024 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
3025 : { \
3026 : const double dfX = dfX_; \
3027 : dfHalfX = 0.5 * dfX; \
3028 : const double dfThreeX = 3.0 * dfX; \
3029 : const double dfXMinus1 = dfX - 1; \
3030 : \
3031 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
3032 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
3033 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
3034 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
3035 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
3036 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
3037 : }
3038 :
3039 : // TODO(schwehr): Use an inline function.
3040 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
3041 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
3042 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
3043 : #endif
3044 :
3045 299879 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3046 : double dfSrcX, double dfSrcY,
3047 : double *pdfDensity, double *pdfReal,
3048 : double *pdfImag)
3049 :
3050 : {
3051 299879 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3052 299879 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3053 299879 : GPtrDiff_t iSrcOffset =
3054 299879 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3055 299879 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3056 299879 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3057 299879 : double adfDensity[4] = {};
3058 299879 : double adfReal[4] = {};
3059 299879 : double adfImag[4] = {};
3060 :
3061 : // Get the bilinear interpolation at the image borders.
3062 299879 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3063 284412 : iSrcY + 2 >= poWK->nSrcYSize)
3064 24136 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3065 24136 : pdfDensity, pdfReal, pdfImag);
3066 :
3067 275743 : double adfValueDens[4] = {};
3068 275743 : double adfValueReal[4] = {};
3069 275743 : double adfValueImag[4] = {};
3070 :
3071 275743 : double adfCoeffsX[4] = {};
3072 275743 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3073 :
3074 1232410 : for (GPtrDiff_t i = -1; i < 3; i++)
3075 : {
3076 1003120 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3077 991507 : 2, adfDensity, adfReal, adfImag) ||
3078 991507 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3079 973867 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3080 2960190 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3081 965566 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
3082 : {
3083 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3084 46449 : pdfDensity, pdfReal, pdfImag);
3085 : }
3086 :
3087 956668 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3088 956668 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3089 956668 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3090 : }
3091 :
3092 : /* -------------------------------------------------------------------- */
3093 : /* For now, if we have any pixels missing in the kernel area, */
3094 : /* we fallback on using bilinear interpolation. Ideally we */
3095 : /* should do "weight adjustment" of our results similarly to */
3096 : /* what is done for the cubic spline and lanc. interpolators. */
3097 : /* -------------------------------------------------------------------- */
3098 :
3099 229294 : double adfCoeffsY[4] = {};
3100 229294 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3101 :
3102 229294 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3103 229294 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3104 229294 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3105 :
3106 229294 : return true;
3107 : }
3108 :
3109 : #ifdef USE_SSE2
3110 :
3111 : /************************************************************************/
3112 : /* XMMLoad4Values() */
3113 : /* */
3114 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3115 : /* m128 register. */
3116 : /************************************************************************/
3117 :
3118 949092 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3119 : {
3120 : unsigned int i;
3121 949092 : memcpy(&i, ptr, 4);
3122 1898180 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3123 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3124 : // 32-bit integers.
3125 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3126 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3127 : #else
3128 1898180 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3129 1898180 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3130 : #endif
3131 1898180 : return _mm_cvtepi32_ps(xmm_i);
3132 : }
3133 :
3134 5292 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3135 : {
3136 : GUInt64 i;
3137 5292 : memcpy(&i, ptr, 8);
3138 10584 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3139 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3140 : // 32-bit integers.
3141 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3142 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3143 : #else
3144 10584 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3145 : #endif
3146 10584 : return _mm_cvtepi32_ps(xmm_i);
3147 : }
3148 :
3149 : /************************************************************************/
3150 : /* XMMHorizontalAdd() */
3151 : /* */
3152 : /* Return the sum of the 4 floating points of the register. */
3153 : /************************************************************************/
3154 :
3155 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3156 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3157 : {
3158 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3159 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3160 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3161 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3162 : return _mm_cvtss_f32(sums);
3163 : }
3164 : #else
3165 238596 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3166 : {
3167 238596 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3168 238596 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3169 238596 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3170 238596 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3171 238596 : return _mm_cvtss_f32(sums);
3172 : }
3173 : #endif
3174 :
3175 : #endif // define USE_SSE2
3176 :
3177 : /************************************************************************/
3178 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3179 : /************************************************************************/
3180 :
3181 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3182 : // because there are a few assumptions above those types.
3183 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3184 : // perf benefit.
3185 :
3186 : template <class T>
3187 361 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3188 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3189 : double *pdfDensity, double *pdfReal)
3190 : {
3191 361 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3192 361 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3193 361 : const GPtrDiff_t iSrcOffset =
3194 361 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3195 :
3196 : // Get the bilinear interpolation at the image borders.
3197 361 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3198 361 : iSrcY + 2 >= poWK->nSrcYSize)
3199 : {
3200 0 : double adfImagIgnored[4] = {};
3201 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3202 0 : pdfDensity, pdfReal, adfImagIgnored);
3203 : }
3204 :
3205 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3206 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3207 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3208 :
3209 : // TODO(schwehr): Explain the magic numbers.
3210 : float afTemp[4 + 4 + 4 + 1];
3211 : float *pafAligned =
3212 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3213 : float *pafCoeffs = pafAligned;
3214 : float *pafDensity = pafAligned + 4;
3215 : float *pafValue = pafAligned + 8;
3216 :
3217 : const float fHalfDeltaX = 0.5f * fDeltaX;
3218 : const float fThreeDeltaX = 3.0f * fDeltaX;
3219 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3220 :
3221 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3222 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3223 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3224 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3225 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3226 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD);
3227 :
3228 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3229 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3230 : i++, iOffset += poWK->nSrcXSize)
3231 : {
3232 : const __m128 xmmDensity =
3233 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3234 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3235 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3236 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3237 :
3238 : const __m128 xmmValues =
3239 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3240 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3241 : }
3242 : if (_mm_movemask_ps(xmmMaskLowDensity))
3243 : {
3244 : double adfImagIgnored[4] = {};
3245 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3246 : pdfDensity, pdfReal, adfImagIgnored);
3247 : }
3248 :
3249 : const float fHalfDeltaY = 0.5f * fDeltaY;
3250 : const float fThreeDeltaY = 3.0f * fDeltaY;
3251 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3252 :
3253 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3254 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3255 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3256 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3257 :
3258 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3259 :
3260 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3261 : const __m128 xmmValue = _mm_load_ps(pafValue);
3262 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3263 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3264 :
3265 : // We did all above computations on float32 whereas the general case is
3266 : // float64. Not sure if one is fundamentally more correct than the other
3267 : // one, but we want our optimization to give the same result as the
3268 : // general case as much as possible, so if the resulting value is
3269 : // close to some_int_value + 0.5, redo the computation with the general
3270 : // case.
3271 : // Note: If other types than Byte or UInt16, will need changes.
3272 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3273 : return true;
3274 :
3275 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3276 :
3277 361 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3278 361 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3279 :
3280 361 : double adfValueDens[4] = {};
3281 361 : double adfValueReal[4] = {};
3282 :
3283 361 : double adfCoeffsX[4] = {};
3284 361 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3285 :
3286 361 : double adfCoeffsY[4] = {};
3287 361 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3288 :
3289 1433 : for (GPtrDiff_t i = -1; i < 3; i++)
3290 : {
3291 1177 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3292 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3293 1177 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] < SRC_DENSITY_THRESHOLD ||
3294 1089 : poWK->pafUnifiedSrcDensity[iOffset + 1] < SRC_DENSITY_THRESHOLD ||
3295 1089 : poWK->pafUnifiedSrcDensity[iOffset + 2] < SRC_DENSITY_THRESHOLD ||
3296 1089 : poWK->pafUnifiedSrcDensity[iOffset + 3] < SRC_DENSITY_THRESHOLD)
3297 : {
3298 105 : double adfImagIgnored[4] = {};
3299 105 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3300 : pdfDensity, pdfReal,
3301 105 : adfImagIgnored);
3302 : }
3303 : #endif
3304 :
3305 1072 : adfValueDens[i + 1] =
3306 1072 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3307 :
3308 1072 : adfValueReal[i + 1] = CONVOL4(
3309 : adfCoeffsX,
3310 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3311 : }
3312 :
3313 256 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3314 256 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3315 :
3316 256 : return true;
3317 : }
3318 :
3319 : /************************************************************************/
3320 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3321 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3322 : /************************************************************************/
3323 :
3324 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3325 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3326 : double *pdfDensity, double *pdfReal)
3327 :
3328 : {
3329 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3330 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3331 0 : const GPtrDiff_t iSrcOffset =
3332 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3333 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3334 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3335 :
3336 : // Get the bilinear interpolation at the image borders.
3337 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3338 0 : iSrcY + 2 >= poWK->nSrcYSize)
3339 : {
3340 0 : double adfImagIgnored[4] = {};
3341 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3342 0 : pdfDensity, pdfReal, adfImagIgnored);
3343 : }
3344 :
3345 0 : double adfCoeffsX[4] = {};
3346 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3347 :
3348 0 : double adfCoeffsY[4] = {};
3349 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3350 :
3351 0 : double adfValueDens[4] = {};
3352 0 : double adfValueReal[4] = {};
3353 0 : double adfDensity[4] = {};
3354 0 : double adfReal[4] = {};
3355 0 : double adfImagIgnored[4] = {};
3356 :
3357 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3358 : {
3359 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3360 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3361 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD ||
3362 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD ||
3363 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD ||
3364 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD)
3365 : {
3366 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3367 : pdfDensity, pdfReal,
3368 0 : adfImagIgnored);
3369 : }
3370 :
3371 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3372 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3373 : }
3374 :
3375 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3376 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3377 :
3378 0 : return true;
3379 : }
3380 :
3381 : template <class T>
3382 1906603 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3383 : int iBand, double dfSrcX,
3384 : double dfSrcY, T *pValue)
3385 :
3386 : {
3387 1906603 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3388 1906603 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3389 1906603 : const GPtrDiff_t iSrcOffset =
3390 1906603 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3391 1906603 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3392 1906603 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3393 1906603 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3394 1906603 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3395 :
3396 : // Get the bilinear interpolation at the image borders.
3397 1906603 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3398 1662527 : iSrcY + 2 >= poWK->nSrcYSize)
3399 303751 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3400 303751 : pValue);
3401 :
3402 1602852 : double adfCoeffs[4] = {};
3403 1602852 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3404 :
3405 1602852 : double adfValue[4] = {};
3406 :
3407 8014250 : for (GPtrDiff_t i = -1; i < 3; i++)
3408 : {
3409 6411406 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3410 :
3411 6411406 : adfValue[i + 1] = CONVOL4(
3412 : adfCoeffs,
3413 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3414 : }
3415 :
3416 : const double dfValue =
3417 1602852 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3418 : adfValue[1], adfValue[2], adfValue[3]);
3419 :
3420 1602852 : *pValue = GWKClampValueT<T>(dfValue);
3421 :
3422 1602852 : return true;
3423 : }
3424 :
3425 : /************************************************************************/
3426 : /* GWKLanczosSinc() */
3427 : /************************************************************************/
3428 :
3429 : /*
3430 : * Lanczos windowed sinc interpolation kernel with radius r.
3431 : * /
3432 : * | sinc(x) * sinc(x/r), if |x| < r
3433 : * L(x) = | 1, if x = 0 ,
3434 : * | 0, otherwise
3435 : * \
3436 : *
3437 : * where sinc(x) = sin(PI * x) / (PI * x).
3438 : */
3439 :
3440 1056 : static double GWKLanczosSinc(double dfX)
3441 : {
3442 1056 : if (dfX == 0.0)
3443 0 : return 1.0;
3444 :
3445 1056 : const double dfPIX = M_PI * dfX;
3446 1056 : const double dfPIXoverR = dfPIX / 3;
3447 1056 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3448 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3449 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3450 1056 : const double dfSinPIXoverR = sin(dfPIXoverR);
3451 1056 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3452 1056 : const double dfSinPIXMulSinPIXoverR =
3453 1056 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3454 1056 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3455 : }
3456 :
3457 106242 : static double GWKLanczosSinc4Values(double *padfValues)
3458 : {
3459 531210 : for (int i = 0; i < 4; i++)
3460 : {
3461 424968 : if (padfValues[i] == 0.0)
3462 : {
3463 0 : padfValues[i] = 1.0;
3464 : }
3465 : else
3466 : {
3467 424968 : const double dfPIX = M_PI * padfValues[i];
3468 424968 : const double dfPIXoverR = dfPIX / 3;
3469 424968 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3470 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3471 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3472 424968 : const double dfSinPIXoverR = sin(dfPIXoverR);
3473 424968 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3474 424968 : const double dfSinPIXMulSinPIXoverR =
3475 424968 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3476 424968 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3477 : }
3478 : }
3479 106242 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3480 : }
3481 :
3482 : /************************************************************************/
3483 : /* GWKBilinear() */
3484 : /************************************************************************/
3485 :
3486 6669050 : static double GWKBilinear(double dfX)
3487 : {
3488 6669050 : double dfAbsX = fabs(dfX);
3489 6669050 : if (dfAbsX <= 1.0)
3490 6197920 : return 1 - dfAbsX;
3491 : else
3492 471127 : return 0.0;
3493 : }
3494 :
3495 396360 : static double GWKBilinear4Values(double *padfValues)
3496 : {
3497 396360 : double dfAbsX0 = fabs(padfValues[0]);
3498 396360 : double dfAbsX1 = fabs(padfValues[1]);
3499 396360 : double dfAbsX2 = fabs(padfValues[2]);
3500 396360 : double dfAbsX3 = fabs(padfValues[3]);
3501 396360 : if (dfAbsX0 <= 1.0)
3502 290431 : padfValues[0] = 1 - dfAbsX0;
3503 : else
3504 105929 : padfValues[0] = 0.0;
3505 396360 : if (dfAbsX1 <= 1.0)
3506 396360 : padfValues[1] = 1 - dfAbsX1;
3507 : else
3508 0 : padfValues[1] = 0.0;
3509 396360 : if (dfAbsX2 <= 1.0)
3510 396360 : padfValues[2] = 1 - dfAbsX2;
3511 : else
3512 0 : padfValues[2] = 0.0;
3513 396360 : if (dfAbsX3 <= 1.0)
3514 290324 : padfValues[3] = 1 - dfAbsX3;
3515 : else
3516 106036 : padfValues[3] = 0.0;
3517 396360 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3518 : }
3519 :
3520 : /************************************************************************/
3521 : /* GWKCubic() */
3522 : /************************************************************************/
3523 :
3524 4337930 : static double GWKCubic(double dfX)
3525 : {
3526 4337930 : return CubicKernel(dfX);
3527 : }
3528 :
3529 7078940 : static double GWKCubic4Values(double *padfValues)
3530 : {
3531 7078940 : const double dfAbsX_0 = fabs(padfValues[0]);
3532 7078940 : const double dfAbsX_1 = fabs(padfValues[1]);
3533 7078940 : const double dfAbsX_2 = fabs(padfValues[2]);
3534 7078940 : const double dfAbsX_3 = fabs(padfValues[3]);
3535 7078940 : const double dfX2_0 = padfValues[0] * padfValues[0];
3536 7078940 : const double dfX2_1 = padfValues[1] * padfValues[1];
3537 7078940 : const double dfX2_2 = padfValues[2] * padfValues[2];
3538 7078940 : const double dfX2_3 = padfValues[3] * padfValues[3];
3539 :
3540 7078940 : double dfVal0 = 0.0;
3541 7078940 : if (dfAbsX_0 <= 1.0)
3542 1030550 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3543 6048390 : else if (dfAbsX_0 <= 2.0)
3544 4290000 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3545 :
3546 7078940 : double dfVal1 = 0.0;
3547 7078940 : if (dfAbsX_1 <= 1.0)
3548 4108100 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3549 2970840 : else if (dfAbsX_1 <= 2.0)
3550 2966620 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3551 :
3552 7078940 : double dfVal2 = 0.0;
3553 7078940 : if (dfAbsX_2 <= 1.0)
3554 5922680 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3555 1156260 : else if (dfAbsX_2 <= 2.0)
3556 1151440 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3557 :
3558 7078940 : double dfVal3 = 0.0;
3559 7078940 : if (dfAbsX_3 <= 1.0)
3560 3163030 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3561 3915910 : else if (dfAbsX_3 <= 2.0)
3562 3646870 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3563 :
3564 7078940 : padfValues[0] = dfVal0;
3565 7078940 : padfValues[1] = dfVal1;
3566 7078940 : padfValues[2] = dfVal2;
3567 7078940 : padfValues[3] = dfVal3;
3568 7078940 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3569 : }
3570 :
3571 : /************************************************************************/
3572 : /* GWKBSpline() */
3573 : /************************************************************************/
3574 :
3575 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3576 : // Equation 8 with (B,C)=(1,0)
3577 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3578 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3579 :
3580 138696 : static double GWKBSpline(double x)
3581 : {
3582 138696 : const double xp2 = x + 2.0;
3583 138696 : const double xp1 = x + 1.0;
3584 138696 : const double xm1 = x - 1.0;
3585 :
3586 : // This will most likely be used, so we'll compute it ahead of time to
3587 : // avoid stalling the processor.
3588 138696 : const double xp2c = xp2 * xp2 * xp2;
3589 :
3590 : // Note that the test is computed only if it is needed.
3591 : // TODO(schwehr): Make this easier to follow.
3592 : return xp2 > 0.0
3593 277392 : ? ((xp1 > 0.0)
3594 138696 : ? ((x > 0.0)
3595 124338 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3596 89912 : 6.0 * x * x * x
3597 : : 0.0) +
3598 124338 : -4.0 * xp1 * xp1 * xp1
3599 : : 0.0) +
3600 : xp2c
3601 138696 : : 0.0; // * 0.166666666666666666666
3602 : }
3603 :
3604 2220360 : static double GWKBSpline4Values(double *padfValues)
3605 : {
3606 11101800 : for (int i = 0; i < 4; i++)
3607 : {
3608 8881440 : const double x = padfValues[i];
3609 8881440 : const double xp2 = x + 2.0;
3610 8881440 : const double xp1 = x + 1.0;
3611 8881440 : const double xm1 = x - 1.0;
3612 :
3613 : // This will most likely be used, so we'll compute it ahead of time to
3614 : // avoid stalling the processor.
3615 8881440 : const double xp2c = xp2 * xp2 * xp2;
3616 :
3617 : // Note that the test is computed only if it is needed.
3618 : // TODO(schwehr): Make this easier to follow.
3619 8881440 : padfValues[i] =
3620 : (xp2 > 0.0)
3621 17762900 : ? ((xp1 > 0.0)
3622 8881440 : ? ((x > 0.0)
3623 6660880 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3624 4437750 : 6.0 * x * x * x
3625 : : 0.0) +
3626 6660880 : -4.0 * xp1 * xp1 * xp1
3627 : : 0.0) +
3628 : xp2c
3629 : : 0.0; // * 0.166666666666666666666
3630 : }
3631 2220360 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3632 : }
3633 : /************************************************************************/
3634 : /* GWKResampleWrkStruct */
3635 : /************************************************************************/
3636 :
3637 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3638 :
3639 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3640 : double dfSrcX, double dfSrcY,
3641 : double *pdfDensity, double *pdfReal,
3642 : double *pdfImag,
3643 : GWKResampleWrkStruct *psWrkStruct);
3644 :
3645 : struct _GWKResampleWrkStruct
3646 : {
3647 : pfnGWKResampleType pfnGWKResample;
3648 :
3649 : // Space for saved X weights.
3650 : double *padfWeightsX;
3651 : bool *pabCalcX;
3652 :
3653 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3654 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3655 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3656 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3657 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3658 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3659 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3660 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3661 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3662 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3663 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3664 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3665 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3666 :
3667 : // Space for saving a row of pixels.
3668 : double *padfRowDensity;
3669 : double *padfRowReal;
3670 : double *padfRowImag;
3671 : };
3672 :
3673 : /************************************************************************/
3674 : /* GWKResampleCreateWrkStruct() */
3675 : /************************************************************************/
3676 :
3677 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3678 : double dfSrcY, double *pdfDensity, double *pdfReal,
3679 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3680 :
3681 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3682 : double dfSrcX, double dfSrcY,
3683 : double *pdfDensity, double *pdfReal,
3684 : double *pdfImag,
3685 : GWKResampleWrkStruct *psWrkStruct);
3686 :
3687 352 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3688 : {
3689 352 : const int nXDist = (poWK->nXRadius + 1) * 2;
3690 352 : const int nYDist = (poWK->nYRadius + 1) * 2;
3691 :
3692 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3693 352 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3694 :
3695 : // Alloc space for saved X weights.
3696 352 : psWrkStruct->padfWeightsX =
3697 352 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3698 352 : psWrkStruct->pabCalcX =
3699 352 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3700 :
3701 352 : psWrkStruct->padfWeightsY =
3702 352 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3703 352 : psWrkStruct->iLastSrcX = -10;
3704 352 : psWrkStruct->iLastSrcY = -10;
3705 352 : psWrkStruct->dfLastDeltaX = -10;
3706 352 : psWrkStruct->dfLastDeltaY = -10;
3707 :
3708 : // Alloc space for saving a row of pixels.
3709 352 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3710 324 : poWK->panUnifiedSrcValid == nullptr &&
3711 312 : poWK->papanBandSrcValid == nullptr)
3712 : {
3713 312 : psWrkStruct->padfRowDensity = nullptr;
3714 : }
3715 : else
3716 : {
3717 40 : psWrkStruct->padfRowDensity =
3718 40 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3719 : }
3720 352 : psWrkStruct->padfRowReal =
3721 352 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3722 352 : psWrkStruct->padfRowImag =
3723 352 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3724 :
3725 352 : if (poWK->eResample == GRA_Lanczos)
3726 : {
3727 63 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3728 :
3729 63 : if (poWK->dfXScale < 1)
3730 : {
3731 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3732 4 : psWrkStruct->dfSinPiXScaleOver3 =
3733 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3734 4 : psWrkStruct->dfCosPiXScaleOver3);
3735 : // "Naive":
3736 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3737 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3738 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3739 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3740 4 : psWrkStruct->dfCosPiXScaleOver3 -
3741 4 : 3) *
3742 4 : psWrkStruct->dfCosPiXScaleOver3;
3743 4 : psWrkStruct->dfSinPiXScale = sqrt(
3744 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3745 : }
3746 :
3747 63 : if (poWK->dfYScale < 1)
3748 : {
3749 11 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3750 11 : psWrkStruct->dfSinPiYScaleOver3 =
3751 11 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3752 11 : psWrkStruct->dfCosPiYScaleOver3);
3753 : // "Naive":
3754 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3755 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3756 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3757 11 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3758 11 : psWrkStruct->dfCosPiYScaleOver3 -
3759 11 : 3) *
3760 11 : psWrkStruct->dfCosPiYScaleOver3;
3761 11 : psWrkStruct->dfSinPiYScale = sqrt(
3762 11 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3763 : }
3764 : }
3765 : else
3766 289 : psWrkStruct->pfnGWKResample = GWKResample;
3767 :
3768 352 : return psWrkStruct;
3769 : }
3770 :
3771 : /************************************************************************/
3772 : /* GWKResampleDeleteWrkStruct() */
3773 : /************************************************************************/
3774 :
3775 352 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3776 : {
3777 352 : CPLFree(psWrkStruct->padfWeightsX);
3778 352 : CPLFree(psWrkStruct->padfWeightsY);
3779 352 : CPLFree(psWrkStruct->pabCalcX);
3780 352 : CPLFree(psWrkStruct->padfRowDensity);
3781 352 : CPLFree(psWrkStruct->padfRowReal);
3782 352 : CPLFree(psWrkStruct->padfRowImag);
3783 352 : CPLFree(psWrkStruct);
3784 352 : }
3785 :
3786 : /************************************************************************/
3787 : /* GWKResample() */
3788 : /************************************************************************/
3789 :
3790 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3791 : double dfSrcY, double *pdfDensity, double *pdfReal,
3792 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3793 :
3794 : {
3795 : // Save as local variables to avoid following pointers in loops.
3796 239383 : const int nSrcXSize = poWK->nSrcXSize;
3797 239383 : const int nSrcYSize = poWK->nSrcYSize;
3798 :
3799 239383 : double dfAccumulatorReal = 0.0;
3800 239383 : double dfAccumulatorImag = 0.0;
3801 239383 : double dfAccumulatorDensity = 0.0;
3802 239383 : double dfAccumulatorWeight = 0.0;
3803 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3804 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3805 239383 : const GPtrDiff_t iSrcOffset =
3806 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3807 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3808 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3809 :
3810 239383 : const double dfXScale = poWK->dfXScale;
3811 239383 : const double dfYScale = poWK->dfYScale;
3812 :
3813 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
3814 :
3815 : // Space for saved X weights.
3816 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
3817 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
3818 :
3819 : // Space for saving a row of pixels.
3820 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
3821 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
3822 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
3823 :
3824 : // Mark as needing calculation (don't calculate the weights yet,
3825 : // because a mask may render it unnecessary).
3826 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
3827 :
3828 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3829 239383 : CPLAssert(pfnGetWeight);
3830 :
3831 : // Skip sampling over edge of image.
3832 239383 : int j = poWK->nFiltInitY;
3833 239383 : int jMax = poWK->nYRadius;
3834 239383 : if (iSrcY + j < 0)
3835 566 : j = -iSrcY;
3836 239383 : if (iSrcY + jMax >= nSrcYSize)
3837 662 : jMax = nSrcYSize - iSrcY - 1;
3838 :
3839 239383 : int iMin = poWK->nFiltInitX;
3840 239383 : int iMax = poWK->nXRadius;
3841 239383 : if (iSrcX + iMin < 0)
3842 566 : iMin = -iSrcX;
3843 239383 : if (iSrcX + iMax >= nSrcXSize)
3844 659 : iMax = nSrcXSize - iSrcX - 1;
3845 :
3846 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
3847 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
3848 :
3849 239383 : GPtrDiff_t iRowOffset =
3850 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3851 :
3852 : // Loop over pixel rows in the kernel.
3853 1445930 : for (; j <= jMax; ++j)
3854 : {
3855 1206540 : iRowOffset += nSrcXSize;
3856 :
3857 : // Get pixel values.
3858 : // We can potentially read extra elements after the "normal" end of the
3859 : // source arrays, but the contract of papabySrcImage[iBand],
3860 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3861 : // is to have WARP_EXTRA_ELTS reserved at their end.
3862 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3863 : padfRowDensity, padfRowReal, padfRowImag))
3864 72 : continue;
3865 :
3866 : // Calculate the Y weight.
3867 : double dfWeight1 = (bYScaleBelow1)
3868 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3869 1600 : : pfnGetWeight(j - dfDeltaY);
3870 :
3871 : // Iterate over pixels in row.
3872 1206470 : double dfAccumulatorRealLocal = 0.0;
3873 1206470 : double dfAccumulatorImagLocal = 0.0;
3874 1206470 : double dfAccumulatorDensityLocal = 0.0;
3875 1206470 : double dfAccumulatorWeightLocal = 0.0;
3876 :
3877 7317420 : for (int i = iMin; i <= iMax; ++i)
3878 : {
3879 : // Skip sampling if pixel has zero density.
3880 6110940 : if (padfRowDensity != nullptr &&
3881 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
3882 546 : continue;
3883 :
3884 6110400 : double dfWeight2 = 0.0;
3885 :
3886 : // Make or use a cached set of weights for this row.
3887 6110400 : if (pabCalcX[i - iMin])
3888 : {
3889 : // Use saved weight value instead of recomputing it.
3890 4903920 : dfWeight2 = padfWeightsX[i - iMin];
3891 : }
3892 : else
3893 : {
3894 : // Calculate & save the X weight.
3895 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
3896 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3897 1600 : : pfnGetWeight(i - dfDeltaX);
3898 :
3899 1206480 : pabCalcX[i - iMin] = true;
3900 : }
3901 :
3902 : // Accumulate!
3903 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
3904 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
3905 6110400 : if (padfRowDensity != nullptr)
3906 76731 : dfAccumulatorDensityLocal +=
3907 76731 : padfRowDensity[i - iMin] * dfWeight2;
3908 6110400 : dfAccumulatorWeightLocal += dfWeight2;
3909 : }
3910 :
3911 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3912 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3913 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3914 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3915 : }
3916 :
3917 239383 : if (dfAccumulatorWeight < 0.000001 ||
3918 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
3919 : {
3920 0 : *pdfDensity = 0.0;
3921 0 : return false;
3922 : }
3923 :
3924 : // Calculate the output taking into account weighting.
3925 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
3926 : {
3927 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3928 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3929 239380 : if (padfRowDensity != nullptr)
3930 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3931 : else
3932 237496 : *pdfDensity = 1.0;
3933 : }
3934 : else
3935 : {
3936 3 : *pdfReal = dfAccumulatorReal;
3937 3 : *pdfImag = dfAccumulatorImag;
3938 3 : if (padfRowDensity != nullptr)
3939 3 : *pdfDensity = dfAccumulatorDensity;
3940 : else
3941 0 : *pdfDensity = 1.0;
3942 : }
3943 :
3944 239383 : return true;
3945 : }
3946 :
3947 : /************************************************************************/
3948 : /* GWKResampleOptimizedLanczos() */
3949 : /************************************************************************/
3950 :
3951 617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3952 : double dfSrcX, double dfSrcY,
3953 : double *pdfDensity, double *pdfReal,
3954 : double *pdfImag,
3955 : GWKResampleWrkStruct *psWrkStruct)
3956 :
3957 : {
3958 : // Save as local variables to avoid following pointers in loops.
3959 617144 : const int nSrcXSize = poWK->nSrcXSize;
3960 617144 : const int nSrcYSize = poWK->nSrcYSize;
3961 :
3962 617144 : double dfAccumulatorReal = 0.0;
3963 617144 : double dfAccumulatorImag = 0.0;
3964 617144 : double dfAccumulatorDensity = 0.0;
3965 617144 : double dfAccumulatorWeight = 0.0;
3966 617144 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3967 617144 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3968 617144 : const GPtrDiff_t iSrcOffset =
3969 617144 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3970 617144 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3971 617144 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3972 :
3973 617144 : const double dfXScale = poWK->dfXScale;
3974 617144 : const double dfYScale = poWK->dfYScale;
3975 :
3976 : // Space for saved X weights.
3977 617144 : double *const padfWeightsXShifted =
3978 617144 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
3979 617144 : double *const padfWeightsYShifted =
3980 617144 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
3981 :
3982 : // Space for saving a row of pixels.
3983 617144 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
3984 617144 : double *const padfRowReal = psWrkStruct->padfRowReal;
3985 617144 : double *const padfRowImag = psWrkStruct->padfRowImag;
3986 :
3987 : // Skip sampling over edge of image.
3988 617144 : int jMin = poWK->nFiltInitY;
3989 617144 : int jMax = poWK->nYRadius;
3990 617144 : if (iSrcY + jMin < 0)
3991 16522 : jMin = -iSrcY;
3992 617144 : if (iSrcY + jMax >= nSrcYSize)
3993 5782 : jMax = nSrcYSize - iSrcY - 1;
3994 :
3995 617144 : int iMin = poWK->nFiltInitX;
3996 617144 : int iMax = poWK->nXRadius;
3997 617144 : if (iSrcX + iMin < 0)
3998 15797 : iMin = -iSrcX;
3999 617144 : if (iSrcX + iMax >= nSrcXSize)
4000 4657 : iMax = nSrcXSize - iSrcX - 1;
4001 :
4002 617144 : if (dfXScale < 1.0)
4003 : {
4004 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
4005 200179 : iMin++;
4006 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
4007 0 : iMax--;
4008 :
4009 : // clang-format off
4010 : /*
4011 : Naive version:
4012 : for (int i = iMin; i <= iMax; ++i)
4013 : {
4014 : psWrkStruct->padfWeightsXShifted[i] =
4015 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
4016 : }
4017 :
4018 : but given that:
4019 :
4020 : GWKLanczosSinc(x):
4021 : if (dfX == 0.0)
4022 : return 1.0;
4023 :
4024 : const double dfPIX = M_PI * dfX;
4025 : const double dfPIXoverR = dfPIX / 3;
4026 : const double dfPIX2overR = dfPIX * dfPIXoverR;
4027 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
4028 :
4029 : and
4030 : sin (a + b) = sin a cos b + cos a sin b.
4031 : cos (a + b) = cos a cos b - sin a sin b.
4032 :
4033 : we can skip any sin() computation within the loop
4034 : */
4035 : // clang-format on
4036 :
4037 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
4038 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4039 : {
4040 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4041 :
4042 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4043 71790 : double dfCosOver3 = cos(dfPIXover3);
4044 71790 : double dfSinOver3 = sin(dfPIXover3);
4045 :
4046 : // "Naive":
4047 : // double dfSin = sin( M_PI * dfX );
4048 : // double dfCos = cos( M_PI * dfX );
4049 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4050 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4051 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4052 :
4053 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4054 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4055 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4056 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4057 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4058 71790 : padfWeightsXShifted[iMin] =
4059 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4060 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
4061 : {
4062 1564690 : dfX += dfXScale;
4063 1564690 : const double dfNewSin =
4064 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4065 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4066 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
4067 1564690 : padfWeightsXShifted[i] =
4068 : dfX == 0
4069 1564690 : ? 1.0
4070 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4071 1564690 : const double dfNewCos =
4072 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4073 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4074 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
4075 1564690 : dfSin = dfNewSin;
4076 1564690 : dfCos = dfNewCos;
4077 1564690 : dfSinOver3 = dfNewSinOver3;
4078 1564690 : dfCosOver3 = dfNewCosOver3;
4079 : }
4080 :
4081 71790 : psWrkStruct->iLastSrcX = iSrcX;
4082 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4083 : }
4084 : }
4085 : else
4086 : {
4087 757542 : while (iMin - dfDeltaX < -3.0)
4088 343260 : iMin++;
4089 414282 : while (iMax - dfDeltaX > 3.0)
4090 0 : iMax--;
4091 :
4092 414282 : if (iSrcX != psWrkStruct->iLastSrcX ||
4093 209580 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4094 : {
4095 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4096 : // following trigonometric formulas.
4097 :
4098 : // TODO(schwehr): Move this somewhere where it can be rendered at
4099 : // LaTeX.
4100 : // clang-format off
4101 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4102 : // cos(M_PI * dfBase) * sin(M_PI * k)
4103 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4104 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4105 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4106 :
4107 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4108 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4109 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4110 : // clang-format on
4111 :
4112 414282 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4113 414282 : const double dfSin2PIDeltaXOver3 =
4114 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4115 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4116 414282 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4117 414282 : const double dfSinPIDeltaX =
4118 414282 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4119 414282 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4120 414282 : const double dfInvPI2Over3xSinPIDeltaX =
4121 : dfInvPI2Over3 * dfSinPIDeltaX;
4122 414282 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4123 414282 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4124 414282 : const double dfSinPIOver3 = 0.8660254037844386;
4125 414282 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4126 414282 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4127 : const double padfCst[] = {
4128 414282 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4129 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4130 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4131 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4132 414282 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4133 :
4134 2936860 : for (int i = iMin; i <= iMax; ++i)
4135 : {
4136 2522570 : const double dfX = i - dfDeltaX;
4137 2522570 : if (dfX == 0.0)
4138 58282 : padfWeightsXShifted[i] = 1.0;
4139 : else
4140 2464290 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4141 : #if DEBUG_VERBOSE
4142 : // TODO(schwehr): AlmostEqual.
4143 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4144 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4145 : #endif
4146 : }
4147 :
4148 414282 : psWrkStruct->iLastSrcX = iSrcX;
4149 414282 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4150 : }
4151 : }
4152 :
4153 617144 : if (dfYScale < 1.0)
4154 : {
4155 403116 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4156 200254 : jMin++;
4157 202862 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4158 0 : jMax--;
4159 :
4160 : // clang-format off
4161 : /*
4162 : Naive version:
4163 : for (int j = jMin; j <= jMax; ++j)
4164 : {
4165 : padfWeightsYShifted[j] =
4166 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4167 : }
4168 : */
4169 : // clang-format on
4170 :
4171 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4172 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4173 : {
4174 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4175 :
4176 383 : double dfPIYover3 = M_PI / 3 * dfY;
4177 383 : double dfCosOver3 = cos(dfPIYover3);
4178 383 : double dfSinOver3 = sin(dfPIYover3);
4179 :
4180 : // "Naive":
4181 : // double dfSin = sin( M_PI * dfY );
4182 : // double dfCos = cos( M_PI * dfY );
4183 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4184 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4185 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4186 :
4187 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4188 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4189 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4190 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4191 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4192 383 : padfWeightsYShifted[jMin] =
4193 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4194 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4195 : {
4196 6935 : dfY += dfYScale;
4197 6935 : const double dfNewSin =
4198 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4199 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4200 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4201 6935 : padfWeightsYShifted[j] =
4202 : dfY == 0
4203 6935 : ? 1.0
4204 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4205 6935 : const double dfNewCos =
4206 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4207 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4208 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4209 6935 : dfSin = dfNewSin;
4210 6935 : dfCos = dfNewCos;
4211 6935 : dfSinOver3 = dfNewSinOver3;
4212 6935 : dfCosOver3 = dfNewCosOver3;
4213 : }
4214 :
4215 383 : psWrkStruct->iLastSrcY = iSrcY;
4216 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4217 : }
4218 : }
4219 : else
4220 : {
4221 684742 : while (jMin - dfDeltaY < -3.0)
4222 270460 : jMin++;
4223 414282 : while (jMax - dfDeltaY > 3.0)
4224 0 : jMax--;
4225 :
4226 414282 : if (iSrcY != psWrkStruct->iLastSrcY ||
4227 413663 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4228 : {
4229 1132 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4230 1132 : const double dfSin2PIDeltaYOver3 =
4231 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4232 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4233 1132 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4234 1132 : const double dfSinPIDeltaY =
4235 1132 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4236 1132 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4237 1132 : const double dfInvPI2Over3xSinPIDeltaY =
4238 : dfInvPI2Over3 * dfSinPIDeltaY;
4239 1132 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4240 1132 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4241 1132 : const double dfSinPIOver3 = 0.8660254037844386;
4242 1132 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4243 1132 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4244 : const double padfCst[] = {
4245 1132 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4246 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4247 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4248 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4249 1132 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4250 :
4251 7917 : for (int j = jMin; j <= jMax; ++j)
4252 : {
4253 6785 : const double dfY = j - dfDeltaY;
4254 6785 : if (dfY == 0.0)
4255 460 : padfWeightsYShifted[j] = 1.0;
4256 : else
4257 6325 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4258 : #if DEBUG_VERBOSE
4259 : // TODO(schwehr): AlmostEqual.
4260 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4261 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4262 : #endif
4263 : }
4264 :
4265 1132 : psWrkStruct->iLastSrcY = iSrcY;
4266 1132 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4267 : }
4268 : }
4269 :
4270 : // If we have no density information, we can simply compute the
4271 : // accumulated weight.
4272 617144 : if (padfRowDensity == nullptr)
4273 : {
4274 617144 : double dfRowAccWeight = 0.0;
4275 7903490 : for (int i = iMin; i <= iMax; ++i)
4276 : {
4277 7286350 : dfRowAccWeight += padfWeightsXShifted[i];
4278 : }
4279 617144 : double dfColAccWeight = 0.0;
4280 7958040 : for (int j = jMin; j <= jMax; ++j)
4281 : {
4282 7340900 : dfColAccWeight += padfWeightsYShifted[j];
4283 : }
4284 617144 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4285 : }
4286 :
4287 : // Loop over pixel rows in the kernel.
4288 :
4289 617144 : if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
4290 616524 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4291 : !padfRowDensity)
4292 : {
4293 : // Optimization for Byte case without any masking/alpha
4294 :
4295 616524 : if (dfAccumulatorWeight < 0.000001)
4296 : {
4297 0 : *pdfDensity = 0.0;
4298 0 : return false;
4299 : }
4300 :
4301 616524 : const GByte *pSrc =
4302 616524 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4303 616524 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4304 :
4305 : #if defined(USE_SSE2)
4306 616524 : if (iMax - iMin + 1 == 6)
4307 : {
4308 : // This is just an optimized version of the general case in
4309 : // the else clause.
4310 :
4311 346854 : pSrc += iMin;
4312 346854 : int j = jMin;
4313 : const auto fourXWeights =
4314 346854 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4315 :
4316 : // Process 2 lines at the same time.
4317 1375860 : for (; j < jMax; j += 2)
4318 : {
4319 : const XMMReg4Double v_acc =
4320 1029000 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4321 : const XMMReg4Double v_acc2 =
4322 1029000 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4323 1029000 : const double dfRowAcc = v_acc.GetHorizSum();
4324 1029000 : const double dfRowAccEnd =
4325 1029000 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4326 1029000 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4327 1029000 : dfAccumulatorReal +=
4328 1029000 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4329 1029000 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4330 1029000 : const double dfRowAcc2End =
4331 1029000 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4332 1029000 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4333 1029000 : dfAccumulatorReal +=
4334 1029000 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4335 1029000 : pSrc += 2 * nSrcXSize;
4336 : }
4337 346854 : if (j == jMax)
4338 : {
4339 : // Process last line if there's an odd number of them.
4340 :
4341 : const XMMReg4Double v_acc =
4342 86045 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4343 86045 : const double dfRowAcc = v_acc.GetHorizSum();
4344 86045 : const double dfRowAccEnd =
4345 86045 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4346 86045 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4347 86045 : dfAccumulatorReal +=
4348 86045 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4349 : }
4350 : }
4351 : else
4352 : #endif
4353 : {
4354 5463580 : for (int j = jMin; j <= jMax; ++j)
4355 : {
4356 5193900 : int i = iMin;
4357 5193900 : double dfRowAcc1 = 0.0;
4358 5193900 : double dfRowAcc2 = 0.0;
4359 : // A bit of loop unrolling
4360 62750600 : for (; i < iMax; i += 2)
4361 : {
4362 57556700 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4363 57556700 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4364 : }
4365 5193900 : if (i == iMax)
4366 : {
4367 : // Process last column if there's an odd number of them.
4368 426183 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4369 : }
4370 :
4371 5193900 : dfAccumulatorReal +=
4372 5193900 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4373 5193900 : pSrc += nSrcXSize;
4374 : }
4375 : }
4376 :
4377 : // Calculate the output taking into account weighting.
4378 616524 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4379 : {
4380 569230 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4381 569230 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4382 569230 : *pdfDensity = 1.0;
4383 : }
4384 : else
4385 : {
4386 47294 : *pdfReal = dfAccumulatorReal;
4387 47294 : *pdfDensity = 1.0;
4388 : }
4389 :
4390 616524 : return true;
4391 : }
4392 :
4393 620 : GPtrDiff_t iRowOffset =
4394 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4395 :
4396 620 : int nCountValid = 0;
4397 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4398 :
4399 3560 : for (int j = jMin; j <= jMax; ++j)
4400 : {
4401 2940 : iRowOffset += nSrcXSize;
4402 :
4403 : // Get pixel values.
4404 : // We can potentially read extra elements after the "normal" end of the
4405 : // source arrays, but the contract of papabySrcImage[iBand],
4406 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4407 : // is to have WARP_EXTRA_ELTS reserved at their end.
4408 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4409 : padfRowDensity, padfRowReal, padfRowImag))
4410 0 : continue;
4411 :
4412 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4413 :
4414 : // Iterate over pixels in row.
4415 2940 : if (padfRowDensity != nullptr)
4416 : {
4417 0 : for (int i = iMin; i <= iMax; ++i)
4418 : {
4419 : // Skip sampling if pixel has zero density.
4420 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD)
4421 0 : continue;
4422 :
4423 0 : nCountValid++;
4424 :
4425 : // Use a cached set of weights for this row.
4426 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4427 :
4428 : // Accumulate!
4429 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4430 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4431 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4432 0 : dfAccumulatorWeight += dfWeight2;
4433 : }
4434 : }
4435 2940 : else if (bIsNonComplex)
4436 : {
4437 1764 : double dfRowAccReal = 0.0;
4438 10560 : for (int i = iMin; i <= iMax; ++i)
4439 : {
4440 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4441 :
4442 : // Accumulate!
4443 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4444 : }
4445 :
4446 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4447 : }
4448 : else
4449 : {
4450 1176 : double dfRowAccReal = 0.0;
4451 1176 : double dfRowAccImag = 0.0;
4452 7040 : for (int i = iMin; i <= iMax; ++i)
4453 : {
4454 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4455 :
4456 : // Accumulate!
4457 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4458 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4459 : }
4460 :
4461 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4462 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4463 : }
4464 : }
4465 :
4466 620 : if (dfAccumulatorWeight < 0.000001 ||
4467 0 : (padfRowDensity != nullptr &&
4468 0 : (dfAccumulatorDensity < 0.000001 ||
4469 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4470 : {
4471 0 : *pdfDensity = 0.0;
4472 0 : return false;
4473 : }
4474 :
4475 : // Calculate the output taking into account weighting.
4476 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4477 : {
4478 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4479 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4480 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4481 0 : if (padfRowDensity != nullptr)
4482 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4483 : else
4484 0 : *pdfDensity = 1.0;
4485 : }
4486 : else
4487 : {
4488 620 : *pdfReal = dfAccumulatorReal;
4489 620 : *pdfImag = dfAccumulatorImag;
4490 620 : if (padfRowDensity != nullptr)
4491 0 : *pdfDensity = dfAccumulatorDensity;
4492 : else
4493 620 : *pdfDensity = 1.0;
4494 : }
4495 :
4496 620 : return true;
4497 : }
4498 :
4499 : /************************************************************************/
4500 : /* GWKComputeWeights() */
4501 : /************************************************************************/
4502 :
4503 3747080 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4504 : double dfDeltaX, double dfXScale, int jMin,
4505 : int jMax, double dfDeltaY, double dfYScale,
4506 : double *padfWeightsHorizontal,
4507 : double *padfWeightsVertical, double &dfInvWeights)
4508 : {
4509 :
4510 3747080 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4511 3747080 : CPLAssert(pfnGetWeight);
4512 3747080 : const FilterFunc4ValuesType pfnGetWeight4Values =
4513 3747080 : apfGWKFilter4Values[eResample];
4514 3747080 : CPLAssert(pfnGetWeight4Values);
4515 :
4516 3747080 : int i = iMin; // Used after for.
4517 3747080 : int iC = 0; // Used after for.
4518 : // Not zero, but as close as possible to it, to avoid potential division by
4519 : // zero at end of function
4520 3747080 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4521 8313540 : for (; i + 2 < iMax; i += 4, iC += 4)
4522 : {
4523 4566760 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4524 4566760 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4525 4566760 : padfWeightsHorizontal[iC + 2] =
4526 4566760 : padfWeightsHorizontal[iC + 1] + dfXScale;
4527 4566760 : padfWeightsHorizontal[iC + 3] =
4528 4566760 : padfWeightsHorizontal[iC + 2] + dfXScale;
4529 4566450 : dfAccumulatorWeightHorizontal +=
4530 4566760 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4531 : }
4532 3963760 : for (; i <= iMax; ++i, ++iC)
4533 : {
4534 220112 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4535 216980 : padfWeightsHorizontal[iC] = dfWeight;
4536 216980 : dfAccumulatorWeightHorizontal += dfWeight;
4537 : }
4538 :
4539 3743640 : int j = jMin; // Used after for.
4540 3743640 : int jC = 0; // Used after for.
4541 : // Not zero, but as close as possible to it, to avoid potential division by
4542 : // zero at end of function
4543 3743640 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4544 7890620 : for (; j + 2 < jMax; j += 4, jC += 4)
4545 : {
4546 4147250 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4547 4147250 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4548 4147250 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4549 4147250 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4550 4146970 : dfAccumulatorWeightVertical +=
4551 4147250 : pfnGetWeight4Values(padfWeightsVertical + jC);
4552 : }
4553 8247980 : for (; j <= jMax; ++j, ++jC)
4554 : {
4555 4505800 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4556 4504610 : padfWeightsVertical[jC] = dfWeight;
4557 4504610 : dfAccumulatorWeightVertical += dfWeight;
4558 : }
4559 :
4560 3742180 : dfInvWeights =
4561 3742180 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4562 3742180 : }
4563 :
4564 : /************************************************************************/
4565 : /* GWKResampleNoMasksT() */
4566 : /************************************************************************/
4567 :
4568 : template <class T>
4569 : static bool
4570 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4571 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4572 : double *padfWeightsVertical, double &dfInvWeights)
4573 :
4574 : {
4575 : // Commonly used; save locally.
4576 : const int nSrcXSize = poWK->nSrcXSize;
4577 : const int nSrcYSize = poWK->nSrcYSize;
4578 :
4579 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4580 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4581 : const GPtrDiff_t iSrcOffset =
4582 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4583 :
4584 : const int nXRadius = poWK->nXRadius;
4585 : const int nYRadius = poWK->nYRadius;
4586 :
4587 : // Politely refuse to process invalid coordinates or obscenely small image.
4588 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4589 : nYRadius > nSrcYSize)
4590 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4591 : pValue);
4592 :
4593 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4594 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4595 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4596 :
4597 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4598 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4599 :
4600 : int iMin = 1 - nXRadius;
4601 : if (iSrcX + iMin < 0)
4602 : iMin = -iSrcX;
4603 : int iMax = nXRadius;
4604 : if (iSrcX + iMax >= nSrcXSize - 1)
4605 : iMax = nSrcXSize - 1 - iSrcX;
4606 :
4607 : int jMin = 1 - nYRadius;
4608 : if (iSrcY + jMin < 0)
4609 : jMin = -iSrcY;
4610 : int jMax = nYRadius;
4611 : if (iSrcY + jMax >= nSrcYSize - 1)
4612 : jMax = nSrcYSize - 1 - iSrcY;
4613 :
4614 : if (iBand == 0)
4615 : {
4616 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4617 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4618 : padfWeightsVertical, dfInvWeights);
4619 : }
4620 :
4621 : // Loop over all rows in the kernel.
4622 : double dfAccumulator = 0.0;
4623 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4624 : {
4625 : const GPtrDiff_t iSampJ =
4626 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4627 :
4628 : // Loop over all pixels in the row.
4629 : double dfAccumulatorLocal = 0.0;
4630 : double dfAccumulatorLocal2 = 0.0;
4631 : int iC = 0;
4632 : int i = iMin;
4633 : // Process by chunk of 4 cols.
4634 : for (; i + 2 < iMax; i += 4, iC += 4)
4635 : {
4636 : // Retrieve the pixel & accumulate.
4637 : dfAccumulatorLocal +=
4638 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4639 : dfAccumulatorLocal +=
4640 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4641 : dfAccumulatorLocal2 +=
4642 : pSrcBand[i + 2 + iSampJ] * padfWeightsHorizontal[iC + 2];
4643 : dfAccumulatorLocal2 +=
4644 : pSrcBand[i + 3 + iSampJ] * padfWeightsHorizontal[iC + 3];
4645 : }
4646 : dfAccumulatorLocal += dfAccumulatorLocal2;
4647 : if (i < iMax)
4648 : {
4649 : dfAccumulatorLocal +=
4650 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4651 : dfAccumulatorLocal +=
4652 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4653 : i += 2;
4654 : iC += 2;
4655 : }
4656 : if (i == iMax)
4657 : {
4658 : dfAccumulatorLocal +=
4659 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4660 : }
4661 :
4662 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4663 : }
4664 :
4665 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4666 :
4667 : return true;
4668 : }
4669 :
4670 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4671 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4672 : #if defined(USE_SSE2)
4673 :
4674 : /************************************************************************/
4675 : /* GWKResampleNoMasks_SSE2_T() */
4676 : /************************************************************************/
4677 :
4678 : template <class T>
4679 9180523 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4680 : double dfSrcX, double dfSrcY, T *pValue,
4681 : double *padfWeightsHorizontal,
4682 : double *padfWeightsVertical,
4683 : double &dfInvWeights)
4684 : {
4685 : // Commonly used; save locally.
4686 9180523 : const int nSrcXSize = poWK->nSrcXSize;
4687 9180523 : const int nSrcYSize = poWK->nSrcYSize;
4688 :
4689 9180523 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4690 9180523 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4691 9180523 : const GPtrDiff_t iSrcOffset =
4692 9180523 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4693 9180523 : const int nXRadius = poWK->nXRadius;
4694 9180523 : const int nYRadius = poWK->nYRadius;
4695 :
4696 : // Politely refuse to process invalid coordinates or obscenely small image.
4697 9180523 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4698 : nYRadius > nSrcYSize)
4699 7912 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4700 3 : pValue);
4701 :
4702 9172611 : const T *pSrcBand =
4703 9172611 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4704 :
4705 9172611 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4706 9172611 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4707 9172611 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4708 9159961 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4709 :
4710 9153591 : int iMin = 1 - nXRadius;
4711 9153591 : if (iSrcX + iMin < 0)
4712 43143 : iMin = -iSrcX;
4713 9153591 : int iMax = nXRadius;
4714 9153591 : if (iSrcX + iMax >= nSrcXSize - 1)
4715 38106 : iMax = nSrcXSize - 1 - iSrcX;
4716 :
4717 9153591 : int jMin = 1 - nYRadius;
4718 9153591 : if (iSrcY + jMin < 0)
4719 49554 : jMin = -iSrcY;
4720 9153591 : int jMax = nYRadius;
4721 9153591 : if (iSrcY + jMax >= nSrcYSize - 1)
4722 36028 : jMax = nSrcYSize - 1 - iSrcY;
4723 :
4724 9153591 : if (iBand == 0)
4725 : {
4726 3746231 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4727 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4728 : padfWeightsVertical, dfInvWeights);
4729 : }
4730 :
4731 9160371 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4732 : // Process by chunk of 4 rows.
4733 9160371 : int jC = 0;
4734 9160371 : int j = jMin;
4735 9160371 : double dfAccumulator = 0.0;
4736 19415293 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4737 : {
4738 : // Loop over all pixels in the row.
4739 10249022 : int iC = 0;
4740 10249022 : int i = iMin;
4741 : // Process by chunk of 4 cols.
4742 10249022 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4743 10212372 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4744 10228582 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4745 10245412 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4746 26713680 : for (; i + 2 < iMax; i += 4, iC += 4)
4747 : {
4748 : // Retrieve the pixel & accumulate.
4749 16483688 : XMMReg4Double v_pixels_1 =
4750 16483688 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4751 16508288 : XMMReg4Double v_pixels_2 =
4752 16508288 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4753 16501388 : XMMReg4Double v_pixels_3 =
4754 16501388 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4755 16486988 : XMMReg4Double v_pixels_4 =
4756 16486988 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4757 :
4758 16511088 : XMMReg4Double v_padfWeight =
4759 16511088 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4760 :
4761 16496688 : v_acc_1 += v_pixels_1 * v_padfWeight;
4762 16499988 : v_acc_2 += v_pixels_2 * v_padfWeight;
4763 16501988 : v_acc_3 += v_pixels_3 * v_padfWeight;
4764 16492088 : v_acc_4 += v_pixels_4 * v_padfWeight;
4765 : }
4766 :
4767 10230002 : if (i < iMax)
4768 : {
4769 142910 : XMMReg2Double v_pixels_1 =
4770 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4771 142910 : XMMReg2Double v_pixels_2 =
4772 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4773 142910 : XMMReg2Double v_pixels_3 =
4774 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4775 142910 : XMMReg2Double v_pixels_4 =
4776 142910 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4777 :
4778 142910 : XMMReg2Double v_padfWeight =
4779 142910 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4780 :
4781 142910 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4782 142910 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4783 142910 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4784 142910 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4785 :
4786 142910 : i += 2;
4787 142910 : iC += 2;
4788 : }
4789 :
4790 10230002 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4791 10246672 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4792 10230432 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4793 10234522 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4794 :
4795 10254932 : if (i == iMax)
4796 : {
4797 49195 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4798 49195 : padfWeightsHorizontal[iC];
4799 49195 : dfAccumulatorLocal_2 +=
4800 49195 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4801 49195 : padfWeightsHorizontal[iC];
4802 49195 : dfAccumulatorLocal_3 +=
4803 49195 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4804 49195 : padfWeightsHorizontal[iC];
4805 49195 : dfAccumulatorLocal_4 +=
4806 49195 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4807 49195 : padfWeightsHorizontal[iC];
4808 : }
4809 :
4810 10254932 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4811 10254932 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4812 10254932 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4813 10254932 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4814 : }
4815 22244341 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4816 : {
4817 : // Loop over all pixels in the row.
4818 13069140 : int iC = 0;
4819 13069140 : int i = iMin;
4820 : // Process by chunk of 4 cols.
4821 13069140 : XMMReg4Double v_acc = XMMReg4Double::Zero();
4822 26190563 : for (; i + 2 < iMax; i += 4, iC += 4)
4823 : {
4824 : // Retrieve the pixel & accumulate.
4825 13115123 : XMMReg4Double v_pixels =
4826 13115123 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4827 13133723 : XMMReg4Double v_padfWeight =
4828 13133723 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4829 :
4830 13109123 : v_acc += v_pixels * v_padfWeight;
4831 : }
4832 :
4833 13075540 : double dfAccumulatorLocal = v_acc.GetHorizSum();
4834 :
4835 13078040 : if (i < iMax)
4836 : {
4837 173964 : dfAccumulatorLocal +=
4838 173964 : pSrcBand[i + iSampJ] * padfWeightsHorizontal[iC];
4839 173964 : dfAccumulatorLocal +=
4840 173964 : pSrcBand[i + 1 + iSampJ] * padfWeightsHorizontal[iC + 1];
4841 173964 : i += 2;
4842 173964 : iC += 2;
4843 : }
4844 13078040 : if (i == iMax)
4845 : {
4846 33020 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4847 33020 : padfWeightsHorizontal[iC];
4848 : }
4849 :
4850 13078040 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4851 : }
4852 :
4853 9175171 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4854 :
4855 9124341 : return true;
4856 : }
4857 :
4858 : /************************************************************************/
4859 : /* GWKResampleNoMasksT<GByte>() */
4860 : /************************************************************************/
4861 :
4862 : template <>
4863 8583790 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4864 : double dfSrcX, double dfSrcY, GByte *pValue,
4865 : double *padfWeightsHorizontal,
4866 : double *padfWeightsVertical,
4867 : double &dfInvWeights)
4868 : {
4869 8583790 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4870 : padfWeightsHorizontal, padfWeightsVertical,
4871 8576300 : dfInvWeights);
4872 : }
4873 :
4874 : /************************************************************************/
4875 : /* GWKResampleNoMasksT<GInt16>() */
4876 : /************************************************************************/
4877 :
4878 : template <>
4879 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4880 : double dfSrcX, double dfSrcY, GInt16 *pValue,
4881 : double *padfWeightsHorizontal,
4882 : double *padfWeightsVertical,
4883 : double &dfInvWeights)
4884 : {
4885 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4886 : padfWeightsHorizontal, padfWeightsVertical,
4887 252563 : dfInvWeights);
4888 : }
4889 :
4890 : /************************************************************************/
4891 : /* GWKResampleNoMasksT<GUInt16>() */
4892 : /************************************************************************/
4893 :
4894 : template <>
4895 343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4896 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
4897 : double *padfWeightsHorizontal,
4898 : double *padfWeightsVertical,
4899 : double &dfInvWeights)
4900 : {
4901 343440 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4902 : padfWeightsHorizontal, padfWeightsVertical,
4903 343440 : dfInvWeights);
4904 : }
4905 :
4906 : /************************************************************************/
4907 : /* GWKResampleNoMasksT<float>() */
4908 : /************************************************************************/
4909 :
4910 : template <>
4911 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
4912 : double dfSrcX, double dfSrcY, float *pValue,
4913 : double *padfWeightsHorizontal,
4914 : double *padfWeightsVertical,
4915 : double &dfInvWeights)
4916 : {
4917 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4918 : padfWeightsHorizontal, padfWeightsVertical,
4919 2500 : dfInvWeights);
4920 : }
4921 :
4922 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4923 :
4924 : /************************************************************************/
4925 : /* GWKResampleNoMasksT<double>() */
4926 : /************************************************************************/
4927 :
4928 : template <>
4929 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
4930 : double dfSrcX, double dfSrcY, double *pValue,
4931 : double *padfWeightsHorizontal,
4932 : double *padfWeightsVertical,
4933 : double &dfInvWeights)
4934 : {
4935 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4936 : padfWeightsHorizontal, padfWeightsVertical,
4937 : dfInvWeights);
4938 : }
4939 :
4940 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4941 :
4942 : #endif /* defined(USE_SSE2) */
4943 :
4944 : /************************************************************************/
4945 : /* GWKRoundSourceCoordinates() */
4946 : /************************************************************************/
4947 :
4948 1000 : static void GWKRoundSourceCoordinates(
4949 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
4950 : double dfSrcCoordPrecision, double dfErrorThreshold,
4951 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
4952 : double dfDstY)
4953 : {
4954 1000 : double dfPct = 0.8;
4955 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
4956 : {
4957 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4958 : }
4959 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4960 :
4961 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
4962 : {
4963 500000 : const double dfXBefore = padfX[iDstX];
4964 500000 : const double dfYBefore = padfY[iDstX];
4965 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4966 : dfSrcCoordPrecision;
4967 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4968 : dfSrcCoordPrecision;
4969 :
4970 : // If we are in an uncertainty zone, go to non-approximated
4971 : // transformation.
4972 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4973 : // be at least 10 times greater than the approximation error.
4974 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4975 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
4976 : {
4977 180090 : padfX[iDstX] = iDstX + dfDstXOff;
4978 180090 : padfY[iDstX] = dfDstY;
4979 180090 : padfZ[iDstX] = 0.0;
4980 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
4981 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
4982 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4983 : dfSrcCoordPrecision;
4984 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4985 : dfSrcCoordPrecision;
4986 : }
4987 : }
4988 1000 : }
4989 :
4990 : /************************************************************************/
4991 : /* GWKOpenCLCase() */
4992 : /* */
4993 : /* This is identical to GWKGeneralCase(), but functions via */
4994 : /* OpenCL. This means we have vector optimization (SSE) and/or */
4995 : /* GPU optimization depending on our prefs. The code itself is */
4996 : /* general and not optimized, but by defining constants we can */
4997 : /* make some pretty darn good code on the fly. */
4998 : /************************************************************************/
4999 :
5000 : #if defined(HAVE_OPENCL)
5001 0 : static CPLErr GWKOpenCLCase(GDALWarpKernel *poWK)
5002 : {
5003 0 : const int nDstXSize = poWK->nDstXSize;
5004 0 : const int nDstYSize = poWK->nDstYSize;
5005 0 : const int nSrcXSize = poWK->nSrcXSize;
5006 0 : const int nSrcYSize = poWK->nSrcYSize;
5007 0 : const int nDstXOff = poWK->nDstXOff;
5008 0 : const int nDstYOff = poWK->nDstYOff;
5009 0 : const int nSrcXOff = poWK->nSrcXOff;
5010 0 : const int nSrcYOff = poWK->nSrcYOff;
5011 0 : bool bUseImag = false;
5012 :
5013 : cl_channel_type imageFormat;
5014 0 : switch (poWK->eWorkingDataType)
5015 : {
5016 0 : case GDT_Byte:
5017 0 : imageFormat = CL_UNORM_INT8;
5018 0 : break;
5019 0 : case GDT_UInt16:
5020 0 : imageFormat = CL_UNORM_INT16;
5021 0 : break;
5022 0 : case GDT_CInt16:
5023 0 : bUseImag = true;
5024 : [[fallthrough]];
5025 0 : case GDT_Int16:
5026 0 : imageFormat = CL_SNORM_INT16;
5027 0 : break;
5028 0 : case GDT_CFloat32:
5029 0 : bUseImag = true;
5030 : [[fallthrough]];
5031 0 : case GDT_Float32:
5032 0 : imageFormat = CL_FLOAT;
5033 0 : break;
5034 0 : default:
5035 : // No support for higher precision formats.
5036 0 : CPLDebug("OpenCL", "Unsupported resampling OpenCL data type %d.",
5037 0 : static_cast<int>(poWK->eWorkingDataType));
5038 0 : return CE_Warning;
5039 : }
5040 :
5041 : OCLResampAlg resampAlg;
5042 0 : switch (poWK->eResample)
5043 : {
5044 0 : case GRA_Bilinear:
5045 0 : resampAlg = OCL_Bilinear;
5046 0 : break;
5047 0 : case GRA_Cubic:
5048 0 : resampAlg = OCL_Cubic;
5049 0 : break;
5050 0 : case GRA_CubicSpline:
5051 0 : resampAlg = OCL_CubicSpline;
5052 0 : break;
5053 0 : case GRA_Lanczos:
5054 0 : resampAlg = OCL_Lanczos;
5055 0 : break;
5056 0 : default:
5057 : // No support for higher precision formats.
5058 0 : CPLDebug("OpenCL",
5059 : "Unsupported resampling OpenCL resampling alg %d.",
5060 0 : static_cast<int>(poWK->eResample));
5061 0 : return CE_Warning;
5062 : }
5063 :
5064 0 : struct oclWarper *warper = nullptr;
5065 : cl_int err;
5066 0 : CPLErr eErr = CE_None;
5067 :
5068 : // TODO(schwehr): Fix indenting.
5069 : try
5070 : {
5071 :
5072 : // Using a factor of 2 or 4 seems to have much less rounding error
5073 : // than 3 on the GPU.
5074 : // Then the rounding error can cause strange artifacts under the
5075 : // right conditions.
5076 0 : warper = GDALWarpKernelOpenCL_createEnv(
5077 : nSrcXSize, nSrcYSize, nDstXSize, nDstYSize, imageFormat,
5078 0 : poWK->nBands, 4, bUseImag, poWK->papanBandSrcValid != nullptr,
5079 : poWK->pafDstDensity, poWK->padfDstNoDataReal, resampAlg, &err);
5080 :
5081 0 : if (err != CL_SUCCESS || warper == nullptr)
5082 : {
5083 0 : eErr = CE_Warning;
5084 0 : if (warper != nullptr)
5085 0 : throw eErr;
5086 0 : return eErr;
5087 : }
5088 :
5089 0 : CPLDebug("GDAL",
5090 : "GDALWarpKernel()::GWKOpenCLCase() "
5091 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
5092 : nSrcXOff, nSrcYOff, nSrcXSize, nSrcYSize, nDstXOff, nDstYOff,
5093 : nDstXSize, nDstYSize);
5094 :
5095 0 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
5096 : {
5097 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
5098 0 : eErr = CE_Failure;
5099 0 : throw eErr;
5100 : }
5101 :
5102 : /* ====================================================================
5103 : */
5104 : /* Loop over bands. */
5105 : /* ====================================================================
5106 : */
5107 0 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5108 : {
5109 0 : if (poWK->papanBandSrcValid != nullptr &&
5110 0 : poWK->papanBandSrcValid[iBand] != nullptr)
5111 : {
5112 0 : GDALWarpKernelOpenCL_setSrcValid(
5113 : warper,
5114 0 : reinterpret_cast<int *>(poWK->papanBandSrcValid[iBand]),
5115 : iBand);
5116 0 : if (err != CL_SUCCESS)
5117 : {
5118 0 : CPLError(
5119 : CE_Failure, CPLE_AppDefined,
5120 : "OpenCL routines reported failure (%d) on line %d.",
5121 : static_cast<int>(err), __LINE__);
5122 0 : eErr = CE_Failure;
5123 0 : throw eErr;
5124 : }
5125 : }
5126 :
5127 0 : err = GDALWarpKernelOpenCL_setSrcImg(
5128 0 : warper, poWK->papabySrcImage[iBand], iBand);
5129 0 : if (err != CL_SUCCESS)
5130 : {
5131 0 : CPLError(CE_Failure, CPLE_AppDefined,
5132 : "OpenCL routines reported failure (%d) on line %d.",
5133 : static_cast<int>(err), __LINE__);
5134 0 : eErr = CE_Failure;
5135 0 : throw eErr;
5136 : }
5137 :
5138 0 : err = GDALWarpKernelOpenCL_setDstImg(
5139 0 : warper, poWK->papabyDstImage[iBand], iBand);
5140 0 : if (err != CL_SUCCESS)
5141 : {
5142 0 : CPLError(CE_Failure, CPLE_AppDefined,
5143 : "OpenCL routines reported failure (%d) on line %d.",
5144 : static_cast<int>(err), __LINE__);
5145 0 : eErr = CE_Failure;
5146 0 : throw eErr;
5147 : }
5148 : }
5149 :
5150 : /* --------------------------------------------------------------------
5151 : */
5152 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5153 : /* scanlines worth of positions. */
5154 : /* --------------------------------------------------------------------
5155 : */
5156 :
5157 : // For x, 2 *, because we cache the precomputed values at the end.
5158 : double *padfX =
5159 0 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5160 : double *padfY =
5161 0 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5162 : double *padfZ =
5163 0 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5164 : int *pabSuccess =
5165 0 : static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5166 0 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5167 0 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5168 0 : const double dfErrorThreshold = CPLAtof(CSLFetchNameValueDef(
5169 0 : poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5170 :
5171 : // Precompute values.
5172 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5173 0 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5174 :
5175 : /* ====================================================================
5176 : */
5177 : /* Loop over output lines. */
5178 : /* ====================================================================
5179 : */
5180 0 : for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; ++iDstY)
5181 : {
5182 : /* ----------------------------------------------------------------
5183 : */
5184 : /* Setup points to transform to source image space. */
5185 : /* ----------------------------------------------------------------
5186 : */
5187 0 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5188 0 : const double dfYConst = iDstY + 0.5 + poWK->nDstYOff;
5189 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5190 0 : padfY[iDstX] = dfYConst;
5191 0 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5192 :
5193 : /* ----------------------------------------------------------------
5194 : */
5195 : /* Transform the points from destination pixel/line
5196 : * coordinates*/
5197 : /* to source pixel/line coordinates. */
5198 : /* ----------------------------------------------------------------
5199 : */
5200 0 : poWK->pfnTransformer(poWK->pTransformerArg, TRUE, nDstXSize, padfX,
5201 : padfY, padfZ, pabSuccess);
5202 0 : if (dfSrcCoordPrecision > 0.0)
5203 : {
5204 0 : GWKRoundSourceCoordinates(
5205 : nDstXSize, padfX, padfY, padfZ, pabSuccess,
5206 : dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
5207 : poWK->pTransformerArg, 0.5 + nDstXOff,
5208 0 : iDstY + 0.5 + nDstYOff);
5209 : }
5210 :
5211 0 : err = GDALWarpKernelOpenCL_setCoordRow(
5212 : warper, padfX, padfY, nSrcXOff, nSrcYOff, pabSuccess, iDstY);
5213 0 : if (err != CL_SUCCESS)
5214 : {
5215 0 : CPLError(CE_Failure, CPLE_AppDefined,
5216 : "OpenCL routines reported failure (%d) on line %d.",
5217 : static_cast<int>(err), __LINE__);
5218 0 : eErr = CE_Failure;
5219 0 : break;
5220 : }
5221 :
5222 : // Update the valid & density masks because we don't do so in the
5223 : // kernel.
5224 0 : for (int iDstX = 0; iDstX < nDstXSize && eErr == CE_None; iDstX++)
5225 : {
5226 0 : const double dfX = padfX[iDstX];
5227 0 : const double dfY = padfY[iDstX];
5228 0 : const GPtrDiff_t iDstOffset =
5229 0 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5230 :
5231 : // See GWKGeneralCase() for appropriate commenting.
5232 0 : if (!pabSuccess[iDstX] || dfX < nSrcXOff || dfY < nSrcYOff)
5233 0 : continue;
5234 :
5235 0 : int iSrcX = static_cast<int>(dfX) - nSrcXOff;
5236 0 : int iSrcY = static_cast<int>(dfY) - nSrcYOff;
5237 :
5238 0 : if (iSrcX < 0 || iSrcX >= nSrcXSize || iSrcY < 0 ||
5239 : iSrcY >= nSrcYSize)
5240 0 : continue;
5241 :
5242 0 : GPtrDiff_t iSrcOffset =
5243 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
5244 0 : double dfDensity = 1.0;
5245 :
5246 0 : if (poWK->pafUnifiedSrcDensity != nullptr && iSrcX >= 0 &&
5247 0 : iSrcY >= 0 && iSrcX < nSrcXSize && iSrcY < nSrcYSize)
5248 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5249 :
5250 0 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5251 :
5252 : // Because this is on the bit-wise level, it can't be done well
5253 : // in OpenCL.
5254 0 : if (poWK->panDstValid != nullptr)
5255 0 : poWK->panDstValid[iDstOffset >> 5] |=
5256 0 : 0x01 << (iDstOffset & 0x1f);
5257 : }
5258 : }
5259 :
5260 0 : CPLFree(padfX);
5261 0 : CPLFree(padfY);
5262 0 : CPLFree(padfZ);
5263 0 : CPLFree(pabSuccess);
5264 :
5265 0 : if (eErr != CE_None)
5266 0 : throw eErr;
5267 :
5268 0 : err = GDALWarpKernelOpenCL_runResamp(
5269 : warper, poWK->pafUnifiedSrcDensity, poWK->panUnifiedSrcValid,
5270 : poWK->pafDstDensity, poWK->panDstValid, poWK->dfXScale,
5271 : poWK->dfYScale, poWK->dfXFilter, poWK->dfYFilter, poWK->nXRadius,
5272 : poWK->nYRadius, poWK->nFiltInitX, poWK->nFiltInitY);
5273 :
5274 0 : if (err != CL_SUCCESS)
5275 : {
5276 0 : CPLError(CE_Failure, CPLE_AppDefined,
5277 : "OpenCL routines reported failure (%d) on line %d.",
5278 : static_cast<int>(err), __LINE__);
5279 0 : eErr = CE_Failure;
5280 0 : throw eErr;
5281 : }
5282 :
5283 : /* ====================================================================
5284 : */
5285 : /* Loop over output lines. */
5286 : /* ====================================================================
5287 : */
5288 0 : for (int iDstY = 0; iDstY < nDstYSize && eErr == CE_None; iDstY++)
5289 : {
5290 0 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5291 : {
5292 0 : void *rowReal = nullptr;
5293 0 : void *rowImag = nullptr;
5294 0 : GByte *pabyDst = poWK->papabyDstImage[iBand];
5295 :
5296 0 : err = GDALWarpKernelOpenCL_getRow(warper, &rowReal, &rowImag,
5297 : iDstY, iBand);
5298 0 : if (err != CL_SUCCESS)
5299 : {
5300 0 : CPLError(
5301 : CE_Failure, CPLE_AppDefined,
5302 : "OpenCL routines reported failure (%d) on line %d.",
5303 : static_cast<int>(err), __LINE__);
5304 0 : eErr = CE_Failure;
5305 0 : throw eErr;
5306 : }
5307 :
5308 : // Copy the data from the warper to GDAL's memory.
5309 0 : switch (poWK->eWorkingDataType)
5310 : {
5311 0 : case GDT_Byte:
5312 0 : memcpy(&(pabyDst[iDstY * nDstXSize]), rowReal,
5313 : sizeof(GByte) * nDstXSize);
5314 0 : break;
5315 0 : case GDT_Int16:
5316 0 : memcpy(&(reinterpret_cast<GInt16 *>(
5317 0 : pabyDst)[iDstY * nDstXSize]),
5318 0 : rowReal, sizeof(GInt16) * nDstXSize);
5319 0 : break;
5320 0 : case GDT_UInt16:
5321 0 : memcpy(&(reinterpret_cast<GUInt16 *>(
5322 0 : pabyDst)[iDstY * nDstXSize]),
5323 0 : rowReal, sizeof(GUInt16) * nDstXSize);
5324 0 : break;
5325 0 : case GDT_Float32:
5326 0 : memcpy(&(reinterpret_cast<float *>(
5327 0 : pabyDst)[iDstY * nDstXSize]),
5328 0 : rowReal, sizeof(float) * nDstXSize);
5329 0 : break;
5330 0 : case GDT_CInt16:
5331 : {
5332 0 : GInt16 *pabyDstI16 = &(reinterpret_cast<GInt16 *>(
5333 0 : pabyDst)[iDstY * nDstXSize]);
5334 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5335 : {
5336 0 : pabyDstI16[iDstX * 2] =
5337 0 : static_cast<GInt16 *>(rowReal)[iDstX];
5338 0 : pabyDstI16[iDstX * 2 + 1] =
5339 0 : static_cast<GInt16 *>(rowImag)[iDstX];
5340 : }
5341 : }
5342 0 : break;
5343 0 : case GDT_CFloat32:
5344 : {
5345 0 : float *pabyDstF32 = &(reinterpret_cast<float *>(
5346 0 : pabyDst)[iDstY * nDstXSize]);
5347 0 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5348 : {
5349 0 : pabyDstF32[iDstX * 2] =
5350 0 : static_cast<float *>(rowReal)[iDstX];
5351 0 : pabyDstF32[iDstX * 2 + 1] =
5352 0 : static_cast<float *>(rowImag)[iDstX];
5353 : }
5354 : }
5355 0 : break;
5356 0 : default:
5357 : // No support for higher precision formats.
5358 0 : CPLError(CE_Failure, CPLE_AppDefined,
5359 : "Unsupported resampling OpenCL data type %d.",
5360 0 : static_cast<int>(poWK->eWorkingDataType));
5361 0 : eErr = CE_Failure;
5362 0 : throw eErr;
5363 : }
5364 : }
5365 : }
5366 : }
5367 0 : catch (const CPLErr &)
5368 : {
5369 : }
5370 :
5371 0 : if ((err = GDALWarpKernelOpenCL_deleteEnv(warper)) != CL_SUCCESS)
5372 : {
5373 0 : CPLError(CE_Failure, CPLE_AppDefined,
5374 : "OpenCL routines reported failure (%d) on line %d.",
5375 : static_cast<int>(err), __LINE__);
5376 0 : return CE_Failure;
5377 : }
5378 :
5379 0 : return eErr;
5380 : }
5381 : #endif /* defined(HAVE_OPENCL) */
5382 :
5383 : /************************************************************************/
5384 : /* GWKCheckAndComputeSrcOffsets() */
5385 : /************************************************************************/
5386 : static CPL_INLINE bool
5387 109634000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
5388 : int _iDstY, double *_padfX, double *_padfY,
5389 : int _nSrcXSize, int _nSrcYSize,
5390 : GPtrDiff_t &iSrcOffset)
5391 : {
5392 109634000 : const GDALWarpKernel *_poWK = psJob->poWK;
5393 109760000 : for (int iTry = 0; iTry < 2; ++iTry)
5394 : {
5395 109768000 : if (iTry == 1)
5396 : {
5397 : // If the source coordinate is slightly outside of the source raster
5398 : // retry to transform it alone, so that the exact coordinate
5399 : // transformer is used.
5400 :
5401 125881 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
5402 125881 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
5403 125881 : double dfZ = 0;
5404 125881 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
5405 125881 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
5406 125881 : _pabSuccess + _iDstX);
5407 : }
5408 109768000 : if (!_pabSuccess[_iDstX])
5409 3593470 : return false;
5410 :
5411 : // If this happens this is likely the symptom of a bug somewhere.
5412 106175000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
5413 : {
5414 : static bool bNanCoordFound = false;
5415 0 : if (!bNanCoordFound)
5416 : {
5417 0 : CPLDebug("WARP",
5418 : "GWKCheckAndComputeSrcOffsets(): "
5419 : "NaN coordinate found on point %d.",
5420 : _iDstX);
5421 0 : bNanCoordFound = true;
5422 : }
5423 0 : return false;
5424 : }
5425 :
5426 : /* --------------------------------------------------------------------
5427 : */
5428 : /* Figure out what pixel we want in our source raster, and skip */
5429 : /* further processing if it is well off the source image. */
5430 : /* --------------------------------------------------------------------
5431 : */
5432 : /* We test against the value before casting to avoid the */
5433 : /* problem of asymmetric truncation effects around zero. That is */
5434 : /* -0.5 will be 0 when cast to an int. */
5435 106132000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5436 : {
5437 : // If the source coordinate is slightly outside of the source raster
5438 : // retry to transform it alone, so that the exact coordinate
5439 : // transformer is used.
5440 4137570 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5441 21497 : continue;
5442 4116080 : return false;
5443 : }
5444 :
5445 101994000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5446 : {
5447 : // If the source coordinate is slightly outside of the source raster
5448 : // retry to transform it alone, so that the exact coordinate
5449 : // transformer is used.
5450 4793040 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5451 38555 : continue;
5452 4754490 : return false;
5453 : }
5454 :
5455 : // Check for potential overflow when casting from float to int, (if
5456 : // operating outside natural projection area, padfX/Y can be a very huge
5457 : // positive number before doing the actual conversion), as such cast is
5458 : // undefined behavior that can trigger exception with some compilers
5459 : // (see #6753)
5460 97201100 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5461 : {
5462 : // If the source coordinate is slightly outside of the source raster
5463 : // retry to transform it alone, so that the exact coordinate
5464 : // transformer is used.
5465 3503560 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5466 33295 : continue;
5467 3470270 : return false;
5468 : }
5469 93697500 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5470 : {
5471 : // If the source coordinate is slightly outside of the source raster
5472 : // retry to transform it alone, so that the exact coordinate
5473 : // transformer is used.
5474 3731490 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5475 32536 : continue;
5476 3698950 : return false;
5477 : }
5478 :
5479 89966000 : break;
5480 : }
5481 :
5482 89958300 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5483 89958300 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5484 89958300 : if (iSrcX == _nSrcXSize)
5485 0 : iSrcX--;
5486 89958300 : if (iSrcY == _nSrcYSize)
5487 0 : iSrcY--;
5488 :
5489 : // Those checks should normally be OK given the previous ones.
5490 89958300 : CPLAssert(iSrcX >= 0);
5491 89958300 : CPLAssert(iSrcY >= 0);
5492 89958300 : CPLAssert(iSrcX < _nSrcXSize);
5493 89958300 : CPLAssert(iSrcY < _nSrcYSize);
5494 :
5495 89958300 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5496 :
5497 89958300 : return true;
5498 : }
5499 :
5500 : /************************************************************************/
5501 : /* GWKOneSourceCornerFailsToReproject() */
5502 : /************************************************************************/
5503 :
5504 737 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5505 : {
5506 737 : GDALWarpKernel *poWK = psJob->poWK;
5507 2201 : for (int iY = 0; iY <= 1; ++iY)
5508 : {
5509 4398 : for (int iX = 0; iX <= 1; ++iX)
5510 : {
5511 2934 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5512 2934 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5513 2934 : double dfZTmp = 0;
5514 2934 : int nSuccess = FALSE;
5515 2934 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5516 : &dfYTmp, &dfZTmp, &nSuccess);
5517 2934 : if (!nSuccess)
5518 6 : return true;
5519 : }
5520 : }
5521 731 : return false;
5522 : }
5523 :
5524 : /************************************************************************/
5525 : /* GWKAdjustSrcOffsetOnEdge() */
5526 : /************************************************************************/
5527 :
5528 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5529 : GPtrDiff_t &iSrcOffset)
5530 : {
5531 9714 : GDALWarpKernel *poWK = psJob->poWK;
5532 9714 : const int nSrcXSize = poWK->nSrcXSize;
5533 9714 : const int nSrcYSize = poWK->nSrcYSize;
5534 :
5535 : // Check if the computed source position slightly altered
5536 : // fails to reproject. If so, then we are at the edge of
5537 : // the validity area, and it is worth checking neighbour
5538 : // source pixels for validity.
5539 9714 : int nSuccess = FALSE;
5540 : {
5541 9714 : double dfXTmp =
5542 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5543 9714 : double dfYTmp =
5544 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5545 9714 : double dfZTmp = 0;
5546 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5547 : &dfZTmp, &nSuccess);
5548 : }
5549 9714 : if (nSuccess)
5550 : {
5551 6996 : double dfXTmp =
5552 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5553 6996 : double dfYTmp =
5554 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5555 6996 : double dfZTmp = 0;
5556 6996 : nSuccess = FALSE;
5557 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5558 : &dfZTmp, &nSuccess);
5559 : }
5560 9714 : if (nSuccess)
5561 : {
5562 5624 : double dfXTmp =
5563 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5564 5624 : double dfYTmp =
5565 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5566 5624 : double dfZTmp = 0;
5567 5624 : nSuccess = FALSE;
5568 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5569 : &dfZTmp, &nSuccess);
5570 : }
5571 :
5572 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5573 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5574 : {
5575 1860 : iSrcOffset++;
5576 1860 : return true;
5577 : }
5578 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5579 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5580 : {
5581 1334 : iSrcOffset += nSrcXSize;
5582 1334 : return true;
5583 : }
5584 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5585 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5586 : {
5587 956 : iSrcOffset--;
5588 956 : return true;
5589 : }
5590 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5591 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5592 : {
5593 340 : iSrcOffset -= nSrcXSize;
5594 340 : return true;
5595 : }
5596 :
5597 5224 : return false;
5598 : }
5599 :
5600 : /************************************************************************/
5601 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5602 : /************************************************************************/
5603 :
5604 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5605 : GPtrDiff_t &iSrcOffset)
5606 : {
5607 0 : GDALWarpKernel *poWK = psJob->poWK;
5608 0 : const int nSrcXSize = poWK->nSrcXSize;
5609 0 : const int nSrcYSize = poWK->nSrcYSize;
5610 :
5611 : // Check if the computed source position slightly altered
5612 : // fails to reproject. If so, then we are at the edge of
5613 : // the validity area, and it is worth checking neighbour
5614 : // source pixels for validity.
5615 0 : int nSuccess = FALSE;
5616 : {
5617 0 : double dfXTmp =
5618 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5619 0 : double dfYTmp =
5620 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5621 0 : double dfZTmp = 0;
5622 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5623 : &dfZTmp, &nSuccess);
5624 : }
5625 0 : if (nSuccess)
5626 : {
5627 0 : double dfXTmp =
5628 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5629 0 : double dfYTmp =
5630 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5631 0 : double dfZTmp = 0;
5632 0 : nSuccess = FALSE;
5633 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5634 : &dfZTmp, &nSuccess);
5635 : }
5636 0 : if (nSuccess)
5637 : {
5638 0 : double dfXTmp =
5639 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5640 0 : double dfYTmp =
5641 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5642 0 : double dfZTmp = 0;
5643 0 : nSuccess = FALSE;
5644 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5645 : &dfZTmp, &nSuccess);
5646 : }
5647 :
5648 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5649 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >= SRC_DENSITY_THRESHOLD)
5650 : {
5651 0 : iSrcOffset++;
5652 0 : return true;
5653 : }
5654 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5655 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5656 : SRC_DENSITY_THRESHOLD)
5657 : {
5658 0 : iSrcOffset += nSrcXSize;
5659 0 : return true;
5660 : }
5661 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5662 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5663 : SRC_DENSITY_THRESHOLD)
5664 : {
5665 0 : iSrcOffset--;
5666 0 : return true;
5667 : }
5668 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5669 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5670 : SRC_DENSITY_THRESHOLD)
5671 : {
5672 0 : iSrcOffset -= nSrcXSize;
5673 0 : return true;
5674 : }
5675 :
5676 0 : return false;
5677 : }
5678 :
5679 : /************************************************************************/
5680 : /* GWKGeneralCase() */
5681 : /* */
5682 : /* This is the most general case. It attempts to handle all */
5683 : /* possible features with relatively little concern for */
5684 : /* efficiency. */
5685 : /************************************************************************/
5686 :
5687 243 : static void GWKGeneralCaseThread(void *pData)
5688 : {
5689 243 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5690 243 : GDALWarpKernel *poWK = psJob->poWK;
5691 243 : const int iYMin = psJob->iYMin;
5692 243 : const int iYMax = psJob->iYMax;
5693 : const double dfMultFactorVerticalShiftPipeline =
5694 243 : poWK->bApplyVerticalShift
5695 243 : ? CPLAtof(CSLFetchNameValueDef(
5696 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5697 : "1.0"))
5698 243 : : 0.0;
5699 :
5700 243 : int nDstXSize = poWK->nDstXSize;
5701 243 : int nSrcXSize = poWK->nSrcXSize;
5702 243 : int nSrcYSize = poWK->nSrcYSize;
5703 :
5704 : /* -------------------------------------------------------------------- */
5705 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5706 : /* scanlines worth of positions. */
5707 : /* -------------------------------------------------------------------- */
5708 : // For x, 2 *, because we cache the precomputed values at the end.
5709 : double *padfX =
5710 243 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5711 : double *padfY =
5712 243 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5713 : double *padfZ =
5714 243 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5715 243 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5716 :
5717 243 : const bool bUse4SamplesFormula =
5718 243 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5719 :
5720 243 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5721 243 : if (poWK->eResample != GRA_NearestNeighbour)
5722 : {
5723 224 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5724 : }
5725 243 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5726 243 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5727 243 : const double dfErrorThreshold = CPLAtof(
5728 243 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5729 :
5730 : const bool bOneSourceCornerFailsToReproject =
5731 243 : GWKOneSourceCornerFailsToReproject(psJob);
5732 :
5733 : // Precompute values.
5734 6513 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5735 6270 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5736 :
5737 : /* ==================================================================== */
5738 : /* Loop over output lines. */
5739 : /* ==================================================================== */
5740 6513 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5741 : {
5742 : /* --------------------------------------------------------------------
5743 : */
5744 : /* Setup points to transform to source image space. */
5745 : /* --------------------------------------------------------------------
5746 : */
5747 6270 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5748 6270 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5749 242830 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5750 236560 : padfY[iDstX] = dfY;
5751 6270 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5752 :
5753 : /* --------------------------------------------------------------------
5754 : */
5755 : /* Transform the points from destination pixel/line coordinates */
5756 : /* to source pixel/line coordinates. */
5757 : /* --------------------------------------------------------------------
5758 : */
5759 6270 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5760 : padfY, padfZ, pabSuccess);
5761 6270 : if (dfSrcCoordPrecision > 0.0)
5762 : {
5763 0 : GWKRoundSourceCoordinates(
5764 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5765 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5766 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5767 : }
5768 :
5769 : /* ====================================================================
5770 : */
5771 : /* Loop over pixels in output scanline. */
5772 : /* ====================================================================
5773 : */
5774 242830 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5775 : {
5776 236560 : GPtrDiff_t iSrcOffset = 0;
5777 236560 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5778 : padfX, padfY, nSrcXSize,
5779 : nSrcYSize, iSrcOffset))
5780 0 : continue;
5781 :
5782 : /* --------------------------------------------------------------------
5783 : */
5784 : /* Do not try to apply transparent/invalid source pixels to the
5785 : */
5786 : /* destination. This currently ignores the multi-pixel input
5787 : */
5788 : /* of bilinear and cubic resamples. */
5789 : /* --------------------------------------------------------------------
5790 : */
5791 236560 : double dfDensity = 1.0;
5792 :
5793 236560 : if (poWK->pafUnifiedSrcDensity != nullptr)
5794 : {
5795 1200 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5796 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD)
5797 : {
5798 0 : if (!bOneSourceCornerFailsToReproject)
5799 : {
5800 0 : continue;
5801 : }
5802 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5803 : psJob, iSrcOffset))
5804 : {
5805 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
5806 : }
5807 : else
5808 : {
5809 0 : continue;
5810 : }
5811 : }
5812 : }
5813 :
5814 236560 : if (poWK->panUnifiedSrcValid != nullptr &&
5815 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5816 : {
5817 0 : if (!bOneSourceCornerFailsToReproject)
5818 : {
5819 0 : continue;
5820 : }
5821 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5822 : {
5823 0 : continue;
5824 : }
5825 : }
5826 :
5827 : /* ====================================================================
5828 : */
5829 : /* Loop processing each band. */
5830 : /* ====================================================================
5831 : */
5832 236560 : bool bHasFoundDensity = false;
5833 :
5834 236560 : const GPtrDiff_t iDstOffset =
5835 236560 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5836 473120 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5837 : {
5838 236560 : double dfBandDensity = 0.0;
5839 236560 : double dfValueReal = 0.0;
5840 236560 : double dfValueImag = 0.0;
5841 :
5842 : /* --------------------------------------------------------------------
5843 : */
5844 : /* Collect the source value. */
5845 : /* --------------------------------------------------------------------
5846 : */
5847 236560 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5848 : nSrcYSize == 1)
5849 : {
5850 : // FALSE is returned if dfBandDensity == 0, which is
5851 : // checked below.
5852 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5853 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5854 : &dfValueImag));
5855 : }
5856 235992 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5857 : {
5858 648 : GWKBilinearResample4Sample(
5859 648 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5860 648 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5861 : &dfValueReal, &dfValueImag);
5862 : }
5863 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5864 : {
5865 248 : GWKCubicResample4Sample(
5866 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5867 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5868 : &dfValueReal, &dfValueImag);
5869 : }
5870 : else
5871 : #ifdef DEBUG
5872 : // Only useful for clang static analyzer.
5873 235096 : if (psWrkStruct != nullptr)
5874 : #endif
5875 : {
5876 235096 : psWrkStruct->pfnGWKResample(
5877 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5878 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5879 : &dfValueReal, &dfValueImag, psWrkStruct);
5880 : }
5881 :
5882 : // If we didn't find any valid inputs skip to next band.
5883 236560 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5884 0 : continue;
5885 :
5886 236560 : if (poWK->bApplyVerticalShift)
5887 : {
5888 0 : if (!std::isfinite(padfZ[iDstX]))
5889 0 : continue;
5890 : // Subtract padfZ[] since the coordinate transformation is
5891 : // from target to source
5892 0 : dfValueReal =
5893 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5894 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5895 : }
5896 :
5897 236560 : bHasFoundDensity = true;
5898 :
5899 : /* --------------------------------------------------------------------
5900 : */
5901 : /* We have a computed value from the source. Now apply it
5902 : * to */
5903 : /* the destination pixel. */
5904 : /* --------------------------------------------------------------------
5905 : */
5906 236560 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5907 : dfValueReal, dfValueImag);
5908 : }
5909 :
5910 236560 : if (!bHasFoundDensity)
5911 0 : continue;
5912 :
5913 : /* --------------------------------------------------------------------
5914 : */
5915 : /* Update destination density/validity masks. */
5916 : /* --------------------------------------------------------------------
5917 : */
5918 236560 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5919 :
5920 236560 : if (poWK->panDstValid != nullptr)
5921 : {
5922 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5923 : }
5924 : } /* Next iDstX */
5925 :
5926 : /* --------------------------------------------------------------------
5927 : */
5928 : /* Report progress to the user, and optionally cancel out. */
5929 : /* --------------------------------------------------------------------
5930 : */
5931 6270 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5932 0 : break;
5933 : }
5934 :
5935 : /* -------------------------------------------------------------------- */
5936 : /* Cleanup and return. */
5937 : /* -------------------------------------------------------------------- */
5938 243 : CPLFree(padfX);
5939 243 : CPLFree(padfY);
5940 243 : CPLFree(padfZ);
5941 243 : CPLFree(pabSuccess);
5942 243 : if (psWrkStruct)
5943 224 : GWKResampleDeleteWrkStruct(psWrkStruct);
5944 243 : }
5945 :
5946 243 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5947 : {
5948 243 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5949 : }
5950 :
5951 : /************************************************************************/
5952 : /* GWKRealCase() */
5953 : /* */
5954 : /* General case for non-complex data types. */
5955 : /************************************************************************/
5956 :
5957 155 : static void GWKRealCaseThread(void *pData)
5958 :
5959 : {
5960 155 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5961 155 : GDALWarpKernel *poWK = psJob->poWK;
5962 155 : const int iYMin = psJob->iYMin;
5963 155 : const int iYMax = psJob->iYMax;
5964 :
5965 155 : const int nDstXSize = poWK->nDstXSize;
5966 155 : const int nSrcXSize = poWK->nSrcXSize;
5967 155 : const int nSrcYSize = poWK->nSrcYSize;
5968 : const double dfMultFactorVerticalShiftPipeline =
5969 155 : poWK->bApplyVerticalShift
5970 155 : ? CPLAtof(CSLFetchNameValueDef(
5971 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5972 : "1.0"))
5973 155 : : 0.0;
5974 :
5975 : /* -------------------------------------------------------------------- */
5976 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5977 : /* scanlines worth of positions. */
5978 : /* -------------------------------------------------------------------- */
5979 :
5980 : // For x, 2 *, because we cache the precomputed values at the end.
5981 : double *padfX =
5982 155 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5983 : double *padfY =
5984 155 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5985 : double *padfZ =
5986 155 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5987 155 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5988 :
5989 155 : const bool bUse4SamplesFormula =
5990 155 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5991 :
5992 155 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5993 155 : if (poWK->eResample != GRA_NearestNeighbour)
5994 : {
5995 128 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5996 : }
5997 155 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5998 155 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5999 155 : const double dfErrorThreshold = CPLAtof(
6000 155 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6001 :
6002 450 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
6003 295 : poWK->papanBandSrcValid == nullptr &&
6004 140 : poWK->pafUnifiedSrcDensity != nullptr;
6005 :
6006 : const bool bOneSourceCornerFailsToReproject =
6007 155 : GWKOneSourceCornerFailsToReproject(psJob);
6008 :
6009 : // Precompute values.
6010 19572 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6011 19417 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6012 :
6013 : /* ==================================================================== */
6014 : /* Loop over output lines. */
6015 : /* ==================================================================== */
6016 22275 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6017 : {
6018 : /* --------------------------------------------------------------------
6019 : */
6020 : /* Setup points to transform to source image space. */
6021 : /* --------------------------------------------------------------------
6022 : */
6023 22120 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6024 22120 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6025 43558800 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6026 43536700 : padfY[iDstX] = dfY;
6027 22120 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6028 :
6029 : /* --------------------------------------------------------------------
6030 : */
6031 : /* Transform the points from destination pixel/line coordinates */
6032 : /* to source pixel/line coordinates. */
6033 : /* --------------------------------------------------------------------
6034 : */
6035 22120 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6036 : padfY, padfZ, pabSuccess);
6037 22120 : if (dfSrcCoordPrecision > 0.0)
6038 : {
6039 0 : GWKRoundSourceCoordinates(
6040 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6041 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6042 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6043 : }
6044 :
6045 : /* ====================================================================
6046 : */
6047 : /* Loop over pixels in output scanline. */
6048 : /* ====================================================================
6049 : */
6050 43558800 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6051 : {
6052 43536700 : GPtrDiff_t iSrcOffset = 0;
6053 43536700 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6054 : padfX, padfY, nSrcXSize,
6055 : nSrcYSize, iSrcOffset))
6056 42894000 : continue;
6057 :
6058 : /* --------------------------------------------------------------------
6059 : */
6060 : /* Do not try to apply transparent/invalid source pixels to the
6061 : */
6062 : /* destination. This currently ignores the multi-pixel input
6063 : */
6064 : /* of bilinear and cubic resamples. */
6065 : /* --------------------------------------------------------------------
6066 : */
6067 31480200 : double dfDensity = 1.0;
6068 :
6069 31480200 : if (poWK->pafUnifiedSrcDensity != nullptr)
6070 : {
6071 1360180 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
6072 1360180 : if (dfDensity < SRC_DENSITY_THRESHOLD)
6073 : {
6074 1308710 : if (!bOneSourceCornerFailsToReproject)
6075 : {
6076 1308710 : continue;
6077 : }
6078 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
6079 : psJob, iSrcOffset))
6080 : {
6081 0 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
6082 : }
6083 : else
6084 : {
6085 0 : continue;
6086 : }
6087 : }
6088 : }
6089 :
6090 59800100 : if (poWK->panUnifiedSrcValid != nullptr &&
6091 29628600 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6092 : {
6093 29531000 : if (!bOneSourceCornerFailsToReproject)
6094 : {
6095 29528700 : continue;
6096 : }
6097 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6098 : {
6099 0 : continue;
6100 : }
6101 : }
6102 :
6103 : /* ====================================================================
6104 : */
6105 : /* Loop processing each band. */
6106 : /* ====================================================================
6107 : */
6108 642736 : bool bHasFoundDensity = false;
6109 :
6110 642736 : const GPtrDiff_t iDstOffset =
6111 642736 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6112 1717090 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6113 : {
6114 1074360 : double dfBandDensity = 0.0;
6115 1074360 : double dfValueReal = 0.0;
6116 :
6117 : /* --------------------------------------------------------------------
6118 : */
6119 : /* Collect the source value. */
6120 : /* --------------------------------------------------------------------
6121 : */
6122 1074360 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
6123 : nSrcYSize == 1)
6124 : {
6125 : // FALSE is returned if dfBandDensity == 0, which is
6126 : // checked below.
6127 151448 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
6128 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
6129 : }
6130 922909 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
6131 : {
6132 1486 : double dfValueImagIgnored = 0.0;
6133 1486 : GWKBilinearResample4Sample(
6134 1486 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6135 1486 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6136 1486 : &dfValueReal, &dfValueImagIgnored);
6137 : }
6138 921423 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
6139 : {
6140 299992 : if (bSrcMaskIsDensity)
6141 : {
6142 361 : if (poWK->eWorkingDataType == GDT_Byte)
6143 : {
6144 361 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
6145 361 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6146 361 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6147 : &dfValueReal);
6148 : }
6149 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
6150 : {
6151 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
6152 0 : GUInt16>(poWK, iBand,
6153 0 : padfX[iDstX] - poWK->nSrcXOff,
6154 0 : padfY[iDstX] - poWK->nSrcYOff,
6155 : &dfBandDensity, &dfValueReal);
6156 : }
6157 : else
6158 : {
6159 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
6160 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6161 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6162 : &dfValueReal);
6163 : }
6164 : }
6165 : else
6166 : {
6167 299631 : double dfValueImagIgnored = 0.0;
6168 299631 : GWKCubicResample4Sample(
6169 299631 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6170 299631 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6171 : &dfValueReal, &dfValueImagIgnored);
6172 299992 : }
6173 : }
6174 : else
6175 : #ifdef DEBUG
6176 : // Only useful for clang static analyzer.
6177 621431 : if (psWrkStruct != nullptr)
6178 : #endif
6179 : {
6180 621431 : double dfValueImagIgnored = 0.0;
6181 621431 : psWrkStruct->pfnGWKResample(
6182 621431 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6183 621431 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
6184 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
6185 : }
6186 :
6187 : // If we didn't find any valid inputs skip to next band.
6188 1074360 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
6189 0 : continue;
6190 :
6191 1074360 : if (poWK->bApplyVerticalShift)
6192 : {
6193 0 : if (!std::isfinite(padfZ[iDstX]))
6194 0 : continue;
6195 : // Subtract padfZ[] since the coordinate transformation is
6196 : // from target to source
6197 0 : dfValueReal =
6198 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
6199 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
6200 : }
6201 :
6202 1074360 : bHasFoundDensity = true;
6203 :
6204 : /* --------------------------------------------------------------------
6205 : */
6206 : /* We have a computed value from the source. Now apply it
6207 : * to */
6208 : /* the destination pixel. */
6209 : /* --------------------------------------------------------------------
6210 : */
6211 1074360 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
6212 : dfValueReal);
6213 : }
6214 :
6215 642736 : if (!bHasFoundDensity)
6216 0 : continue;
6217 :
6218 : /* --------------------------------------------------------------------
6219 : */
6220 : /* Update destination density/validity masks. */
6221 : /* --------------------------------------------------------------------
6222 : */
6223 642736 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6224 :
6225 642736 : if (poWK->panDstValid != nullptr)
6226 : {
6227 101716 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6228 : }
6229 : } // Next iDstX.
6230 :
6231 : /* --------------------------------------------------------------------
6232 : */
6233 : /* Report progress to the user, and optionally cancel out. */
6234 : /* --------------------------------------------------------------------
6235 : */
6236 22120 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6237 0 : break;
6238 : }
6239 :
6240 : /* -------------------------------------------------------------------- */
6241 : /* Cleanup and return. */
6242 : /* -------------------------------------------------------------------- */
6243 155 : CPLFree(padfX);
6244 155 : CPLFree(padfY);
6245 155 : CPLFree(padfZ);
6246 155 : CPLFree(pabSuccess);
6247 155 : if (psWrkStruct)
6248 128 : GWKResampleDeleteWrkStruct(psWrkStruct);
6249 155 : }
6250 :
6251 155 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
6252 : {
6253 155 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
6254 : }
6255 :
6256 : /************************************************************************/
6257 : /* GWKCubicResampleNoMasks4MultiBandT() */
6258 : /************************************************************************/
6259 :
6260 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
6261 : /* and enough SSE registries */
6262 : #if defined(USE_SSE2)
6263 :
6264 238596 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
6265 : const __m128 row2, const __m128 row3,
6266 : const __m128 weightsXY0,
6267 : const __m128 weightsXY1,
6268 : const __m128 weightsXY2,
6269 : const __m128 weightsXY3)
6270 : {
6271 1670170 : return XMMHorizontalAdd(_mm_add_ps(
6272 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
6273 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
6274 238596 : _mm_mul_ps(row3, weightsXY3))));
6275 : }
6276 :
6277 : template <class T>
6278 81323 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
6279 : double dfSrcX, double dfSrcY,
6280 : const GPtrDiff_t iDstOffset)
6281 : {
6282 81323 : const double dfSrcXShifted = dfSrcX - 0.5;
6283 81323 : const int iSrcX = static_cast<int>(dfSrcXShifted);
6284 81323 : const double dfSrcYShifted = dfSrcY - 0.5;
6285 81323 : const int iSrcY = static_cast<int>(dfSrcYShifted);
6286 81323 : const GPtrDiff_t iSrcOffset =
6287 81323 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
6288 :
6289 : // Get the bilinear interpolation at the image borders.
6290 81323 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
6291 80326 : iSrcY + 2 >= poWK->nSrcYSize)
6292 : {
6293 7164 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6294 : {
6295 : T value;
6296 5373 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
6297 : &value);
6298 5373 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6299 : value;
6300 1791 : }
6301 : }
6302 : else
6303 : {
6304 79532 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
6305 79532 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
6306 :
6307 : float afCoeffsX[4];
6308 : float afCoeffsY[4];
6309 79532 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
6310 79532 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
6311 79532 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
6312 : const auto weightsXY0 =
6313 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
6314 : const auto weightsXY1 =
6315 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
6316 : const auto weightsXY2 =
6317 159064 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
6318 : const auto weightsXY3 =
6319 79532 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
6320 :
6321 79532 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
6322 :
6323 79532 : int iBand = 0;
6324 : // Process 2 bands at a time
6325 159064 : for (; iBand + 1 < poWK->nBands; iBand += 2)
6326 : {
6327 79532 : const T *CPL_RESTRICT pBand0 =
6328 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6329 79532 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
6330 : const auto row1_0 =
6331 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6332 : const auto row2_0 =
6333 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6334 : const auto row3_0 =
6335 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6336 :
6337 79532 : const T *CPL_RESTRICT pBand1 =
6338 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
6339 79532 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
6340 : const auto row1_1 =
6341 79532 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
6342 : const auto row2_1 =
6343 79532 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
6344 : const auto row3_1 =
6345 79532 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
6346 :
6347 : const float fValue_0 =
6348 79532 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
6349 : weightsXY1, weightsXY2, weightsXY3);
6350 :
6351 : const float fValue_1 =
6352 79532 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
6353 : weightsXY1, weightsXY2, weightsXY3);
6354 :
6355 79532 : T *CPL_RESTRICT pDstBand0 =
6356 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6357 79532 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
6358 :
6359 79532 : T *CPL_RESTRICT pDstBand1 =
6360 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
6361 79532 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
6362 : }
6363 79532 : if (iBand < poWK->nBands)
6364 : {
6365 79532 : const T *CPL_RESTRICT pBand0 =
6366 79532 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
6367 79532 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
6368 : const auto row1 =
6369 79532 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
6370 : const auto row2 =
6371 79532 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
6372 : const auto row3 =
6373 79532 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
6374 :
6375 : const float fValue =
6376 79532 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
6377 : weightsXY2, weightsXY3);
6378 :
6379 79532 : T *CPL_RESTRICT pDstBand =
6380 79532 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
6381 79532 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
6382 : }
6383 : }
6384 :
6385 81323 : if (poWK->pafDstDensity)
6386 441 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6387 81323 : }
6388 :
6389 : #endif // defined(USE_SSE2)
6390 :
6391 : /************************************************************************/
6392 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
6393 : /************************************************************************/
6394 :
6395 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
6396 1177 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
6397 :
6398 : {
6399 1177 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6400 1177 : GDALWarpKernel *poWK = psJob->poWK;
6401 1177 : const int iYMin = psJob->iYMin;
6402 1177 : const int iYMax = psJob->iYMax;
6403 1159 : const double dfMultFactorVerticalShiftPipeline =
6404 1177 : poWK->bApplyVerticalShift
6405 18 : ? CPLAtof(CSLFetchNameValueDef(
6406 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6407 : "1.0"))
6408 : : 0.0;
6409 :
6410 1177 : const int nDstXSize = poWK->nDstXSize;
6411 1177 : const int nSrcXSize = poWK->nSrcXSize;
6412 1177 : const int nSrcYSize = poWK->nSrcYSize;
6413 :
6414 : /* -------------------------------------------------------------------- */
6415 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6416 : /* scanlines worth of positions. */
6417 : /* -------------------------------------------------------------------- */
6418 :
6419 : // For x, 2 *, because we cache the precomputed values at the end.
6420 : double *padfX =
6421 1177 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6422 : double *padfY =
6423 1177 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6424 : double *padfZ =
6425 1177 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6426 1177 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6427 :
6428 1177 : const int nXRadius = poWK->nXRadius;
6429 : double *padfWeightsX =
6430 1177 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6431 : double *padfWeightsY = static_cast<double *>(
6432 1177 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6433 1177 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6434 1177 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6435 1177 : const double dfErrorThreshold = CPLAtof(
6436 1177 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6437 :
6438 : // Precompute values.
6439 254841 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6440 253664 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6441 :
6442 : /* ==================================================================== */
6443 : /* Loop over output lines. */
6444 : /* ==================================================================== */
6445 130049 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6446 : {
6447 : /* --------------------------------------------------------------------
6448 : */
6449 : /* Setup points to transform to source image space. */
6450 : /* --------------------------------------------------------------------
6451 : */
6452 128873 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6453 128873 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6454 58394094 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6455 58265266 : padfY[iDstX] = dfY;
6456 128873 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6457 :
6458 : /* --------------------------------------------------------------------
6459 : */
6460 : /* Transform the points from destination pixel/line coordinates */
6461 : /* to source pixel/line coordinates. */
6462 : /* --------------------------------------------------------------------
6463 : */
6464 128873 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6465 : padfY, padfZ, pabSuccess);
6466 128873 : if (dfSrcCoordPrecision > 0.0)
6467 : {
6468 1000 : GWKRoundSourceCoordinates(
6469 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6470 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6471 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6472 : }
6473 :
6474 : /* ====================================================================
6475 : */
6476 : /* Loop over pixels in output scanline. */
6477 : /* ====================================================================
6478 : */
6479 58195534 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6480 : {
6481 58066596 : GPtrDiff_t iSrcOffset = 0;
6482 58066596 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6483 : padfX, padfY, nSrcXSize,
6484 : nSrcYSize, iSrcOffset))
6485 6545209 : continue;
6486 :
6487 : /* ====================================================================
6488 : */
6489 : /* Loop processing each band. */
6490 : /* ====================================================================
6491 : */
6492 51759962 : const GPtrDiff_t iDstOffset =
6493 51759962 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6494 :
6495 : #if defined(USE_SSE2)
6496 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6497 : (std::is_same<T, GByte>::value ||
6498 : std::is_same<T, GUInt16>::value))
6499 : {
6500 752574 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6501 : {
6502 81323 : GWKCubicResampleNoMasks4MultiBandT<T>(
6503 81323 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6504 81323 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6505 :
6506 81323 : continue;
6507 : }
6508 : }
6509 : #endif // defined(USE_SSE2)
6510 :
6511 51678639 : [[maybe_unused]] double dfInvWeights = 0;
6512 144421098 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6513 : {
6514 92899567 : T value = 0;
6515 : if constexpr (eResample == GRA_NearestNeighbour)
6516 : {
6517 77005949 : value = reinterpret_cast<T *>(
6518 77005949 : poWK->papabySrcImage[iBand])[iSrcOffset];
6519 : }
6520 : else if constexpr (bUse4SamplesFormula)
6521 : {
6522 : if constexpr (eResample == GRA_Bilinear)
6523 4806886 : GWKBilinearResampleNoMasks4SampleT(
6524 4806886 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6525 4806886 : padfY[iDstX] - poWK->nSrcYOff, &value);
6526 : else
6527 1906603 : GWKCubicResampleNoMasks4SampleT(
6528 1906603 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6529 1906603 : padfY[iDstX] - poWK->nSrcYOff, &value);
6530 : }
6531 : else
6532 : {
6533 9180129 : GWKResampleNoMasksT(
6534 9180129 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6535 9180129 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6536 : padfWeightsY, dfInvWeights);
6537 : }
6538 :
6539 92899487 : if (poWK->bApplyVerticalShift)
6540 : {
6541 818 : if (!std::isfinite(padfZ[iDstX]))
6542 0 : continue;
6543 : // Subtract padfZ[] since the coordinate transformation is
6544 : // from target to source
6545 3 : value = GWKClampValueT<T>(
6546 818 : value * poWK->dfMultFactorVerticalShift -
6547 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6548 : }
6549 :
6550 92742327 : if (poWK->pafDstDensity)
6551 11712299 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6552 :
6553 92742327 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6554 : value;
6555 : }
6556 : }
6557 :
6558 : /* --------------------------------------------------------------------
6559 : */
6560 : /* Report progress to the user, and optionally cancel out. */
6561 : /* --------------------------------------------------------------------
6562 : */
6563 128873 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6564 1 : break;
6565 : }
6566 :
6567 : /* -------------------------------------------------------------------- */
6568 : /* Cleanup and return. */
6569 : /* -------------------------------------------------------------------- */
6570 1177 : CPLFree(padfX);
6571 1177 : CPLFree(padfY);
6572 1177 : CPLFree(padfZ);
6573 1177 : CPLFree(pabSuccess);
6574 1177 : CPLFree(padfWeightsX);
6575 1177 : CPLFree(padfWeightsY);
6576 1177 : }
6577 :
6578 : template <class T, GDALResampleAlg eResample>
6579 921 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6580 : {
6581 921 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6582 : pData);
6583 921 : }
6584 :
6585 : template <class T, GDALResampleAlg eResample>
6586 256 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6587 :
6588 : {
6589 256 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6590 256 : GDALWarpKernel *poWK = psJob->poWK;
6591 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6592 256 : const bool bUse4SamplesFormula =
6593 256 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6594 256 : if (bUse4SamplesFormula)
6595 156 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6596 : pData);
6597 : else
6598 100 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6599 : pData);
6600 256 : }
6601 :
6602 866 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6603 : {
6604 866 : return GWKRun(
6605 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6606 866 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6607 : }
6608 :
6609 126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6610 : {
6611 126 : return GWKRun(
6612 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6613 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6614 126 : GRA_Bilinear>);
6615 : }
6616 :
6617 72 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6618 : {
6619 72 : return GWKRun(
6620 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6621 72 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6622 : }
6623 :
6624 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6625 : {
6626 9 : return GWKRun(
6627 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6628 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6629 : }
6630 :
6631 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6632 :
6633 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6634 : {
6635 : return GWKRun(
6636 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6637 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6638 : }
6639 : #endif
6640 :
6641 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6642 : {
6643 12 : return GWKRun(
6644 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6645 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6646 : }
6647 :
6648 : /************************************************************************/
6649 : /* GWKNearestByte() */
6650 : /* */
6651 : /* Case for 8bit input data with nearest neighbour resampling */
6652 : /* using valid flags. Should be as fast as possible for this */
6653 : /* particular transformation type. */
6654 : /************************************************************************/
6655 :
6656 339 : template <class T> static void GWKNearestThread(void *pData)
6657 :
6658 : {
6659 339 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6660 339 : GDALWarpKernel *poWK = psJob->poWK;
6661 339 : const int iYMin = psJob->iYMin;
6662 339 : const int iYMax = psJob->iYMax;
6663 339 : const double dfMultFactorVerticalShiftPipeline =
6664 339 : poWK->bApplyVerticalShift
6665 0 : ? CPLAtof(CSLFetchNameValueDef(
6666 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6667 : "1.0"))
6668 : : 0.0;
6669 :
6670 339 : const int nDstXSize = poWK->nDstXSize;
6671 339 : const int nSrcXSize = poWK->nSrcXSize;
6672 339 : const int nSrcYSize = poWK->nSrcYSize;
6673 :
6674 : /* -------------------------------------------------------------------- */
6675 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6676 : /* scanlines worth of positions. */
6677 : /* -------------------------------------------------------------------- */
6678 :
6679 : // For x, 2 *, because we cache the precomputed values at the end.
6680 : double *padfX =
6681 339 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6682 : double *padfY =
6683 339 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6684 : double *padfZ =
6685 339 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6686 339 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6687 :
6688 339 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6689 339 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6690 339 : const double dfErrorThreshold = CPLAtof(
6691 339 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6692 :
6693 : const bool bOneSourceCornerFailsToReproject =
6694 339 : GWKOneSourceCornerFailsToReproject(psJob);
6695 :
6696 : // Precompute values.
6697 48911 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6698 48572 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6699 :
6700 : /* ==================================================================== */
6701 : /* Loop over output lines. */
6702 : /* ==================================================================== */
6703 36409 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6704 : {
6705 :
6706 : /* --------------------------------------------------------------------
6707 : */
6708 : /* Setup points to transform to source image space. */
6709 : /* --------------------------------------------------------------------
6710 : */
6711 36070 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6712 36070 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6713 7637615 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6714 7601550 : padfY[iDstX] = dfY;
6715 36070 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6716 :
6717 : /* --------------------------------------------------------------------
6718 : */
6719 : /* Transform the points from destination pixel/line coordinates */
6720 : /* to source pixel/line coordinates. */
6721 : /* --------------------------------------------------------------------
6722 : */
6723 36070 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6724 : padfY, padfZ, pabSuccess);
6725 36070 : if (dfSrcCoordPrecision > 0.0)
6726 : {
6727 0 : GWKRoundSourceCoordinates(
6728 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6729 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6730 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6731 : }
6732 : /* ====================================================================
6733 : */
6734 : /* Loop over pixels in output scanline. */
6735 : /* ====================================================================
6736 : */
6737 7637615 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6738 : {
6739 7601550 : GPtrDiff_t iSrcOffset = 0;
6740 7601550 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6741 : padfX, padfY, nSrcXSize,
6742 : nSrcYSize, iSrcOffset))
6743 2117518 : continue;
6744 :
6745 : /* --------------------------------------------------------------------
6746 : */
6747 : /* Do not try to apply invalid source pixels to the dest. */
6748 : /* --------------------------------------------------------------------
6749 : */
6750 7419936 : if (poWK->panUnifiedSrcValid != nullptr &&
6751 931241 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6752 : {
6753 49670 : if (!bOneSourceCornerFailsToReproject)
6754 : {
6755 42185 : continue;
6756 : }
6757 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6758 : {
6759 5224 : continue;
6760 : }
6761 : }
6762 :
6763 : /* --------------------------------------------------------------------
6764 : */
6765 : /* Do not try to apply transparent source pixels to the
6766 : * destination.*/
6767 : /* --------------------------------------------------------------------
6768 : */
6769 6441284 : double dfDensity = 1.0;
6770 :
6771 6441284 : if (poWK->pafUnifiedSrcDensity != nullptr)
6772 : {
6773 1064945 : dfDensity = poWK->pafUnifiedSrcDensity[iSrcOffset];
6774 1064945 : if (dfDensity < SRC_DENSITY_THRESHOLD)
6775 957251 : continue;
6776 : }
6777 :
6778 : /* ====================================================================
6779 : */
6780 : /* Loop processing each band. */
6781 : /* ====================================================================
6782 : */
6783 :
6784 5484032 : const GPtrDiff_t iDstOffset =
6785 5484032 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6786 :
6787 12643414 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6788 : {
6789 7159332 : T value = 0;
6790 7159332 : double dfBandDensity = 0.0;
6791 :
6792 : /* --------------------------------------------------------------------
6793 : */
6794 : /* Collect the source value. */
6795 : /* --------------------------------------------------------------------
6796 : */
6797 7159332 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6798 : &value))
6799 : {
6800 :
6801 7159332 : if (poWK->bApplyVerticalShift)
6802 : {
6803 0 : if (!std::isfinite(padfZ[iDstX]))
6804 0 : continue;
6805 : // Subtract padfZ[] since the coordinate transformation
6806 : // is from target to source
6807 0 : value = GWKClampValueT<T>(
6808 0 : value * poWK->dfMultFactorVerticalShift -
6809 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6810 : }
6811 :
6812 7159332 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6813 : dfBandDensity, value);
6814 : }
6815 : }
6816 :
6817 : /* --------------------------------------------------------------------
6818 : */
6819 : /* Mark this pixel valid/opaque in the output. */
6820 : /* --------------------------------------------------------------------
6821 : */
6822 5484032 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6823 :
6824 5484032 : if (poWK->panDstValid != nullptr)
6825 : {
6826 4854774 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6827 : }
6828 : } /* Next iDstX */
6829 :
6830 : /* --------------------------------------------------------------------
6831 : */
6832 : /* Report progress to the user, and optionally cancel out. */
6833 : /* --------------------------------------------------------------------
6834 : */
6835 36070 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6836 0 : break;
6837 : }
6838 :
6839 : /* -------------------------------------------------------------------- */
6840 : /* Cleanup and return. */
6841 : /* -------------------------------------------------------------------- */
6842 339 : CPLFree(padfX);
6843 339 : CPLFree(padfY);
6844 339 : CPLFree(padfZ);
6845 339 : CPLFree(pabSuccess);
6846 339 : }
6847 :
6848 274 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6849 : {
6850 274 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6851 : }
6852 :
6853 18 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6854 : {
6855 18 : return GWKRun(
6856 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6857 18 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6858 : }
6859 :
6860 18 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6861 : {
6862 18 : return GWKRun(
6863 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6864 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6865 18 : GRA_Bilinear>);
6866 : }
6867 :
6868 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6869 : {
6870 6 : return GWKRun(
6871 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6872 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6873 6 : GRA_Bilinear>);
6874 : }
6875 :
6876 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6877 : {
6878 5 : return GWKRun(
6879 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6880 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6881 5 : GRA_Bilinear>);
6882 : }
6883 :
6884 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6885 :
6886 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6887 : {
6888 : return GWKRun(
6889 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6890 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6891 : GRA_Bilinear>);
6892 : }
6893 : #endif
6894 :
6895 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6896 : {
6897 5 : return GWKRun(
6898 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6899 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6900 : }
6901 :
6902 12 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6903 : {
6904 12 : return GWKRun(
6905 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6906 12 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6907 : }
6908 :
6909 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6910 : {
6911 6 : return GWKRun(
6912 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6913 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6914 : }
6915 :
6916 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6917 : {
6918 5 : return GWKRun(
6919 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6920 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6921 : }
6922 :
6923 24 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6924 : {
6925 24 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6926 : }
6927 :
6928 0 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6929 : {
6930 0 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6931 : }
6932 :
6933 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6934 : {
6935 11 : return GWKRun(
6936 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6937 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6938 : }
6939 :
6940 37 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6941 : {
6942 37 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6943 : }
6944 :
6945 : /************************************************************************/
6946 : /* GWKAverageOrMode() */
6947 : /* */
6948 : /************************************************************************/
6949 :
6950 : static void GWKAverageOrModeThread(void *pData);
6951 :
6952 130 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6953 : {
6954 130 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6955 : }
6956 :
6957 : // Overall logic based on GWKGeneralCaseThread().
6958 130 : static void GWKAverageOrModeThread(void *pData)
6959 : {
6960 130 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6961 130 : GDALWarpKernel *poWK = psJob->poWK;
6962 130 : const int iYMin = psJob->iYMin;
6963 130 : const int iYMax = psJob->iYMax;
6964 : const double dfMultFactorVerticalShiftPipeline =
6965 130 : poWK->bApplyVerticalShift
6966 130 : ? CPLAtof(CSLFetchNameValueDef(
6967 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6968 : "1.0"))
6969 130 : : 0.0;
6970 :
6971 130 : const int nDstXSize = poWK->nDstXSize;
6972 130 : const int nSrcXSize = poWK->nSrcXSize;
6973 130 : const int nSrcYSize = poWK->nSrcYSize;
6974 :
6975 : /* -------------------------------------------------------------------- */
6976 : /* Find out which algorithm to use (small optim.) */
6977 : /* -------------------------------------------------------------------- */
6978 130 : int nAlgo = 0;
6979 :
6980 : // Only used for GRA_Mode
6981 130 : float *pafRealVals = nullptr;
6982 130 : float *pafCounts = nullptr;
6983 130 : int nBins = 0;
6984 130 : int nBinsOffset = 0;
6985 130 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6986 :
6987 : // Only used with nAlgo = 6.
6988 130 : float quant = 0.5;
6989 :
6990 : // To control array allocation only when data type is complex
6991 130 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
6992 :
6993 130 : if (poWK->eResample == GRA_Average)
6994 : {
6995 71 : nAlgo = GWKAOM_Average;
6996 : }
6997 59 : else if (poWK->eResample == GRA_RMS)
6998 : {
6999 9 : nAlgo = GWKAOM_RMS;
7000 : }
7001 50 : else if (poWK->eResample == GRA_Mode)
7002 : {
7003 : // TODO check color table count > 256.
7004 23 : if (poWK->eWorkingDataType == GDT_Byte ||
7005 17 : poWK->eWorkingDataType == GDT_UInt16 ||
7006 17 : poWK->eWorkingDataType == GDT_Int16)
7007 : {
7008 14 : nAlgo = GWKAOM_Imode;
7009 :
7010 : // In the case of a paletted or non-paletted byte band,
7011 : // Input values are between 0 and 255.
7012 14 : if (poWK->eWorkingDataType == GDT_Byte)
7013 : {
7014 6 : nBins = 256;
7015 : }
7016 : // In the case of Int8, input values are between -128 and 127.
7017 8 : else if (poWK->eWorkingDataType == GDT_Int8)
7018 : {
7019 0 : nBins = 256;
7020 0 : nBinsOffset = 128;
7021 : }
7022 : // In the case of Int16, input values are between -32768 and 32767.
7023 8 : else if (poWK->eWorkingDataType == GDT_Int16)
7024 : {
7025 8 : nBins = 65536;
7026 8 : nBinsOffset = 32768;
7027 : }
7028 : // In the case of UInt16, input values are between 0 and 65537.
7029 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
7030 : {
7031 0 : nBins = 65536;
7032 : }
7033 : pafCounts =
7034 14 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7035 14 : if (pafCounts == nullptr)
7036 0 : return;
7037 : }
7038 : else
7039 : {
7040 9 : nAlgo = GWKAOM_Fmode;
7041 :
7042 9 : if (nSrcXSize > 0 && nSrcYSize > 0)
7043 : {
7044 : pafRealVals = static_cast<float *>(
7045 9 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7046 : pafCounts = static_cast<float *>(
7047 9 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
7048 9 : if (pafRealVals == nullptr || pafCounts == nullptr)
7049 : {
7050 0 : VSIFree(pafRealVals);
7051 0 : VSIFree(pafCounts);
7052 0 : return;
7053 : }
7054 : }
7055 : }
7056 : }
7057 27 : else if (poWK->eResample == GRA_Max)
7058 : {
7059 6 : nAlgo = GWKAOM_Max;
7060 : }
7061 21 : else if (poWK->eResample == GRA_Min)
7062 : {
7063 5 : nAlgo = GWKAOM_Min;
7064 : }
7065 16 : else if (poWK->eResample == GRA_Med)
7066 : {
7067 6 : nAlgo = GWKAOM_Quant;
7068 6 : quant = 0.5;
7069 : }
7070 10 : else if (poWK->eResample == GRA_Q1)
7071 : {
7072 5 : nAlgo = GWKAOM_Quant;
7073 5 : quant = 0.25;
7074 : }
7075 5 : else if (poWK->eResample == GRA_Q3)
7076 : {
7077 5 : nAlgo = GWKAOM_Quant;
7078 5 : quant = 0.75;
7079 : }
7080 : #ifdef disabled
7081 : else if (poWK->eResample == GRA_Sum)
7082 : {
7083 : nAlgo = GWKAOM_Sum;
7084 : }
7085 : #endif
7086 : else
7087 : {
7088 : // Other resample algorithms not permitted here.
7089 0 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7090 : "illegal resample");
7091 0 : return;
7092 : }
7093 :
7094 130 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread() using algo %d",
7095 : nAlgo);
7096 :
7097 : /* -------------------------------------------------------------------- */
7098 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7099 : /* scanlines worth of positions. */
7100 : /* -------------------------------------------------------------------- */
7101 :
7102 : double *padfX =
7103 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7104 : double *padfY =
7105 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7106 : double *padfZ =
7107 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7108 : double *padfX2 =
7109 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7110 : double *padfY2 =
7111 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7112 : double *padfZ2 =
7113 130 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7114 130 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7115 130 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7116 :
7117 130 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7118 130 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7119 130 : const double dfErrorThreshold = CPLAtof(
7120 130 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7121 :
7122 : const double dfExcludedValuesThreshold =
7123 130 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7124 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7125 130 : 100.0;
7126 : const double dfNodataValuesThreshold =
7127 130 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7128 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7129 130 : 100.0;
7130 :
7131 : const int nXMargin =
7132 130 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7133 : const int nYMargin =
7134 130 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7135 :
7136 : /* ==================================================================== */
7137 : /* Loop over output lines. */
7138 : /* ==================================================================== */
7139 6627 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7140 : {
7141 :
7142 : /* --------------------------------------------------------------------
7143 : */
7144 : /* Setup points to transform to source image space. */
7145 : /* --------------------------------------------------------------------
7146 : */
7147 1669840 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7148 : {
7149 1663340 : padfX[iDstX] = iDstX + poWK->nDstXOff;
7150 1663340 : padfY[iDstX] = iDstY + poWK->nDstYOff;
7151 1663340 : padfZ[iDstX] = 0.0;
7152 1663340 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
7153 1663340 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
7154 1663340 : padfZ2[iDstX] = 0.0;
7155 : }
7156 :
7157 : /* --------------------------------------------------------------------
7158 : */
7159 : /* Transform the points from destination pixel/line coordinates */
7160 : /* to source pixel/line coordinates. */
7161 : /* --------------------------------------------------------------------
7162 : */
7163 6497 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
7164 : padfY, padfZ, pabSuccess);
7165 6497 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
7166 : padfY2, padfZ2, pabSuccess2);
7167 :
7168 6497 : if (dfSrcCoordPrecision > 0.0)
7169 : {
7170 0 : GWKRoundSourceCoordinates(
7171 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
7172 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
7173 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
7174 0 : GWKRoundSourceCoordinates(
7175 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2,
7176 : dfSrcCoordPrecision, dfErrorThreshold, poWK->pfnTransformer,
7177 0 : psJob->pTransformerArg, 1.0 + poWK->nDstXOff,
7178 0 : iDstY + 1.0 + poWK->nDstYOff);
7179 : }
7180 :
7181 : /* ====================================================================
7182 : */
7183 : /* Loop over pixels in output scanline. */
7184 : /* ====================================================================
7185 : */
7186 1669840 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7187 : {
7188 1663340 : GPtrDiff_t iSrcOffset = 0;
7189 1663340 : double dfDensity = 1.0;
7190 1663340 : bool bHasFoundDensity = false;
7191 :
7192 1663340 : if (!pabSuccess[iDstX] || !pabSuccess2[iDstX])
7193 311460 : continue;
7194 :
7195 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
7196 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
7197 1663340 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
7198 1663320 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
7199 1663320 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
7200 1663300 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
7201 1663300 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
7202 1663300 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
7203 1663290 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
7204 1663290 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
7205 : {
7206 62 : continue;
7207 : }
7208 :
7209 1663280 : const GPtrDiff_t iDstOffset =
7210 1663280 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7211 :
7212 : // Compute corners in source crs.
7213 :
7214 : // The transformation might not have preserved ordering of
7215 : // coordinates so do the necessary swapping (#5433).
7216 : // NOTE: this is really an approximative fix. To do something
7217 : // more precise we would for example need to compute the
7218 : // transformation of coordinates in the
7219 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
7220 : // coordinates, and take the bounding box of the got source
7221 : // coordinates.
7222 :
7223 1663280 : if (padfX[iDstX] > padfX2[iDstX])
7224 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
7225 :
7226 : // Detect situations where the target pixel is close to the
7227 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
7228 : // close to the left-most and right-most columns of the source
7229 : // raster. The 2 value below was experimentally determined to
7230 : // avoid false-positives and false-negatives.
7231 : // Addresses https://github.com/OSGeo/gdal/issues/6478
7232 1663280 : bool bWrapOverX = false;
7233 1663280 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
7234 1663280 : if (poWK->nSrcXOff == 0 &&
7235 1663280 : padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
7236 14495 : (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale <
7237 : nThresholdWrapOverX)
7238 : {
7239 : // Check there is a discontinuity by checking at mid-pixel.
7240 : // NOTE: all this remains fragile. To confidently
7241 : // detect antimeridian warping we should probably try to access
7242 : // georeferenced coordinates, and not rely only on tests on
7243 : // image space coordinates. But accessing georeferenced
7244 : // coordinates from here is not trivial, and we would for example
7245 : // have to handle both geographic, Mercator, etc.
7246 : // Let's hope this heuristics is good enough for now.
7247 1041 : double x = iDstX + 0.5 + poWK->nDstXOff;
7248 1041 : double y = iDstY + poWK->nDstYOff;
7249 1041 : double z = 0;
7250 1041 : int bSuccess = FALSE;
7251 1041 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y,
7252 : &z, &bSuccess);
7253 1041 : if (bSuccess && x < padfX[iDstX])
7254 : {
7255 1008 : bWrapOverX = true;
7256 1008 : std::swap(padfX[iDstX], padfX2[iDstX]);
7257 1008 : padfX2[iDstX] += nSrcXSize;
7258 : }
7259 : }
7260 :
7261 1663280 : const double dfXMin = padfX[iDstX] - poWK->nSrcXOff;
7262 1663280 : const double dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
7263 1663280 : constexpr double EPS = 1e-10;
7264 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
7265 1663280 : if (!(dfXMax > -EPS && dfXMin < nSrcXSize + EPS))
7266 72 : continue;
7267 1663200 : int iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPS), 0.0));
7268 1663200 : int iSrcXMax = static_cast<int>(
7269 1663200 : std::min(ceil(dfXMax - EPS), static_cast<double>(INT_MAX)));
7270 1663200 : if (!bWrapOverX)
7271 1662200 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
7272 1663200 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
7273 472 : iSrcXMax++;
7274 :
7275 1663200 : if (padfY[iDstX] > padfY2[iDstX])
7276 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
7277 1663200 : const double dfYMin = padfY[iDstX] - poWK->nSrcYOff;
7278 1663200 : const double dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
7279 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
7280 1663200 : if (!(dfYMax > -EPS && dfYMin < nSrcYSize + EPS))
7281 36 : continue;
7282 1663170 : int iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPS), 0.0));
7283 : int iSrcYMax =
7284 1663170 : std::min(static_cast<int>(ceil(dfYMax - EPS)), nSrcYSize);
7285 1663170 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
7286 0 : iSrcYMax++;
7287 :
7288 : #define COMPUTE_WEIGHT_Y(iSrcY) \
7289 : ((iSrcY == iSrcYMin) \
7290 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
7291 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
7292 : : 1.0)
7293 :
7294 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
7295 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
7296 : ? dfWeightY \
7297 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
7298 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
7299 : : dfWeightY)
7300 :
7301 1663170 : bool bDone = false;
7302 :
7303 : // Special Average mode where we process all bands together,
7304 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7305 2267240 : if (nAlgo == GWKAOM_Average &&
7306 604073 : (!poWK->m_aadfExcludedValues.empty() ||
7307 393224 : dfNodataValuesThreshold < 1 - EPS) &&
7308 2267240 : !poWK->bApplyVerticalShift && !bIsComplex)
7309 : {
7310 393224 : double dfTotalWeightInvalid = 0.0;
7311 393224 : double dfTotalWeightExcluded = 0.0;
7312 393224 : double dfTotalWeightRegular = 0.0;
7313 786448 : std::vector<double> adfValueReal(poWK->nBands, 0);
7314 786448 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7315 : std::vector<int> anCountExcludedValues(
7316 393224 : poWK->m_aadfExcludedValues.size(), 0);
7317 :
7318 1572890 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7319 : {
7320 1179660 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7321 1179660 : iSrcOffset =
7322 1179660 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7323 5111860 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7324 : iSrcX++, iSrcOffset++)
7325 : {
7326 3932190 : if (bWrapOverX)
7327 0 : iSrcOffset =
7328 0 : (iSrcX % nSrcXSize) +
7329 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7330 :
7331 3932190 : const double dfWeight =
7332 3932190 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7333 3932190 : if (dfWeight <= 0)
7334 0 : continue;
7335 :
7336 3932200 : if (poWK->panUnifiedSrcValid != nullptr &&
7337 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7338 : {
7339 3 : dfTotalWeightInvalid += dfWeight;
7340 3 : continue;
7341 : }
7342 :
7343 3932190 : bool bAllValid = true;
7344 7274900 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7345 : {
7346 6160660 : double dfBandDensity = 0;
7347 6160660 : double dfValueImagTmp = 0;
7348 9503370 : if (!(GWKGetPixelValue(
7349 : poWK, iBand, iSrcOffset, &dfBandDensity,
7350 6160660 : &adfValueReal[iBand], &dfValueImagTmp) &&
7351 3342710 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7352 : {
7353 2817950 : bAllValid = false;
7354 2817950 : break;
7355 : }
7356 : }
7357 :
7358 3932190 : if (!bAllValid)
7359 : {
7360 2817950 : dfTotalWeightInvalid += dfWeight;
7361 2817950 : continue;
7362 : }
7363 :
7364 1114240 : bool bExcludedValueFound = false;
7365 2228350 : for (size_t i = 0;
7366 2228350 : i < poWK->m_aadfExcludedValues.size(); ++i)
7367 : {
7368 1114130 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7369 : {
7370 21 : bExcludedValueFound = true;
7371 21 : ++anCountExcludedValues[i];
7372 21 : dfTotalWeightExcluded += dfWeight;
7373 21 : break;
7374 : }
7375 : }
7376 1114240 : if (!bExcludedValueFound)
7377 : {
7378 : // Weighted incremental algorithm mean
7379 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7380 1114220 : dfTotalWeightRegular += dfWeight;
7381 4456870 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7382 : {
7383 3342650 : adfValueAveraged[iBand] +=
7384 6685300 : (dfWeight / dfTotalWeightRegular) *
7385 6685300 : (adfValueReal[iBand] -
7386 3342650 : adfValueAveraged[iBand]);
7387 : }
7388 : }
7389 : }
7390 : }
7391 :
7392 393224 : const double dfTotalWeight = dfTotalWeightInvalid +
7393 : dfTotalWeightExcluded +
7394 : dfTotalWeightRegular;
7395 393224 : if (dfTotalWeightInvalid > 0 &&
7396 : dfTotalWeightInvalid >=
7397 311293 : dfNodataValuesThreshold * dfTotalWeight)
7398 : {
7399 : // Do nothing. Let bHasFoundDensity to false.
7400 : }
7401 81934 : else if (dfTotalWeightExcluded > 0 &&
7402 : dfTotalWeightExcluded >=
7403 6 : dfExcludedValuesThreshold * dfTotalWeight)
7404 : {
7405 : // Find the most represented excluded value tuple
7406 3 : size_t iExcludedValue = 0;
7407 3 : int nExcludedValueCount = 0;
7408 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7409 : ++i)
7410 : {
7411 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
7412 : {
7413 3 : iExcludedValue = i;
7414 3 : nExcludedValueCount = anCountExcludedValues[i];
7415 : }
7416 : }
7417 :
7418 3 : bHasFoundDensity = true;
7419 :
7420 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7421 : {
7422 9 : GWKSetPixelValue(
7423 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7424 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7425 : 0);
7426 3 : }
7427 : }
7428 81931 : else if (dfTotalWeightRegular > 0)
7429 : {
7430 81931 : bHasFoundDensity = true;
7431 :
7432 327720 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7433 : {
7434 245789 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7435 : /* dfBandDensity = */ 1.0,
7436 245789 : adfValueAveraged[iBand], 0);
7437 : }
7438 : }
7439 :
7440 : // Skip below loop on bands
7441 393224 : bDone = true;
7442 : }
7443 :
7444 : /* ====================================================================
7445 : */
7446 : /* Loop processing each band. */
7447 : /* ====================================================================
7448 : */
7449 :
7450 4439540 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7451 : {
7452 2776380 : double dfBandDensity = 0.0;
7453 2776380 : double dfValueReal = 0.0;
7454 2776380 : double dfValueImag = 0.0;
7455 2776380 : double dfValueRealTmp = 0.0;
7456 2776380 : double dfValueImagTmp = 0.0;
7457 :
7458 : /* --------------------------------------------------------------------
7459 : */
7460 : /* Collect the source value. */
7461 : /* --------------------------------------------------------------------
7462 : */
7463 :
7464 : // Loop over source lines and pixels - 3 possible algorithms.
7465 :
7466 : // poWK->eResample == GRA_Average.
7467 2776380 : if (nAlgo == GWKAOM_Average)
7468 : {
7469 300849 : double dfTotalWeight = 0.0;
7470 :
7471 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7472 : // in gcore/overview.cpp.
7473 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7474 : {
7475 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7476 330459 : iSrcOffset = iSrcXMin +
7477 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7478 803200 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7479 : iSrcX++, iSrcOffset++)
7480 : {
7481 472741 : if (bWrapOverX)
7482 630 : iSrcOffset =
7483 630 : (iSrcX % nSrcXSize) +
7484 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7485 :
7486 472745 : if (poWK->panUnifiedSrcValid != nullptr &&
7487 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7488 : iSrcOffset))
7489 : {
7490 1 : continue;
7491 : }
7492 :
7493 472740 : if (GWKGetPixelValue(
7494 : poWK, iBand, iSrcOffset, &dfBandDensity,
7495 945480 : &dfValueRealTmp, &dfValueImagTmp) &&
7496 472740 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7497 : {
7498 472740 : const double dfWeight =
7499 472740 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7500 472740 : if (dfWeight > 0)
7501 : {
7502 : // Weighted incremental algorithm mean
7503 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7504 472740 : dfTotalWeight += dfWeight;
7505 472740 : dfValueReal +=
7506 472740 : (dfWeight / dfTotalWeight) *
7507 472740 : (dfValueRealTmp - dfValueReal);
7508 472740 : if (bIsComplex)
7509 : {
7510 252 : dfValueImag +=
7511 252 : (dfWeight / dfTotalWeight) *
7512 252 : (dfValueImagTmp - dfValueImag);
7513 : }
7514 : }
7515 : }
7516 : }
7517 : }
7518 :
7519 300849 : if (dfTotalWeight > 0)
7520 : {
7521 300849 : if (poWK->bApplyVerticalShift)
7522 : {
7523 0 : if (!std::isfinite(padfZ[iDstX]))
7524 0 : continue;
7525 : // Subtract padfZ[] since the coordinate
7526 : // transformation is from target to source
7527 0 : dfValueReal =
7528 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7529 0 : padfZ[iDstX] *
7530 : dfMultFactorVerticalShiftPipeline;
7531 : }
7532 :
7533 300849 : dfBandDensity = 1;
7534 300849 : bHasFoundDensity = true;
7535 : }
7536 : } // GRA_Average.
7537 : // poWK->eResample == GRA_RMS.
7538 2776380 : if (nAlgo == GWKAOM_RMS)
7539 : {
7540 300416 : double dfTotalReal = 0.0;
7541 300416 : double dfTotalImag = 0.0;
7542 300416 : double dfTotalWeight = 0.0;
7543 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7544 : // in gcore/overview.cpp.
7545 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7546 : {
7547 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7548 330162 : iSrcOffset = iSrcXMin +
7549 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7550 802723 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7551 : iSrcX++, iSrcOffset++)
7552 : {
7553 472561 : if (bWrapOverX)
7554 630 : iSrcOffset =
7555 630 : (iSrcX % nSrcXSize) +
7556 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7557 :
7558 472561 : if (poWK->panUnifiedSrcValid != nullptr &&
7559 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7560 : iSrcOffset))
7561 : {
7562 0 : continue;
7563 : }
7564 :
7565 472561 : if (GWKGetPixelValue(
7566 : poWK, iBand, iSrcOffset, &dfBandDensity,
7567 945122 : &dfValueRealTmp, &dfValueImagTmp) &&
7568 472561 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7569 : {
7570 472561 : const double dfWeight =
7571 472561 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7572 472561 : dfTotalWeight += dfWeight;
7573 472561 : dfTotalReal +=
7574 472561 : dfValueRealTmp * dfValueRealTmp * dfWeight;
7575 472561 : if (bIsComplex)
7576 48 : dfTotalImag += dfValueImagTmp *
7577 48 : dfValueImagTmp * dfWeight;
7578 : }
7579 : }
7580 : }
7581 :
7582 300416 : if (dfTotalWeight > 0)
7583 : {
7584 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7585 :
7586 300416 : if (poWK->bApplyVerticalShift)
7587 : {
7588 0 : if (!std::isfinite(padfZ[iDstX]))
7589 0 : continue;
7590 : // Subtract padfZ[] since the coordinate
7591 : // transformation is from target to source
7592 0 : dfValueReal =
7593 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7594 0 : padfZ[iDstX] *
7595 : dfMultFactorVerticalShiftPipeline;
7596 : }
7597 :
7598 300416 : if (bIsComplex)
7599 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7600 :
7601 300416 : dfBandDensity = 1;
7602 300416 : bHasFoundDensity = true;
7603 : }
7604 : } // GRA_RMS.
7605 : #ifdef disabled
7606 : else if (nAlgo == GWKAOM_Sum)
7607 : // poWK->eResample == GRA_Sum
7608 : {
7609 : double dfTotalReal = 0.0;
7610 : double dfTotalImag = 0.0;
7611 : bool bFoundValid = false;
7612 :
7613 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7614 : {
7615 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7616 : iSrcOffset = iSrcXMin +
7617 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7618 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7619 : iSrcX++, iSrcOffset++)
7620 : {
7621 : if (bWrapOverX)
7622 : iSrcOffset =
7623 : (iSrcX % nSrcXSize) +
7624 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7625 :
7626 : if (poWK->panUnifiedSrcValid != nullptr &&
7627 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7628 : iSrcOffset))
7629 : {
7630 : continue;
7631 : }
7632 :
7633 : if (GWKGetPixelValue(
7634 : poWK, iBand, iSrcOffset, &dfBandDensity,
7635 : &dfValueRealTmp, &dfValueImagTmp) &&
7636 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7637 : {
7638 : const double dfWeight =
7639 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7640 : bFoundValid = true;
7641 : dfTotalReal += dfValueRealTmp * dfWeight;
7642 : if (bIsComplex)
7643 : {
7644 : dfTotalImag += dfValueImagTmp * dfWeight;
7645 : }
7646 : }
7647 : }
7648 : }
7649 :
7650 : if (bFoundValid)
7651 : {
7652 : dfValueReal = dfTotalReal;
7653 :
7654 : if (poWK->bApplyVerticalShift)
7655 : {
7656 : if (!std::isfinite(padfZ[iDstX]))
7657 : continue;
7658 : // Subtract padfZ[] since the coordinate
7659 : // transformation is from target to source
7660 : dfValueReal =
7661 : dfValueReal * poWK->dfMultFactorVerticalShift -
7662 : padfZ[iDstX] *
7663 : dfMultFactorVerticalShiftPipeline;
7664 : }
7665 :
7666 : if (bIsComplex)
7667 : {
7668 : dfValueImag = dfTotalImag;
7669 : }
7670 : dfBandDensity = 1;
7671 : bHasFoundDensity = true;
7672 : }
7673 : } // GRA_Sum.
7674 : #endif
7675 2475960 : else if (nAlgo == GWKAOM_Imode || nAlgo == GWKAOM_Fmode)
7676 : // poWK->eResample == GRA_Mode
7677 : {
7678 : // This code adapted from GDALDownsampleChunk32R_Mode() in
7679 : // gcore/overview.cpp.
7680 500026 : if (nAlgo == GWKAOM_Fmode) // int32 or float.
7681 : {
7682 : // Does it make sense it makes to run a
7683 : // majority filter on floating point data? But, here it
7684 : // is for the sake of compatibility. It won't look
7685 : // right on RGB images by the nature of the filter.
7686 3407 : nBins = 0;
7687 3407 : int iModeIndex = -1;
7688 :
7689 10228 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7690 : {
7691 6821 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7692 6821 : iSrcOffset =
7693 6821 : iSrcXMin +
7694 6821 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7695 20484 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7696 : iSrcX++, iSrcOffset++)
7697 : {
7698 13663 : if (bWrapOverX)
7699 0 : iSrcOffset =
7700 0 : (iSrcX % nSrcXSize) +
7701 0 : static_cast<GPtrDiff_t>(iSrcY) *
7702 0 : nSrcXSize;
7703 :
7704 13663 : if (poWK->panUnifiedSrcValid != nullptr &&
7705 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7706 : iSrcOffset))
7707 0 : continue;
7708 :
7709 13663 : if (GWKGetPixelValue(
7710 : poWK, iBand, iSrcOffset, &dfBandDensity,
7711 27326 : &dfValueRealTmp, &dfValueImagTmp) &&
7712 13663 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7713 : {
7714 13663 : const float fVal =
7715 13663 : static_cast<float>(dfValueRealTmp);
7716 13663 : const double dfWeight =
7717 13663 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7718 :
7719 : // Check array for existing entry.
7720 13663 : int i = 0;
7721 29135 : for (i = 0; i < nBins; ++i)
7722 : {
7723 17768 : if (pafRealVals[i] == fVal)
7724 : {
7725 :
7726 2296 : pafCounts[i] +=
7727 2296 : static_cast<float>(dfWeight);
7728 2296 : bool bValIsMaxCount =
7729 2296 : (pafCounts[i] >
7730 2296 : pafCounts[iModeIndex]);
7731 :
7732 2296 : if (!bValIsMaxCount &&
7733 1492 : pafCounts[i] ==
7734 1492 : pafCounts[iModeIndex])
7735 : {
7736 1487 : switch (eTieStrategy)
7737 : {
7738 1474 : case GWKTS_First:
7739 1474 : break;
7740 6 : case GWKTS_Min:
7741 6 : bValIsMaxCount =
7742 : fVal <
7743 : pafRealVals
7744 6 : [iModeIndex];
7745 6 : break;
7746 7 : case GWKTS_Max:
7747 7 : bValIsMaxCount =
7748 : fVal >
7749 : pafRealVals
7750 7 : [iModeIndex];
7751 7 : break;
7752 : }
7753 : }
7754 :
7755 2296 : if (bValIsMaxCount)
7756 : {
7757 807 : iModeIndex = i;
7758 : }
7759 :
7760 2296 : break;
7761 : }
7762 : }
7763 :
7764 : // Add to arr if entry not already there.
7765 13663 : if (i == nBins)
7766 : {
7767 11367 : pafRealVals[i] = fVal;
7768 11367 : pafCounts[i] =
7769 11367 : static_cast<float>(dfWeight);
7770 :
7771 11367 : if (iModeIndex < 0)
7772 3407 : iModeIndex = i;
7773 :
7774 11367 : ++nBins;
7775 : }
7776 : }
7777 : }
7778 : }
7779 :
7780 3407 : if (iModeIndex != -1)
7781 : {
7782 3407 : dfValueReal = pafRealVals[iModeIndex];
7783 :
7784 3407 : if (poWK->bApplyVerticalShift)
7785 : {
7786 0 : if (!std::isfinite(padfZ[iDstX]))
7787 0 : continue;
7788 : // Subtract padfZ[] since the coordinate
7789 : // transformation is from target to source
7790 0 : dfValueReal =
7791 0 : dfValueReal *
7792 0 : poWK->dfMultFactorVerticalShift -
7793 0 : padfZ[iDstX] *
7794 : dfMultFactorVerticalShiftPipeline;
7795 : }
7796 :
7797 3407 : dfBandDensity = 1;
7798 3407 : bHasFoundDensity = true;
7799 : }
7800 : }
7801 : else // byte or int16.
7802 : {
7803 496619 : float fMaxCount = 0.0f;
7804 496619 : int nMode = -1;
7805 496619 : bool bHasSourceValues = false;
7806 :
7807 496619 : memset(pafCounts, 0, nBins * sizeof(float));
7808 :
7809 1612550 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7810 : {
7811 1115930 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7812 1115930 : iSrcOffset =
7813 1115930 : iSrcXMin +
7814 1115930 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7815 4733150 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7816 : iSrcX++, iSrcOffset++)
7817 : {
7818 3617220 : if (bWrapOverX)
7819 630 : iSrcOffset =
7820 630 : (iSrcX % nSrcXSize) +
7821 630 : static_cast<GPtrDiff_t>(iSrcY) *
7822 630 : nSrcXSize;
7823 :
7824 3617220 : if (poWK->panUnifiedSrcValid != nullptr &&
7825 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7826 : iSrcOffset))
7827 0 : continue;
7828 :
7829 3617220 : if (GWKGetPixelValue(
7830 : poWK, iBand, iSrcOffset, &dfBandDensity,
7831 7234430 : &dfValueRealTmp, &dfValueImagTmp) &&
7832 3617220 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7833 : {
7834 3617220 : bHasSourceValues = true;
7835 3617220 : const int nVal =
7836 3617220 : static_cast<int>(dfValueRealTmp);
7837 3617220 : const int iBin = nVal + nBinsOffset;
7838 3617220 : const double dfWeight =
7839 3617220 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7840 :
7841 : // Sum the density.
7842 3617220 : pafCounts[iBin] +=
7843 3617220 : static_cast<float>(dfWeight);
7844 : // Is it the most common value so far?
7845 3617220 : bool bUpdateMode =
7846 3617220 : pafCounts[iBin] > fMaxCount;
7847 3617220 : if (!bUpdateMode &&
7848 778312 : pafCounts[iBin] == fMaxCount)
7849 : {
7850 218624 : switch (eTieStrategy)
7851 : {
7852 218616 : case GWKTS_First:
7853 218616 : break;
7854 4 : case GWKTS_Min:
7855 4 : bUpdateMode = nVal < nMode;
7856 4 : break;
7857 4 : case GWKTS_Max:
7858 4 : bUpdateMode = nVal > nMode;
7859 4 : break;
7860 : }
7861 : }
7862 3617220 : if (bUpdateMode)
7863 : {
7864 2838910 : nMode = nVal;
7865 2838910 : fMaxCount = pafCounts[iBin];
7866 : }
7867 : }
7868 : }
7869 : }
7870 :
7871 496619 : if (bHasSourceValues)
7872 : {
7873 496619 : dfValueReal = nMode;
7874 :
7875 496619 : if (poWK->bApplyVerticalShift)
7876 : {
7877 0 : if (!std::isfinite(padfZ[iDstX]))
7878 0 : continue;
7879 : // Subtract padfZ[] since the coordinate
7880 : // transformation is from target to source
7881 0 : dfValueReal =
7882 0 : dfValueReal *
7883 0 : poWK->dfMultFactorVerticalShift -
7884 0 : padfZ[iDstX] *
7885 : dfMultFactorVerticalShiftPipeline;
7886 : }
7887 :
7888 496619 : dfBandDensity = 1;
7889 496619 : bHasFoundDensity = true;
7890 : }
7891 500026 : }
7892 : } // GRA_Mode.
7893 1975930 : else if (nAlgo == GWKAOM_Max)
7894 : // poWK->eResample == GRA_Max.
7895 : {
7896 335037 : bool bFoundValid = false;
7897 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
7898 : // This code adapted from nAlgo 1 method, GRA_Average.
7899 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7900 : {
7901 952975 : iSrcOffset = iSrcXMin +
7902 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7903 4406540 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7904 : iSrcX++, iSrcOffset++)
7905 : {
7906 3453560 : if (bWrapOverX)
7907 630 : iSrcOffset =
7908 630 : (iSrcX % nSrcXSize) +
7909 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7910 :
7911 3456370 : if (poWK->panUnifiedSrcValid != nullptr &&
7912 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7913 : iSrcOffset))
7914 : {
7915 2446 : continue;
7916 : }
7917 :
7918 : // Returns pixel value if it is not no data.
7919 3451120 : if (GWKGetPixelValue(
7920 : poWK, iBand, iSrcOffset, &dfBandDensity,
7921 6902230 : &dfValueRealTmp, &dfValueImagTmp) &&
7922 3451120 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7923 : {
7924 3451120 : bFoundValid = true;
7925 3451120 : if (dfTotalReal < dfValueRealTmp)
7926 : {
7927 442642 : dfTotalReal = dfValueRealTmp;
7928 : }
7929 : }
7930 : }
7931 : }
7932 :
7933 335037 : if (bFoundValid)
7934 : {
7935 335037 : dfValueReal = dfTotalReal;
7936 :
7937 335037 : if (poWK->bApplyVerticalShift)
7938 : {
7939 0 : if (!std::isfinite(padfZ[iDstX]))
7940 0 : continue;
7941 : // Subtract padfZ[] since the coordinate
7942 : // transformation is from target to source
7943 0 : dfValueReal =
7944 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7945 0 : padfZ[iDstX] *
7946 : dfMultFactorVerticalShiftPipeline;
7947 : }
7948 :
7949 335037 : dfBandDensity = 1;
7950 335037 : bHasFoundDensity = true;
7951 : }
7952 : } // GRA_Max.
7953 1640900 : else if (nAlgo == GWKAOM_Min)
7954 : // poWK->eResample == GRA_Min.
7955 : {
7956 335012 : bool bFoundValid = false;
7957 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
7958 : // This code adapted from nAlgo 1 method, GRA_Average.
7959 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7960 : {
7961 952710 : iSrcOffset = iSrcXMin +
7962 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7963 4403460 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7964 : iSrcX++, iSrcOffset++)
7965 : {
7966 3450750 : if (bWrapOverX)
7967 630 : iSrcOffset =
7968 630 : (iSrcX % nSrcXSize) +
7969 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7970 :
7971 3450750 : if (poWK->panUnifiedSrcValid != nullptr &&
7972 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7973 : iSrcOffset))
7974 : {
7975 0 : continue;
7976 : }
7977 :
7978 : // Returns pixel value if it is not no data.
7979 3450750 : if (GWKGetPixelValue(
7980 : poWK, iBand, iSrcOffset, &dfBandDensity,
7981 6901500 : &dfValueRealTmp, &dfValueImagTmp) &&
7982 3450750 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7983 : {
7984 3450750 : bFoundValid = true;
7985 3450750 : if (dfTotalReal > dfValueRealTmp)
7986 : {
7987 443069 : dfTotalReal = dfValueRealTmp;
7988 : }
7989 : }
7990 : }
7991 : }
7992 :
7993 335012 : if (bFoundValid)
7994 : {
7995 335012 : dfValueReal = dfTotalReal;
7996 :
7997 335012 : if (poWK->bApplyVerticalShift)
7998 : {
7999 0 : if (!std::isfinite(padfZ[iDstX]))
8000 0 : continue;
8001 : // Subtract padfZ[] since the coordinate
8002 : // transformation is from target to source
8003 0 : dfValueReal =
8004 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8005 0 : padfZ[iDstX] *
8006 : dfMultFactorVerticalShiftPipeline;
8007 : }
8008 :
8009 335012 : dfBandDensity = 1;
8010 335012 : bHasFoundDensity = true;
8011 : }
8012 : } // GRA_Min.
8013 1305880 : else if (nAlgo == GWKAOM_Quant)
8014 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
8015 : {
8016 1005040 : bool bFoundValid = false;
8017 1005040 : std::vector<double> dfRealValuesTmp;
8018 :
8019 : // This code adapted from nAlgo 1 method, GRA_Average.
8020 3863170 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
8021 : {
8022 2858130 : iSrcOffset = iSrcXMin +
8023 2858130 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8024 13210400 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
8025 : iSrcX++, iSrcOffset++)
8026 : {
8027 10352300 : if (bWrapOverX)
8028 1890 : iSrcOffset =
8029 1890 : (iSrcX % nSrcXSize) +
8030 1890 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
8031 :
8032 10352300 : if (poWK->panUnifiedSrcValid != nullptr &&
8033 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
8034 : iSrcOffset))
8035 : {
8036 0 : continue;
8037 : }
8038 :
8039 : // Returns pixel value if it is not no data.
8040 10352300 : if (GWKGetPixelValue(
8041 : poWK, iBand, iSrcOffset, &dfBandDensity,
8042 20704500 : &dfValueRealTmp, &dfValueImagTmp) &&
8043 10352300 : dfBandDensity > BAND_DENSITY_THRESHOLD)
8044 : {
8045 10352300 : bFoundValid = true;
8046 10352300 : dfRealValuesTmp.push_back(dfValueRealTmp);
8047 : }
8048 : }
8049 : }
8050 :
8051 1005040 : if (bFoundValid)
8052 : {
8053 1005040 : std::sort(dfRealValuesTmp.begin(),
8054 : dfRealValuesTmp.end());
8055 : int quantIdx = static_cast<int>(
8056 1005040 : std::ceil(quant * dfRealValuesTmp.size() - 1));
8057 1005040 : dfValueReal = dfRealValuesTmp[quantIdx];
8058 :
8059 1005040 : if (poWK->bApplyVerticalShift)
8060 : {
8061 0 : if (!std::isfinite(padfZ[iDstX]))
8062 0 : continue;
8063 : // Subtract padfZ[] since the coordinate
8064 : // transformation is from target to source
8065 0 : dfValueReal =
8066 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
8067 0 : padfZ[iDstX] *
8068 : dfMultFactorVerticalShiftPipeline;
8069 : }
8070 :
8071 1005040 : dfBandDensity = 1;
8072 1005040 : bHasFoundDensity = true;
8073 1005040 : dfRealValuesTmp.clear();
8074 : }
8075 : } // Quantile.
8076 :
8077 : /* --------------------------------------------------------------------
8078 : */
8079 : /* We have a computed value from the source. Now apply it
8080 : * to */
8081 : /* the destination pixel. */
8082 : /* --------------------------------------------------------------------
8083 : */
8084 2776380 : if (bHasFoundDensity)
8085 : {
8086 : // TODO: Should we compute dfBandDensity in fct of
8087 : // nCount/nCount2, or use as a threshold to set the dest
8088 : // value?
8089 : // dfBandDensity = (float) nCount / nCount2;
8090 : // if( (float) nCount / nCount2 > 0.1 )
8091 : // or fix gdalwarp crop_to_cutline to crop partially
8092 : // overlapping pixels.
8093 2776380 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8094 : dfValueReal, dfValueImag);
8095 : }
8096 : }
8097 :
8098 1663170 : if (!bHasFoundDensity)
8099 311290 : continue;
8100 :
8101 : /* --------------------------------------------------------------------
8102 : */
8103 : /* Update destination density/validity masks. */
8104 : /* --------------------------------------------------------------------
8105 : */
8106 1351880 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8107 :
8108 1351880 : if (poWK->panDstValid != nullptr)
8109 : {
8110 74 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8111 : }
8112 : } /* Next iDstX */
8113 :
8114 : /* --------------------------------------------------------------------
8115 : */
8116 : /* Report progress to the user, and optionally cancel out. */
8117 : /* --------------------------------------------------------------------
8118 : */
8119 6497 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8120 0 : break;
8121 : }
8122 :
8123 : /* -------------------------------------------------------------------- */
8124 : /* Cleanup and return. */
8125 : /* -------------------------------------------------------------------- */
8126 130 : CPLFree(padfX);
8127 130 : CPLFree(padfY);
8128 130 : CPLFree(padfZ);
8129 130 : CPLFree(padfX2);
8130 130 : CPLFree(padfY2);
8131 130 : CPLFree(padfZ2);
8132 130 : CPLFree(pabSuccess);
8133 130 : CPLFree(pabSuccess2);
8134 130 : VSIFree(pafCounts);
8135 130 : VSIFree(pafRealVals);
8136 : }
8137 :
8138 : /************************************************************************/
8139 : /* getOrientation() */
8140 : /************************************************************************/
8141 :
8142 : typedef std::pair<double, double> XYPair;
8143 :
8144 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8145 : // -1 if it is counter-clockwise oriented,
8146 : // or 0 if it is colinear.
8147 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8148 : {
8149 2355910 : const double p1x = p1.first;
8150 2355910 : const double p1y = p1.second;
8151 2355910 : const double p2x = p2.first;
8152 2355910 : const double p2y = p2.second;
8153 2355910 : const double p3x = p3.first;
8154 2355910 : const double p3y = p3.second;
8155 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8156 2355910 : if (std::abs(val) < 1e-20)
8157 2690 : return 0;
8158 2353220 : else if (val > 0)
8159 0 : return 1;
8160 : else
8161 2353220 : return -1;
8162 : }
8163 :
8164 : /************************************************************************/
8165 : /* isConvex() */
8166 : /************************************************************************/
8167 :
8168 : typedef std::vector<XYPair> XYPoly;
8169 :
8170 : // poly must be closed
8171 785302 : static bool isConvex(const XYPoly &poly)
8172 : {
8173 785302 : const size_t n = poly.size();
8174 785302 : size_t i = 0;
8175 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8176 785302 : ++i;
8177 2355910 : for (; i < n - 2; ++i)
8178 : {
8179 : const int orientation =
8180 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8181 1570600 : if (orientation != 0)
8182 : {
8183 1567910 : if (last_orientation == 0)
8184 0 : last_orientation = orientation;
8185 1567910 : else if (orientation != last_orientation)
8186 0 : return false;
8187 : }
8188 : }
8189 785302 : return true;
8190 : }
8191 :
8192 : /************************************************************************/
8193 : /* pointIntersectsConvexPoly() */
8194 : /************************************************************************/
8195 :
8196 : // Returns whether xy intersects poly, that must be closed and convex.
8197 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8198 : {
8199 6049100 : const size_t n = poly.size();
8200 6049100 : double dx1 = xy.first - poly[0].first;
8201 6049100 : double dy1 = xy.second - poly[0].second;
8202 6049100 : double dx2 = poly[1].first - poly[0].first;
8203 6049100 : double dy2 = poly[1].second - poly[0].second;
8204 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8205 :
8206 : // Check if the point remains on the same side (left/right) of all edges
8207 14556400 : for (size_t i = 2; i < n; i++)
8208 : {
8209 12793100 : dx1 = xy.first - poly[i - 1].first;
8210 12793100 : dy1 = xy.second - poly[i - 1].second;
8211 :
8212 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8213 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8214 :
8215 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8216 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8217 725558 : prevCrossProduct = crossProduct;
8218 12067500 : else if (prevCrossProduct * crossProduct < 0)
8219 4285760 : return false;
8220 : }
8221 :
8222 1763340 : return true;
8223 : }
8224 :
8225 : /************************************************************************/
8226 : /* getIntersection() */
8227 : /************************************************************************/
8228 :
8229 : /* Returns intersection of [p1,p2] with [p3,p4], if
8230 : * it is a single point, and the 2 segments are not colinear.
8231 : */
8232 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8233 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8234 : {
8235 11811000 : const double x1 = p1.first;
8236 11811000 : const double y1 = p1.second;
8237 11811000 : const double x2 = p2.first;
8238 11811000 : const double y2 = p2.second;
8239 11811000 : const double x3 = p3.first;
8240 11811000 : const double y3 = p3.second;
8241 11811000 : const double x4 = p4.first;
8242 11811000 : const double y4 = p4.second;
8243 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8244 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8245 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8246 9260780 : return false;
8247 :
8248 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8249 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8250 973924 : return false;
8251 :
8252 1576340 : const double t = t_num / denom;
8253 1576340 : xy.first = x1 + t * (x2 - x1);
8254 1576340 : xy.second = y1 + t * (y2 - y1);
8255 1576340 : return true;
8256 : }
8257 :
8258 : /************************************************************************/
8259 : /* getConvexPolyIntersection() */
8260 : /************************************************************************/
8261 :
8262 : // poly1 and poly2 must be closed and convex.
8263 : // The returned intersection will not necessary be closed.
8264 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8265 : XYPoly &intersection)
8266 : {
8267 785302 : intersection.clear();
8268 :
8269 : // Add all points of poly1 inside poly2
8270 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8271 : {
8272 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8273 1187430 : intersection.push_back(poly1[i]);
8274 : }
8275 785302 : if (intersection.size() == poly1.size() - 1)
8276 : {
8277 : // poly1 is inside poly2
8278 119100 : return;
8279 : }
8280 :
8281 : // Add all points of poly2 inside poly1
8282 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8283 : {
8284 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8285 575904 : intersection.push_back(poly2[i]);
8286 : }
8287 :
8288 : // Compute the intersection of all edges of both polygons
8289 726972 : XYPair xy;
8290 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8291 : {
8292 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8293 : {
8294 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8295 11631600 : poly2[i2 + 1], xy))
8296 : {
8297 1576230 : intersection.push_back(xy);
8298 : }
8299 : }
8300 : }
8301 :
8302 726972 : if (intersection.empty())
8303 60770 : return;
8304 :
8305 : // Find lowest-left point in intersection set
8306 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8307 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8308 3772450 : for (const auto &pair : intersection)
8309 : {
8310 3106240 : const double x = pair.first;
8311 3106240 : const double y = pair.second;
8312 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8313 : {
8314 1096040 : lowest_x = x;
8315 1096040 : lowest_y = y;
8316 : }
8317 : }
8318 :
8319 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8320 : {
8321 5737980 : const double p1x_diff = p1.first - lowest_x;
8322 5737980 : const double p1y_diff = p1.second - lowest_y;
8323 5737980 : const double p2x_diff = p2.first - lowest_x;
8324 5737980 : const double p2y_diff = p2.second - lowest_y;
8325 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8326 : {
8327 2655420 : if (p1x_diff >= 0)
8328 : {
8329 2655420 : if (p2x_diff >= 0)
8330 2655420 : return p1.first < p2.first;
8331 0 : return true;
8332 : }
8333 : else
8334 : {
8335 0 : if (p2x_diff >= 0)
8336 0 : return false;
8337 0 : return p1.first < p2.first;
8338 : }
8339 : }
8340 :
8341 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8342 1046960 : return p1.second < p2.second;
8343 :
8344 : double tan_p1;
8345 2035600 : if (p1x_diff == 0.0)
8346 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8347 : else
8348 1570980 : tan_p1 = p1y_diff / p1x_diff;
8349 :
8350 : double tan_p2;
8351 2035600 : if (p2x_diff == 0.0)
8352 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8353 : else
8354 1196080 : tan_p2 = p2y_diff / p2x_diff;
8355 :
8356 2035600 : if (tan_p1 >= 0)
8357 : {
8358 1904790 : if (tan_p2 >= 0)
8359 1881590 : return tan_p1 < tan_p2;
8360 : else
8361 23199 : return true;
8362 : }
8363 : else
8364 : {
8365 130806 : if (tan_p2 >= 0)
8366 103900 : return false;
8367 : else
8368 26906 : return tan_p1 < tan_p2;
8369 : }
8370 666202 : };
8371 :
8372 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8373 : // hull
8374 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8375 :
8376 : // Remove duplicated points
8377 666202 : size_t j = 1;
8378 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8379 : {
8380 2440040 : if (intersection[i] != intersection[i - 1])
8381 : {
8382 1452560 : if (j < i)
8383 545275 : intersection[j] = intersection[i];
8384 1452560 : ++j;
8385 : }
8386 : }
8387 666202 : intersection.resize(j);
8388 : }
8389 :
8390 : /************************************************************************/
8391 : /* getArea() */
8392 : /************************************************************************/
8393 :
8394 : // poly may or may not be closed.
8395 558521 : static double getArea(const XYPoly &poly)
8396 : {
8397 : // CPLAssert(poly.size() >= 2);
8398 558521 : const size_t nPointCount = poly.size();
8399 : double dfAreaSum =
8400 558521 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
8401 :
8402 1765140 : for (size_t i = 1; i < nPointCount - 1; i++)
8403 : {
8404 1206610 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
8405 : }
8406 :
8407 558521 : dfAreaSum += poly[nPointCount - 1].first *
8408 558521 : (poly[0].second - poly[nPointCount - 2].second);
8409 :
8410 558521 : return 0.5 * std::fabs(dfAreaSum);
8411 : }
8412 :
8413 : /************************************************************************/
8414 : /* GWKSumPreserving() */
8415 : /************************************************************************/
8416 :
8417 : static void GWKSumPreservingThread(void *pData);
8418 :
8419 18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8420 : {
8421 18 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8422 : }
8423 :
8424 18 : static void GWKSumPreservingThread(void *pData)
8425 : {
8426 18 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8427 18 : GDALWarpKernel *poWK = psJob->poWK;
8428 18 : const int iYMin = psJob->iYMin;
8429 18 : const int iYMax = psJob->iYMax;
8430 : const bool bIsAffineNoRotation =
8431 18 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8432 26 : poWK->pTransformerArg) &&
8433 : // for debug/testing purposes
8434 8 : CPLTestBool(
8435 18 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8436 :
8437 18 : const int nDstXSize = poWK->nDstXSize;
8438 18 : const int nSrcXSize = poWK->nSrcXSize;
8439 18 : const int nSrcYSize = poWK->nSrcYSize;
8440 :
8441 36 : std::vector<double> adfX0(nSrcXSize + 1);
8442 36 : std::vector<double> adfY0(nSrcXSize + 1);
8443 36 : std::vector<double> adfZ0(nSrcXSize + 1);
8444 36 : std::vector<double> adfX1(nSrcXSize + 1);
8445 36 : std::vector<double> adfY1(nSrcXSize + 1);
8446 36 : std::vector<double> adfZ1(nSrcXSize + 1);
8447 36 : std::vector<int> abSuccess0(nSrcXSize + 1);
8448 36 : std::vector<int> abSuccess1(nSrcXSize + 1);
8449 :
8450 : CPLRectObj sGlobalBounds;
8451 18 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8452 18 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8453 18 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8454 18 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8455 18 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8456 :
8457 : struct SourcePixel
8458 : {
8459 : int iSrcX;
8460 : int iSrcY;
8461 :
8462 : // Coordinates of source pixel in target pixel coordinates
8463 : double dfDstX0;
8464 : double dfDstY0;
8465 : double dfDstX1;
8466 : double dfDstY1;
8467 : double dfDstX2;
8468 : double dfDstY2;
8469 : double dfDstX3;
8470 : double dfDstY3;
8471 :
8472 : // Source pixel total area (might be larger than the one described
8473 : // by above coordinates, if the pixel was crossing the antimeridian
8474 : // and split)
8475 : double dfArea;
8476 : };
8477 :
8478 36 : std::vector<SourcePixel> sourcePixels;
8479 :
8480 36 : XYPoly discontinuityLeft(5);
8481 36 : XYPoly discontinuityRight(5);
8482 :
8483 : /* ==================================================================== */
8484 : /* First pass: transform the 4 corners of each potential */
8485 : /* contributing source pixel to target pixel coordinates. */
8486 : /* ==================================================================== */
8487 :
8488 : // Special case for top line
8489 : {
8490 18 : int iY = 0;
8491 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8492 : {
8493 1112 : adfX1[iX] = iX + poWK->nSrcXOff;
8494 1112 : adfY1[iX] = iY + poWK->nSrcYOff;
8495 1112 : adfZ1[iX] = 0;
8496 : }
8497 :
8498 18 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8499 : adfX1.data(), adfY1.data(), adfZ1.data(),
8500 : abSuccess1.data());
8501 :
8502 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8503 : {
8504 1112 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8505 0 : abSuccess1[iX] = FALSE;
8506 : else
8507 : {
8508 1112 : adfX1[iX] -= poWK->nDstXOff;
8509 1112 : adfY1[iX] -= poWK->nDstYOff;
8510 : }
8511 : }
8512 : }
8513 :
8514 413412 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8515 : {
8516 413412 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8517 205344 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8518 413412 : ? 1
8519 208068 : : -1;
8520 18 : };
8521 :
8522 : const auto FindDiscontinuity =
8523 80 : [poWK, psJob, getInsideXSign](
8524 : double dfXLeft, double dfXRight, double dfY,
8525 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8526 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8527 : {
8528 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8529 : {
8530 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8531 800 : double dfXMidReprojected = dfXMid;
8532 800 : dfYMidReprojected = dfY;
8533 800 : double dfZ = 0;
8534 800 : int nSuccess = 0;
8535 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8536 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8537 : &nSuccess);
8538 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8539 : {
8540 456 : dfXRight = dfXMid;
8541 456 : dfXMidReprojectedRight = dfXMidReprojected;
8542 : }
8543 : else
8544 : {
8545 344 : dfXLeft = dfXMid;
8546 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8547 : }
8548 : }
8549 80 : };
8550 :
8551 566 : for (int iY = 0; iY < nSrcYSize; ++iY)
8552 : {
8553 548 : std::swap(adfX0, adfX1);
8554 548 : std::swap(adfY0, adfY1);
8555 548 : std::swap(adfZ0, adfZ1);
8556 548 : std::swap(abSuccess0, abSuccess1);
8557 :
8558 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8559 : {
8560 103964 : adfX1[iX] = iX + poWK->nSrcXOff;
8561 103964 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8562 103964 : adfZ1[iX] = 0;
8563 : }
8564 :
8565 548 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8566 : adfX1.data(), adfY1.data(), adfZ1.data(),
8567 : abSuccess1.data());
8568 :
8569 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8570 : {
8571 103964 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8572 0 : abSuccess1[iX] = FALSE;
8573 : else
8574 : {
8575 103964 : adfX1[iX] -= poWK->nDstXOff;
8576 103964 : adfY1[iX] -= poWK->nDstYOff;
8577 : }
8578 : }
8579 :
8580 103964 : for (int iX = 0; iX < nSrcXSize; ++iX)
8581 : {
8582 206832 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8583 103416 : abSuccess1[iX + 1])
8584 : {
8585 : /* --------------------------------------------------------------------
8586 : */
8587 : /* Do not try to apply transparent source pixels to the
8588 : * destination.*/
8589 : /* --------------------------------------------------------------------
8590 : */
8591 103416 : const auto iSrcOffset =
8592 103416 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8593 105816 : if (poWK->panUnifiedSrcValid != nullptr &&
8594 2400 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8595 : {
8596 10971 : continue;
8597 : }
8598 :
8599 103410 : if (poWK->pafUnifiedSrcDensity != nullptr)
8600 : {
8601 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8602 : SRC_DENSITY_THRESHOLD)
8603 0 : continue;
8604 : }
8605 :
8606 : SourcePixel sp;
8607 103410 : sp.dfArea = 0;
8608 103410 : sp.dfDstX0 = adfX0[iX];
8609 103410 : sp.dfDstY0 = adfY0[iX];
8610 103410 : sp.dfDstX1 = adfX0[iX + 1];
8611 103410 : sp.dfDstY1 = adfY0[iX + 1];
8612 103410 : sp.dfDstX2 = adfX1[iX + 1];
8613 103410 : sp.dfDstY2 = adfY1[iX + 1];
8614 103410 : sp.dfDstX3 = adfX1[iX];
8615 103410 : sp.dfDstY3 = adfY1[iX];
8616 :
8617 : // Detect pixel that likely cross the anti-meridian and
8618 : // introduce a discontinuity when reprojected.
8619 :
8620 103410 : if (getInsideXSign(adfX0[iX]) !=
8621 103506 : getInsideXSign(adfX0[iX + 1]) &&
8622 164 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8623 68 : getInsideXSign(adfX0[iX + 1]) ==
8624 103574 : getInsideXSign(adfX1[iX + 1]) &&
8625 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8626 : 0)
8627 : {
8628 40 : double dfXMidReprojectedLeftTop = 0;
8629 40 : double dfXMidReprojectedRightTop = 0;
8630 40 : double dfYMidReprojectedTop = 0;
8631 40 : FindDiscontinuity(
8632 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8633 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8634 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8635 : dfYMidReprojectedTop);
8636 40 : double dfXMidReprojectedLeftBottom = 0;
8637 40 : double dfXMidReprojectedRightBottom = 0;
8638 40 : double dfYMidReprojectedBottom = 0;
8639 40 : FindDiscontinuity(
8640 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8641 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8642 : dfXMidReprojectedLeftBottom,
8643 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8644 :
8645 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8646 40 : discontinuityLeft[1] =
8647 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8648 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8649 40 : dfYMidReprojectedBottom);
8650 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8651 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8652 :
8653 40 : discontinuityRight[0] =
8654 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8655 40 : discontinuityRight[1] =
8656 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8657 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8658 40 : dfYMidReprojectedBottom);
8659 40 : discontinuityRight[3] =
8660 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8661 40 : discontinuityRight[4] =
8662 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8663 :
8664 40 : sp.dfArea = getArea(discontinuityLeft) +
8665 40 : getArea(discontinuityRight);
8666 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8667 : {
8668 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8669 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8670 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8671 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8672 : }
8673 : else
8674 : {
8675 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8676 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8677 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8678 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8679 : }
8680 : }
8681 :
8682 : // Bounding box of source pixel (expressed in target pixel
8683 : // coordinates)
8684 : CPLRectObj sRect;
8685 103410 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8686 103410 : std::min(sp.dfDstX2, sp.dfDstX3));
8687 103410 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8688 103410 : std::min(sp.dfDstY2, sp.dfDstY3));
8689 103410 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8690 103410 : std::max(sp.dfDstX2, sp.dfDstX3));
8691 103410 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8692 103410 : std::max(sp.dfDstY2, sp.dfDstY3));
8693 103410 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8694 101350 : sRect.miny < iYMax && sRect.maxy > iYMin))
8695 : {
8696 10852 : continue;
8697 : }
8698 :
8699 92558 : sp.iSrcX = iX;
8700 92558 : sp.iSrcY = iY;
8701 :
8702 92558 : if (!bIsAffineNoRotation)
8703 : {
8704 : // Check polygon validity (no self-crossing)
8705 89745 : XYPair xy;
8706 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8707 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8708 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8709 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8710 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8711 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8712 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8713 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8714 : {
8715 113 : continue;
8716 : }
8717 : }
8718 :
8719 92445 : CPLQuadTreeInsertWithBounds(
8720 : hQuadTree,
8721 : reinterpret_cast<void *>(
8722 92445 : static_cast<uintptr_t>(sourcePixels.size())),
8723 : &sRect);
8724 :
8725 92445 : sourcePixels.push_back(sp);
8726 : }
8727 : }
8728 : }
8729 :
8730 36 : std::vector<double> adfRealValue(poWK->nBands);
8731 36 : std::vector<double> adfImagValue(poWK->nBands);
8732 36 : std::vector<double> adfBandDensity(poWK->nBands);
8733 36 : std::vector<double> adfWeight(poWK->nBands);
8734 :
8735 : #ifdef CHECK_SUM_WITH_GEOS
8736 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8737 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8738 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8739 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8740 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8741 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8742 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8743 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8744 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8745 :
8746 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8747 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8748 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8749 : #endif
8750 :
8751 : const XYPoly xy1{
8752 36 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8753 36 : XYPoly xy2(5);
8754 36 : XYPoly xy2_triangle(4);
8755 36 : XYPoly intersection;
8756 :
8757 : /* ==================================================================== */
8758 : /* Loop over output lines. */
8759 : /* ==================================================================== */
8760 891 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8761 : {
8762 : CPLRectObj sRect;
8763 873 : sRect.miny = iDstY;
8764 873 : sRect.maxy = iDstY + 1;
8765 :
8766 : /* ====================================================================
8767 : */
8768 : /* Loop over pixels in output scanline. */
8769 : /* ====================================================================
8770 : */
8771 221042 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8772 : {
8773 220169 : sRect.minx = iDstX;
8774 220169 : sRect.maxx = iDstX + 1;
8775 220169 : int nSourcePixels = 0;
8776 : void **pahSourcePixel =
8777 220169 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8778 220169 : if (nSourcePixels == 0)
8779 : {
8780 1258 : CPLFree(pahSourcePixel);
8781 1262 : continue;
8782 : }
8783 :
8784 218911 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8785 218911 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8786 218911 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8787 218911 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
8788 218911 : double dfDensity = 0;
8789 218911 : double dfTotalWeight = 0;
8790 :
8791 : /* ====================================================================
8792 : */
8793 : /* Iterate over each contributing source pixel to add its
8794 : */
8795 : /* value weighed by the ratio of the area of its
8796 : * intersection */
8797 : /* with the target pixel divided by the area of the source
8798 : */
8799 : /* pixel. */
8800 : /* ====================================================================
8801 : */
8802 1020520 : for (int i = 0; i < nSourcePixels; ++i)
8803 : {
8804 801614 : const int iSourcePixel = static_cast<int>(
8805 801614 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8806 801614 : auto &sp = sourcePixels[iSourcePixel];
8807 :
8808 801614 : double dfWeight = 0.0;
8809 801614 : if (bIsAffineNoRotation)
8810 : {
8811 : // Optimization since the source pixel is a rectangle in
8812 : // target pixel coordinates
8813 16312 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8814 16312 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8815 16312 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8816 16312 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8817 16312 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8818 16312 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8819 16312 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8820 16312 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8821 16312 : dfWeight =
8822 16312 : ((dfIntersMaxX - dfIntersMinX) *
8823 16312 : (dfIntersMaxY - dfIntersMinY)) /
8824 16312 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8825 : }
8826 : else
8827 : {
8828 : // Compute the polygon of the source pixel in target pixel
8829 : // coordinates, and shifted to the target pixel (unit square
8830 : // coordinates)
8831 :
8832 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8833 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8834 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8835 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8836 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8837 :
8838 785302 : if (isConvex(xy2))
8839 : {
8840 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
8841 785302 : if (intersection.size() >= 3)
8842 : {
8843 468849 : dfWeight = getArea(intersection);
8844 : }
8845 : }
8846 : else
8847 : {
8848 : // Split xy2 into 2 triangles.
8849 0 : xy2_triangle[0] = xy2[0];
8850 0 : xy2_triangle[1] = xy2[1];
8851 0 : xy2_triangle[2] = xy2[2];
8852 0 : xy2_triangle[3] = xy2[0];
8853 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8854 : intersection);
8855 0 : if (intersection.size() >= 3)
8856 : {
8857 0 : dfWeight = getArea(intersection);
8858 : }
8859 :
8860 0 : xy2_triangle[1] = xy2[2];
8861 0 : xy2_triangle[2] = xy2[3];
8862 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8863 : intersection);
8864 0 : if (intersection.size() >= 3)
8865 : {
8866 0 : dfWeight += getArea(intersection);
8867 : }
8868 : }
8869 785302 : if (dfWeight > 0.0)
8870 : {
8871 468828 : if (sp.dfArea == 0)
8872 89592 : sp.dfArea = getArea(xy2);
8873 468828 : dfWeight /= sp.dfArea;
8874 : }
8875 :
8876 : #ifdef CHECK_SUM_WITH_GEOS
8877 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
8878 : sp.dfDstX0 - iDstX,
8879 : sp.dfDstY0 - iDstY);
8880 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
8881 : sp.dfDstX1 - iDstX,
8882 : sp.dfDstY1 - iDstY);
8883 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
8884 : sp.dfDstX2 - iDstX,
8885 : sp.dfDstY2 - iDstY);
8886 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
8887 : sp.dfDstX3 - iDstX,
8888 : sp.dfDstY3 - iDstY);
8889 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
8890 : sp.dfDstX0 - iDstX,
8891 : sp.dfDstY0 - iDstY);
8892 :
8893 : double dfWeightGEOS = 0.0;
8894 : auto hIntersection =
8895 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
8896 : if (hIntersection)
8897 : {
8898 : double dfIntersArea = 0.0;
8899 : if (GEOSArea_r(hGEOSContext, hIntersection,
8900 : &dfIntersArea) &&
8901 : dfIntersArea > 0)
8902 : {
8903 : double dfSourceArea = 0.0;
8904 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
8905 : {
8906 : dfWeightGEOS = dfIntersArea / dfSourceArea;
8907 : }
8908 : }
8909 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
8910 : }
8911 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
8912 : {
8913 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
8914 : dfWeight, dfWeightGEOS);
8915 : printf("xy2: "); // ok
8916 : for (const auto &xy : xy2)
8917 : printf("[%f, %f], ", xy.first, xy.second); // ok
8918 : printf("\n"); // ok
8919 : printf("intersection: "); // ok
8920 : for (const auto &xy : intersection)
8921 : printf("[%f, %f], ", xy.first, xy.second); // ok
8922 : printf("\n"); // ok
8923 : }
8924 : #endif
8925 : }
8926 801614 : if (dfWeight > 0.0)
8927 : {
8928 474099 : const GPtrDiff_t iSrcOffset =
8929 474099 : sp.iSrcX +
8930 474099 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
8931 474099 : dfTotalWeight += dfWeight;
8932 :
8933 474099 : if (poWK->pafUnifiedSrcDensity != nullptr)
8934 : {
8935 0 : dfDensity +=
8936 0 : dfWeight * poWK->pafUnifiedSrcDensity[iSrcOffset];
8937 : }
8938 : else
8939 : {
8940 474099 : dfDensity += dfWeight;
8941 : }
8942 :
8943 1818720 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8944 : {
8945 : // Returns pixel value if it is not no data.
8946 : double dfBandDensity;
8947 : double dfRealValue;
8948 : double dfImagValue;
8949 2689240 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
8950 : &dfBandDensity, &dfRealValue,
8951 : &dfImagValue) &&
8952 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
8953 : {
8954 0 : continue;
8955 : }
8956 :
8957 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
8958 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
8959 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
8960 1344620 : adfWeight[iBand] += dfWeight;
8961 : }
8962 : }
8963 : }
8964 :
8965 218911 : CPLFree(pahSourcePixel);
8966 :
8967 : /* --------------------------------------------------------------------
8968 : */
8969 : /* Update destination pixel value. */
8970 : /* --------------------------------------------------------------------
8971 : */
8972 218911 : bool bHasFoundDensity = false;
8973 218911 : const GPtrDiff_t iDstOffset =
8974 218911 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
8975 827822 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8976 : {
8977 608911 : if (adfWeight[iBand] > 0)
8978 : {
8979 : const double dfBandDensity =
8980 608907 : adfBandDensity[iBand] / adfWeight[iBand];
8981 608907 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
8982 : {
8983 608907 : bHasFoundDensity = true;
8984 608907 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8985 608907 : adfRealValue[iBand],
8986 608907 : adfImagValue[iBand]);
8987 : }
8988 : }
8989 : }
8990 :
8991 218911 : if (!bHasFoundDensity)
8992 4 : continue;
8993 :
8994 : /* --------------------------------------------------------------------
8995 : */
8996 : /* Update destination density/validity masks. */
8997 : /* --------------------------------------------------------------------
8998 : */
8999 218907 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
9000 :
9001 218907 : if (poWK->panDstValid != nullptr)
9002 : {
9003 11750 : CPLMaskSet(poWK->panDstValid, iDstOffset);
9004 : }
9005 : }
9006 :
9007 : /* --------------------------------------------------------------------
9008 : */
9009 : /* Report progress to the user, and optionally cancel out. */
9010 : /* --------------------------------------------------------------------
9011 : */
9012 873 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
9013 0 : break;
9014 : }
9015 :
9016 : #ifdef CHECK_SUM_WITH_GEOS
9017 : GEOSGeom_destroy_r(hGEOSContext, hP1);
9018 : GEOSGeom_destroy_r(hGEOSContext, hP2);
9019 : OGRGeometry::freeGEOSContext(hGEOSContext);
9020 : #endif
9021 18 : CPLQuadTreeDestroy(hQuadTree);
9022 18 : }
|