Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: High Performance Image Reprojector
4 : * Purpose: Implementation of the GDALWarpKernel class. Implements the actual
5 : * image warping for a "chunk" of input and output imagery already
6 : * loaded into memory.
7 : * Author: Frank Warmerdam, warmerdam@pobox.com
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2003, Frank Warmerdam <warmerdam@pobox.com>
11 : * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
12 : *
13 : * SPDX-License-Identifier: MIT
14 : ****************************************************************************/
15 :
16 : #include "cpl_port.h"
17 : #include "gdalwarper.h"
18 :
19 : #include <cfloat>
20 : #include <cmath>
21 : #include <cstddef>
22 : #include <cstdlib>
23 : #include <cstring>
24 :
25 : #include <algorithm>
26 : #include <limits>
27 : #include <mutex>
28 : #include <new>
29 : #include <utility>
30 : #include <vector>
31 :
32 : #include "cpl_atomic_ops.h"
33 : #include "cpl_conv.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_mask.h"
37 : #include "cpl_multiproc.h"
38 : #include "cpl_progress.h"
39 : #include "cpl_string.h"
40 : #include "cpl_vsi.h"
41 : #include "cpl_worker_thread_pool.h"
42 : #include "cpl_quad_tree.h"
43 : #include "gdal.h"
44 : #include "gdal_alg.h"
45 : #include "gdal_alg_priv.h"
46 : #include "gdal_thread_pool.h"
47 : #include "gdalresamplingkernels.h"
48 :
49 : // #define CHECK_SUM_WITH_GEOS
50 : #ifdef CHECK_SUM_WITH_GEOS
51 : #include "ogr_geometry.h"
52 : #include "ogr_geos.h"
53 : #endif
54 :
55 : #ifdef USE_NEON_OPTIMIZATIONS
56 : #include "include_sse2neon.h"
57 : #define USE_SSE2
58 :
59 : #include "gdalsse_priv.h"
60 :
61 : // We restrict to 64bit processors because they are guaranteed to have SSE2.
62 : // Could possibly be used too on 32bit, but we would need to check at runtime.
63 : #elif defined(__x86_64) || defined(_M_X64)
64 : #define USE_SSE2
65 :
66 : #include "gdalsse_priv.h"
67 :
68 : #if __SSE4_1__
69 : #include <smmintrin.h>
70 : #endif
71 :
72 : #if __SSE3__
73 : #include <pmmintrin.h>
74 : #endif
75 :
76 : #endif
77 :
78 : constexpr double BAND_DENSITY_THRESHOLD = 0.0000000001;
79 : constexpr float SRC_DENSITY_THRESHOLD_FLOAT = 0.000000001f;
80 : constexpr double SRC_DENSITY_THRESHOLD_DOUBLE = 0.000000001;
81 :
82 : // #define INSTANTIATE_FLOAT64_SSE2_IMPL
83 :
84 : static const int anGWKFilterRadius[] = {
85 : 0, // Nearest neighbour
86 : 1, // Bilinear
87 : 2, // Cubic Convolution (Catmull-Rom)
88 : 2, // Cubic B-Spline
89 : 3, // Lanczos windowed sinc
90 : 0, // Average
91 : 0, // Mode
92 : 0, // Reserved GRA_Gauss=7
93 : 0, // Max
94 : 0, // Min
95 : 0, // Med
96 : 0, // Q1
97 : 0, // Q3
98 : 0, // Sum
99 : 0, // RMS
100 : };
101 :
102 : static double GWKBilinear(double dfX);
103 : static double GWKCubic(double dfX);
104 : static double GWKBSpline(double dfX);
105 : static double GWKLanczosSinc(double dfX);
106 :
107 : static const FilterFuncType apfGWKFilter[] = {
108 : nullptr, // Nearest neighbour
109 : GWKBilinear, // Bilinear
110 : GWKCubic, // Cubic Convolution (Catmull-Rom)
111 : GWKBSpline, // Cubic B-Spline
112 : GWKLanczosSinc, // Lanczos windowed sinc
113 : nullptr, // Average
114 : nullptr, // Mode
115 : nullptr, // Reserved GRA_Gauss=7
116 : nullptr, // Max
117 : nullptr, // Min
118 : nullptr, // Med
119 : nullptr, // Q1
120 : nullptr, // Q3
121 : nullptr, // Sum
122 : nullptr, // RMS
123 : };
124 :
125 : // TODO(schwehr): Can we make these functions have a const * const arg?
126 : static double GWKBilinear4Values(double *padfVals);
127 : static double GWKCubic4Values(double *padfVals);
128 : static double GWKBSpline4Values(double *padfVals);
129 : static double GWKLanczosSinc4Values(double *padfVals);
130 :
131 : static const FilterFunc4ValuesType apfGWKFilter4Values[] = {
132 : nullptr, // Nearest neighbour
133 : GWKBilinear4Values, // Bilinear
134 : GWKCubic4Values, // Cubic Convolution (Catmull-Rom)
135 : GWKBSpline4Values, // Cubic B-Spline
136 : GWKLanczosSinc4Values, // Lanczos windowed sinc
137 : nullptr, // Average
138 : nullptr, // Mode
139 : nullptr, // Reserved GRA_Gauss=7
140 : nullptr, // Max
141 : nullptr, // Min
142 : nullptr, // Med
143 : nullptr, // Q1
144 : nullptr, // Q3
145 : nullptr, // Sum
146 : nullptr, // RMS
147 : };
148 :
149 13137 : int GWKGetFilterRadius(GDALResampleAlg eResampleAlg)
150 : {
151 : static_assert(CPL_ARRAYSIZE(anGWKFilterRadius) == GRA_LAST_VALUE + 1,
152 : "Bad size of anGWKFilterRadius");
153 13137 : return anGWKFilterRadius[eResampleAlg];
154 : }
155 :
156 5027 : FilterFuncType GWKGetFilterFunc(GDALResampleAlg eResampleAlg)
157 : {
158 : static_assert(CPL_ARRAYSIZE(apfGWKFilter) == GRA_LAST_VALUE + 1,
159 : "Bad size of apfGWKFilter");
160 5027 : return apfGWKFilter[eResampleAlg];
161 : }
162 :
163 5028 : FilterFunc4ValuesType GWKGetFilterFunc4Values(GDALResampleAlg eResampleAlg)
164 : {
165 : static_assert(CPL_ARRAYSIZE(apfGWKFilter4Values) == GRA_LAST_VALUE + 1,
166 : "Bad size of apfGWKFilter4Values");
167 5028 : return apfGWKFilter4Values[eResampleAlg];
168 : }
169 :
170 : static CPLErr GWKGeneralCase(GDALWarpKernel *);
171 : static CPLErr GWKRealCase(GDALWarpKernel *poWK);
172 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
173 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
174 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
175 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
176 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
177 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
178 : #endif
179 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK);
180 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK);
181 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
182 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
183 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
184 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
185 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK);
186 : #endif
187 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
188 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK);
189 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK);
190 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK);
191 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK);
192 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK);
193 : static CPLErr GWKAverageOrMode(GDALWarpKernel *);
194 : static CPLErr GWKSumPreserving(GDALWarpKernel *);
195 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
196 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
197 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *);
198 :
199 : /************************************************************************/
200 : /* GWKJobStruct */
201 : /************************************************************************/
202 :
203 : struct GWKJobStruct
204 : {
205 : std::mutex &mutex;
206 : std::condition_variable &cv;
207 : int counterSingleThreaded = 0;
208 : int &counter;
209 : bool &stopFlag;
210 : GDALWarpKernel *poWK = nullptr;
211 : int iYMin = 0;
212 : int iYMax = 0;
213 : int (*pfnProgress)(GWKJobStruct *psJob) = nullptr;
214 : void *pTransformerArg = nullptr;
215 : // used by GWKRun() to assign the proper pTransformerArg
216 : void (*pfnFunc)(void *) = nullptr;
217 :
218 2758 : GWKJobStruct(std::mutex &mutex_, std::condition_variable &cv_,
219 : int &counter_, bool &stopFlag_)
220 2758 : : mutex(mutex_), cv(cv_), counter(counter_), stopFlag(stopFlag_)
221 : {
222 2758 : }
223 : };
224 :
225 : struct GWKThreadData
226 : {
227 : std::unique_ptr<CPLJobQueue> poJobQueue{};
228 : std::unique_ptr<std::vector<GWKJobStruct>> threadJobs{};
229 : int nMaxThreads{0};
230 : int counter{0};
231 : bool stopFlag{false};
232 : std::mutex mutex{};
233 : std::condition_variable cv{};
234 : bool bTransformerArgInputAssignedToThread{false};
235 : void *pTransformerArgInput{
236 : nullptr}; // owned by calling layer. Not to be destroyed
237 : std::map<GIntBig, void *> mapThreadToTransformerArg{};
238 : int nTotalThreadCountForThisRun = 0;
239 : int nCurThreadCountForThisRun = 0;
240 : };
241 :
242 : /************************************************************************/
243 : /* GWKProgressThread() */
244 : /************************************************************************/
245 :
246 : // Return TRUE if the computation must be interrupted.
247 36 : static int GWKProgressThread(GWKJobStruct *psJob)
248 : {
249 36 : bool stop = false;
250 : {
251 36 : std::lock_guard<std::mutex> lock(psJob->mutex);
252 36 : psJob->counter++;
253 36 : stop = psJob->stopFlag;
254 : }
255 36 : psJob->cv.notify_one();
256 :
257 36 : return stop;
258 : }
259 :
260 : /************************************************************************/
261 : /* GWKProgressMonoThread() */
262 : /************************************************************************/
263 :
264 : // Return TRUE if the computation must be interrupted.
265 358950 : static int GWKProgressMonoThread(GWKJobStruct *psJob)
266 : {
267 358950 : GDALWarpKernel *poWK = psJob->poWK;
268 358952 : if (!poWK->pfnProgress(poWK->dfProgressBase +
269 358950 : poWK->dfProgressScale *
270 358950 : (++psJob->counterSingleThreaded /
271 358950 : static_cast<double>(psJob->iYMax)),
272 : "", poWK->pProgress))
273 : {
274 2 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
275 1 : psJob->stopFlag = true;
276 1 : return TRUE;
277 : }
278 358950 : return FALSE;
279 : }
280 :
281 : /************************************************************************/
282 : /* GWKGenericMonoThread() */
283 : /************************************************************************/
284 :
285 2739 : static CPLErr GWKGenericMonoThread(GDALWarpKernel *poWK,
286 : void (*pfnFunc)(void *pUserData))
287 : {
288 2739 : GWKThreadData td;
289 :
290 : // NOTE: the mutex is not used.
291 2737 : GWKJobStruct job(td.mutex, td.cv, td.counter, td.stopFlag);
292 2736 : job.poWK = poWK;
293 2736 : job.iYMin = 0;
294 2736 : job.iYMax = poWK->nDstYSize;
295 2736 : job.pfnProgress = GWKProgressMonoThread;
296 2736 : job.pTransformerArg = poWK->pTransformerArg;
297 2736 : job.counterSingleThreaded = td.counter;
298 2736 : pfnFunc(&job);
299 2739 : td.counter = job.counterSingleThreaded;
300 :
301 5478 : return td.stopFlag ? CE_Failure : CE_None;
302 : }
303 :
304 : /************************************************************************/
305 : /* GWKThreadsCreate() */
306 : /************************************************************************/
307 :
308 1622 : void *GWKThreadsCreate(char **papszWarpOptions,
309 : GDALTransformerFunc /* pfnTransformer */,
310 : void *pTransformerArg)
311 : {
312 : const char *pszWarpThreads =
313 1622 : CSLFetchNameValue(papszWarpOptions, "NUM_THREADS");
314 1622 : if (pszWarpThreads == nullptr)
315 1605 : pszWarpThreads = CPLGetConfigOption("GDAL_NUM_THREADS", "1");
316 :
317 1622 : int nThreads = 0;
318 1622 : if (EQUAL(pszWarpThreads, "ALL_CPUS"))
319 3 : nThreads = CPLGetNumCPUs();
320 : else
321 1619 : nThreads = atoi(pszWarpThreads);
322 1622 : if (nThreads <= 1)
323 1600 : nThreads = 0;
324 1622 : if (nThreads > 128)
325 0 : nThreads = 128;
326 :
327 1622 : GWKThreadData *psThreadData = new GWKThreadData();
328 : auto poThreadPool =
329 1622 : nThreads > 0 ? GDALGetGlobalThreadPool(nThreads) : nullptr;
330 1622 : if (nThreads && poThreadPool)
331 : {
332 22 : psThreadData->nMaxThreads = nThreads;
333 22 : psThreadData->threadJobs.reset(new std::vector<GWKJobStruct>(
334 : nThreads,
335 22 : GWKJobStruct(psThreadData->mutex, psThreadData->cv,
336 44 : psThreadData->counter, psThreadData->stopFlag)));
337 :
338 22 : psThreadData->poJobQueue = poThreadPool->CreateJobQueue();
339 22 : psThreadData->pTransformerArgInput = pTransformerArg;
340 : }
341 :
342 1622 : return psThreadData;
343 : }
344 :
345 : /************************************************************************/
346 : /* GWKThreadsEnd() */
347 : /************************************************************************/
348 :
349 1622 : void GWKThreadsEnd(void *psThreadDataIn)
350 : {
351 1622 : if (psThreadDataIn == nullptr)
352 0 : return;
353 :
354 1622 : GWKThreadData *psThreadData = static_cast<GWKThreadData *>(psThreadDataIn);
355 1622 : if (psThreadData->poJobQueue)
356 : {
357 : // cppcheck-suppress constVariableReference
358 32 : for (auto &pair : psThreadData->mapThreadToTransformerArg)
359 : {
360 10 : CPLAssert(pair.second != psThreadData->pTransformerArgInput);
361 10 : GDALDestroyTransformer(pair.second);
362 : }
363 22 : psThreadData->poJobQueue.reset();
364 : }
365 1622 : delete psThreadData;
366 : }
367 :
368 : /************************************************************************/
369 : /* ThreadFuncAdapter() */
370 : /************************************************************************/
371 :
372 31 : static void ThreadFuncAdapter(void *pData)
373 : {
374 31 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
375 31 : GWKThreadData *psThreadData =
376 31 : static_cast<GWKThreadData *>(psJob->poWK->psThreadData);
377 :
378 : // Look if we have already a per-thread transformer
379 31 : void *pTransformerArg = nullptr;
380 31 : const GIntBig nThreadId = CPLGetPID();
381 :
382 : {
383 62 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
384 31 : ++psThreadData->nCurThreadCountForThisRun;
385 :
386 31 : auto oIter = psThreadData->mapThreadToTransformerArg.find(nThreadId);
387 31 : if (oIter != psThreadData->mapThreadToTransformerArg.end())
388 : {
389 0 : pTransformerArg = oIter->second;
390 : }
391 31 : else if (!psThreadData->bTransformerArgInputAssignedToThread &&
392 31 : psThreadData->nCurThreadCountForThisRun ==
393 31 : psThreadData->nTotalThreadCountForThisRun)
394 : {
395 : // If we are the last thread to be started, temporarily borrow the
396 : // original transformer
397 21 : psThreadData->bTransformerArgInputAssignedToThread = true;
398 21 : pTransformerArg = psThreadData->pTransformerArgInput;
399 21 : psThreadData->mapThreadToTransformerArg[nThreadId] =
400 : pTransformerArg;
401 : }
402 :
403 31 : if (pTransformerArg == nullptr)
404 : {
405 10 : CPLAssert(psThreadData->pTransformerArgInput != nullptr);
406 10 : CPLAssert(!psThreadData->bTransformerArgInputAssignedToThread);
407 : }
408 : }
409 :
410 : // If no transformer assigned to current thread, instantiate one
411 31 : if (pTransformerArg == nullptr)
412 : {
413 : // This somehow assumes that GDALCloneTransformer() is thread-safe
414 : // which should normally be the case.
415 : pTransformerArg =
416 10 : GDALCloneTransformer(psThreadData->pTransformerArgInput);
417 :
418 : // Lock for the stop flag and the transformer map.
419 10 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
420 10 : if (!pTransformerArg)
421 : {
422 0 : psJob->stopFlag = true;
423 0 : return;
424 : }
425 10 : psThreadData->mapThreadToTransformerArg[nThreadId] = pTransformerArg;
426 : }
427 :
428 31 : psJob->pTransformerArg = pTransformerArg;
429 31 : psJob->pfnFunc(pData);
430 :
431 : // Give back original transformer, if borrowed.
432 : {
433 62 : std::lock_guard<std::mutex> lock(psThreadData->mutex);
434 31 : if (psThreadData->bTransformerArgInputAssignedToThread &&
435 22 : pTransformerArg == psThreadData->pTransformerArgInput)
436 : {
437 : psThreadData->mapThreadToTransformerArg.erase(
438 21 : psThreadData->mapThreadToTransformerArg.find(nThreadId));
439 21 : psThreadData->bTransformerArgInputAssignedToThread = false;
440 : }
441 : }
442 : }
443 :
444 : /************************************************************************/
445 : /* GWKRun() */
446 : /************************************************************************/
447 :
448 2757 : static CPLErr GWKRun(GDALWarpKernel *poWK, const char *pszFuncName,
449 : void (*pfnFunc)(void *pUserData))
450 :
451 : {
452 2757 : const int nDstYSize = poWK->nDstYSize;
453 :
454 2757 : CPLDebug("GDAL",
455 : "GDALWarpKernel()::%s() "
456 : "Src=%d,%d,%dx%d Dst=%d,%d,%dx%d",
457 : pszFuncName, poWK->nSrcXOff, poWK->nSrcYOff, poWK->nSrcXSize,
458 : poWK->nSrcYSize, poWK->nDstXOff, poWK->nDstYOff, poWK->nDstXSize,
459 : poWK->nDstYSize);
460 :
461 2760 : if (!poWK->pfnProgress(poWK->dfProgressBase, "", poWK->pProgress))
462 : {
463 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
464 0 : return CE_Failure;
465 : }
466 :
467 2759 : GWKThreadData *psThreadData =
468 : static_cast<GWKThreadData *>(poWK->psThreadData);
469 2759 : if (psThreadData == nullptr || psThreadData->poJobQueue == nullptr)
470 : {
471 2737 : return GWKGenericMonoThread(poWK, pfnFunc);
472 : }
473 :
474 22 : int nThreads = std::min(psThreadData->nMaxThreads, nDstYSize / 2);
475 : // Config option mostly useful for tests to be able to test multithreading
476 : // with small rasters
477 : const int nWarpChunkSize =
478 21 : atoi(CPLGetConfigOption("WARP_THREAD_CHUNK_SIZE", "65536"));
479 21 : if (nWarpChunkSize > 0)
480 : {
481 19 : GIntBig nChunks =
482 19 : static_cast<GIntBig>(nDstYSize) * poWK->nDstXSize / nWarpChunkSize;
483 19 : if (nThreads > nChunks)
484 14 : nThreads = static_cast<int>(nChunks);
485 : }
486 21 : if (nThreads <= 0)
487 17 : nThreads = 1;
488 :
489 21 : CPLDebug("WARP", "Using %d threads", nThreads);
490 :
491 21 : auto &jobs = *psThreadData->threadJobs;
492 21 : CPLAssert(static_cast<int>(jobs.size()) >= nThreads);
493 : // Fill-in job structures.
494 52 : for (int i = 0; i < nThreads; ++i)
495 : {
496 31 : auto &job = jobs[i];
497 31 : job.poWK = poWK;
498 31 : job.iYMin =
499 31 : static_cast<int>(static_cast<int64_t>(i) * nDstYSize / nThreads);
500 31 : job.iYMax = static_cast<int>(static_cast<int64_t>(i + 1) * nDstYSize /
501 31 : nThreads);
502 31 : if (poWK->pfnProgress != GDALDummyProgress)
503 2 : job.pfnProgress = GWKProgressThread;
504 31 : job.pfnFunc = pfnFunc;
505 : }
506 :
507 : bool bStopFlag;
508 : {
509 21 : std::unique_lock<std::mutex> lock(psThreadData->mutex);
510 :
511 21 : psThreadData->nTotalThreadCountForThisRun = nThreads;
512 : // coverity[missing_lock]
513 21 : psThreadData->nCurThreadCountForThisRun = 0;
514 :
515 : // Start jobs.
516 52 : for (int i = 0; i < nThreads; ++i)
517 : {
518 31 : auto &job = jobs[i];
519 31 : psThreadData->poJobQueue->SubmitJob(ThreadFuncAdapter,
520 : static_cast<void *>(&job));
521 : }
522 :
523 : /* --------------------------------------------------------------------
524 : */
525 : /* Report progress. */
526 : /* --------------------------------------------------------------------
527 : */
528 21 : if (poWK->pfnProgress != GDALDummyProgress)
529 : {
530 15 : while (psThreadData->counter < nDstYSize)
531 : {
532 14 : psThreadData->cv.wait(lock);
533 14 : if (!poWK->pfnProgress(poWK->dfProgressBase +
534 14 : poWK->dfProgressScale *
535 14 : (psThreadData->counter /
536 14 : static_cast<double>(nDstYSize)),
537 : "", poWK->pProgress))
538 : {
539 1 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
540 1 : psThreadData->stopFlag = true;
541 1 : break;
542 : }
543 : }
544 : }
545 :
546 21 : bStopFlag = psThreadData->stopFlag;
547 : }
548 :
549 : /* -------------------------------------------------------------------- */
550 : /* Wait for all jobs to complete. */
551 : /* -------------------------------------------------------------------- */
552 21 : psThreadData->poJobQueue->WaitCompletion();
553 :
554 21 : return bStopFlag ? CE_Failure : CE_None;
555 : }
556 :
557 : /************************************************************************/
558 : /* ==================================================================== */
559 : /* GDALWarpKernel */
560 : /* ==================================================================== */
561 : /************************************************************************/
562 :
563 : /**
564 : * \class GDALWarpKernel "gdalwarper.h"
565 : *
566 : * Low level image warping class.
567 : *
568 : * This class is responsible for low level image warping for one
569 : * "chunk" of imagery. The class is essentially a structure with all
570 : * data members public - primarily so that new special-case functions
571 : * can be added without changing the class declaration.
572 : *
573 : * Applications are normally intended to interactive with warping facilities
574 : * through the GDALWarpOperation class, though the GDALWarpKernel can in
575 : * theory be used directly if great care is taken in setting up the
576 : * control data.
577 : *
578 : * <h3>Design Issues</h3>
579 : *
580 : * The intention is that PerformWarp() would analyze the setup in terms
581 : * of the datatype, resampling type, and validity/density mask usage and
582 : * pick one of many specific implementations of the warping algorithm over
583 : * a continuum of optimization vs. generality. At one end there will be a
584 : * reference general purpose implementation of the algorithm that supports
585 : * any data type (working internally in double precision complex), all three
586 : * resampling types, and any or all of the validity/density masks. At the
587 : * other end would be highly optimized algorithms for common cases like
588 : * nearest neighbour resampling on GDT_Byte data with no masks.
589 : *
590 : * The full set of optimized versions have not been decided but we should
591 : * expect to have at least:
592 : * - One for each resampling algorithm for 8bit data with no masks.
593 : * - One for each resampling algorithm for float data with no masks.
594 : * - One for each resampling algorithm for float data with any/all masks
595 : * (essentially the generic case for just float data).
596 : * - One for each resampling algorithm for 8bit data with support for
597 : * input validity masks (per band or per pixel). This handles the common
598 : * case of nodata masking.
599 : * - One for each resampling algorithm for float data with support for
600 : * input validity masks (per band or per pixel). This handles the common
601 : * case of nodata masking.
602 : *
603 : * Some of the specializations would operate on all bands in one pass
604 : * (especially the ones without masking would do this), while others might
605 : * process each band individually to reduce code complexity.
606 : *
607 : * <h3>Masking Semantics</h3>
608 : *
609 : * A detailed explanation of the semantics of the validity and density masks,
610 : * and their effects on resampling kernels is needed here.
611 : */
612 :
613 : /************************************************************************/
614 : /* GDALWarpKernel Data Members */
615 : /************************************************************************/
616 :
617 : /**
618 : * \var GDALResampleAlg GDALWarpKernel::eResample;
619 : *
620 : * Resampling algorithm.
621 : *
622 : * The resampling algorithm to use. One of GRA_NearestNeighbour, GRA_Bilinear,
623 : * GRA_Cubic, GRA_CubicSpline, GRA_Lanczos, GRA_Average, GRA_RMS,
624 : * GRA_Mode or GRA_Sum.
625 : *
626 : * This field is required. GDT_NearestNeighbour may be used as a default
627 : * value.
628 : */
629 :
630 : /**
631 : * \var GDALDataType GDALWarpKernel::eWorkingDataType;
632 : *
633 : * Working pixel data type.
634 : *
635 : * The datatype of pixels in the source image (papabySrcimage) and
636 : * destination image (papabyDstImage) buffers. Note that operations on
637 : * some data types (such as GDT_Byte) may be much better optimized than other
638 : * less common cases.
639 : *
640 : * This field is required. It may not be GDT_Unknown.
641 : */
642 :
643 : /**
644 : * \var int GDALWarpKernel::nBands;
645 : *
646 : * Number of bands.
647 : *
648 : * The number of bands (layers) of imagery being warped. Determines the
649 : * number of entries in the papabySrcImage, papanBandSrcValid,
650 : * and papabyDstImage arrays.
651 : *
652 : * This field is required.
653 : */
654 :
655 : /**
656 : * \var int GDALWarpKernel::nSrcXSize;
657 : *
658 : * Source image width in pixels.
659 : *
660 : * This field is required.
661 : */
662 :
663 : /**
664 : * \var int GDALWarpKernel::nSrcYSize;
665 : *
666 : * Source image height in pixels.
667 : *
668 : * This field is required.
669 : */
670 :
671 : /**
672 : * \var double GDALWarpKernel::dfSrcXExtraSize;
673 : *
674 : * Number of pixels included in nSrcXSize that are present on the edges of
675 : * the area of interest to take into account the width of the kernel.
676 : *
677 : * This field is required.
678 : */
679 :
680 : /**
681 : * \var double GDALWarpKernel::dfSrcYExtraSize;
682 : *
683 : * Number of pixels included in nSrcYExtraSize that are present on the edges of
684 : * the area of interest to take into account the height of the kernel.
685 : *
686 : * This field is required.
687 : */
688 :
689 : /**
690 : * \var int GDALWarpKernel::papabySrcImage;
691 : *
692 : * Array of source image band data.
693 : *
694 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
695 : * to image data. Each individual band of image data is organized as a single
696 : * block of image data in left to right, then bottom to top order. The actual
697 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
698 : *
699 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
700 : * the second band with eWorkingDataType set to GDT_Float32 use code like
701 : * this:
702 : *
703 : * \code
704 : * float dfPixelValue;
705 : * int nBand = 2-1; // Band indexes are zero based.
706 : * int nPixel = 3; // Zero based.
707 : * int nLine = 4; // Zero based.
708 : *
709 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
710 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
711 : * assert( nBand >= 0 && nBand < poKern->nBands );
712 : * dfPixelValue = ((float *) poKern->papabySrcImage[nBand])
713 : * [nPixel + nLine * poKern->nSrcXSize];
714 : * \endcode
715 : *
716 : * This field is required.
717 : */
718 :
719 : /**
720 : * \var GUInt32 **GDALWarpKernel::papanBandSrcValid;
721 : *
722 : * Per band validity mask for source pixels.
723 : *
724 : * Array of pixel validity mask layers for each source band. Each of
725 : * the mask layers is the same size (in pixels) as the source image with
726 : * one bit per pixel. Note that it is legal (and common) for this to be
727 : * NULL indicating that none of the pixels are invalidated, or for some
728 : * band validity masks to be NULL in which case all pixels of the band are
729 : * valid. The following code can be used to test the validity of a particular
730 : * pixel.
731 : *
732 : * \code
733 : * int bIsValid = TRUE;
734 : * int nBand = 2-1; // Band indexes are zero based.
735 : * int nPixel = 3; // Zero based.
736 : * int nLine = 4; // Zero based.
737 : *
738 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
739 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
740 : * assert( nBand >= 0 && nBand < poKern->nBands );
741 : *
742 : * if( poKern->papanBandSrcValid != NULL
743 : * && poKern->papanBandSrcValid[nBand] != NULL )
744 : * {
745 : * GUInt32 *panBandMask = poKern->papanBandSrcValid[nBand];
746 : * int iPixelOffset = nPixel + nLine * poKern->nSrcXSize;
747 : *
748 : * bIsValid = CPLMaskGet(panBandMask, iPixelOffset)
749 : * }
750 : * \endcode
751 : */
752 :
753 : /**
754 : * \var GUInt32 *GDALWarpKernel::panUnifiedSrcValid;
755 : *
756 : * Per pixel validity mask for source pixels.
757 : *
758 : * A single validity mask layer that applies to the pixels of all source
759 : * bands. It is accessed similarly to papanBandSrcValid, but without the
760 : * extra level of band indirection.
761 : *
762 : * This pointer may be NULL indicating that all pixels are valid.
763 : *
764 : * Note that if both panUnifiedSrcValid, and papanBandSrcValid are available,
765 : * the pixel isn't considered to be valid unless both arrays indicate it is
766 : * valid.
767 : */
768 :
769 : /**
770 : * \var float *GDALWarpKernel::pafUnifiedSrcDensity;
771 : *
772 : * Per pixel density mask for source pixels.
773 : *
774 : * A single density mask layer that applies to the pixels of all source
775 : * bands. It contains values between 0.0 and 1.0 indicating the degree to
776 : * which this pixel should be allowed to contribute to the output result.
777 : *
778 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
779 : *
780 : * The density for a pixel may be accessed like this:
781 : *
782 : * \code
783 : * float fDensity = 1.0;
784 : * int nPixel = 3; // Zero based.
785 : * int nLine = 4; // Zero based.
786 : *
787 : * assert( nPixel >= 0 && nPixel < poKern->nSrcXSize );
788 : * assert( nLine >= 0 && nLine < poKern->nSrcYSize );
789 : * if( poKern->pafUnifiedSrcDensity != NULL )
790 : * fDensity = poKern->pafUnifiedSrcDensity
791 : * [nPixel + nLine * poKern->nSrcXSize];
792 : * \endcode
793 : */
794 :
795 : /**
796 : * \var int GDALWarpKernel::nDstXSize;
797 : *
798 : * Width of destination image in pixels.
799 : *
800 : * This field is required.
801 : */
802 :
803 : /**
804 : * \var int GDALWarpKernel::nDstYSize;
805 : *
806 : * Height of destination image in pixels.
807 : *
808 : * This field is required.
809 : */
810 :
811 : /**
812 : * \var GByte **GDALWarpKernel::papabyDstImage;
813 : *
814 : * Array of destination image band data.
815 : *
816 : * This is an array of pointers (of size GDALWarpKernel::nBands) pointers
817 : * to image data. Each individual band of image data is organized as a single
818 : * block of image data in left to right, then bottom to top order. The actual
819 : * type of the image data is determined by GDALWarpKernel::eWorkingDataType.
820 : *
821 : * To access the pixel value for the (x=3, y=4) pixel (zero based) of
822 : * the second band with eWorkingDataType set to GDT_Float32 use code like
823 : * this:
824 : *
825 : * \code
826 : * float dfPixelValue;
827 : * int nBand = 2-1; // Band indexes are zero based.
828 : * int nPixel = 3; // Zero based.
829 : * int nLine = 4; // Zero based.
830 : *
831 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
832 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
833 : * assert( nBand >= 0 && nBand < poKern->nBands );
834 : * dfPixelValue = ((float *) poKern->papabyDstImage[nBand])
835 : * [nPixel + nLine * poKern->nSrcYSize];
836 : * \endcode
837 : *
838 : * This field is required.
839 : */
840 :
841 : /**
842 : * \var GUInt32 *GDALWarpKernel::panDstValid;
843 : *
844 : * Per pixel validity mask for destination pixels.
845 : *
846 : * A single validity mask layer that applies to the pixels of all destination
847 : * bands. It is accessed similarly to papanUnitifiedSrcValid, but based
848 : * on the size of the destination image.
849 : *
850 : * This pointer may be NULL indicating that all pixels are valid.
851 : */
852 :
853 : /**
854 : * \var float *GDALWarpKernel::pafDstDensity;
855 : *
856 : * Per pixel density mask for destination pixels.
857 : *
858 : * A single density mask layer that applies to the pixels of all destination
859 : * bands. It contains values between 0.0 and 1.0.
860 : *
861 : * This pointer may be NULL indicating that all pixels have a density of 1.0.
862 : *
863 : * The density for a pixel may be accessed like this:
864 : *
865 : * \code
866 : * float fDensity = 1.0;
867 : * int nPixel = 3; // Zero based.
868 : * int nLine = 4; // Zero based.
869 : *
870 : * assert( nPixel >= 0 && nPixel < poKern->nDstXSize );
871 : * assert( nLine >= 0 && nLine < poKern->nDstYSize );
872 : * if( poKern->pafDstDensity != NULL )
873 : * fDensity = poKern->pafDstDensity[nPixel + nLine * poKern->nDstXSize];
874 : * \endcode
875 : */
876 :
877 : /**
878 : * \var int GDALWarpKernel::nSrcXOff;
879 : *
880 : * X offset to source pixel coordinates for transformation.
881 : *
882 : * See pfnTransformer.
883 : *
884 : * This field is required.
885 : */
886 :
887 : /**
888 : * \var int GDALWarpKernel::nSrcYOff;
889 : *
890 : * Y offset to source pixel coordinates for transformation.
891 : *
892 : * See pfnTransformer.
893 : *
894 : * This field is required.
895 : */
896 :
897 : /**
898 : * \var int GDALWarpKernel::nDstXOff;
899 : *
900 : * X offset to destination pixel coordinates for transformation.
901 : *
902 : * See pfnTransformer.
903 : *
904 : * This field is required.
905 : */
906 :
907 : /**
908 : * \var int GDALWarpKernel::nDstYOff;
909 : *
910 : * Y offset to destination pixel coordinates for transformation.
911 : *
912 : * See pfnTransformer.
913 : *
914 : * This field is required.
915 : */
916 :
917 : /**
918 : * \var GDALTransformerFunc GDALWarpKernel::pfnTransformer;
919 : *
920 : * Source/destination location transformer.
921 : *
922 : * The function to call to transform coordinates between source image
923 : * pixel/line coordinates and destination image pixel/line coordinates.
924 : * See GDALTransformerFunc() for details of the semantics of this function.
925 : *
926 : * The GDALWarpKern algorithm will only ever use this transformer in
927 : * "destination to source" mode (bDstToSrc=TRUE), and will always pass
928 : * partial or complete scanlines of points in the destination image as
929 : * input. This means, among other things, that it is safe to the
930 : * approximating transform GDALApproxTransform() as the transformation
931 : * function.
932 : *
933 : * Source and destination images may be subsets of a larger overall image.
934 : * The transformation algorithms will expect and return pixel/line coordinates
935 : * in terms of this larger image, so coordinates need to be offset by
936 : * the offsets specified in nSrcXOff, nSrcYOff, nDstXOff, and nDstYOff before
937 : * passing to pfnTransformer, and after return from it.
938 : *
939 : * The GDALWarpKernel::pfnTransformerArg value will be passed as the callback
940 : * data to this function when it is called.
941 : *
942 : * This field is required.
943 : */
944 :
945 : /**
946 : * \var void *GDALWarpKernel::pTransformerArg;
947 : *
948 : * Callback data for pfnTransformer.
949 : *
950 : * This field may be NULL if not required for the pfnTransformer being used.
951 : */
952 :
953 : /**
954 : * \var GDALProgressFunc GDALWarpKernel::pfnProgress;
955 : *
956 : * The function to call to report progress of the algorithm, and to check
957 : * for a requested termination of the operation. It operates according to
958 : * GDALProgressFunc() semantics.
959 : *
960 : * Generally speaking the progress function will be invoked for each
961 : * scanline of the destination buffer that has been processed.
962 : *
963 : * This field may be NULL (internally set to GDALDummyProgress()).
964 : */
965 :
966 : /**
967 : * \var void *GDALWarpKernel::pProgress;
968 : *
969 : * Callback data for pfnProgress.
970 : *
971 : * This field may be NULL if not required for the pfnProgress being used.
972 : */
973 :
974 : /************************************************************************/
975 : /* GDALWarpKernel() */
976 : /************************************************************************/
977 :
978 2790 : GDALWarpKernel::GDALWarpKernel()
979 : : papszWarpOptions(nullptr), eResample(GRA_NearestNeighbour),
980 : eWorkingDataType(GDT_Unknown), nBands(0), nSrcXSize(0), nSrcYSize(0),
981 : dfSrcXExtraSize(0.0), dfSrcYExtraSize(0.0), papabySrcImage(nullptr),
982 : papanBandSrcValid(nullptr), panUnifiedSrcValid(nullptr),
983 : pafUnifiedSrcDensity(nullptr), nDstXSize(0), nDstYSize(0),
984 : papabyDstImage(nullptr), panDstValid(nullptr), pafDstDensity(nullptr),
985 : dfXScale(1.0), dfYScale(1.0), dfXFilter(0.0), dfYFilter(0.0), nXRadius(0),
986 : nYRadius(0), nFiltInitX(0), nFiltInitY(0), nSrcXOff(0), nSrcYOff(0),
987 : nDstXOff(0), nDstYOff(0), pfnTransformer(nullptr),
988 : pTransformerArg(nullptr), pfnProgress(GDALDummyProgress),
989 : pProgress(nullptr), dfProgressBase(0.0), dfProgressScale(1.0),
990 : padfDstNoDataReal(nullptr), psThreadData(nullptr),
991 2790 : eTieStrategy(GWKTS_First)
992 : {
993 2788 : }
994 :
995 : /************************************************************************/
996 : /* ~GDALWarpKernel() */
997 : /************************************************************************/
998 :
999 2790 : GDALWarpKernel::~GDALWarpKernel()
1000 : {
1001 2790 : }
1002 :
1003 : /************************************************************************/
1004 : /* PerformWarp() */
1005 : /************************************************************************/
1006 :
1007 : /**
1008 : * \fn CPLErr GDALWarpKernel::PerformWarp();
1009 : *
1010 : * This method performs the warp described in the GDALWarpKernel.
1011 : *
1012 : * @return CE_None on success or CE_Failure if an error occurs.
1013 : */
1014 :
1015 2786 : CPLErr GDALWarpKernel::PerformWarp()
1016 :
1017 : {
1018 2786 : const CPLErr eErr = Validate();
1019 :
1020 2785 : if (eErr != CE_None)
1021 1 : return eErr;
1022 :
1023 : // See #2445 and #3079.
1024 2784 : if (nSrcXSize <= 0 || nSrcYSize <= 0)
1025 : {
1026 26 : if (!pfnProgress(dfProgressBase + dfProgressScale, "", pProgress))
1027 : {
1028 0 : CPLError(CE_Failure, CPLE_UserInterrupt, "User terminated");
1029 0 : return CE_Failure;
1030 : }
1031 25 : return CE_None;
1032 : }
1033 :
1034 : /* -------------------------------------------------------------------- */
1035 : /* Pre-calculate resampling scales and window sizes for filtering. */
1036 : /* -------------------------------------------------------------------- */
1037 :
1038 2758 : dfXScale = static_cast<double>(nDstXSize) / (nSrcXSize - dfSrcXExtraSize);
1039 2758 : dfYScale = static_cast<double>(nDstYSize) / (nSrcYSize - dfSrcYExtraSize);
1040 2758 : if (nSrcXSize >= nDstXSize && nSrcXSize <= nDstXSize + dfSrcXExtraSize)
1041 1365 : dfXScale = 1.0;
1042 2758 : if (nSrcYSize >= nDstYSize && nSrcYSize <= nDstYSize + dfSrcYExtraSize)
1043 1091 : dfYScale = 1.0;
1044 2758 : if (dfXScale < 1.0)
1045 : {
1046 591 : double dfXReciprocalScale = 1.0 / dfXScale;
1047 591 : const int nXReciprocalScale =
1048 591 : static_cast<int>(dfXReciprocalScale + 0.5);
1049 591 : if (fabs(dfXReciprocalScale - nXReciprocalScale) < 0.05)
1050 462 : dfXScale = 1.0 / nXReciprocalScale;
1051 : }
1052 2758 : if (dfYScale < 1.0)
1053 : {
1054 535 : double dfYReciprocalScale = 1.0 / dfYScale;
1055 535 : const int nYReciprocalScale =
1056 535 : static_cast<int>(dfYReciprocalScale + 0.5);
1057 535 : if (fabs(dfYReciprocalScale - nYReciprocalScale) < 0.05)
1058 378 : dfYScale = 1.0 / nYReciprocalScale;
1059 : }
1060 :
1061 : // XSCALE and YSCALE undocumented for now. Can help in some cases.
1062 : // Best would probably be a per-pixel scale computation.
1063 2758 : const char *pszXScale = CSLFetchNameValue(papszWarpOptions, "XSCALE");
1064 2760 : if (pszXScale != nullptr && !EQUAL(pszXScale, "FROM_GRID_SAMPLING"))
1065 1 : dfXScale = CPLAtof(pszXScale);
1066 2760 : const char *pszYScale = CSLFetchNameValue(papszWarpOptions, "YSCALE");
1067 2760 : if (pszYScale != nullptr)
1068 1 : dfYScale = CPLAtof(pszYScale);
1069 :
1070 : // If the xscale is significantly lower than the yscale, this is highly
1071 : // suspicious of a situation of wrapping a very large virtual file in
1072 : // geographic coordinates with left and right parts being close to the
1073 : // antimeridian. In that situation, the xscale computed by the above method
1074 : // is completely wrong. Prefer doing an average of a few sample points
1075 : // instead
1076 2760 : if ((dfYScale / dfXScale > 100 ||
1077 1 : (pszXScale != nullptr && EQUAL(pszXScale, "FROM_GRID_SAMPLING"))))
1078 : {
1079 : // Sample points along a grid
1080 4 : const int nPointsX = std::min(10, nDstXSize);
1081 4 : const int nPointsY = std::min(10, nDstYSize);
1082 4 : const int nPoints = 3 * nPointsX * nPointsY;
1083 8 : std::vector<double> padfX;
1084 8 : std::vector<double> padfY;
1085 8 : std::vector<double> padfZ(nPoints);
1086 8 : std::vector<int> pabSuccess(nPoints);
1087 44 : for (int iY = 0; iY < nPointsY; iY++)
1088 : {
1089 440 : for (int iX = 0; iX < nPointsX; iX++)
1090 : {
1091 400 : const double dfX =
1092 : nPointsX == 1
1093 400 : ? 0.0
1094 400 : : static_cast<double>(iX) * nDstXSize / (nPointsX - 1);
1095 400 : const double dfY =
1096 : nPointsY == 1
1097 400 : ? 0.0
1098 400 : : static_cast<double>(iY) * nDstYSize / (nPointsY - 1);
1099 :
1100 : // Reproject each destination sample point and its neighbours
1101 : // at (x+1,y) and (x,y+1), so as to get the local scale.
1102 400 : padfX.push_back(dfX);
1103 400 : padfY.push_back(dfY);
1104 :
1105 400 : padfX.push_back((iX == nPointsX - 1) ? dfX - 1 : dfX + 1);
1106 400 : padfY.push_back(dfY);
1107 :
1108 400 : padfX.push_back(dfX);
1109 400 : padfY.push_back((iY == nPointsY - 1) ? dfY - 1 : dfY + 1);
1110 : }
1111 : }
1112 4 : pfnTransformer(pTransformerArg, TRUE, nPoints, &padfX[0], &padfY[0],
1113 4 : &padfZ[0], &pabSuccess[0]);
1114 :
1115 : // Compute the xscale at each sampling point
1116 8 : std::vector<double> adfXScales;
1117 404 : for (int i = 0; i < nPoints; i += 3)
1118 : {
1119 400 : if (pabSuccess[i] && pabSuccess[i + 1] && pabSuccess[i + 2])
1120 : {
1121 : const double dfPointXScale =
1122 400 : 1.0 / std::max(std::abs(padfX[i + 1] - padfX[i]),
1123 800 : std::abs(padfX[i + 2] - padfX[i]));
1124 400 : adfXScales.push_back(dfPointXScale);
1125 : }
1126 : }
1127 :
1128 : // Sort by increasing xcale
1129 4 : std::sort(adfXScales.begin(), adfXScales.end());
1130 :
1131 4 : if (!adfXScales.empty())
1132 : {
1133 : // Compute the average of scales, but eliminate outliers small
1134 : // scales, if some samples are just along the discontinuity.
1135 4 : const double dfMaxPointXScale = adfXScales.back();
1136 4 : double dfSumPointXScale = 0;
1137 4 : int nCountPointScale = 0;
1138 404 : for (double dfPointXScale : adfXScales)
1139 : {
1140 400 : if (dfPointXScale > dfMaxPointXScale / 10)
1141 : {
1142 398 : dfSumPointXScale += dfPointXScale;
1143 398 : nCountPointScale++;
1144 : }
1145 : }
1146 4 : if (nCountPointScale > 0) // should always be true
1147 : {
1148 4 : const double dfXScaleFromSampling =
1149 4 : dfSumPointXScale / nCountPointScale;
1150 : #if DEBUG_VERBOSE
1151 : CPLDebug("WARP", "Correcting dfXScale from %f to %f", dfXScale,
1152 : dfXScaleFromSampling);
1153 : #endif
1154 4 : dfXScale = dfXScaleFromSampling;
1155 : }
1156 : }
1157 : }
1158 :
1159 : #if DEBUG_VERBOSE
1160 : CPLDebug("WARP", "dfXScale = %f, dfYScale = %f", dfXScale, dfYScale);
1161 : #endif
1162 :
1163 2760 : const int bUse4SamplesFormula = dfXScale >= 0.95 && dfYScale >= 0.95;
1164 :
1165 : // Safety check for callers that would use GDALWarpKernel without using
1166 : // GDALWarpOperation.
1167 2697 : if ((eResample == GRA_CubicSpline || eResample == GRA_Lanczos ||
1168 2634 : ((eResample == GRA_Cubic || eResample == GRA_Bilinear) &&
1169 5520 : !bUse4SamplesFormula)) &&
1170 390 : atoi(CSLFetchNameValueDef(papszWarpOptions, "EXTRA_ELTS", "0")) !=
1171 : WARP_EXTRA_ELTS)
1172 : {
1173 0 : CPLError(CE_Failure, CPLE_AppDefined,
1174 : "Source arrays must have WARP_EXTRA_ELTS extra elements at "
1175 : "their end. "
1176 : "See GDALWarpKernel class definition. If this condition is "
1177 : "fulfilled, define a EXTRA_ELTS=%d warp options",
1178 : WARP_EXTRA_ELTS);
1179 0 : return CE_Failure;
1180 : }
1181 :
1182 2760 : dfXFilter = anGWKFilterRadius[eResample];
1183 2760 : dfYFilter = anGWKFilterRadius[eResample];
1184 :
1185 2760 : nXRadius = dfXScale < 1.0 ? static_cast<int>(ceil(dfXFilter / dfXScale))
1186 2253 : : static_cast<int>(dfXFilter);
1187 2760 : nYRadius = dfYScale < 1.0 ? static_cast<int>(ceil(dfYFilter / dfYScale))
1188 2253 : : static_cast<int>(dfYFilter);
1189 :
1190 : // Filter window offset depends on the parity of the kernel radius.
1191 2760 : nFiltInitX = ((anGWKFilterRadius[eResample] + 1) % 2) - nXRadius;
1192 2760 : nFiltInitY = ((anGWKFilterRadius[eResample] + 1) % 2) - nYRadius;
1193 :
1194 2757 : bApplyVerticalShift =
1195 2760 : CPLFetchBool(papszWarpOptions, "APPLY_VERTICAL_SHIFT", false);
1196 2759 : dfMultFactorVerticalShift = CPLAtof(CSLFetchNameValueDef(
1197 2757 : papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT", "1.0"));
1198 :
1199 : /* -------------------------------------------------------------------- */
1200 : /* Set up resampling functions. */
1201 : /* -------------------------------------------------------------------- */
1202 2758 : if (CPLFetchBool(papszWarpOptions, "USE_GENERAL_CASE", false))
1203 12 : return GWKGeneralCase(this);
1204 :
1205 2748 : const bool bNoMasksOrDstDensityOnly =
1206 2741 : papanBandSrcValid == nullptr && panUnifiedSrcValid == nullptr &&
1207 5489 : pafUnifiedSrcDensity == nullptr && panDstValid == nullptr;
1208 :
1209 2748 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour &&
1210 : bNoMasksOrDstDensityOnly)
1211 936 : return GWKNearestNoMasksOrDstDensityOnlyByte(this);
1212 :
1213 1812 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Bilinear &&
1214 : bNoMasksOrDstDensityOnly)
1215 126 : return GWKBilinearNoMasksOrDstDensityOnlyByte(this);
1216 :
1217 1686 : if (eWorkingDataType == GDT_Byte && eResample == GRA_Cubic &&
1218 : bNoMasksOrDstDensityOnly)
1219 609 : return GWKCubicNoMasksOrDstDensityOnlyByte(this);
1220 :
1221 1077 : if (eWorkingDataType == GDT_Byte && eResample == GRA_CubicSpline &&
1222 : bNoMasksOrDstDensityOnly)
1223 12 : return GWKCubicSplineNoMasksOrDstDensityOnlyByte(this);
1224 :
1225 1065 : if (eWorkingDataType == GDT_Byte && eResample == GRA_NearestNeighbour)
1226 341 : return GWKNearestByte(this);
1227 :
1228 724 : if ((eWorkingDataType == GDT_Int16 || eWorkingDataType == GDT_UInt16) &&
1229 133 : eResample == GRA_NearestNeighbour && bNoMasksOrDstDensityOnly)
1230 14 : return GWKNearestNoMasksOrDstDensityOnlyShort(this);
1231 :
1232 710 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Cubic &&
1233 : bNoMasksOrDstDensityOnly)
1234 5 : return GWKCubicNoMasksOrDstDensityOnlyShort(this);
1235 :
1236 705 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_CubicSpline &&
1237 : bNoMasksOrDstDensityOnly)
1238 6 : return GWKCubicSplineNoMasksOrDstDensityOnlyShort(this);
1239 :
1240 699 : if ((eWorkingDataType == GDT_Int16) && eResample == GRA_Bilinear &&
1241 : bNoMasksOrDstDensityOnly)
1242 5 : return GWKBilinearNoMasksOrDstDensityOnlyShort(this);
1243 :
1244 694 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Cubic &&
1245 : bNoMasksOrDstDensityOnly)
1246 14 : return GWKCubicNoMasksOrDstDensityOnlyUShort(this);
1247 :
1248 680 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_CubicSpline &&
1249 : bNoMasksOrDstDensityOnly)
1250 5 : return GWKCubicSplineNoMasksOrDstDensityOnlyUShort(this);
1251 :
1252 675 : if ((eWorkingDataType == GDT_UInt16) && eResample == GRA_Bilinear &&
1253 : bNoMasksOrDstDensityOnly)
1254 6 : return GWKBilinearNoMasksOrDstDensityOnlyUShort(this);
1255 :
1256 669 : if (eWorkingDataType == GDT_Int16 && eResample == GRA_NearestNeighbour)
1257 26 : return GWKNearestShort(this);
1258 :
1259 643 : if (eWorkingDataType == GDT_UInt16 && eResample == GRA_NearestNeighbour)
1260 6 : return GWKNearestUnsignedShort(this);
1261 :
1262 637 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour &&
1263 : bNoMasksOrDstDensityOnly)
1264 11 : return GWKNearestNoMasksOrDstDensityOnlyFloat(this);
1265 :
1266 626 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_NearestNeighbour)
1267 44 : return GWKNearestFloat(this);
1268 :
1269 582 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Bilinear &&
1270 : bNoMasksOrDstDensityOnly)
1271 4 : return GWKBilinearNoMasksOrDstDensityOnlyFloat(this);
1272 :
1273 578 : if (eWorkingDataType == GDT_Float32 && eResample == GRA_Cubic &&
1274 : bNoMasksOrDstDensityOnly)
1275 9 : return GWKCubicNoMasksOrDstDensityOnlyFloat(this);
1276 :
1277 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
1278 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Bilinear &&
1279 : bNoMasksOrDstDensityOnly)
1280 : return GWKBilinearNoMasksOrDstDensityOnlyDouble(this);
1281 :
1282 : if (eWorkingDataType == GDT_Float64 && eResample == GRA_Cubic &&
1283 : bNoMasksOrDstDensityOnly)
1284 : return GWKCubicNoMasksOrDstDensityOnlyDouble(this);
1285 : #endif
1286 :
1287 569 : if (eResample == GRA_Average)
1288 77 : return GWKAverageOrMode(this);
1289 :
1290 492 : if (eResample == GRA_RMS)
1291 9 : return GWKAverageOrMode(this);
1292 :
1293 483 : if (eResample == GRA_Mode)
1294 45 : return GWKAverageOrMode(this);
1295 :
1296 438 : if (eResample == GRA_Max)
1297 6 : return GWKAverageOrMode(this);
1298 :
1299 432 : if (eResample == GRA_Min)
1300 5 : return GWKAverageOrMode(this);
1301 :
1302 427 : if (eResample == GRA_Med)
1303 6 : return GWKAverageOrMode(this);
1304 :
1305 421 : if (eResample == GRA_Q1)
1306 10 : return GWKAverageOrMode(this);
1307 :
1308 411 : if (eResample == GRA_Q3)
1309 5 : return GWKAverageOrMode(this);
1310 :
1311 406 : if (eResample == GRA_Sum)
1312 18 : return GWKSumPreserving(this);
1313 :
1314 388 : if (!GDALDataTypeIsComplex(eWorkingDataType))
1315 : {
1316 158 : return GWKRealCase(this);
1317 : }
1318 :
1319 227 : return GWKGeneralCase(this);
1320 : }
1321 :
1322 : /************************************************************************/
1323 : /* Validate() */
1324 : /************************************************************************/
1325 :
1326 : /**
1327 : * \fn CPLErr GDALWarpKernel::Validate()
1328 : *
1329 : * Check the settings in the GDALWarpKernel, and issue a CPLError()
1330 : * (and return CE_Failure) if the configuration is considered to be
1331 : * invalid for some reason.
1332 : *
1333 : * This method will also do some standard defaulting such as setting
1334 : * pfnProgress to GDALDummyProgress() if it is NULL.
1335 : *
1336 : * @return CE_None on success or CE_Failure if an error is detected.
1337 : */
1338 :
1339 2784 : CPLErr GDALWarpKernel::Validate()
1340 :
1341 : {
1342 2784 : if (static_cast<size_t>(eResample) >=
1343 : (sizeof(anGWKFilterRadius) / sizeof(anGWKFilterRadius[0])))
1344 : {
1345 0 : CPLError(CE_Failure, CPLE_AppDefined,
1346 : "Unsupported resampling method %d.",
1347 0 : static_cast<int>(eResample));
1348 0 : return CE_Failure;
1349 : }
1350 :
1351 : // Tuples of values (e.g. "<R>,<G>,<B>" or "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>)") that must
1352 : // be ignored as contributing source pixels during resampling. Only taken into account by
1353 : // Average currently
1354 : const char *pszExcludedValues =
1355 2784 : CSLFetchNameValue(papszWarpOptions, "EXCLUDED_VALUES");
1356 2786 : if (pszExcludedValues)
1357 : {
1358 : const CPLStringList aosTokens(
1359 15 : CSLTokenizeString2(pszExcludedValues, "(,)", 0));
1360 14 : if ((aosTokens.size() % nBands) != 0)
1361 : {
1362 1 : CPLError(CE_Failure, CPLE_AppDefined,
1363 : "EXCLUDED_VALUES should contain one or several tuples of "
1364 : "%d values formatted like <R>,<G>,<B> or "
1365 : "(<R1>,<G1>,<B1>),(<R2>,<G2>,<B2>) if there are multiple "
1366 : "tuples",
1367 : nBands);
1368 1 : return CE_Failure;
1369 : }
1370 26 : std::vector<double> adfTuple;
1371 52 : for (int i = 0; i < aosTokens.size(); ++i)
1372 : {
1373 39 : adfTuple.push_back(CPLAtof(aosTokens[i]));
1374 39 : if (((i + 1) % nBands) == 0)
1375 : {
1376 13 : m_aadfExcludedValues.push_back(adfTuple);
1377 13 : adfTuple.clear();
1378 : }
1379 : }
1380 : }
1381 :
1382 2784 : return CE_None;
1383 : }
1384 :
1385 : /************************************************************************/
1386 : /* GWKOverlayDensity() */
1387 : /* */
1388 : /* Compute the final density for the destination pixel. This */
1389 : /* is a function of the overlay density (passed in) and the */
1390 : /* original density. */
1391 : /************************************************************************/
1392 :
1393 9804390 : static void GWKOverlayDensity(const GDALWarpKernel *poWK, GPtrDiff_t iDstOffset,
1394 : double dfDensity)
1395 : {
1396 9804390 : if (dfDensity < 0.0001 || poWK->pafDstDensity == nullptr)
1397 7844130 : return;
1398 :
1399 1960260 : poWK->pafDstDensity[iDstOffset] =
1400 1960260 : 1.0f -
1401 1960260 : (1.0f - float(dfDensity)) * (1.0f - poWK->pafDstDensity[iDstOffset]);
1402 : }
1403 :
1404 : /************************************************************************/
1405 : /* GWKRoundValueT() */
1406 : /************************************************************************/
1407 :
1408 : template <class T, class U, bool is_signed> struct sGWKRoundValueT
1409 : {
1410 : static T eval(U);
1411 : };
1412 :
1413 : template <class T, class U> struct sGWKRoundValueT<T, U, true> /* signed */
1414 : {
1415 791525 : static T eval(U value)
1416 : {
1417 791525 : return static_cast<T>(floor(value + U(0.5)));
1418 : }
1419 : };
1420 :
1421 : template <class T, class U> struct sGWKRoundValueT<T, U, false> /* unsigned */
1422 : {
1423 110266097 : static T eval(U value)
1424 : {
1425 110266097 : return static_cast<T>(value + U(0.5));
1426 : }
1427 : };
1428 :
1429 110641522 : template <class T, class U> static T GWKRoundValueT(U value)
1430 : {
1431 110641522 : return sGWKRoundValueT<T, U, cpl::NumericLimits<T>::is_signed>::eval(value);
1432 : }
1433 :
1434 268974 : template <> float GWKRoundValueT<float, double>(double value)
1435 : {
1436 268974 : return static_cast<float>(value);
1437 : }
1438 :
1439 : #ifdef notused
1440 : template <> double GWKRoundValueT<double, double>(double value)
1441 : {
1442 : return value;
1443 : }
1444 : #endif
1445 :
1446 : /************************************************************************/
1447 : /* GWKClampValueT() */
1448 : /************************************************************************/
1449 :
1450 105973034 : template <class T, class U> static CPL_INLINE T GWKClampValueT(U value)
1451 : {
1452 105973034 : if (value < static_cast<U>(cpl::NumericLimits<T>::min()))
1453 481927 : return cpl::NumericLimits<T>::min();
1454 105470426 : else if (value > static_cast<U>(cpl::NumericLimits<T>::max()))
1455 682785 : return cpl::NumericLimits<T>::max();
1456 : else
1457 104795726 : return GWKRoundValueT<T, U>(value);
1458 : }
1459 :
1460 718914 : template <> float GWKClampValueT<float, double>(double dfValue)
1461 : {
1462 718914 : return static_cast<float>(dfValue);
1463 : }
1464 :
1465 : #ifdef notused
1466 : template <> double GWKClampValueT<double, double>(double dfValue)
1467 : {
1468 : return dfValue;
1469 : }
1470 : #endif
1471 :
1472 : /************************************************************************/
1473 : /* AvoidNoData() */
1474 : /************************************************************************/
1475 :
1476 : template <class T>
1477 14880669 : inline void AvoidNoData(const GDALWarpKernel *poWK, int iBand,
1478 : GPtrDiff_t iDstOffset)
1479 : {
1480 14880669 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1481 14880669 : T *pDst = reinterpret_cast<T *>(pabyDst);
1482 :
1483 14880669 : if (poWK->padfDstNoDataReal != nullptr &&
1484 6831481 : poWK->padfDstNoDataReal[iBand] == static_cast<double>(pDst[iDstOffset]))
1485 : {
1486 : if constexpr (cpl::NumericLimits<T>::is_integer)
1487 : {
1488 2637 : if (pDst[iDstOffset] ==
1489 2637 : static_cast<T>(cpl::NumericLimits<T>::lowest()))
1490 : {
1491 2509 : pDst[iDstOffset] =
1492 2509 : static_cast<T>(cpl::NumericLimits<T>::lowest() + 1);
1493 : }
1494 : else
1495 128 : pDst[iDstOffset]--;
1496 : }
1497 : else
1498 : {
1499 64 : if (pDst[iDstOffset] == cpl::NumericLimits<T>::max())
1500 : {
1501 : using std::nextafter;
1502 0 : pDst[iDstOffset] =
1503 0 : nextafter(pDst[iDstOffset], static_cast<T>(0));
1504 : }
1505 : else
1506 : {
1507 : using std::nextafter;
1508 64 : pDst[iDstOffset] =
1509 64 : nextafter(pDst[iDstOffset], cpl::NumericLimits<T>::max());
1510 : }
1511 : }
1512 :
1513 2701 : if (!poWK->bWarnedAboutDstNoDataReplacement)
1514 : {
1515 25 : const_cast<GDALWarpKernel *>(poWK)
1516 : ->bWarnedAboutDstNoDataReplacement = true;
1517 25 : CPLError(CE_Warning, CPLE_AppDefined,
1518 : "Value %g in the source dataset has been changed to %g "
1519 : "in the destination dataset to avoid being treated as "
1520 : "NoData. To avoid this, select a different NoData value "
1521 : "for the destination dataset.",
1522 25 : poWK->padfDstNoDataReal[iBand],
1523 25 : static_cast<double>(pDst[iDstOffset]));
1524 : }
1525 : }
1526 14880669 : }
1527 :
1528 : /************************************************************************/
1529 : /* GWKSetPixelValueRealT() */
1530 : /************************************************************************/
1531 :
1532 : template <class T>
1533 9788354 : static bool GWKSetPixelValueRealT(const GDALWarpKernel *poWK, int iBand,
1534 : GPtrDiff_t iDstOffset, double dfDensity,
1535 : T value)
1536 : {
1537 9788354 : T *pDst = reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
1538 :
1539 : /* -------------------------------------------------------------------- */
1540 : /* If the source density is less than 100% we need to fetch the */
1541 : /* existing destination value, and mix it with the source to */
1542 : /* get the new "to apply" value. Also compute composite */
1543 : /* density. */
1544 : /* */
1545 : /* We avoid mixing if density is very near one or risk mixing */
1546 : /* in very extreme nodata values and causing odd results (#1610) */
1547 : /* -------------------------------------------------------------------- */
1548 9788354 : if (dfDensity < 0.9999)
1549 : {
1550 945508 : if (dfDensity < 0.0001)
1551 0 : return true;
1552 :
1553 945508 : double dfDstDensity = 1.0;
1554 :
1555 945508 : if (poWK->pafDstDensity != nullptr)
1556 944036 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1557 1472 : else if (poWK->panDstValid != nullptr &&
1558 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1559 0 : dfDstDensity = 0.0;
1560 :
1561 : // It seems like we also ought to be testing panDstValid[] here!
1562 :
1563 945508 : const double dfDstReal = static_cast<double>(pDst[iDstOffset]);
1564 :
1565 : // The destination density is really only relative to the portion
1566 : // not occluded by the overlay.
1567 945508 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1568 :
1569 945508 : const double dfReal =
1570 945508 : (double(value) * dfDensity + dfDstReal * dfDstInfluence) /
1571 945508 : (dfDensity + dfDstInfluence);
1572 :
1573 : /* --------------------------------------------------------------------
1574 : */
1575 : /* Actually apply the destination value. */
1576 : /* */
1577 : /* Avoid using the destination nodata value for integer datatypes
1578 : */
1579 : /* if by chance it is equal to the computed pixel value. */
1580 : /* --------------------------------------------------------------------
1581 : */
1582 945508 : pDst[iDstOffset] = GWKClampValueT<T>(dfReal);
1583 : }
1584 : else
1585 : {
1586 8842853 : pDst[iDstOffset] = value;
1587 : }
1588 :
1589 9788354 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1590 :
1591 9788354 : return true;
1592 : }
1593 :
1594 : /************************************************************************/
1595 : /* ClampRoundAndAvoidNoData() */
1596 : /************************************************************************/
1597 :
1598 : template <class T>
1599 5092335 : inline void ClampRoundAndAvoidNoData(const GDALWarpKernel *poWK, int iBand,
1600 : GPtrDiff_t iDstOffset, double dfReal)
1601 : {
1602 5092335 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1603 5092335 : T *pDst = reinterpret_cast<T *>(pabyDst);
1604 :
1605 : if constexpr (cpl::NumericLimits<T>::is_integer)
1606 : {
1607 : using std::floor;
1608 4609439 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::lowest()))
1609 5308 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::lowest());
1610 4604139 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1611 23628 : pDst[iDstOffset] = static_cast<T>(cpl::NumericLimits<T>::max());
1612 : else if constexpr (cpl::NumericLimits<T>::is_signed)
1613 9834 : pDst[iDstOffset] = static_cast<T>(floor(dfReal + 0.5));
1614 : else
1615 4570675 : pDst[iDstOffset] = static_cast<T>(dfReal + 0.5);
1616 : }
1617 : else
1618 : {
1619 482896 : pDst[iDstOffset] = static_cast<T>(dfReal);
1620 : }
1621 :
1622 5092335 : AvoidNoData<T>(poWK, iBand, iDstOffset);
1623 5092335 : }
1624 :
1625 : /************************************************************************/
1626 : /* GWKSetPixelValue() */
1627 : /************************************************************************/
1628 :
1629 4012410 : static bool GWKSetPixelValue(const GDALWarpKernel *poWK, int iBand,
1630 : GPtrDiff_t iDstOffset, double dfDensity,
1631 : double dfReal, double dfImag)
1632 :
1633 : {
1634 4012410 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1635 :
1636 : /* -------------------------------------------------------------------- */
1637 : /* If the source density is less than 100% we need to fetch the */
1638 : /* existing destination value, and mix it with the source to */
1639 : /* get the new "to apply" value. Also compute composite */
1640 : /* density. */
1641 : /* */
1642 : /* We avoid mixing if density is very near one or risk mixing */
1643 : /* in very extreme nodata values and causing odd results (#1610) */
1644 : /* -------------------------------------------------------------------- */
1645 4012410 : if (dfDensity < 0.9999)
1646 : {
1647 800 : if (dfDensity < 0.0001)
1648 0 : return true;
1649 :
1650 800 : double dfDstDensity = 1.0;
1651 800 : if (poWK->pafDstDensity != nullptr)
1652 800 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1653 0 : else if (poWK->panDstValid != nullptr &&
1654 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1655 0 : dfDstDensity = 0.0;
1656 :
1657 800 : double dfDstReal = 0.0;
1658 800 : double dfDstImag = 0.0;
1659 : // It seems like we also ought to be testing panDstValid[] here!
1660 :
1661 : // TODO(schwehr): Factor out this repreated type of set.
1662 800 : switch (poWK->eWorkingDataType)
1663 : {
1664 0 : case GDT_Byte:
1665 0 : dfDstReal = pabyDst[iDstOffset];
1666 0 : dfDstImag = 0.0;
1667 0 : break;
1668 :
1669 0 : case GDT_Int8:
1670 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1671 0 : dfDstImag = 0.0;
1672 0 : break;
1673 :
1674 400 : case GDT_Int16:
1675 400 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1676 400 : dfDstImag = 0.0;
1677 400 : break;
1678 :
1679 400 : case GDT_UInt16:
1680 400 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1681 400 : dfDstImag = 0.0;
1682 400 : break;
1683 :
1684 0 : case GDT_Int32:
1685 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1686 0 : dfDstImag = 0.0;
1687 0 : break;
1688 :
1689 0 : case GDT_UInt32:
1690 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1691 0 : dfDstImag = 0.0;
1692 0 : break;
1693 :
1694 0 : case GDT_Int64:
1695 0 : dfDstReal = static_cast<double>(
1696 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1697 0 : dfDstImag = 0.0;
1698 0 : break;
1699 :
1700 0 : case GDT_UInt64:
1701 0 : dfDstReal = static_cast<double>(
1702 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1703 0 : dfDstImag = 0.0;
1704 0 : break;
1705 :
1706 0 : case GDT_Float16:
1707 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1708 0 : dfDstImag = 0.0;
1709 0 : break;
1710 :
1711 0 : case GDT_Float32:
1712 0 : dfDstReal =
1713 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1714 0 : dfDstImag = 0.0;
1715 0 : break;
1716 :
1717 0 : case GDT_Float64:
1718 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1719 0 : dfDstImag = 0.0;
1720 0 : break;
1721 :
1722 0 : case GDT_CInt16:
1723 0 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2];
1724 0 : dfDstImag =
1725 0 : reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset * 2 + 1];
1726 0 : break;
1727 :
1728 0 : case GDT_CInt32:
1729 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2];
1730 0 : dfDstImag =
1731 0 : reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset * 2 + 1];
1732 0 : break;
1733 :
1734 0 : case GDT_CFloat16:
1735 : dfDstReal =
1736 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2];
1737 : dfDstImag =
1738 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1];
1739 0 : break;
1740 :
1741 0 : case GDT_CFloat32:
1742 0 : dfDstReal =
1743 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset * 2]);
1744 0 : dfDstImag = double(
1745 0 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1]);
1746 0 : break;
1747 :
1748 0 : case GDT_CFloat64:
1749 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset * 2];
1750 0 : dfDstImag =
1751 0 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1];
1752 0 : break;
1753 :
1754 0 : case GDT_Unknown:
1755 : case GDT_TypeCount:
1756 0 : CPLAssert(false);
1757 : return false;
1758 : }
1759 :
1760 : // The destination density is really only relative to the portion
1761 : // not occluded by the overlay.
1762 800 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
1763 :
1764 800 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
1765 800 : (dfDensity + dfDstInfluence);
1766 :
1767 800 : dfImag = (dfImag * dfDensity + dfDstImag * dfDstInfluence) /
1768 800 : (dfDensity + dfDstInfluence);
1769 : }
1770 :
1771 : /* -------------------------------------------------------------------- */
1772 : /* Actually apply the destination value. */
1773 : /* */
1774 : /* Avoid using the destination nodata value for integer datatypes */
1775 : /* if by chance it is equal to the computed pixel value. */
1776 : /* -------------------------------------------------------------------- */
1777 :
1778 4012410 : switch (poWK->eWorkingDataType)
1779 : {
1780 3290010 : case GDT_Byte:
1781 3290010 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
1782 3290010 : break;
1783 :
1784 0 : case GDT_Int8:
1785 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
1786 0 : break;
1787 :
1788 7472 : case GDT_Int16:
1789 7472 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
1790 7472 : break;
1791 :
1792 464 : case GDT_UInt16:
1793 464 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
1794 464 : break;
1795 :
1796 63 : case GDT_UInt32:
1797 63 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
1798 63 : break;
1799 :
1800 63 : case GDT_Int32:
1801 63 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
1802 63 : break;
1803 :
1804 0 : case GDT_UInt64:
1805 0 : ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
1806 : dfReal);
1807 0 : break;
1808 :
1809 0 : case GDT_Int64:
1810 0 : ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
1811 : dfReal);
1812 0 : break;
1813 :
1814 0 : case GDT_Float16:
1815 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
1816 0 : break;
1817 :
1818 478957 : case GDT_Float32:
1819 478957 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
1820 478957 : break;
1821 :
1822 147 : case GDT_Float64:
1823 147 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
1824 147 : break;
1825 :
1826 234079 : case GDT_CInt16:
1827 : {
1828 : typedef GInt16 T;
1829 234079 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1830 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1831 0 : cpl::NumericLimits<T>::min();
1832 234079 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1833 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1834 0 : cpl::NumericLimits<T>::max();
1835 : else
1836 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1837 234079 : static_cast<T>(floor(dfReal + 0.5));
1838 234079 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1839 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1840 0 : cpl::NumericLimits<T>::min();
1841 234079 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1842 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1843 0 : cpl::NumericLimits<T>::max();
1844 : else
1845 234079 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1846 234079 : static_cast<T>(floor(dfImag + 0.5));
1847 234079 : break;
1848 : }
1849 :
1850 379 : case GDT_CInt32:
1851 : {
1852 : typedef GInt32 T;
1853 379 : if (dfReal < static_cast<double>(cpl::NumericLimits<T>::min()))
1854 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1855 0 : cpl::NumericLimits<T>::min();
1856 379 : else if (dfReal > static_cast<double>(cpl::NumericLimits<T>::max()))
1857 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1858 0 : cpl::NumericLimits<T>::max();
1859 : else
1860 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2] =
1861 379 : static_cast<T>(floor(dfReal + 0.5));
1862 379 : if (dfImag < static_cast<double>(cpl::NumericLimits<T>::min()))
1863 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1864 0 : cpl::NumericLimits<T>::min();
1865 379 : else if (dfImag > static_cast<double>(cpl::NumericLimits<T>::max()))
1866 0 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1867 0 : cpl::NumericLimits<T>::max();
1868 : else
1869 379 : reinterpret_cast<T *>(pabyDst)[iDstOffset * 2 + 1] =
1870 379 : static_cast<T>(floor(dfImag + 0.5));
1871 379 : break;
1872 : }
1873 :
1874 0 : case GDT_CFloat16:
1875 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2] =
1876 0 : static_cast<GFloat16>(dfReal);
1877 0 : reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset * 2 + 1] =
1878 0 : static_cast<GFloat16>(dfImag);
1879 0 : break;
1880 :
1881 394 : case GDT_CFloat32:
1882 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2] =
1883 394 : static_cast<float>(dfReal);
1884 394 : reinterpret_cast<float *>(pabyDst)[iDstOffset * 2 + 1] =
1885 394 : static_cast<float>(dfImag);
1886 394 : break;
1887 :
1888 380 : case GDT_CFloat64:
1889 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2] = dfReal;
1890 380 : reinterpret_cast<double *>(pabyDst)[iDstOffset * 2 + 1] = dfImag;
1891 380 : break;
1892 :
1893 0 : case GDT_Unknown:
1894 : case GDT_TypeCount:
1895 0 : return false;
1896 : }
1897 :
1898 4012410 : return true;
1899 : }
1900 :
1901 : /************************************************************************/
1902 : /* GWKSetPixelValueReal() */
1903 : /************************************************************************/
1904 :
1905 1315160 : static bool GWKSetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
1906 : GPtrDiff_t iDstOffset, double dfDensity,
1907 : double dfReal)
1908 :
1909 : {
1910 1315160 : GByte *pabyDst = poWK->papabyDstImage[iBand];
1911 :
1912 : /* -------------------------------------------------------------------- */
1913 : /* If the source density is less than 100% we need to fetch the */
1914 : /* existing destination value, and mix it with the source to */
1915 : /* get the new "to apply" value. Also compute composite */
1916 : /* density. */
1917 : /* */
1918 : /* We avoid mixing if density is very near one or risk mixing */
1919 : /* in very extreme nodata values and causing odd results (#1610) */
1920 : /* -------------------------------------------------------------------- */
1921 1315160 : if (dfDensity < 0.9999)
1922 : {
1923 600 : if (dfDensity < 0.0001)
1924 0 : return true;
1925 :
1926 600 : double dfDstReal = 0.0;
1927 600 : double dfDstDensity = 1.0;
1928 :
1929 600 : if (poWK->pafDstDensity != nullptr)
1930 600 : dfDstDensity = double(poWK->pafDstDensity[iDstOffset]);
1931 0 : else if (poWK->panDstValid != nullptr &&
1932 0 : !CPLMaskGet(poWK->panDstValid, iDstOffset))
1933 0 : dfDstDensity = 0.0;
1934 :
1935 : // It seems like we also ought to be testing panDstValid[] here!
1936 :
1937 600 : switch (poWK->eWorkingDataType)
1938 : {
1939 0 : case GDT_Byte:
1940 0 : dfDstReal = pabyDst[iDstOffset];
1941 0 : break;
1942 :
1943 0 : case GDT_Int8:
1944 0 : dfDstReal = reinterpret_cast<GInt8 *>(pabyDst)[iDstOffset];
1945 0 : break;
1946 :
1947 300 : case GDT_Int16:
1948 300 : dfDstReal = reinterpret_cast<GInt16 *>(pabyDst)[iDstOffset];
1949 300 : break;
1950 :
1951 300 : case GDT_UInt16:
1952 300 : dfDstReal = reinterpret_cast<GUInt16 *>(pabyDst)[iDstOffset];
1953 300 : break;
1954 :
1955 0 : case GDT_Int32:
1956 0 : dfDstReal = reinterpret_cast<GInt32 *>(pabyDst)[iDstOffset];
1957 0 : break;
1958 :
1959 0 : case GDT_UInt32:
1960 0 : dfDstReal = reinterpret_cast<GUInt32 *>(pabyDst)[iDstOffset];
1961 0 : break;
1962 :
1963 0 : case GDT_Int64:
1964 0 : dfDstReal = static_cast<double>(
1965 0 : reinterpret_cast<std::int64_t *>(pabyDst)[iDstOffset]);
1966 0 : break;
1967 :
1968 0 : case GDT_UInt64:
1969 0 : dfDstReal = static_cast<double>(
1970 0 : reinterpret_cast<std::uint64_t *>(pabyDst)[iDstOffset]);
1971 0 : break;
1972 :
1973 0 : case GDT_Float16:
1974 0 : dfDstReal = reinterpret_cast<GFloat16 *>(pabyDst)[iDstOffset];
1975 0 : break;
1976 :
1977 0 : case GDT_Float32:
1978 0 : dfDstReal =
1979 0 : double(reinterpret_cast<float *>(pabyDst)[iDstOffset]);
1980 0 : break;
1981 :
1982 0 : case GDT_Float64:
1983 0 : dfDstReal = reinterpret_cast<double *>(pabyDst)[iDstOffset];
1984 0 : break;
1985 :
1986 0 : case GDT_CInt16:
1987 : case GDT_CInt32:
1988 : case GDT_CFloat16:
1989 : case GDT_CFloat32:
1990 : case GDT_CFloat64:
1991 : case GDT_Unknown:
1992 : case GDT_TypeCount:
1993 0 : CPLAssert(false);
1994 : return false;
1995 : }
1996 :
1997 : // The destination density is really only relative to the portion
1998 : // not occluded by the overlay.
1999 600 : const double dfDstInfluence = (1.0 - dfDensity) * dfDstDensity;
2000 :
2001 600 : dfReal = (dfReal * dfDensity + dfDstReal * dfDstInfluence) /
2002 600 : (dfDensity + dfDstInfluence);
2003 : }
2004 :
2005 : /* -------------------------------------------------------------------- */
2006 : /* Actually apply the destination value. */
2007 : /* */
2008 : /* Avoid using the destination nodata value for integer datatypes */
2009 : /* if by chance it is equal to the computed pixel value. */
2010 : /* -------------------------------------------------------------------- */
2011 :
2012 1315160 : switch (poWK->eWorkingDataType)
2013 : {
2014 1308310 : case GDT_Byte:
2015 1308310 : ClampRoundAndAvoidNoData<GByte>(poWK, iBand, iDstOffset, dfReal);
2016 1308310 : break;
2017 :
2018 0 : case GDT_Int8:
2019 0 : ClampRoundAndAvoidNoData<GInt8>(poWK, iBand, iDstOffset, dfReal);
2020 0 : break;
2021 :
2022 1117 : case GDT_Int16:
2023 1117 : ClampRoundAndAvoidNoData<GInt16>(poWK, iBand, iDstOffset, dfReal);
2024 1117 : break;
2025 :
2026 379 : case GDT_UInt16:
2027 379 : ClampRoundAndAvoidNoData<GUInt16>(poWK, iBand, iDstOffset, dfReal);
2028 379 : break;
2029 :
2030 347 : case GDT_UInt32:
2031 347 : ClampRoundAndAvoidNoData<GUInt32>(poWK, iBand, iDstOffset, dfReal);
2032 347 : break;
2033 :
2034 1150 : case GDT_Int32:
2035 1150 : ClampRoundAndAvoidNoData<GInt32>(poWK, iBand, iDstOffset, dfReal);
2036 1150 : break;
2037 :
2038 32 : case GDT_UInt64:
2039 32 : ClampRoundAndAvoidNoData<std::uint64_t>(poWK, iBand, iDstOffset,
2040 : dfReal);
2041 32 : break;
2042 :
2043 32 : case GDT_Int64:
2044 32 : ClampRoundAndAvoidNoData<std::int64_t>(poWK, iBand, iDstOffset,
2045 : dfReal);
2046 32 : break;
2047 :
2048 0 : case GDT_Float16:
2049 0 : ClampRoundAndAvoidNoData<GFloat16>(poWK, iBand, iDstOffset, dfReal);
2050 0 : break;
2051 :
2052 3442 : case GDT_Float32:
2053 3442 : ClampRoundAndAvoidNoData<float>(poWK, iBand, iDstOffset, dfReal);
2054 3442 : break;
2055 :
2056 350 : case GDT_Float64:
2057 350 : ClampRoundAndAvoidNoData<double>(poWK, iBand, iDstOffset, dfReal);
2058 350 : break;
2059 :
2060 0 : case GDT_CInt16:
2061 : case GDT_CInt32:
2062 : case GDT_CFloat16:
2063 : case GDT_CFloat32:
2064 : case GDT_CFloat64:
2065 0 : return false;
2066 :
2067 0 : case GDT_Unknown:
2068 : case GDT_TypeCount:
2069 0 : CPLAssert(false);
2070 : return false;
2071 : }
2072 :
2073 1315160 : return true;
2074 : }
2075 :
2076 : /************************************************************************/
2077 : /* GWKGetPixelValue() */
2078 : /************************************************************************/
2079 :
2080 : /* It is assumed that panUnifiedSrcValid has been checked before */
2081 :
2082 30506400 : static bool GWKGetPixelValue(const GDALWarpKernel *poWK, int iBand,
2083 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2084 : double *pdfReal, double *pdfImag)
2085 :
2086 : {
2087 30506400 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2088 :
2089 61012700 : if (poWK->papanBandSrcValid != nullptr &&
2090 30506400 : poWK->papanBandSrcValid[iBand] != nullptr &&
2091 0 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2092 : {
2093 0 : *pdfDensity = 0.0;
2094 0 : return false;
2095 : }
2096 :
2097 30506400 : *pdfReal = 0.0;
2098 30506400 : *pdfImag = 0.0;
2099 :
2100 : // TODO(schwehr): Fix casting.
2101 30506400 : switch (poWK->eWorkingDataType)
2102 : {
2103 29429400 : case GDT_Byte:
2104 29429400 : *pdfReal = pabySrc[iSrcOffset];
2105 29429400 : *pdfImag = 0.0;
2106 29429400 : break;
2107 :
2108 0 : case GDT_Int8:
2109 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2110 0 : *pdfImag = 0.0;
2111 0 : break;
2112 :
2113 28232 : case GDT_Int16:
2114 28232 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2115 28232 : *pdfImag = 0.0;
2116 28232 : break;
2117 :
2118 166 : case GDT_UInt16:
2119 166 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2120 166 : *pdfImag = 0.0;
2121 166 : break;
2122 :
2123 63 : case GDT_Int32:
2124 63 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2125 63 : *pdfImag = 0.0;
2126 63 : break;
2127 :
2128 63 : case GDT_UInt32:
2129 63 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2130 63 : *pdfImag = 0.0;
2131 63 : break;
2132 :
2133 0 : case GDT_Int64:
2134 0 : *pdfReal = static_cast<double>(
2135 0 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2136 0 : *pdfImag = 0.0;
2137 0 : break;
2138 :
2139 0 : case GDT_UInt64:
2140 0 : *pdfReal = static_cast<double>(
2141 0 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2142 0 : *pdfImag = 0.0;
2143 0 : break;
2144 :
2145 0 : case GDT_Float16:
2146 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2147 0 : *pdfImag = 0.0;
2148 0 : break;
2149 :
2150 1047220 : case GDT_Float32:
2151 1047220 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2152 1047220 : *pdfImag = 0.0;
2153 1047220 : break;
2154 :
2155 582 : case GDT_Float64:
2156 582 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2157 582 : *pdfImag = 0.0;
2158 582 : break;
2159 :
2160 133 : case GDT_CInt16:
2161 133 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2];
2162 133 : *pdfImag = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset * 2 + 1];
2163 133 : break;
2164 :
2165 133 : case GDT_CInt32:
2166 133 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2];
2167 133 : *pdfImag = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset * 2 + 1];
2168 133 : break;
2169 :
2170 0 : case GDT_CFloat16:
2171 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2];
2172 0 : *pdfImag =
2173 0 : reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset * 2 + 1];
2174 0 : break;
2175 :
2176 194 : case GDT_CFloat32:
2177 194 : *pdfReal =
2178 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2]);
2179 194 : *pdfImag =
2180 194 : double(reinterpret_cast<float *>(pabySrc)[iSrcOffset * 2 + 1]);
2181 194 : break;
2182 :
2183 138 : case GDT_CFloat64:
2184 138 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2];
2185 138 : *pdfImag = reinterpret_cast<double *>(pabySrc)[iSrcOffset * 2 + 1];
2186 138 : break;
2187 :
2188 0 : case GDT_Unknown:
2189 : case GDT_TypeCount:
2190 0 : CPLAssert(false);
2191 : *pdfDensity = 0.0;
2192 : return false;
2193 : }
2194 :
2195 30506400 : if (poWK->pafUnifiedSrcDensity != nullptr)
2196 4194800 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2197 : else
2198 26311600 : *pdfDensity = 1.0;
2199 :
2200 30506400 : return *pdfDensity != 0.0;
2201 : }
2202 :
2203 : /************************************************************************/
2204 : /* GWKGetPixelValueReal() */
2205 : /************************************************************************/
2206 :
2207 1092 : static bool GWKGetPixelValueReal(const GDALWarpKernel *poWK, int iBand,
2208 : GPtrDiff_t iSrcOffset, double *pdfDensity,
2209 : double *pdfReal)
2210 :
2211 : {
2212 1092 : GByte *pabySrc = poWK->papabySrcImage[iBand];
2213 :
2214 2186 : if (poWK->papanBandSrcValid != nullptr &&
2215 1094 : poWK->papanBandSrcValid[iBand] != nullptr &&
2216 2 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset))
2217 : {
2218 0 : *pdfDensity = 0.0;
2219 0 : return false;
2220 : }
2221 :
2222 1092 : switch (poWK->eWorkingDataType)
2223 : {
2224 1 : case GDT_Byte:
2225 1 : *pdfReal = pabySrc[iSrcOffset];
2226 1 : break;
2227 :
2228 0 : case GDT_Int8:
2229 0 : *pdfReal = reinterpret_cast<GInt8 *>(pabySrc)[iSrcOffset];
2230 0 : break;
2231 :
2232 1 : case GDT_Int16:
2233 1 : *pdfReal = reinterpret_cast<GInt16 *>(pabySrc)[iSrcOffset];
2234 1 : break;
2235 :
2236 1 : case GDT_UInt16:
2237 1 : *pdfReal = reinterpret_cast<GUInt16 *>(pabySrc)[iSrcOffset];
2238 1 : break;
2239 :
2240 886 : case GDT_Int32:
2241 886 : *pdfReal = reinterpret_cast<GInt32 *>(pabySrc)[iSrcOffset];
2242 886 : break;
2243 :
2244 83 : case GDT_UInt32:
2245 83 : *pdfReal = reinterpret_cast<GUInt32 *>(pabySrc)[iSrcOffset];
2246 83 : break;
2247 :
2248 16 : case GDT_Int64:
2249 16 : *pdfReal = static_cast<double>(
2250 16 : reinterpret_cast<std::int64_t *>(pabySrc)[iSrcOffset]);
2251 16 : break;
2252 :
2253 16 : case GDT_UInt64:
2254 16 : *pdfReal = static_cast<double>(
2255 16 : reinterpret_cast<std::uint64_t *>(pabySrc)[iSrcOffset]);
2256 16 : break;
2257 :
2258 0 : case GDT_Float16:
2259 0 : *pdfReal = reinterpret_cast<GFloat16 *>(pabySrc)[iSrcOffset];
2260 0 : break;
2261 :
2262 2 : case GDT_Float32:
2263 2 : *pdfReal = double(reinterpret_cast<float *>(pabySrc)[iSrcOffset]);
2264 2 : break;
2265 :
2266 86 : case GDT_Float64:
2267 86 : *pdfReal = reinterpret_cast<double *>(pabySrc)[iSrcOffset];
2268 86 : break;
2269 :
2270 0 : case GDT_CInt16:
2271 : case GDT_CInt32:
2272 : case GDT_CFloat16:
2273 : case GDT_CFloat32:
2274 : case GDT_CFloat64:
2275 : case GDT_Unknown:
2276 : case GDT_TypeCount:
2277 0 : CPLAssert(false);
2278 : return false;
2279 : }
2280 :
2281 1092 : if (poWK->pafUnifiedSrcDensity != nullptr)
2282 0 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2283 : else
2284 1092 : *pdfDensity = 1.0;
2285 :
2286 1092 : return *pdfDensity != 0.0;
2287 : }
2288 :
2289 : /************************************************************************/
2290 : /* GWKGetPixelRow() */
2291 : /************************************************************************/
2292 :
2293 : /* It is assumed that adfImag[] is set to 0 by caller code for non-complex */
2294 : /* data-types. */
2295 :
2296 2368030 : static bool GWKGetPixelRow(const GDALWarpKernel *poWK, int iBand,
2297 : GPtrDiff_t iSrcOffset, int nHalfSrcLen,
2298 : double *padfDensity, double adfReal[],
2299 : double *padfImag)
2300 : {
2301 : // We know that nSrcLen is even, so we can *always* unroll loops 2x.
2302 2368030 : const int nSrcLen = nHalfSrcLen * 2;
2303 2368030 : bool bHasValid = false;
2304 :
2305 2368030 : if (padfDensity != nullptr)
2306 : {
2307 : // Init the density.
2308 3380670 : for (int i = 0; i < nSrcLen; i += 2)
2309 : {
2310 2210230 : padfDensity[i] = 1.0;
2311 2210230 : padfDensity[i + 1] = 1.0;
2312 : }
2313 :
2314 1170440 : if (poWK->panUnifiedSrcValid != nullptr)
2315 : {
2316 3281460 : for (int i = 0; i < nSrcLen; i += 2)
2317 : {
2318 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i))
2319 2067740 : bHasValid = true;
2320 : else
2321 74323 : padfDensity[i] = 0.0;
2322 :
2323 2142070 : if (CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + i + 1))
2324 2068400 : bHasValid = true;
2325 : else
2326 73668 : padfDensity[i + 1] = 0.0;
2327 : }
2328 :
2329 : // Reset or fail as needed.
2330 1139400 : if (bHasValid)
2331 1116590 : bHasValid = false;
2332 : else
2333 22806 : return false;
2334 : }
2335 :
2336 1147640 : if (poWK->papanBandSrcValid != nullptr &&
2337 0 : poWK->papanBandSrcValid[iBand] != nullptr)
2338 : {
2339 0 : for (int i = 0; i < nSrcLen; i += 2)
2340 : {
2341 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset + i))
2342 0 : bHasValid = true;
2343 : else
2344 0 : padfDensity[i] = 0.0;
2345 :
2346 0 : if (CPLMaskGet(poWK->papanBandSrcValid[iBand],
2347 0 : iSrcOffset + i + 1))
2348 0 : bHasValid = true;
2349 : else
2350 0 : padfDensity[i + 1] = 0.0;
2351 : }
2352 :
2353 : // Reset or fail as needed.
2354 0 : if (bHasValid)
2355 0 : bHasValid = false;
2356 : else
2357 0 : return false;
2358 : }
2359 : }
2360 :
2361 : // TODO(schwehr): Fix casting.
2362 : // Fetch data.
2363 2345230 : switch (poWK->eWorkingDataType)
2364 : {
2365 1136510 : case GDT_Byte:
2366 : {
2367 1136510 : GByte *pSrc =
2368 1136510 : reinterpret_cast<GByte *>(poWK->papabySrcImage[iBand]);
2369 1136510 : pSrc += iSrcOffset;
2370 3281230 : for (int i = 0; i < nSrcLen; i += 2)
2371 : {
2372 2144720 : adfReal[i] = pSrc[i];
2373 2144720 : adfReal[i + 1] = pSrc[i + 1];
2374 : }
2375 1136510 : break;
2376 : }
2377 :
2378 0 : case GDT_Int8:
2379 : {
2380 0 : GInt8 *pSrc =
2381 0 : reinterpret_cast<GInt8 *>(poWK->papabySrcImage[iBand]);
2382 0 : pSrc += iSrcOffset;
2383 0 : for (int i = 0; i < nSrcLen; i += 2)
2384 : {
2385 0 : adfReal[i] = pSrc[i];
2386 0 : adfReal[i + 1] = pSrc[i + 1];
2387 : }
2388 0 : break;
2389 : }
2390 :
2391 5614 : case GDT_Int16:
2392 : {
2393 5614 : GInt16 *pSrc =
2394 5614 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2395 5614 : pSrc += iSrcOffset;
2396 21492 : for (int i = 0; i < nSrcLen; i += 2)
2397 : {
2398 15878 : adfReal[i] = pSrc[i];
2399 15878 : adfReal[i + 1] = pSrc[i + 1];
2400 : }
2401 5614 : break;
2402 : }
2403 :
2404 4142 : case GDT_UInt16:
2405 : {
2406 4142 : GUInt16 *pSrc =
2407 4142 : reinterpret_cast<GUInt16 *>(poWK->papabySrcImage[iBand]);
2408 4142 : pSrc += iSrcOffset;
2409 18548 : for (int i = 0; i < nSrcLen; i += 2)
2410 : {
2411 14406 : adfReal[i] = pSrc[i];
2412 14406 : adfReal[i + 1] = pSrc[i + 1];
2413 : }
2414 4142 : break;
2415 : }
2416 :
2417 778 : case GDT_Int32:
2418 : {
2419 778 : GInt32 *pSrc =
2420 778 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2421 778 : pSrc += iSrcOffset;
2422 2288 : for (int i = 0; i < nSrcLen; i += 2)
2423 : {
2424 1510 : adfReal[i] = pSrc[i];
2425 1510 : adfReal[i + 1] = pSrc[i + 1];
2426 : }
2427 778 : break;
2428 : }
2429 :
2430 778 : case GDT_UInt32:
2431 : {
2432 778 : GUInt32 *pSrc =
2433 778 : reinterpret_cast<GUInt32 *>(poWK->papabySrcImage[iBand]);
2434 778 : pSrc += iSrcOffset;
2435 2288 : for (int i = 0; i < nSrcLen; i += 2)
2436 : {
2437 1510 : adfReal[i] = pSrc[i];
2438 1510 : adfReal[i + 1] = pSrc[i + 1];
2439 : }
2440 778 : break;
2441 : }
2442 :
2443 28 : case GDT_Int64:
2444 : {
2445 28 : auto pSrc =
2446 28 : reinterpret_cast<std::int64_t *>(poWK->papabySrcImage[iBand]);
2447 28 : pSrc += iSrcOffset;
2448 56 : for (int i = 0; i < nSrcLen; i += 2)
2449 : {
2450 28 : adfReal[i] = static_cast<double>(pSrc[i]);
2451 28 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2452 : }
2453 28 : break;
2454 : }
2455 :
2456 28 : case GDT_UInt64:
2457 : {
2458 28 : auto pSrc =
2459 28 : reinterpret_cast<std::uint64_t *>(poWK->papabySrcImage[iBand]);
2460 28 : pSrc += iSrcOffset;
2461 56 : for (int i = 0; i < nSrcLen; i += 2)
2462 : {
2463 28 : adfReal[i] = static_cast<double>(pSrc[i]);
2464 28 : adfReal[i + 1] = static_cast<double>(pSrc[i + 1]);
2465 : }
2466 28 : break;
2467 : }
2468 :
2469 0 : case GDT_Float16:
2470 : {
2471 0 : GFloat16 *pSrc =
2472 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2473 0 : pSrc += iSrcOffset;
2474 0 : for (int i = 0; i < nSrcLen; i += 2)
2475 : {
2476 0 : adfReal[i] = pSrc[i];
2477 0 : adfReal[i + 1] = pSrc[i + 1];
2478 : }
2479 0 : break;
2480 : }
2481 :
2482 25102 : case GDT_Float32:
2483 : {
2484 25102 : float *pSrc =
2485 25102 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2486 25102 : pSrc += iSrcOffset;
2487 121403 : for (int i = 0; i < nSrcLen; i += 2)
2488 : {
2489 96301 : adfReal[i] = double(pSrc[i]);
2490 96301 : adfReal[i + 1] = double(pSrc[i + 1]);
2491 : }
2492 25102 : break;
2493 : }
2494 :
2495 778 : case GDT_Float64:
2496 : {
2497 778 : double *pSrc =
2498 778 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2499 778 : pSrc += iSrcOffset;
2500 2288 : for (int i = 0; i < nSrcLen; i += 2)
2501 : {
2502 1510 : adfReal[i] = pSrc[i];
2503 1510 : adfReal[i + 1] = pSrc[i + 1];
2504 : }
2505 778 : break;
2506 : }
2507 :
2508 1169220 : case GDT_CInt16:
2509 : {
2510 1169220 : GInt16 *pSrc =
2511 1169220 : reinterpret_cast<GInt16 *>(poWK->papabySrcImage[iBand]);
2512 1169220 : pSrc += 2 * iSrcOffset;
2513 4676020 : for (int i = 0; i < nSrcLen; i += 2)
2514 : {
2515 3506800 : adfReal[i] = pSrc[2 * i];
2516 3506800 : padfImag[i] = pSrc[2 * i + 1];
2517 :
2518 3506800 : adfReal[i + 1] = pSrc[2 * i + 2];
2519 3506800 : padfImag[i + 1] = pSrc[2 * i + 3];
2520 : }
2521 1169220 : break;
2522 : }
2523 :
2524 750 : case GDT_CInt32:
2525 : {
2526 750 : GInt32 *pSrc =
2527 750 : reinterpret_cast<GInt32 *>(poWK->papabySrcImage[iBand]);
2528 750 : pSrc += 2 * iSrcOffset;
2529 2232 : for (int i = 0; i < nSrcLen; i += 2)
2530 : {
2531 1482 : adfReal[i] = pSrc[2 * i];
2532 1482 : padfImag[i] = pSrc[2 * i + 1];
2533 :
2534 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2535 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2536 : }
2537 750 : break;
2538 : }
2539 :
2540 0 : case GDT_CFloat16:
2541 : {
2542 0 : GFloat16 *pSrc =
2543 0 : reinterpret_cast<GFloat16 *>(poWK->papabySrcImage[iBand]);
2544 0 : pSrc += 2 * iSrcOffset;
2545 0 : for (int i = 0; i < nSrcLen; i += 2)
2546 : {
2547 0 : adfReal[i] = pSrc[2 * i];
2548 0 : padfImag[i] = pSrc[2 * i + 1];
2549 :
2550 0 : adfReal[i + 1] = pSrc[2 * i + 2];
2551 0 : padfImag[i + 1] = pSrc[2 * i + 3];
2552 : }
2553 0 : break;
2554 : }
2555 :
2556 750 : case GDT_CFloat32:
2557 : {
2558 750 : float *pSrc =
2559 750 : reinterpret_cast<float *>(poWK->papabySrcImage[iBand]);
2560 750 : pSrc += 2 * iSrcOffset;
2561 2232 : for (int i = 0; i < nSrcLen; i += 2)
2562 : {
2563 1482 : adfReal[i] = double(pSrc[2 * i]);
2564 1482 : padfImag[i] = double(pSrc[2 * i + 1]);
2565 :
2566 1482 : adfReal[i + 1] = double(pSrc[2 * i + 2]);
2567 1482 : padfImag[i + 1] = double(pSrc[2 * i + 3]);
2568 : }
2569 750 : break;
2570 : }
2571 :
2572 750 : case GDT_CFloat64:
2573 : {
2574 750 : double *pSrc =
2575 750 : reinterpret_cast<double *>(poWK->papabySrcImage[iBand]);
2576 750 : pSrc += 2 * iSrcOffset;
2577 2232 : for (int i = 0; i < nSrcLen; i += 2)
2578 : {
2579 1482 : adfReal[i] = pSrc[2 * i];
2580 1482 : padfImag[i] = pSrc[2 * i + 1];
2581 :
2582 1482 : adfReal[i + 1] = pSrc[2 * i + 2];
2583 1482 : padfImag[i + 1] = pSrc[2 * i + 3];
2584 : }
2585 750 : break;
2586 : }
2587 :
2588 0 : case GDT_Unknown:
2589 : case GDT_TypeCount:
2590 0 : CPLAssert(false);
2591 : if (padfDensity)
2592 : memset(padfDensity, 0, nSrcLen * sizeof(double));
2593 : return false;
2594 : }
2595 :
2596 2345230 : if (padfDensity == nullptr)
2597 1197590 : return true;
2598 :
2599 1147640 : if (poWK->pafUnifiedSrcDensity == nullptr)
2600 : {
2601 3253380 : for (int i = 0; i < nSrcLen; i += 2)
2602 : {
2603 : // Take into account earlier calcs.
2604 2125710 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2605 : {
2606 2085800 : padfDensity[i] = 1.0;
2607 2085800 : bHasValid = true;
2608 : }
2609 :
2610 2125710 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2611 : {
2612 2086460 : padfDensity[i + 1] = 1.0;
2613 2086460 : bHasValid = true;
2614 : }
2615 : }
2616 : }
2617 : else
2618 : {
2619 70068 : for (int i = 0; i < nSrcLen; i += 2)
2620 : {
2621 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2622 50103 : padfDensity[i] =
2623 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i]);
2624 50103 : if (padfDensity[i] > SRC_DENSITY_THRESHOLD_DOUBLE)
2625 49252 : bHasValid = true;
2626 :
2627 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2628 50103 : padfDensity[i + 1] =
2629 50103 : double(poWK->pafUnifiedSrcDensity[iSrcOffset + i + 1]);
2630 50103 : if (padfDensity[i + 1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2631 49170 : bHasValid = true;
2632 : }
2633 : }
2634 :
2635 1147640 : return bHasValid;
2636 : }
2637 :
2638 : /************************************************************************/
2639 : /* GWKGetPixelT() */
2640 : /************************************************************************/
2641 :
2642 : template <class T>
2643 9798646 : static bool GWKGetPixelT(const GDALWarpKernel *poWK, int iBand,
2644 : GPtrDiff_t iSrcOffset, double *pdfDensity, T *pValue)
2645 :
2646 : {
2647 9798646 : T *pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2648 :
2649 22324997 : if ((poWK->panUnifiedSrcValid != nullptr &&
2650 19597272 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset)) ||
2651 9798646 : (poWK->papanBandSrcValid != nullptr &&
2652 589836 : poWK->papanBandSrcValid[iBand] != nullptr &&
2653 589836 : !CPLMaskGet(poWK->papanBandSrcValid[iBand], iSrcOffset)))
2654 : {
2655 9 : *pdfDensity = 0.0;
2656 9 : return false;
2657 : }
2658 :
2659 9798636 : *pValue = pSrc[iSrcOffset];
2660 :
2661 9798636 : if (poWK->pafUnifiedSrcDensity == nullptr)
2662 8676283 : *pdfDensity = 1.0;
2663 : else
2664 1122362 : *pdfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
2665 :
2666 9798636 : return *pdfDensity != 0.0;
2667 : }
2668 :
2669 : /************************************************************************/
2670 : /* GWKBilinearResample() */
2671 : /* Set of bilinear interpolators */
2672 : /************************************************************************/
2673 :
2674 76488 : static bool GWKBilinearResample4Sample(const GDALWarpKernel *poWK, int iBand,
2675 : double dfSrcX, double dfSrcY,
2676 : double *pdfDensity, double *pdfReal,
2677 : double *pdfImag)
2678 :
2679 : {
2680 : // Save as local variables to avoid following pointers.
2681 76488 : const int nSrcXSize = poWK->nSrcXSize;
2682 76488 : const int nSrcYSize = poWK->nSrcYSize;
2683 :
2684 76488 : int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2685 76488 : int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2686 76488 : double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2687 76488 : double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2688 76488 : bool bShifted = false;
2689 :
2690 76488 : if (iSrcX == -1)
2691 : {
2692 1534 : iSrcX = 0;
2693 1534 : dfRatioX = 1;
2694 : }
2695 76488 : if (iSrcY == -1)
2696 : {
2697 7734 : iSrcY = 0;
2698 7734 : dfRatioY = 1;
2699 : }
2700 76488 : GPtrDiff_t iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
2701 :
2702 : // Shift so we don't overrun the array.
2703 76488 : if (static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize == iSrcOffset + 1 ||
2704 76430 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize ==
2705 76430 : iSrcOffset + nSrcXSize + 1)
2706 : {
2707 110 : bShifted = true;
2708 110 : --iSrcOffset;
2709 : }
2710 :
2711 76488 : double adfDensity[2] = {0.0, 0.0};
2712 76488 : double adfReal[2] = {0.0, 0.0};
2713 76488 : double adfImag[2] = {0.0, 0.0};
2714 76488 : double dfAccumulatorReal = 0.0;
2715 76488 : double dfAccumulatorImag = 0.0;
2716 76488 : double dfAccumulatorDensity = 0.0;
2717 76488 : double dfAccumulatorDivisor = 0.0;
2718 :
2719 76488 : const GPtrDiff_t nSrcPixels =
2720 76488 : static_cast<GPtrDiff_t>(nSrcXSize) * nSrcYSize;
2721 : // Get pixel row.
2722 76488 : if (iSrcY >= 0 && iSrcY < nSrcYSize && iSrcOffset >= 0 &&
2723 152976 : iSrcOffset < nSrcPixels &&
2724 76488 : GWKGetPixelRow(poWK, iBand, iSrcOffset, 1, adfDensity, adfReal,
2725 : adfImag))
2726 : {
2727 70544 : double dfMult1 = dfRatioX * dfRatioY;
2728 70544 : double dfMult2 = (1.0 - dfRatioX) * dfRatioY;
2729 :
2730 : // Shifting corrected.
2731 70544 : if (bShifted)
2732 : {
2733 110 : adfReal[0] = adfReal[1];
2734 110 : adfImag[0] = adfImag[1];
2735 110 : adfDensity[0] = adfDensity[1];
2736 : }
2737 :
2738 : // Upper Left Pixel.
2739 70544 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2740 70544 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
2741 : {
2742 65090 : dfAccumulatorDivisor += dfMult1;
2743 :
2744 65090 : dfAccumulatorReal += adfReal[0] * dfMult1;
2745 65090 : dfAccumulatorImag += adfImag[0] * dfMult1;
2746 65090 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2747 : }
2748 :
2749 : // Upper Right Pixel.
2750 70544 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2751 69889 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2752 : {
2753 64615 : dfAccumulatorDivisor += dfMult2;
2754 :
2755 64615 : dfAccumulatorReal += adfReal[1] * dfMult2;
2756 64615 : dfAccumulatorImag += adfImag[1] * dfMult2;
2757 64615 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2758 : }
2759 : }
2760 :
2761 : // Get pixel row.
2762 76488 : if (iSrcY + 1 >= 0 && iSrcY + 1 < nSrcYSize &&
2763 225392 : iSrcOffset + nSrcXSize >= 0 && iSrcOffset + nSrcXSize < nSrcPixels &&
2764 72416 : GWKGetPixelRow(poWK, iBand, iSrcOffset + nSrcXSize, 1, adfDensity,
2765 : adfReal, adfImag))
2766 : {
2767 66857 : double dfMult1 = dfRatioX * (1.0 - dfRatioY);
2768 66857 : double dfMult2 = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2769 :
2770 : // Shifting corrected
2771 66857 : if (bShifted)
2772 : {
2773 52 : adfReal[0] = adfReal[1];
2774 52 : adfImag[0] = adfImag[1];
2775 52 : adfDensity[0] = adfDensity[1];
2776 : }
2777 :
2778 : // Lower Left Pixel
2779 66857 : if (iSrcX >= 0 && iSrcX < nSrcXSize &&
2780 66857 : adfDensity[0] > SRC_DENSITY_THRESHOLD_DOUBLE)
2781 : {
2782 61578 : dfAccumulatorDivisor += dfMult1;
2783 :
2784 61578 : dfAccumulatorReal += adfReal[0] * dfMult1;
2785 61578 : dfAccumulatorImag += adfImag[0] * dfMult1;
2786 61578 : dfAccumulatorDensity += adfDensity[0] * dfMult1;
2787 : }
2788 :
2789 : // Lower Right Pixel.
2790 66857 : if (iSrcX + 1 >= 0 && iSrcX + 1 < nSrcXSize &&
2791 66260 : adfDensity[1] > SRC_DENSITY_THRESHOLD_DOUBLE)
2792 : {
2793 61283 : dfAccumulatorDivisor += dfMult2;
2794 :
2795 61283 : dfAccumulatorReal += adfReal[1] * dfMult2;
2796 61283 : dfAccumulatorImag += adfImag[1] * dfMult2;
2797 61283 : dfAccumulatorDensity += adfDensity[1] * dfMult2;
2798 : }
2799 : }
2800 :
2801 : /* -------------------------------------------------------------------- */
2802 : /* Return result. */
2803 : /* -------------------------------------------------------------------- */
2804 76488 : if (dfAccumulatorDivisor == 1.0)
2805 : {
2806 44969 : *pdfReal = dfAccumulatorReal;
2807 44969 : *pdfImag = dfAccumulatorImag;
2808 44969 : *pdfDensity = dfAccumulatorDensity;
2809 44969 : return false;
2810 : }
2811 31519 : else if (dfAccumulatorDivisor < 0.00001)
2812 : {
2813 0 : *pdfReal = 0.0;
2814 0 : *pdfImag = 0.0;
2815 0 : *pdfDensity = 0.0;
2816 0 : return false;
2817 : }
2818 : else
2819 : {
2820 31519 : *pdfReal = dfAccumulatorReal / dfAccumulatorDivisor;
2821 31519 : *pdfImag = dfAccumulatorImag / dfAccumulatorDivisor;
2822 31519 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorDivisor;
2823 31519 : return true;
2824 : }
2825 : }
2826 :
2827 : template <class T>
2828 6544500 : static bool GWKBilinearResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
2829 : int iBand, double dfSrcX,
2830 : double dfSrcY, T *pValue)
2831 :
2832 : {
2833 :
2834 6544500 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
2835 6544500 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
2836 6544500 : GPtrDiff_t iSrcOffset =
2837 6544500 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
2838 6544500 : const double dfRatioX = 1.5 - (dfSrcX - iSrcX);
2839 6544500 : const double dfRatioY = 1.5 - (dfSrcY - iSrcY);
2840 :
2841 6544500 : const T *const pSrc = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
2842 :
2843 6544500 : if (iSrcX >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2844 4420828 : iSrcY + 1 < poWK->nSrcYSize)
2845 : {
2846 4377610 : const double dfAccumulator =
2847 4377610 : (double(pSrc[iSrcOffset]) * dfRatioX +
2848 4377610 : double(pSrc[iSrcOffset + 1]) * (1.0 - dfRatioX)) *
2849 : dfRatioY +
2850 4377610 : (double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfRatioX +
2851 4377610 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) *
2852 4377610 : (1.0 - dfRatioX)) *
2853 4377610 : (1.0 - dfRatioY);
2854 :
2855 4377610 : *pValue = GWKRoundValueT<T>(dfAccumulator);
2856 :
2857 4377560 : return true;
2858 : }
2859 :
2860 2166890 : double dfAccumulatorDivisor = 0.0;
2861 2166890 : double dfAccumulator = 0.0;
2862 :
2863 : // Upper Left Pixel.
2864 2166890 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY >= 0 &&
2865 253821 : iSrcY < poWK->nSrcYSize)
2866 : {
2867 253821 : const double dfMult = dfRatioX * dfRatioY;
2868 :
2869 253821 : dfAccumulatorDivisor += dfMult;
2870 :
2871 253821 : dfAccumulator += double(pSrc[iSrcOffset]) * dfMult;
2872 : }
2873 :
2874 : // Upper Right Pixel.
2875 2166890 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY >= 0 &&
2876 1877980 : iSrcY < poWK->nSrcYSize)
2877 : {
2878 1858270 : const double dfMult = (1.0 - dfRatioX) * dfRatioY;
2879 :
2880 1858270 : dfAccumulatorDivisor += dfMult;
2881 :
2882 1858270 : dfAccumulator += double(pSrc[iSrcOffset + 1]) * dfMult;
2883 : }
2884 :
2885 : // Lower Right Pixel.
2886 2166890 : if (iSrcX + 1 >= 0 && iSrcX + 1 < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2887 2003834 : iSrcY + 1 < poWK->nSrcYSize)
2888 : {
2889 1927512 : const double dfMult = (1.0 - dfRatioX) * (1.0 - dfRatioY);
2890 :
2891 1927512 : dfAccumulatorDivisor += dfMult;
2892 :
2893 1927512 : dfAccumulator +=
2894 1927512 : double(pSrc[iSrcOffset + 1 + poWK->nSrcXSize]) * dfMult;
2895 : }
2896 :
2897 : // Lower Left Pixel.
2898 2166890 : if (iSrcX >= 0 && iSrcX < poWK->nSrcXSize && iSrcY + 1 >= 0 &&
2899 345069 : iSrcY + 1 < poWK->nSrcYSize)
2900 : {
2901 268052 : const double dfMult = dfRatioX * (1.0 - dfRatioY);
2902 :
2903 268052 : dfAccumulatorDivisor += dfMult;
2904 :
2905 268052 : dfAccumulator += double(pSrc[iSrcOffset + poWK->nSrcXSize]) * dfMult;
2906 : }
2907 :
2908 : /* -------------------------------------------------------------------- */
2909 : /* Return result. */
2910 : /* -------------------------------------------------------------------- */
2911 2166890 : double dfValue = 0.0;
2912 :
2913 2166890 : if (dfAccumulatorDivisor < 0.00001)
2914 : {
2915 0 : *pValue = 0;
2916 0 : return false;
2917 : }
2918 2166890 : else if (dfAccumulatorDivisor == 1.0)
2919 : {
2920 7320 : dfValue = dfAccumulator;
2921 : }
2922 : else
2923 : {
2924 2159568 : dfValue = dfAccumulator / dfAccumulatorDivisor;
2925 : }
2926 :
2927 2166890 : *pValue = GWKRoundValueT<T>(dfValue);
2928 :
2929 2214420 : return true;
2930 : }
2931 :
2932 : /************************************************************************/
2933 : /* GWKCubicResample() */
2934 : /* Set of bicubic interpolators using cubic convolution. */
2935 : /************************************************************************/
2936 :
2937 : // http://verona.fi-p.unam.mx/boris/practicas/CubConvInterp.pdf Formula 18
2938 : // or http://en.wikipedia.org/wiki/Cubic_Hermite_spline : CINTx(p_1,p0,p1,p2)
2939 : // http://en.wikipedia.org/wiki/Bicubic_interpolation: matrix notation
2940 :
2941 : template <typename T>
2942 1742940 : static inline T CubicConvolution(T distance1, T distance2, T distance3, T f0,
2943 : T f1, T f2, T f3)
2944 : {
2945 1742940 : return (f1 + T(0.5) * (distance1 * (f2 - f0) +
2946 1742940 : distance2 * (2 * f0 - 5 * f1 + 4 * f2 - f3) +
2947 1742940 : distance3 * (3 * (f1 - f2) + f3 - f0)));
2948 : }
2949 :
2950 : /************************************************************************/
2951 : /* GWKCubicComputeWeights() */
2952 : /************************************************************************/
2953 :
2954 : // adfCoeffs[2] = 1.0 - (adfCoeffs[0] + adfCoeffs[1] - adfCoeffs[3]);
2955 :
2956 : template <typename T>
2957 66465980 : static inline void GWKCubicComputeWeights(T x, T coeffs[4])
2958 : {
2959 66465980 : const T halfX = T(0.5) * x;
2960 66465980 : const T threeX = T(3.0) * x;
2961 66465980 : const T halfX2 = halfX * x;
2962 :
2963 66465980 : coeffs[0] = halfX * (-1 + x * (2 - x));
2964 66465980 : coeffs[1] = 1 + halfX2 * (-5 + threeX);
2965 66465980 : coeffs[2] = halfX * (1 + x * (4 - threeX));
2966 66465980 : coeffs[3] = halfX2 * (-1 + x);
2967 66465980 : }
2968 :
2969 14411416 : template <typename T> inline double CONVOL4(const double v1[4], const T v2[4])
2970 : {
2971 14411416 : return v1[0] * double(v2[0]) + v1[1] * double(v2[1]) +
2972 14411416 : v1[2] * double(v2[2]) + v1[3] * double(v2[3]);
2973 : }
2974 :
2975 : #if 0
2976 : // Optimal (in theory...) for max 2 convolutions: 14 multiplications
2977 : // instead of 17.
2978 : // TODO(schwehr): Use an inline function.
2979 : #define GWKCubicComputeWeights_Optim2MAX(dfX_, adfCoeffs, dfHalfX) \
2980 : { \
2981 : const double dfX = dfX_; \
2982 : dfHalfX = 0.5 * dfX; \
2983 : const double dfThreeX = 3.0 * dfX; \
2984 : const double dfXMinus1 = dfX - 1; \
2985 : \
2986 : adfCoeffs[0] = -1 + dfX * (2 - dfX); \
2987 : adfCoeffs[1] = dfX * (-5 + dfThreeX); \
2988 : /*adfCoeffs[2] = 1 + dfX * (4 - dfThreeX);*/ \
2989 : adfCoeffs[2] = -dfXMinus1 - adfCoeffs[1]; \
2990 : /*adfCoeffs[3] = dfX * (-1 + dfX); */ \
2991 : adfCoeffs[3] = dfXMinus1 - adfCoeffs[0]; \
2992 : }
2993 :
2994 : // TODO(schwehr): Use an inline function.
2995 : #define CONVOL4_Optim2MAX(adfCoeffs, v, dfHalfX) \
2996 : ((v)[1] + (dfHalfX) * ((adfCoeffs)[0] * (v)[0] + (adfCoeffs)[1] * (v)[1] + \
2997 : (adfCoeffs)[2] * (v)[2] + (adfCoeffs)[3] * (v)[3]))
2998 : #endif
2999 :
3000 302045 : static bool GWKCubicResample4Sample(const GDALWarpKernel *poWK, int iBand,
3001 : double dfSrcX, double dfSrcY,
3002 : double *pdfDensity, double *pdfReal,
3003 : double *pdfImag)
3004 :
3005 : {
3006 302045 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3007 302045 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3008 302045 : GPtrDiff_t iSrcOffset =
3009 302045 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3010 302045 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3011 302045 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3012 302045 : double adfDensity[4] = {};
3013 302045 : double adfReal[4] = {};
3014 302045 : double adfImag[4] = {};
3015 :
3016 : // Get the bilinear interpolation at the image borders.
3017 302045 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3018 286140 : iSrcY + 2 >= poWK->nSrcYSize)
3019 24670 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3020 24670 : pdfDensity, pdfReal, pdfImag);
3021 :
3022 277375 : double adfValueDens[4] = {};
3023 277375 : double adfValueReal[4] = {};
3024 277375 : double adfValueImag[4] = {};
3025 :
3026 277375 : double adfCoeffsX[4] = {};
3027 277375 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3028 :
3029 1240570 : for (GPtrDiff_t i = -1; i < 3; i++)
3030 : {
3031 1009640 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3032 998035 : 2, adfDensity, adfReal, adfImag) ||
3033 998035 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3034 980395 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3035 2979770 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3036 972094 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3037 : {
3038 46449 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3039 46449 : pdfDensity, pdfReal, pdfImag);
3040 : }
3041 :
3042 963196 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3043 963196 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3044 963196 : adfValueImag[i + 1] = CONVOL4(adfCoeffsX, adfImag);
3045 : }
3046 :
3047 : /* -------------------------------------------------------------------- */
3048 : /* For now, if we have any pixels missing in the kernel area, */
3049 : /* we fallback on using bilinear interpolation. Ideally we */
3050 : /* should do "weight adjustment" of our results similarly to */
3051 : /* what is done for the cubic spline and lanc. interpolators. */
3052 : /* -------------------------------------------------------------------- */
3053 :
3054 230926 : double adfCoeffsY[4] = {};
3055 230926 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3056 :
3057 230926 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3058 230926 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3059 230926 : *pdfImag = CONVOL4(adfCoeffsY, adfValueImag);
3060 :
3061 230926 : return true;
3062 : }
3063 :
3064 : #ifdef USE_SSE2
3065 :
3066 : /************************************************************************/
3067 : /* XMMLoad4Values() */
3068 : /* */
3069 : /* Load 4 packed byte or uint16, cast them to float and put them in a */
3070 : /* m128 register. */
3071 : /************************************************************************/
3072 :
3073 364563000 : static CPL_INLINE __m128 XMMLoad4Values(const GByte *ptr)
3074 : {
3075 : unsigned int i;
3076 364563000 : memcpy(&i, ptr, 4);
3077 729126000 : __m128i xmm_i = _mm_cvtsi32_si128(i);
3078 : // Zero extend 4 packed unsigned 8-bit integers in a to packed
3079 : // 32-bit integers.
3080 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3081 : xmm_i = _mm_cvtepu8_epi32(xmm_i);
3082 : #else
3083 729126000 : xmm_i = _mm_unpacklo_epi8(xmm_i, _mm_setzero_si128());
3084 729126000 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3085 : #endif
3086 729126000 : return _mm_cvtepi32_ps(xmm_i);
3087 : }
3088 :
3089 791724 : static CPL_INLINE __m128 XMMLoad4Values(const GUInt16 *ptr)
3090 : {
3091 : GUInt64 i;
3092 791724 : memcpy(&i, ptr, 8);
3093 1583450 : __m128i xmm_i = _mm_cvtsi64_si128(i);
3094 : // Zero extend 4 packed unsigned 16-bit integers in a to packed
3095 : // 32-bit integers.
3096 : #if defined(__SSE4_1__) || defined(__AVX__) || defined(USE_NEON_OPTIMIZATIONS)
3097 : xmm_i = _mm_cvtepu16_epi32(xmm_i);
3098 : #else
3099 1583450 : xmm_i = _mm_unpacklo_epi16(xmm_i, _mm_setzero_si128());
3100 : #endif
3101 1583450 : return _mm_cvtepi32_ps(xmm_i);
3102 : }
3103 :
3104 : /************************************************************************/
3105 : /* XMMHorizontalAdd() */
3106 : /* */
3107 : /* Return the sum of the 4 floating points of the register. */
3108 : /************************************************************************/
3109 :
3110 : #if defined(__SSE3__) || defined(USE_NEON_OPTIMIZATIONS)
3111 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3112 : {
3113 : __m128 shuf = _mm_movehdup_ps(v); // (v3 , v3 , v1 , v1)
3114 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v3+v2, v1+v1, v1+v0)
3115 : shuf = _mm_movehl_ps(shuf, sums); // (v3 , v3 , v3+v3, v3+v2)
3116 : sums = _mm_add_ss(sums, shuf); // (v1+v0)+(v3+v2)
3117 : return _mm_cvtss_f32(sums);
3118 : }
3119 : #else
3120 94717100 : static CPL_INLINE float XMMHorizontalAdd(__m128 v)
3121 : {
3122 95064600 : __m128 shuf = _mm_movehl_ps(v, v); // (v3 , v2 , v3 , v2)
3123 95064600 : __m128 sums = _mm_add_ps(v, shuf); // (v3+v3, v2+v2, v3+v1, v2+v0)
3124 95064600 : shuf = _mm_shuffle_ps(sums, sums, 1); // (v2+v0, v2+v0, v2+v0, v3+v1)
3125 95134200 : sums = _mm_add_ss(sums, shuf); // (v2+v0)+(v3+v1)
3126 95134200 : return _mm_cvtss_f32(sums);
3127 : }
3128 : #endif
3129 :
3130 : #endif // define USE_SSE2
3131 :
3132 : /************************************************************************/
3133 : /* GWKCubicResampleSrcMaskIsDensity4SampleRealT() */
3134 : /************************************************************************/
3135 :
3136 : // Note: if USE_SSE_CUBIC_IMPL, only instantiate that for Byte and UInt16,
3137 : // because there are a few assumptions above those types.
3138 : // We do not define USE_SSE_CUBIC_IMPL since in practice, it gives zero
3139 : // perf benefit.
3140 :
3141 : template <class T>
3142 389755 : static CPL_INLINE bool GWKCubicResampleSrcMaskIsDensity4SampleRealT(
3143 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3144 : double *pdfDensity, double *pdfReal)
3145 : {
3146 389755 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3147 389755 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3148 389755 : const GPtrDiff_t iSrcOffset =
3149 389755 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3150 :
3151 : // Get the bilinear interpolation at the image borders.
3152 389755 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3153 387271 : iSrcY + 2 >= poWK->nSrcYSize)
3154 : {
3155 2484 : double adfImagIgnored[4] = {};
3156 2484 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3157 2484 : pdfDensity, pdfReal, adfImagIgnored);
3158 : }
3159 :
3160 : #if defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3161 : const float fDeltaX = static_cast<float>(dfSrcX) - 0.5f - iSrcX;
3162 : const float fDeltaY = static_cast<float>(dfSrcY) - 0.5f - iSrcY;
3163 :
3164 : // TODO(schwehr): Explain the magic numbers.
3165 : float afTemp[4 + 4 + 4 + 1];
3166 : float *pafAligned =
3167 : reinterpret_cast<float *>(afTemp + ((size_t)afTemp & 0xf));
3168 : float *pafCoeffs = pafAligned;
3169 : float *pafDensity = pafAligned + 4;
3170 : float *pafValue = pafAligned + 8;
3171 :
3172 : const float fHalfDeltaX = 0.5f * fDeltaX;
3173 : const float fThreeDeltaX = 3.0f * fDeltaX;
3174 : const float fHalfDeltaX2 = fHalfDeltaX * fDeltaX;
3175 :
3176 : pafCoeffs[0] = fHalfDeltaX * (-1 + fDeltaX * (2 - fDeltaX));
3177 : pafCoeffs[1] = 1 + fHalfDeltaX2 * (-5 + fThreeDeltaX);
3178 : pafCoeffs[2] = fHalfDeltaX * (1 + fDeltaX * (4 - fThreeDeltaX));
3179 : pafCoeffs[3] = fHalfDeltaX2 * (-1 + fDeltaX);
3180 : __m128 xmmCoeffs = _mm_load_ps(pafCoeffs);
3181 : const __m128 xmmThreshold = _mm_load1_ps(&SRC_DENSITY_THRESHOLD_FLOAT);
3182 :
3183 : __m128 xmmMaskLowDensity = _mm_setzero_ps();
3184 : for (GPtrDiff_t i = -1, iOffset = iSrcOffset - poWK->nSrcXSize - 1; i < 3;
3185 : i++, iOffset += poWK->nSrcXSize)
3186 : {
3187 : const __m128 xmmDensity =
3188 : _mm_loadu_ps(poWK->pafUnifiedSrcDensity + iOffset);
3189 : xmmMaskLowDensity = _mm_or_ps(xmmMaskLowDensity,
3190 : _mm_cmplt_ps(xmmDensity, xmmThreshold));
3191 : pafDensity[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3192 :
3193 : const __m128 xmmValues =
3194 : XMMLoad4Values(((T *)poWK->papabySrcImage[iBand]) + iOffset);
3195 : pafValue[i + 1] = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValues));
3196 : }
3197 : if (_mm_movemask_ps(xmmMaskLowDensity))
3198 : {
3199 : double adfImagIgnored[4] = {};
3200 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3201 : pdfDensity, pdfReal, adfImagIgnored);
3202 : }
3203 :
3204 : const float fHalfDeltaY = 0.5f * fDeltaY;
3205 : const float fThreeDeltaY = 3.0f * fDeltaY;
3206 : const float fHalfDeltaY2 = fHalfDeltaY * fDeltaY;
3207 :
3208 : pafCoeffs[0] = fHalfDeltaY * (-1 + fDeltaY * (2 - fDeltaY));
3209 : pafCoeffs[1] = 1 + fHalfDeltaY2 * (-5 + fThreeDeltaY);
3210 : pafCoeffs[2] = fHalfDeltaY * (1 + fDeltaY * (4 - fThreeDeltaY));
3211 : pafCoeffs[3] = fHalfDeltaY2 * (-1 + fDeltaY);
3212 :
3213 : xmmCoeffs = _mm_load_ps(pafCoeffs);
3214 :
3215 : const __m128 xmmDensity = _mm_load_ps(pafDensity);
3216 : const __m128 xmmValue = _mm_load_ps(pafValue);
3217 : *pdfDensity = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmDensity));
3218 : *pdfReal = XMMHorizontalAdd(_mm_mul_ps(xmmCoeffs, xmmValue));
3219 :
3220 : // We did all above computations on float32 whereas the general case is
3221 : // float64. Not sure if one is fundamentally more correct than the other
3222 : // one, but we want our optimization to give the same result as the
3223 : // general case as much as possible, so if the resulting value is
3224 : // close to some_int_value + 0.5, redo the computation with the general
3225 : // case.
3226 : // Note: If other types than Byte or UInt16, will need changes.
3227 : if (fabs(*pdfReal - static_cast<int>(*pdfReal) - 0.5) > .007)
3228 : return true;
3229 :
3230 : #endif // defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2)
3231 :
3232 387271 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3233 387271 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3234 :
3235 387271 : double adfValueDens[4] = {};
3236 387271 : double adfValueReal[4] = {};
3237 :
3238 387271 : double adfCoeffsX[4] = {};
3239 387271 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3240 :
3241 387271 : double adfCoeffsY[4] = {};
3242 387271 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3243 :
3244 1930200 : for (GPtrDiff_t i = -1; i < 3; i++)
3245 : {
3246 1544480 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3247 : #if !(defined(USE_SSE_CUBIC_IMPL) && defined(USE_SSE2))
3248 1544480 : if (poWK->pafUnifiedSrcDensity[iOffset + 0] <
3249 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3250 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 1] <
3251 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3252 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 2] <
3253 1542940 : SRC_DENSITY_THRESHOLD_FLOAT ||
3254 1542940 : poWK->pafUnifiedSrcDensity[iOffset + 3] <
3255 : SRC_DENSITY_THRESHOLD_FLOAT)
3256 : {
3257 1551 : double adfImagIgnored[4] = {};
3258 1551 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3259 : pdfDensity, pdfReal,
3260 1551 : adfImagIgnored);
3261 : }
3262 : #endif
3263 :
3264 3085860 : adfValueDens[i + 1] =
3265 1542930 : CONVOL4(adfCoeffsX, poWK->pafUnifiedSrcDensity + iOffset);
3266 :
3267 1542930 : adfValueReal[i + 1] = CONVOL4(
3268 : adfCoeffsX,
3269 1542930 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3270 : }
3271 :
3272 385720 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3273 385720 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3274 :
3275 385720 : return true;
3276 : }
3277 :
3278 : /************************************************************************/
3279 : /* GWKCubicResampleSrcMaskIsDensity4SampleReal() */
3280 : /* Bi-cubic when source has and only has pafUnifiedSrcDensity. */
3281 : /************************************************************************/
3282 :
3283 0 : static bool GWKCubicResampleSrcMaskIsDensity4SampleReal(
3284 : const GDALWarpKernel *poWK, int iBand, double dfSrcX, double dfSrcY,
3285 : double *pdfDensity, double *pdfReal)
3286 :
3287 : {
3288 0 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3289 0 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3290 0 : const GPtrDiff_t iSrcOffset =
3291 0 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3292 0 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3293 0 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3294 :
3295 : // Get the bilinear interpolation at the image borders.
3296 0 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3297 0 : iSrcY + 2 >= poWK->nSrcYSize)
3298 : {
3299 0 : double adfImagIgnored[4] = {};
3300 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3301 0 : pdfDensity, pdfReal, adfImagIgnored);
3302 : }
3303 :
3304 0 : double adfCoeffsX[4] = {};
3305 0 : GWKCubicComputeWeights(dfDeltaX, adfCoeffsX);
3306 :
3307 0 : double adfCoeffsY[4] = {};
3308 0 : GWKCubicComputeWeights(dfDeltaY, adfCoeffsY);
3309 :
3310 0 : double adfValueDens[4] = {};
3311 0 : double adfValueReal[4] = {};
3312 0 : double adfDensity[4] = {};
3313 0 : double adfReal[4] = {};
3314 0 : double adfImagIgnored[4] = {};
3315 :
3316 0 : for (GPtrDiff_t i = -1; i < 3; i++)
3317 : {
3318 0 : if (!GWKGetPixelRow(poWK, iBand, iSrcOffset + i * poWK->nSrcXSize - 1,
3319 0 : 2, adfDensity, adfReal, adfImagIgnored) ||
3320 0 : adfDensity[0] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3321 0 : adfDensity[1] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3322 0 : adfDensity[2] < SRC_DENSITY_THRESHOLD_DOUBLE ||
3323 0 : adfDensity[3] < SRC_DENSITY_THRESHOLD_DOUBLE)
3324 : {
3325 0 : return GWKBilinearResample4Sample(poWK, iBand, dfSrcX, dfSrcY,
3326 : pdfDensity, pdfReal,
3327 0 : adfImagIgnored);
3328 : }
3329 :
3330 0 : adfValueDens[i + 1] = CONVOL4(adfCoeffsX, adfDensity);
3331 0 : adfValueReal[i + 1] = CONVOL4(adfCoeffsX, adfReal);
3332 : }
3333 :
3334 0 : *pdfDensity = CONVOL4(adfCoeffsY, adfValueDens);
3335 0 : *pdfReal = CONVOL4(adfCoeffsY, adfValueReal);
3336 :
3337 0 : return true;
3338 : }
3339 :
3340 : template <class T>
3341 2231485 : static bool GWKCubicResampleNoMasks4SampleT(const GDALWarpKernel *poWK,
3342 : int iBand, double dfSrcX,
3343 : double dfSrcY, T *pValue)
3344 :
3345 : {
3346 2231485 : const int iSrcX = static_cast<int>(dfSrcX - 0.5);
3347 2231485 : const int iSrcY = static_cast<int>(dfSrcY - 0.5);
3348 2231485 : const GPtrDiff_t iSrcOffset =
3349 2231485 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
3350 2231485 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3351 2231485 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3352 2231485 : const double dfDeltaY2 = dfDeltaY * dfDeltaY;
3353 2231485 : const double dfDeltaY3 = dfDeltaY2 * dfDeltaY;
3354 :
3355 : // Get the bilinear interpolation at the image borders.
3356 2231485 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
3357 1814944 : iSrcY + 2 >= poWK->nSrcYSize)
3358 488548 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
3359 488548 : pValue);
3360 :
3361 1742937 : double adfCoeffs[4] = {};
3362 1742937 : GWKCubicComputeWeights(dfDeltaX, adfCoeffs);
3363 :
3364 1742937 : double adfValue[4] = {};
3365 :
3366 8714670 : for (GPtrDiff_t i = -1; i < 3; i++)
3367 : {
3368 6971746 : const GPtrDiff_t iOffset = iSrcOffset + i * poWK->nSrcXSize - 1;
3369 :
3370 6971746 : adfValue[i + 1] = CONVOL4(
3371 : adfCoeffs,
3372 6971746 : reinterpret_cast<T *>(poWK->papabySrcImage[iBand]) + iOffset);
3373 : }
3374 :
3375 : const double dfValue =
3376 1742937 : CubicConvolution(dfDeltaY, dfDeltaY2, dfDeltaY3, adfValue[0],
3377 : adfValue[1], adfValue[2], adfValue[3]);
3378 :
3379 1742937 : *pValue = GWKClampValueT<T>(dfValue);
3380 :
3381 1742937 : return true;
3382 : }
3383 :
3384 : /************************************************************************/
3385 : /* GWKLanczosSinc() */
3386 : /************************************************************************/
3387 :
3388 : /*
3389 : * Lanczos windowed sinc interpolation kernel with radius r.
3390 : * /
3391 : * | sinc(x) * sinc(x/r), if |x| < r
3392 : * L(x) = | 1, if x = 0 ,
3393 : * | 0, otherwise
3394 : * \
3395 : *
3396 : * where sinc(x) = sin(PI * x) / (PI * x).
3397 : */
3398 :
3399 1632 : static double GWKLanczosSinc(double dfX)
3400 : {
3401 1632 : if (dfX == 0.0)
3402 0 : return 1.0;
3403 :
3404 1632 : const double dfPIX = M_PI * dfX;
3405 1632 : const double dfPIXoverR = dfPIX / 3;
3406 1632 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3407 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3408 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3409 1632 : const double dfSinPIXoverR = sin(dfPIXoverR);
3410 1632 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3411 1632 : const double dfSinPIXMulSinPIXoverR =
3412 1632 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3413 1632 : return dfSinPIXMulSinPIXoverR / dfPIX2overR;
3414 : }
3415 :
3416 106692 : static double GWKLanczosSinc4Values(double *padfValues)
3417 : {
3418 533460 : for (int i = 0; i < 4; i++)
3419 : {
3420 426768 : if (padfValues[i] == 0.0)
3421 : {
3422 0 : padfValues[i] = 1.0;
3423 : }
3424 : else
3425 : {
3426 426768 : const double dfPIX = M_PI * padfValues[i];
3427 426768 : const double dfPIXoverR = dfPIX / 3;
3428 426768 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3429 : // Given that sin(3x) = 3 sin(x) - 4 sin^3 (x)
3430 : // we can compute sin(dfSinPIX) from sin(dfPIXoverR)
3431 426768 : const double dfSinPIXoverR = sin(dfPIXoverR);
3432 426768 : const double dfSinPIXoverRSquared = dfSinPIXoverR * dfSinPIXoverR;
3433 426768 : const double dfSinPIXMulSinPIXoverR =
3434 426768 : (3 - 4 * dfSinPIXoverRSquared) * dfSinPIXoverRSquared;
3435 426768 : padfValues[i] = dfSinPIXMulSinPIXoverR / dfPIX2overR;
3436 : }
3437 : }
3438 106692 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3439 : }
3440 :
3441 : /************************************************************************/
3442 : /* GWKBilinear() */
3443 : /************************************************************************/
3444 :
3445 6670070 : static double GWKBilinear(double dfX)
3446 : {
3447 6670070 : double dfAbsX = fabs(dfX);
3448 6670070 : if (dfAbsX <= 1.0)
3449 6198950 : return 1 - dfAbsX;
3450 : else
3451 471127 : return 0.0;
3452 : }
3453 :
3454 401592 : static double GWKBilinear4Values(double *padfValues)
3455 : {
3456 401592 : double dfAbsX0 = fabs(padfValues[0]);
3457 401592 : double dfAbsX1 = fabs(padfValues[1]);
3458 401592 : double dfAbsX2 = fabs(padfValues[2]);
3459 401592 : double dfAbsX3 = fabs(padfValues[3]);
3460 401592 : if (dfAbsX0 <= 1.0)
3461 295634 : padfValues[0] = 1 - dfAbsX0;
3462 : else
3463 105958 : padfValues[0] = 0.0;
3464 401592 : if (dfAbsX1 <= 1.0)
3465 401592 : padfValues[1] = 1 - dfAbsX1;
3466 : else
3467 0 : padfValues[1] = 0.0;
3468 401592 : if (dfAbsX2 <= 1.0)
3469 401592 : padfValues[2] = 1 - dfAbsX2;
3470 : else
3471 0 : padfValues[2] = 0.0;
3472 401592 : if (dfAbsX3 <= 1.0)
3473 295510 : padfValues[3] = 1 - dfAbsX3;
3474 : else
3475 106082 : padfValues[3] = 0.0;
3476 401592 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3477 : }
3478 :
3479 : /************************************************************************/
3480 : /* GWKCubic() */
3481 : /************************************************************************/
3482 :
3483 4357000 : static double GWKCubic(double dfX)
3484 : {
3485 4357000 : return CubicKernel(dfX);
3486 : }
3487 :
3488 8298770 : static double GWKCubic4Values(double *padfValues)
3489 : {
3490 8298770 : const double dfAbsX_0 = fabs(padfValues[0]);
3491 8298770 : const double dfAbsX_1 = fabs(padfValues[1]);
3492 8298770 : const double dfAbsX_2 = fabs(padfValues[2]);
3493 8298770 : const double dfAbsX_3 = fabs(padfValues[3]);
3494 8298770 : const double dfX2_0 = padfValues[0] * padfValues[0];
3495 8298770 : const double dfX2_1 = padfValues[1] * padfValues[1];
3496 8298770 : const double dfX2_2 = padfValues[2] * padfValues[2];
3497 8298770 : const double dfX2_3 = padfValues[3] * padfValues[3];
3498 :
3499 8298770 : double dfVal0 = 0.0;
3500 8298770 : if (dfAbsX_0 <= 1.0)
3501 1527520 : dfVal0 = dfX2_0 * (1.5 * dfAbsX_0 - 2.5) + 1.0;
3502 6771260 : else if (dfAbsX_0 <= 2.0)
3503 4903470 : dfVal0 = dfX2_0 * (-0.5 * dfAbsX_0 + 2.5) - 4.0 * dfAbsX_0 + 2.0;
3504 :
3505 8298770 : double dfVal1 = 0.0;
3506 8298770 : if (dfAbsX_1 <= 1.0)
3507 4776030 : dfVal1 = dfX2_1 * (1.5 * dfAbsX_1 - 2.5) + 1.0;
3508 3522750 : else if (dfAbsX_1 <= 2.0)
3509 3520210 : dfVal1 = dfX2_1 * (-0.5 * dfAbsX_1 + 2.5) - 4.0 * dfAbsX_1 + 2.0;
3510 :
3511 8298770 : double dfVal2 = 0.0;
3512 8298770 : if (dfAbsX_2 <= 1.0)
3513 6591780 : dfVal2 = dfX2_2 * (1.5 * dfAbsX_2 - 2.5) + 1.0;
3514 1707000 : else if (dfAbsX_2 <= 2.0)
3515 1704320 : dfVal2 = dfX2_2 * (-0.5 * dfAbsX_2 + 2.5) - 4.0 * dfAbsX_2 + 2.0;
3516 :
3517 8298770 : double dfVal3 = 0.0;
3518 8298770 : if (dfAbsX_3 <= 1.0)
3519 3663450 : dfVal3 = dfX2_3 * (1.5 * dfAbsX_3 - 2.5) + 1.0;
3520 4635330 : else if (dfAbsX_3 <= 2.0)
3521 4264350 : dfVal3 = dfX2_3 * (-0.5 * dfAbsX_3 + 2.5) - 4.0 * dfAbsX_3 + 2.0;
3522 :
3523 8298770 : padfValues[0] = dfVal0;
3524 8298770 : padfValues[1] = dfVal1;
3525 8298770 : padfValues[2] = dfVal2;
3526 8298770 : padfValues[3] = dfVal3;
3527 8298770 : return dfVal0 + dfVal1 + dfVal2 + dfVal3;
3528 : }
3529 :
3530 : /************************************************************************/
3531 : /* GWKBSpline() */
3532 : /************************************************************************/
3533 :
3534 : // https://www.cs.utexas.edu/~fussell/courses/cs384g-fall2013/lectures/mitchell/Mitchell.pdf
3535 : // Equation 8 with (B,C)=(1,0)
3536 : // 1/6 * ( 3 * |x|^3 - 6 * |x|^2 + 4) |x| < 1
3537 : // 1/6 * ( -|x|^3 + 6 |x|^2 - 12|x| + 8) |x| >= 1 and |x| < 2
3538 :
3539 139200 : static double GWKBSpline(double x)
3540 : {
3541 139200 : const double xp2 = x + 2.0;
3542 139200 : const double xp1 = x + 1.0;
3543 139200 : const double xm1 = x - 1.0;
3544 :
3545 : // This will most likely be used, so we'll compute it ahead of time to
3546 : // avoid stalling the processor.
3547 139200 : const double xp2c = xp2 * xp2 * xp2;
3548 :
3549 : // Note that the test is computed only if it is needed.
3550 : // TODO(schwehr): Make this easier to follow.
3551 : return xp2 > 0.0
3552 278400 : ? ((xp1 > 0.0)
3553 139200 : ? ((x > 0.0)
3554 124806 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3555 90308 : 6.0 * x * x * x
3556 : : 0.0) +
3557 124806 : -4.0 * xp1 * xp1 * xp1
3558 : : 0.0) +
3559 : xp2c
3560 139200 : : 0.0; // * 0.166666666666666666666
3561 : }
3562 :
3563 2220680 : static double GWKBSpline4Values(double *padfValues)
3564 : {
3565 11103400 : for (int i = 0; i < 4; i++)
3566 : {
3567 8882740 : const double x = padfValues[i];
3568 8882740 : const double xp2 = x + 2.0;
3569 8882740 : const double xp1 = x + 1.0;
3570 8882740 : const double xm1 = x - 1.0;
3571 :
3572 : // This will most likely be used, so we'll compute it ahead of time to
3573 : // avoid stalling the processor.
3574 8882740 : const double xp2c = xp2 * xp2 * xp2;
3575 :
3576 : // Note that the test is computed only if it is needed.
3577 : // TODO(schwehr): Make this easier to follow.
3578 8882740 : padfValues[i] =
3579 : (xp2 > 0.0)
3580 17765500 : ? ((xp1 > 0.0)
3581 8882740 : ? ((x > 0.0)
3582 6661820 : ? ((xm1 > 0.0) ? -4.0 * xm1 * xm1 * xm1 : 0.0) +
3583 4438260 : 6.0 * x * x * x
3584 : : 0.0) +
3585 6661820 : -4.0 * xp1 * xp1 * xp1
3586 : : 0.0) +
3587 : xp2c
3588 : : 0.0; // * 0.166666666666666666666
3589 : }
3590 2220680 : return padfValues[0] + padfValues[1] + padfValues[2] + padfValues[3];
3591 : }
3592 : /************************************************************************/
3593 : /* GWKResampleWrkStruct */
3594 : /************************************************************************/
3595 :
3596 : typedef struct _GWKResampleWrkStruct GWKResampleWrkStruct;
3597 :
3598 : typedef bool (*pfnGWKResampleType)(const GDALWarpKernel *poWK, int iBand,
3599 : double dfSrcX, double dfSrcY,
3600 : double *pdfDensity, double *pdfReal,
3601 : double *pdfImag,
3602 : GWKResampleWrkStruct *psWrkStruct);
3603 :
3604 : struct _GWKResampleWrkStruct
3605 : {
3606 : pfnGWKResampleType pfnGWKResample;
3607 :
3608 : // Space for saved X weights.
3609 : double *padfWeightsX;
3610 : bool *pabCalcX;
3611 :
3612 : double *padfWeightsY; // Only used by GWKResampleOptimizedLanczos.
3613 : int iLastSrcX; // Only used by GWKResampleOptimizedLanczos.
3614 : int iLastSrcY; // Only used by GWKResampleOptimizedLanczos.
3615 : double dfLastDeltaX; // Only used by GWKResampleOptimizedLanczos.
3616 : double dfLastDeltaY; // Only used by GWKResampleOptimizedLanczos.
3617 : double dfCosPiXScale; // Only used by GWKResampleOptimizedLanczos.
3618 : double dfSinPiXScale; // Only used by GWKResampleOptimizedLanczos.
3619 : double dfCosPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3620 : double dfSinPiXScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3621 : double dfCosPiYScale; // Only used by GWKResampleOptimizedLanczos.
3622 : double dfSinPiYScale; // Only used by GWKResampleOptimizedLanczos.
3623 : double dfCosPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3624 : double dfSinPiYScaleOver3; // Only used by GWKResampleOptimizedLanczos.
3625 :
3626 : // Space for saving a row of pixels.
3627 : double *padfRowDensity;
3628 : double *padfRowReal;
3629 : double *padfRowImag;
3630 : };
3631 :
3632 : /************************************************************************/
3633 : /* GWKResampleCreateWrkStruct() */
3634 : /************************************************************************/
3635 :
3636 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3637 : double dfSrcY, double *pdfDensity, double *pdfReal,
3638 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct);
3639 :
3640 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3641 : double dfSrcX, double dfSrcY,
3642 : double *pdfDensity, double *pdfReal,
3643 : double *pdfImag,
3644 : GWKResampleWrkStruct *psWrkStruct);
3645 :
3646 357 : static GWKResampleWrkStruct *GWKResampleCreateWrkStruct(GDALWarpKernel *poWK)
3647 : {
3648 357 : const int nXDist = (poWK->nXRadius + 1) * 2;
3649 357 : const int nYDist = (poWK->nYRadius + 1) * 2;
3650 :
3651 : GWKResampleWrkStruct *psWrkStruct = static_cast<GWKResampleWrkStruct *>(
3652 357 : CPLCalloc(1, sizeof(GWKResampleWrkStruct)));
3653 :
3654 : // Alloc space for saved X weights.
3655 357 : psWrkStruct->padfWeightsX =
3656 357 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3657 357 : psWrkStruct->pabCalcX =
3658 357 : static_cast<bool *>(CPLMalloc(nXDist * sizeof(bool)));
3659 :
3660 357 : psWrkStruct->padfWeightsY =
3661 357 : static_cast<double *>(CPLCalloc(nYDist, sizeof(double)));
3662 357 : psWrkStruct->iLastSrcX = -10;
3663 357 : psWrkStruct->iLastSrcY = -10;
3664 357 : psWrkStruct->dfLastDeltaX = -10;
3665 357 : psWrkStruct->dfLastDeltaY = -10;
3666 :
3667 : // Alloc space for saving a row of pixels.
3668 357 : if (poWK->pafUnifiedSrcDensity == nullptr &&
3669 323 : poWK->panUnifiedSrcValid == nullptr &&
3670 310 : poWK->papanBandSrcValid == nullptr)
3671 : {
3672 310 : psWrkStruct->padfRowDensity = nullptr;
3673 : }
3674 : else
3675 : {
3676 47 : psWrkStruct->padfRowDensity =
3677 47 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3678 : }
3679 357 : psWrkStruct->padfRowReal =
3680 357 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3681 357 : psWrkStruct->padfRowImag =
3682 357 : static_cast<double *>(CPLCalloc(nXDist, sizeof(double)));
3683 :
3684 357 : if (poWK->eResample == GRA_Lanczos)
3685 : {
3686 63 : psWrkStruct->pfnGWKResample = GWKResampleOptimizedLanczos;
3687 :
3688 63 : if (poWK->dfXScale < 1)
3689 : {
3690 4 : psWrkStruct->dfCosPiXScaleOver3 = cos(M_PI / 3 * poWK->dfXScale);
3691 4 : psWrkStruct->dfSinPiXScaleOver3 =
3692 4 : sqrt(1 - psWrkStruct->dfCosPiXScaleOver3 *
3693 4 : psWrkStruct->dfCosPiXScaleOver3);
3694 : // "Naive":
3695 : // const double dfCosPiXScale = cos( M_PI * dfXScale );
3696 : // const double dfSinPiXScale = sin( M_PI * dfXScale );
3697 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3698 4 : psWrkStruct->dfCosPiXScale = (4 * psWrkStruct->dfCosPiXScaleOver3 *
3699 4 : psWrkStruct->dfCosPiXScaleOver3 -
3700 4 : 3) *
3701 4 : psWrkStruct->dfCosPiXScaleOver3;
3702 4 : psWrkStruct->dfSinPiXScale = sqrt(
3703 4 : 1 - psWrkStruct->dfCosPiXScale * psWrkStruct->dfCosPiXScale);
3704 : }
3705 :
3706 63 : if (poWK->dfYScale < 1)
3707 : {
3708 11 : psWrkStruct->dfCosPiYScaleOver3 = cos(M_PI / 3 * poWK->dfYScale);
3709 11 : psWrkStruct->dfSinPiYScaleOver3 =
3710 11 : sqrt(1 - psWrkStruct->dfCosPiYScaleOver3 *
3711 11 : psWrkStruct->dfCosPiYScaleOver3);
3712 : // "Naive":
3713 : // const double dfCosPiYScale = cos( M_PI * dfYScale );
3714 : // const double dfSinPiYScale = sin( M_PI * dfYScale );
3715 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and x between 0 and M_PI
3716 11 : psWrkStruct->dfCosPiYScale = (4 * psWrkStruct->dfCosPiYScaleOver3 *
3717 11 : psWrkStruct->dfCosPiYScaleOver3 -
3718 11 : 3) *
3719 11 : psWrkStruct->dfCosPiYScaleOver3;
3720 11 : psWrkStruct->dfSinPiYScale = sqrt(
3721 11 : 1 - psWrkStruct->dfCosPiYScale * psWrkStruct->dfCosPiYScale);
3722 : }
3723 : }
3724 : else
3725 294 : psWrkStruct->pfnGWKResample = GWKResample;
3726 :
3727 357 : return psWrkStruct;
3728 : }
3729 :
3730 : /************************************************************************/
3731 : /* GWKResampleDeleteWrkStruct() */
3732 : /************************************************************************/
3733 :
3734 357 : static void GWKResampleDeleteWrkStruct(GWKResampleWrkStruct *psWrkStruct)
3735 : {
3736 357 : CPLFree(psWrkStruct->padfWeightsX);
3737 357 : CPLFree(psWrkStruct->padfWeightsY);
3738 357 : CPLFree(psWrkStruct->pabCalcX);
3739 357 : CPLFree(psWrkStruct->padfRowDensity);
3740 357 : CPLFree(psWrkStruct->padfRowReal);
3741 357 : CPLFree(psWrkStruct->padfRowImag);
3742 357 : CPLFree(psWrkStruct);
3743 357 : }
3744 :
3745 : /************************************************************************/
3746 : /* GWKResample() */
3747 : /************************************************************************/
3748 :
3749 239383 : static bool GWKResample(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
3750 : double dfSrcY, double *pdfDensity, double *pdfReal,
3751 : double *pdfImag, GWKResampleWrkStruct *psWrkStruct)
3752 :
3753 : {
3754 : // Save as local variables to avoid following pointers in loops.
3755 239383 : const int nSrcXSize = poWK->nSrcXSize;
3756 239383 : const int nSrcYSize = poWK->nSrcYSize;
3757 :
3758 239383 : double dfAccumulatorReal = 0.0;
3759 239383 : double dfAccumulatorImag = 0.0;
3760 239383 : double dfAccumulatorDensity = 0.0;
3761 239383 : double dfAccumulatorWeight = 0.0;
3762 239383 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3763 239383 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3764 239383 : const GPtrDiff_t iSrcOffset =
3765 239383 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3766 239383 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3767 239383 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3768 :
3769 239383 : const double dfXScale = poWK->dfXScale;
3770 239383 : const double dfYScale = poWK->dfYScale;
3771 :
3772 239383 : const int nXDist = (poWK->nXRadius + 1) * 2;
3773 :
3774 : // Space for saved X weights.
3775 239383 : double *padfWeightsX = psWrkStruct->padfWeightsX;
3776 239383 : bool *pabCalcX = psWrkStruct->pabCalcX;
3777 :
3778 : // Space for saving a row of pixels.
3779 239383 : double *padfRowDensity = psWrkStruct->padfRowDensity;
3780 239383 : double *padfRowReal = psWrkStruct->padfRowReal;
3781 239383 : double *padfRowImag = psWrkStruct->padfRowImag;
3782 :
3783 : // Mark as needing calculation (don't calculate the weights yet,
3784 : // because a mask may render it unnecessary).
3785 239383 : memset(pabCalcX, false, nXDist * sizeof(bool));
3786 :
3787 239383 : FilterFuncType pfnGetWeight = apfGWKFilter[poWK->eResample];
3788 239383 : CPLAssert(pfnGetWeight);
3789 :
3790 : // Skip sampling over edge of image.
3791 239383 : int j = poWK->nFiltInitY;
3792 239383 : int jMax = poWK->nYRadius;
3793 239383 : if (iSrcY + j < 0)
3794 566 : j = -iSrcY;
3795 239383 : if (iSrcY + jMax >= nSrcYSize)
3796 662 : jMax = nSrcYSize - iSrcY - 1;
3797 :
3798 239383 : int iMin = poWK->nFiltInitX;
3799 239383 : int iMax = poWK->nXRadius;
3800 239383 : if (iSrcX + iMin < 0)
3801 566 : iMin = -iSrcX;
3802 239383 : if (iSrcX + iMax >= nSrcXSize)
3803 659 : iMax = nSrcXSize - iSrcX - 1;
3804 :
3805 239383 : const int bXScaleBelow1 = (dfXScale < 1.0);
3806 239383 : const int bYScaleBelow1 = (dfYScale < 1.0);
3807 :
3808 239383 : GPtrDiff_t iRowOffset =
3809 239383 : iSrcOffset + static_cast<GPtrDiff_t>(j - 1) * nSrcXSize + iMin;
3810 :
3811 : // Loop over pixel rows in the kernel.
3812 1445930 : for (; j <= jMax; ++j)
3813 : {
3814 1206540 : iRowOffset += nSrcXSize;
3815 :
3816 : // Get pixel values.
3817 : // We can potentially read extra elements after the "normal" end of the
3818 : // source arrays, but the contract of papabySrcImage[iBand],
3819 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
3820 : // is to have WARP_EXTRA_ELTS reserved at their end.
3821 1206540 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
3822 : padfRowDensity, padfRowReal, padfRowImag))
3823 72 : continue;
3824 :
3825 : // Calculate the Y weight.
3826 : double dfWeight1 = (bYScaleBelow1)
3827 1206470 : ? pfnGetWeight((j - dfDeltaY) * dfYScale)
3828 1600 : : pfnGetWeight(j - dfDeltaY);
3829 :
3830 : // Iterate over pixels in row.
3831 1206470 : double dfAccumulatorRealLocal = 0.0;
3832 1206470 : double dfAccumulatorImagLocal = 0.0;
3833 1206470 : double dfAccumulatorDensityLocal = 0.0;
3834 1206470 : double dfAccumulatorWeightLocal = 0.0;
3835 :
3836 7317420 : for (int i = iMin; i <= iMax; ++i)
3837 : {
3838 : // Skip sampling if pixel has zero density.
3839 6110940 : if (padfRowDensity != nullptr &&
3840 77277 : padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
3841 546 : continue;
3842 :
3843 6110400 : double dfWeight2 = 0.0;
3844 :
3845 : // Make or use a cached set of weights for this row.
3846 6110400 : if (pabCalcX[i - iMin])
3847 : {
3848 : // Use saved weight value instead of recomputing it.
3849 4903920 : dfWeight2 = padfWeightsX[i - iMin];
3850 : }
3851 : else
3852 : {
3853 : // Calculate & save the X weight.
3854 1206480 : padfWeightsX[i - iMin] = dfWeight2 =
3855 1206480 : (bXScaleBelow1) ? pfnGetWeight((i - dfDeltaX) * dfXScale)
3856 1600 : : pfnGetWeight(i - dfDeltaX);
3857 :
3858 1206480 : pabCalcX[i - iMin] = true;
3859 : }
3860 :
3861 : // Accumulate!
3862 6110400 : dfAccumulatorRealLocal += padfRowReal[i - iMin] * dfWeight2;
3863 6110400 : dfAccumulatorImagLocal += padfRowImag[i - iMin] * dfWeight2;
3864 6110400 : if (padfRowDensity != nullptr)
3865 76731 : dfAccumulatorDensityLocal +=
3866 76731 : padfRowDensity[i - iMin] * dfWeight2;
3867 6110400 : dfAccumulatorWeightLocal += dfWeight2;
3868 : }
3869 :
3870 1206470 : dfAccumulatorReal += dfAccumulatorRealLocal * dfWeight1;
3871 1206470 : dfAccumulatorImag += dfAccumulatorImagLocal * dfWeight1;
3872 1206470 : dfAccumulatorDensity += dfAccumulatorDensityLocal * dfWeight1;
3873 1206470 : dfAccumulatorWeight += dfAccumulatorWeightLocal * dfWeight1;
3874 : }
3875 :
3876 239383 : if (dfAccumulatorWeight < 0.000001 ||
3877 1887 : (padfRowDensity != nullptr && dfAccumulatorDensity < 0.000001))
3878 : {
3879 0 : *pdfDensity = 0.0;
3880 0 : return false;
3881 : }
3882 :
3883 : // Calculate the output taking into account weighting.
3884 239383 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
3885 : {
3886 239380 : *pdfReal = dfAccumulatorReal / dfAccumulatorWeight;
3887 239380 : *pdfImag = dfAccumulatorImag / dfAccumulatorWeight;
3888 239380 : if (padfRowDensity != nullptr)
3889 1884 : *pdfDensity = dfAccumulatorDensity / dfAccumulatorWeight;
3890 : else
3891 237496 : *pdfDensity = 1.0;
3892 : }
3893 : else
3894 : {
3895 3 : *pdfReal = dfAccumulatorReal;
3896 3 : *pdfImag = dfAccumulatorImag;
3897 3 : if (padfRowDensity != nullptr)
3898 3 : *pdfDensity = dfAccumulatorDensity;
3899 : else
3900 0 : *pdfDensity = 1.0;
3901 : }
3902 :
3903 239383 : return true;
3904 : }
3905 :
3906 : /************************************************************************/
3907 : /* GWKResampleOptimizedLanczos() */
3908 : /************************************************************************/
3909 :
3910 617144 : static bool GWKResampleOptimizedLanczos(const GDALWarpKernel *poWK, int iBand,
3911 : double dfSrcX, double dfSrcY,
3912 : double *pdfDensity, double *pdfReal,
3913 : double *pdfImag,
3914 : GWKResampleWrkStruct *psWrkStruct)
3915 :
3916 : {
3917 : // Save as local variables to avoid following pointers in loops.
3918 617144 : const int nSrcXSize = poWK->nSrcXSize;
3919 617144 : const int nSrcYSize = poWK->nSrcYSize;
3920 :
3921 617144 : double dfAccumulatorReal = 0.0;
3922 617144 : double dfAccumulatorImag = 0.0;
3923 617144 : double dfAccumulatorDensity = 0.0;
3924 617144 : double dfAccumulatorWeight = 0.0;
3925 617144 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
3926 617144 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
3927 617144 : const GPtrDiff_t iSrcOffset =
3928 617144 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
3929 617144 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
3930 617144 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
3931 :
3932 617144 : const double dfXScale = poWK->dfXScale;
3933 617144 : const double dfYScale = poWK->dfYScale;
3934 :
3935 : // Space for saved X weights.
3936 617144 : double *const padfWeightsXShifted =
3937 617144 : psWrkStruct->padfWeightsX - poWK->nFiltInitX;
3938 617144 : double *const padfWeightsYShifted =
3939 617144 : psWrkStruct->padfWeightsY - poWK->nFiltInitY;
3940 :
3941 : // Space for saving a row of pixels.
3942 617144 : double *const padfRowDensity = psWrkStruct->padfRowDensity;
3943 617144 : double *const padfRowReal = psWrkStruct->padfRowReal;
3944 617144 : double *const padfRowImag = psWrkStruct->padfRowImag;
3945 :
3946 : // Skip sampling over edge of image.
3947 617144 : int jMin = poWK->nFiltInitY;
3948 617144 : int jMax = poWK->nYRadius;
3949 617144 : if (iSrcY + jMin < 0)
3950 16522 : jMin = -iSrcY;
3951 617144 : if (iSrcY + jMax >= nSrcYSize)
3952 5782 : jMax = nSrcYSize - iSrcY - 1;
3953 :
3954 617144 : int iMin = poWK->nFiltInitX;
3955 617144 : int iMax = poWK->nXRadius;
3956 617144 : if (iSrcX + iMin < 0)
3957 15797 : iMin = -iSrcX;
3958 617144 : if (iSrcX + iMax >= nSrcXSize)
3959 4657 : iMax = nSrcXSize - iSrcX - 1;
3960 :
3961 617144 : if (dfXScale < 1.0)
3962 : {
3963 403041 : while ((iMin - dfDeltaX) * dfXScale < -3.0)
3964 200179 : iMin++;
3965 202862 : while ((iMax - dfDeltaX) * dfXScale > 3.0)
3966 0 : iMax--;
3967 :
3968 : // clang-format off
3969 : /*
3970 : Naive version:
3971 : for (int i = iMin; i <= iMax; ++i)
3972 : {
3973 : psWrkStruct->padfWeightsXShifted[i] =
3974 : GWKLanczosSinc((i - dfDeltaX) * dfXScale);
3975 : }
3976 :
3977 : but given that:
3978 :
3979 : GWKLanczosSinc(x):
3980 : if (dfX == 0.0)
3981 : return 1.0;
3982 :
3983 : const double dfPIX = M_PI * dfX;
3984 : const double dfPIXoverR = dfPIX / 3;
3985 : const double dfPIX2overR = dfPIX * dfPIXoverR;
3986 : return sin(dfPIX) * sin(dfPIXoverR) / dfPIX2overR;
3987 :
3988 : and
3989 : sin (a + b) = sin a cos b + cos a sin b.
3990 : cos (a + b) = cos a cos b - sin a sin b.
3991 :
3992 : we can skip any sin() computation within the loop
3993 : */
3994 : // clang-format on
3995 :
3996 202862 : if (iSrcX != psWrkStruct->iLastSrcX ||
3997 131072 : dfDeltaX != psWrkStruct->dfLastDeltaX)
3998 : {
3999 71790 : double dfX = (iMin - dfDeltaX) * dfXScale;
4000 :
4001 71790 : double dfPIXover3 = M_PI / 3 * dfX;
4002 71790 : double dfCosOver3 = cos(dfPIXover3);
4003 71790 : double dfSinOver3 = sin(dfPIXover3);
4004 :
4005 : // "Naive":
4006 : // double dfSin = sin( M_PI * dfX );
4007 : // double dfCos = cos( M_PI * dfX );
4008 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4009 71790 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4010 71790 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4011 :
4012 71790 : const double dfCosPiXScaleOver3 = psWrkStruct->dfCosPiXScaleOver3;
4013 71790 : const double dfSinPiXScaleOver3 = psWrkStruct->dfSinPiXScaleOver3;
4014 71790 : const double dfCosPiXScale = psWrkStruct->dfCosPiXScale;
4015 71790 : const double dfSinPiXScale = psWrkStruct->dfSinPiXScale;
4016 71790 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4017 71790 : padfWeightsXShifted[iMin] =
4018 71790 : dfX == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfX * dfX);
4019 1636480 : for (int i = iMin + 1; i <= iMax; ++i)
4020 : {
4021 1564690 : dfX += dfXScale;
4022 1564690 : const double dfNewSin =
4023 1564690 : dfSin * dfCosPiXScale + dfCos * dfSinPiXScale;
4024 1564690 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiXScaleOver3 +
4025 1564690 : dfCosOver3 * dfSinPiXScaleOver3;
4026 1564690 : padfWeightsXShifted[i] =
4027 : dfX == 0
4028 1564690 : ? 1.0
4029 1564690 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfX * dfX);
4030 1564690 : const double dfNewCos =
4031 1564690 : dfCos * dfCosPiXScale - dfSin * dfSinPiXScale;
4032 1564690 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiXScaleOver3 -
4033 1564690 : dfSinOver3 * dfSinPiXScaleOver3;
4034 1564690 : dfSin = dfNewSin;
4035 1564690 : dfCos = dfNewCos;
4036 1564690 : dfSinOver3 = dfNewSinOver3;
4037 1564690 : dfCosOver3 = dfNewCosOver3;
4038 : }
4039 :
4040 71790 : psWrkStruct->iLastSrcX = iSrcX;
4041 71790 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4042 : }
4043 : }
4044 : else
4045 : {
4046 757542 : while (iMin - dfDeltaX < -3.0)
4047 343260 : iMin++;
4048 414282 : while (iMax - dfDeltaX > 3.0)
4049 0 : iMax--;
4050 :
4051 414282 : if (iSrcX != psWrkStruct->iLastSrcX ||
4052 209580 : dfDeltaX != psWrkStruct->dfLastDeltaX)
4053 : {
4054 : // Optimisation of GWKLanczosSinc(i - dfDeltaX) based on the
4055 : // following trigonometric formulas.
4056 :
4057 : // TODO(schwehr): Move this somewhere where it can be rendered at
4058 : // LaTeX.
4059 : // clang-format off
4060 : // sin(M_PI * (dfBase + k)) = sin(M_PI * dfBase) * cos(M_PI * k) +
4061 : // cos(M_PI * dfBase) * sin(M_PI * k)
4062 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k) + dfCosPIBase * sin(M_PI * k)
4063 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * cos(M_PI * k)
4064 : // sin(M_PI * (dfBase + k)) = dfSinPIBase * (((k % 2) == 0) ? 1 : -1)
4065 :
4066 : // sin(M_PI / dfR * (dfBase + k)) = sin(M_PI / dfR * dfBase) * cos(M_PI / dfR * k) +
4067 : // cos(M_PI / dfR * dfBase) * sin(M_PI / dfR * k)
4068 : // sin(M_PI / dfR * (dfBase + k)) = dfSinPIBaseOverR * cos(M_PI / dfR * k) + dfCosPIBaseOverR * sin(M_PI / dfR * k)
4069 : // clang-format on
4070 :
4071 414282 : const double dfSinPIDeltaXOver3 = sin((-M_PI / 3.0) * dfDeltaX);
4072 414282 : const double dfSin2PIDeltaXOver3 =
4073 : dfSinPIDeltaXOver3 * dfSinPIDeltaXOver3;
4074 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaX < PI/2.
4075 414282 : const double dfCosPIDeltaXOver3 = sqrt(1.0 - dfSin2PIDeltaXOver3);
4076 414282 : const double dfSinPIDeltaX =
4077 414282 : (3.0 - 4 * dfSin2PIDeltaXOver3) * dfSinPIDeltaXOver3;
4078 414282 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4079 414282 : const double dfInvPI2Over3xSinPIDeltaX =
4080 : dfInvPI2Over3 * dfSinPIDeltaX;
4081 414282 : const double dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 =
4082 414282 : -0.5 * dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3;
4083 414282 : const double dfSinPIOver3 = 0.8660254037844386;
4084 414282 : const double dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3 =
4085 414282 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaX * dfCosPIDeltaXOver3;
4086 : const double padfCst[] = {
4087 414282 : dfInvPI2Over3xSinPIDeltaX * dfSinPIDeltaXOver3,
4088 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 -
4089 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3,
4090 414282 : dfInvPI2Over3xSinPIDeltaXxm0d5SinPIDeltaXOver3 +
4091 414282 : dfInvPI2Over3xSinPIDeltaXxSinPIOver3xCosPIDeltaXOver3};
4092 :
4093 2936860 : for (int i = iMin; i <= iMax; ++i)
4094 : {
4095 2522570 : const double dfX = i - dfDeltaX;
4096 2522570 : if (dfX == 0.0)
4097 58282 : padfWeightsXShifted[i] = 1.0;
4098 : else
4099 2464290 : padfWeightsXShifted[i] = padfCst[(i + 3) % 3] / (dfX * dfX);
4100 : #if DEBUG_VERBOSE
4101 : // TODO(schwehr): AlmostEqual.
4102 : // CPLAssert(fabs(padfWeightsX[i-poWK->nFiltInitX] -
4103 : // GWKLanczosSinc(dfX, 3.0)) < 1e-10);
4104 : #endif
4105 : }
4106 :
4107 414282 : psWrkStruct->iLastSrcX = iSrcX;
4108 414282 : psWrkStruct->dfLastDeltaX = dfDeltaX;
4109 : }
4110 : }
4111 :
4112 617144 : if (dfYScale < 1.0)
4113 : {
4114 403116 : while ((jMin - dfDeltaY) * dfYScale < -3.0)
4115 200254 : jMin++;
4116 202862 : while ((jMax - dfDeltaY) * dfYScale > 3.0)
4117 0 : jMax--;
4118 :
4119 : // clang-format off
4120 : /*
4121 : Naive version:
4122 : for (int j = jMin; j <= jMax; ++j)
4123 : {
4124 : padfWeightsYShifted[j] =
4125 : GWKLanczosSinc((j - dfDeltaY) * dfYScale);
4126 : }
4127 : */
4128 : // clang-format on
4129 :
4130 202862 : if (iSrcY != psWrkStruct->iLastSrcY ||
4131 202479 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4132 : {
4133 383 : double dfY = (jMin - dfDeltaY) * dfYScale;
4134 :
4135 383 : double dfPIYover3 = M_PI / 3 * dfY;
4136 383 : double dfCosOver3 = cos(dfPIYover3);
4137 383 : double dfSinOver3 = sin(dfPIYover3);
4138 :
4139 : // "Naive":
4140 : // double dfSin = sin( M_PI * dfY );
4141 : // double dfCos = cos( M_PI * dfY );
4142 : // but given that cos(3x) = 4 cos^3(x) - 3 cos(x) and sin(3x) = 3 sin(x) - 4 sin^3 (x).
4143 383 : double dfSin = (3 - 4 * dfSinOver3 * dfSinOver3) * dfSinOver3;
4144 383 : double dfCos = (4 * dfCosOver3 * dfCosOver3 - 3) * dfCosOver3;
4145 :
4146 383 : const double dfCosPiYScaleOver3 = psWrkStruct->dfCosPiYScaleOver3;
4147 383 : const double dfSinPiYScaleOver3 = psWrkStruct->dfSinPiYScaleOver3;
4148 383 : const double dfCosPiYScale = psWrkStruct->dfCosPiYScale;
4149 383 : const double dfSinPiYScale = psWrkStruct->dfSinPiYScale;
4150 383 : constexpr double THREE_PI_PI = 3 * M_PI * M_PI;
4151 383 : padfWeightsYShifted[jMin] =
4152 383 : dfY == 0 ? 1.0 : THREE_PI_PI * dfSin * dfSinOver3 / (dfY * dfY);
4153 7318 : for (int j = jMin + 1; j <= jMax; ++j)
4154 : {
4155 6935 : dfY += dfYScale;
4156 6935 : const double dfNewSin =
4157 6935 : dfSin * dfCosPiYScale + dfCos * dfSinPiYScale;
4158 6935 : const double dfNewSinOver3 = dfSinOver3 * dfCosPiYScaleOver3 +
4159 6935 : dfCosOver3 * dfSinPiYScaleOver3;
4160 6935 : padfWeightsYShifted[j] =
4161 : dfY == 0
4162 6935 : ? 1.0
4163 6935 : : THREE_PI_PI * dfNewSin * dfNewSinOver3 / (dfY * dfY);
4164 6935 : const double dfNewCos =
4165 6935 : dfCos * dfCosPiYScale - dfSin * dfSinPiYScale;
4166 6935 : const double dfNewCosOver3 = dfCosOver3 * dfCosPiYScaleOver3 -
4167 6935 : dfSinOver3 * dfSinPiYScaleOver3;
4168 6935 : dfSin = dfNewSin;
4169 6935 : dfCos = dfNewCos;
4170 6935 : dfSinOver3 = dfNewSinOver3;
4171 6935 : dfCosOver3 = dfNewCosOver3;
4172 : }
4173 :
4174 383 : psWrkStruct->iLastSrcY = iSrcY;
4175 383 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4176 : }
4177 : }
4178 : else
4179 : {
4180 684742 : while (jMin - dfDeltaY < -3.0)
4181 270460 : jMin++;
4182 414282 : while (jMax - dfDeltaY > 3.0)
4183 0 : jMax--;
4184 :
4185 414282 : if (iSrcY != psWrkStruct->iLastSrcY ||
4186 413663 : dfDeltaY != psWrkStruct->dfLastDeltaY)
4187 : {
4188 1132 : const double dfSinPIDeltaYOver3 = sin((-M_PI / 3.0) * dfDeltaY);
4189 1132 : const double dfSin2PIDeltaYOver3 =
4190 : dfSinPIDeltaYOver3 * dfSinPIDeltaYOver3;
4191 : // Ok to use sqrt(1-sin^2) since M_PI / 3 * dfDeltaY < PI/2.
4192 1132 : const double dfCosPIDeltaYOver3 = sqrt(1.0 - dfSin2PIDeltaYOver3);
4193 1132 : const double dfSinPIDeltaY =
4194 1132 : (3.0 - 4.0 * dfSin2PIDeltaYOver3) * dfSinPIDeltaYOver3;
4195 1132 : const double dfInvPI2Over3 = 3.0 / (M_PI * M_PI);
4196 1132 : const double dfInvPI2Over3xSinPIDeltaY =
4197 : dfInvPI2Over3 * dfSinPIDeltaY;
4198 1132 : const double dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 =
4199 1132 : -0.5 * dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3;
4200 1132 : const double dfSinPIOver3 = 0.8660254037844386;
4201 1132 : const double dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3 =
4202 1132 : dfSinPIOver3 * dfInvPI2Over3xSinPIDeltaY * dfCosPIDeltaYOver3;
4203 : const double padfCst[] = {
4204 1132 : dfInvPI2Over3xSinPIDeltaY * dfSinPIDeltaYOver3,
4205 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 -
4206 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3,
4207 1132 : dfInvPI2Over3xSinPIDeltaYxm0d5SinPIDeltaYOver3 +
4208 1132 : dfInvPI2Over3xSinPIDeltaYxSinPIOver3xCosPIDeltaYOver3};
4209 :
4210 7917 : for (int j = jMin; j <= jMax; ++j)
4211 : {
4212 6785 : const double dfY = j - dfDeltaY;
4213 6785 : if (dfY == 0.0)
4214 460 : padfWeightsYShifted[j] = 1.0;
4215 : else
4216 6325 : padfWeightsYShifted[j] = padfCst[(j + 3) % 3] / (dfY * dfY);
4217 : #if DEBUG_VERBOSE
4218 : // TODO(schwehr): AlmostEqual.
4219 : // CPLAssert(fabs(padfWeightsYShifted[j] -
4220 : // GWKLanczosSinc(dfY, 3.0)) < 1e-10);
4221 : #endif
4222 : }
4223 :
4224 1132 : psWrkStruct->iLastSrcY = iSrcY;
4225 1132 : psWrkStruct->dfLastDeltaY = dfDeltaY;
4226 : }
4227 : }
4228 :
4229 : // If we have no density information, we can simply compute the
4230 : // accumulated weight.
4231 617144 : if (padfRowDensity == nullptr)
4232 : {
4233 617144 : double dfRowAccWeight = 0.0;
4234 7903490 : for (int i = iMin; i <= iMax; ++i)
4235 : {
4236 7286350 : dfRowAccWeight += padfWeightsXShifted[i];
4237 : }
4238 617144 : double dfColAccWeight = 0.0;
4239 7958040 : for (int j = jMin; j <= jMax; ++j)
4240 : {
4241 7340900 : dfColAccWeight += padfWeightsYShifted[j];
4242 : }
4243 617144 : dfAccumulatorWeight = dfRowAccWeight * dfColAccWeight;
4244 : }
4245 :
4246 : // Loop over pixel rows in the kernel.
4247 :
4248 617144 : if (poWK->eWorkingDataType == GDT_Byte && !poWK->panUnifiedSrcValid &&
4249 616524 : !poWK->papanBandSrcValid && !poWK->pafUnifiedSrcDensity &&
4250 : !padfRowDensity)
4251 : {
4252 : // Optimization for Byte case without any masking/alpha
4253 :
4254 616524 : if (dfAccumulatorWeight < 0.000001)
4255 : {
4256 0 : *pdfDensity = 0.0;
4257 0 : return false;
4258 : }
4259 :
4260 616524 : const GByte *pSrc =
4261 616524 : reinterpret_cast<const GByte *>(poWK->papabySrcImage[iBand]);
4262 616524 : pSrc += iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4263 :
4264 : #if defined(USE_SSE2)
4265 616524 : if (iMax - iMin + 1 == 6)
4266 : {
4267 : // This is just an optimized version of the general case in
4268 : // the else clause.
4269 :
4270 346854 : pSrc += iMin;
4271 346854 : int j = jMin;
4272 : const auto fourXWeights =
4273 346854 : XMMReg4Double::Load4Val(padfWeightsXShifted + iMin);
4274 :
4275 : // Process 2 lines at the same time.
4276 1375860 : for (; j < jMax; j += 2)
4277 : {
4278 : const XMMReg4Double v_acc =
4279 1029000 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4280 : const XMMReg4Double v_acc2 =
4281 1029000 : XMMReg4Double::Load4Val(pSrc + nSrcXSize) * fourXWeights;
4282 1029000 : const double dfRowAcc = v_acc.GetHorizSum();
4283 1029000 : const double dfRowAccEnd =
4284 1029000 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4285 1029000 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4286 1029000 : dfAccumulatorReal +=
4287 1029000 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4288 1029000 : const double dfRowAcc2 = v_acc2.GetHorizSum();
4289 1029000 : const double dfRowAcc2End =
4290 1029000 : pSrc[nSrcXSize + 4] * padfWeightsXShifted[iMin + 4] +
4291 1029000 : pSrc[nSrcXSize + 5] * padfWeightsXShifted[iMin + 5];
4292 1029000 : dfAccumulatorReal +=
4293 1029000 : (dfRowAcc2 + dfRowAcc2End) * padfWeightsYShifted[j + 1];
4294 1029000 : pSrc += 2 * nSrcXSize;
4295 : }
4296 346854 : if (j == jMax)
4297 : {
4298 : // Process last line if there's an odd number of them.
4299 :
4300 : const XMMReg4Double v_acc =
4301 86045 : XMMReg4Double::Load4Val(pSrc) * fourXWeights;
4302 86045 : const double dfRowAcc = v_acc.GetHorizSum();
4303 86045 : const double dfRowAccEnd =
4304 86045 : pSrc[4] * padfWeightsXShifted[iMin + 4] +
4305 86045 : pSrc[5] * padfWeightsXShifted[iMin + 5];
4306 86045 : dfAccumulatorReal +=
4307 86045 : (dfRowAcc + dfRowAccEnd) * padfWeightsYShifted[j];
4308 : }
4309 : }
4310 : else
4311 : #endif
4312 : {
4313 5463580 : for (int j = jMin; j <= jMax; ++j)
4314 : {
4315 5193900 : int i = iMin;
4316 5193900 : double dfRowAcc1 = 0.0;
4317 5193900 : double dfRowAcc2 = 0.0;
4318 : // A bit of loop unrolling
4319 62750600 : for (; i < iMax; i += 2)
4320 : {
4321 57556700 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4322 57556700 : dfRowAcc2 += pSrc[i + 1] * padfWeightsXShifted[i + 1];
4323 : }
4324 5193900 : if (i == iMax)
4325 : {
4326 : // Process last column if there's an odd number of them.
4327 426183 : dfRowAcc1 += pSrc[i] * padfWeightsXShifted[i];
4328 : }
4329 :
4330 5193900 : dfAccumulatorReal +=
4331 5193900 : (dfRowAcc1 + dfRowAcc2) * padfWeightsYShifted[j];
4332 5193900 : pSrc += nSrcXSize;
4333 : }
4334 : }
4335 :
4336 : // Calculate the output taking into account weighting.
4337 616524 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4338 : {
4339 569230 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4340 569230 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4341 569230 : *pdfDensity = 1.0;
4342 : }
4343 : else
4344 : {
4345 47294 : *pdfReal = dfAccumulatorReal;
4346 47294 : *pdfDensity = 1.0;
4347 : }
4348 :
4349 616524 : return true;
4350 : }
4351 :
4352 620 : GPtrDiff_t iRowOffset =
4353 620 : iSrcOffset + static_cast<GPtrDiff_t>(jMin - 1) * nSrcXSize + iMin;
4354 :
4355 620 : int nCountValid = 0;
4356 620 : const bool bIsNonComplex = !GDALDataTypeIsComplex(poWK->eWorkingDataType);
4357 :
4358 3560 : for (int j = jMin; j <= jMax; ++j)
4359 : {
4360 2940 : iRowOffset += nSrcXSize;
4361 :
4362 : // Get pixel values.
4363 : // We can potentially read extra elements after the "normal" end of the
4364 : // source arrays, but the contract of papabySrcImage[iBand],
4365 : // papanBandSrcValid[iBand], panUnifiedSrcValid and pafUnifiedSrcDensity
4366 : // is to have WARP_EXTRA_ELTS reserved at their end.
4367 2940 : if (!GWKGetPixelRow(poWK, iBand, iRowOffset, (iMax - iMin + 2) / 2,
4368 : padfRowDensity, padfRowReal, padfRowImag))
4369 0 : continue;
4370 :
4371 2940 : const double dfWeight1 = padfWeightsYShifted[j];
4372 :
4373 : // Iterate over pixels in row.
4374 2940 : if (padfRowDensity != nullptr)
4375 : {
4376 0 : for (int i = iMin; i <= iMax; ++i)
4377 : {
4378 : // Skip sampling if pixel has zero density.
4379 0 : if (padfRowDensity[i - iMin] < SRC_DENSITY_THRESHOLD_DOUBLE)
4380 0 : continue;
4381 :
4382 0 : nCountValid++;
4383 :
4384 : // Use a cached set of weights for this row.
4385 0 : const double dfWeight2 = dfWeight1 * padfWeightsXShifted[i];
4386 :
4387 : // Accumulate!
4388 0 : dfAccumulatorReal += padfRowReal[i - iMin] * dfWeight2;
4389 0 : dfAccumulatorImag += padfRowImag[i - iMin] * dfWeight2;
4390 0 : dfAccumulatorDensity += padfRowDensity[i - iMin] * dfWeight2;
4391 0 : dfAccumulatorWeight += dfWeight2;
4392 : }
4393 : }
4394 2940 : else if (bIsNonComplex)
4395 : {
4396 1764 : double dfRowAccReal = 0.0;
4397 10560 : for (int i = iMin; i <= iMax; ++i)
4398 : {
4399 8796 : const double dfWeight2 = padfWeightsXShifted[i];
4400 :
4401 : // Accumulate!
4402 8796 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4403 : }
4404 :
4405 1764 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4406 : }
4407 : else
4408 : {
4409 1176 : double dfRowAccReal = 0.0;
4410 1176 : double dfRowAccImag = 0.0;
4411 7040 : for (int i = iMin; i <= iMax; ++i)
4412 : {
4413 5864 : const double dfWeight2 = padfWeightsXShifted[i];
4414 :
4415 : // Accumulate!
4416 5864 : dfRowAccReal += padfRowReal[i - iMin] * dfWeight2;
4417 5864 : dfRowAccImag += padfRowImag[i - iMin] * dfWeight2;
4418 : }
4419 :
4420 1176 : dfAccumulatorReal += dfRowAccReal * dfWeight1;
4421 1176 : dfAccumulatorImag += dfRowAccImag * dfWeight1;
4422 : }
4423 : }
4424 :
4425 620 : if (dfAccumulatorWeight < 0.000001 ||
4426 0 : (padfRowDensity != nullptr &&
4427 0 : (dfAccumulatorDensity < 0.000001 ||
4428 0 : nCountValid < (jMax - jMin + 1) * (iMax - iMin + 1) / 2)))
4429 : {
4430 0 : *pdfDensity = 0.0;
4431 0 : return false;
4432 : }
4433 :
4434 : // Calculate the output taking into account weighting.
4435 620 : if (dfAccumulatorWeight < 0.99999 || dfAccumulatorWeight > 1.00001)
4436 : {
4437 0 : const double dfInvAcc = 1.0 / dfAccumulatorWeight;
4438 0 : *pdfReal = dfAccumulatorReal * dfInvAcc;
4439 0 : *pdfImag = dfAccumulatorImag * dfInvAcc;
4440 0 : if (padfRowDensity != nullptr)
4441 0 : *pdfDensity = dfAccumulatorDensity * dfInvAcc;
4442 : else
4443 0 : *pdfDensity = 1.0;
4444 : }
4445 : else
4446 : {
4447 620 : *pdfReal = dfAccumulatorReal;
4448 620 : *pdfImag = dfAccumulatorImag;
4449 620 : if (padfRowDensity != nullptr)
4450 0 : *pdfDensity = dfAccumulatorDensity;
4451 : else
4452 620 : *pdfDensity = 1.0;
4453 : }
4454 :
4455 620 : return true;
4456 : }
4457 :
4458 : /************************************************************************/
4459 : /* GWKComputeWeights() */
4460 : /************************************************************************/
4461 :
4462 3874310 : static void GWKComputeWeights(GDALResampleAlg eResample, int iMin, int iMax,
4463 : double dfDeltaX, double dfXScale, int jMin,
4464 : int jMax, double dfDeltaY, double dfYScale,
4465 : double *padfWeightsHorizontal,
4466 : double *padfWeightsVertical, double &dfInvWeights)
4467 : {
4468 :
4469 3874310 : const FilterFuncType pfnGetWeight = apfGWKFilter[eResample];
4470 3874310 : CPLAssert(pfnGetWeight);
4471 3874310 : const FilterFunc4ValuesType pfnGetWeight4Values =
4472 3874310 : apfGWKFilter4Values[eResample];
4473 3874310 : CPLAssert(pfnGetWeight4Values);
4474 :
4475 3874310 : int i = iMin; // Used after for.
4476 3874310 : int iC = 0; // Used after for.
4477 : // Not zero, but as close as possible to it, to avoid potential division by
4478 : // zero at end of function
4479 3874310 : double dfAccumulatorWeightHorizontal = cpl::NumericLimits<double>::min();
4480 8701700 : for (; i + 2 < iMax; i += 4, iC += 4)
4481 : {
4482 4824760 : padfWeightsHorizontal[iC] = (i - dfDeltaX) * dfXScale;
4483 4824760 : padfWeightsHorizontal[iC + 1] = padfWeightsHorizontal[iC] + dfXScale;
4484 4824760 : padfWeightsHorizontal[iC + 2] =
4485 4824760 : padfWeightsHorizontal[iC + 1] + dfXScale;
4486 4824760 : padfWeightsHorizontal[iC + 3] =
4487 4824760 : padfWeightsHorizontal[iC + 2] + dfXScale;
4488 4827380 : dfAccumulatorWeightHorizontal +=
4489 4824760 : pfnGetWeight4Values(padfWeightsHorizontal + iC);
4490 : }
4491 4092900 : for (; i <= iMax; ++i, ++iC)
4492 : {
4493 223187 : const double dfWeight = pfnGetWeight((i - dfDeltaX) * dfXScale);
4494 215966 : padfWeightsHorizontal[iC] = dfWeight;
4495 215966 : dfAccumulatorWeightHorizontal += dfWeight;
4496 : }
4497 :
4498 3869710 : int j = jMin; // Used after for.
4499 3869710 : int jC = 0; // Used after for.
4500 : // Not zero, but as close as possible to it, to avoid potential division by
4501 : // zero at end of function
4502 3869710 : double dfAccumulatorWeightVertical = cpl::NumericLimits<double>::min();
4503 8146200 : for (; j + 2 < jMax; j += 4, jC += 4)
4504 : {
4505 4273930 : padfWeightsVertical[jC] = (j - dfDeltaY) * dfYScale;
4506 4273930 : padfWeightsVertical[jC + 1] = padfWeightsVertical[jC] + dfYScale;
4507 4273930 : padfWeightsVertical[jC + 2] = padfWeightsVertical[jC + 1] + dfYScale;
4508 4273930 : padfWeightsVertical[jC + 3] = padfWeightsVertical[jC + 2] + dfYScale;
4509 4276490 : dfAccumulatorWeightVertical +=
4510 4273930 : pfnGetWeight4Values(padfWeightsVertical + jC);
4511 : }
4512 8377240 : for (; j <= jMax; ++j, ++jC)
4513 : {
4514 4505510 : const double dfWeight = pfnGetWeight((j - dfDeltaY) * dfYScale);
4515 4504960 : padfWeightsVertical[jC] = dfWeight;
4516 4504960 : dfAccumulatorWeightVertical += dfWeight;
4517 : }
4518 :
4519 3871720 : dfInvWeights =
4520 3871720 : 1. / (dfAccumulatorWeightHorizontal * dfAccumulatorWeightVertical);
4521 3871720 : }
4522 :
4523 : /************************************************************************/
4524 : /* GWKResampleNoMasksT() */
4525 : /************************************************************************/
4526 :
4527 : template <class T>
4528 : static bool
4529 : GWKResampleNoMasksT(const GDALWarpKernel *poWK, int iBand, double dfSrcX,
4530 : double dfSrcY, T *pValue, double *padfWeightsHorizontal,
4531 : double *padfWeightsVertical, double &dfInvWeights)
4532 :
4533 : {
4534 : // Commonly used; save locally.
4535 : const int nSrcXSize = poWK->nSrcXSize;
4536 : const int nSrcYSize = poWK->nSrcYSize;
4537 :
4538 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4539 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4540 : const GPtrDiff_t iSrcOffset =
4541 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4542 :
4543 : const int nXRadius = poWK->nXRadius;
4544 : const int nYRadius = poWK->nYRadius;
4545 :
4546 : // Politely refuse to process invalid coordinates or obscenely small image.
4547 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4548 : nYRadius > nSrcYSize)
4549 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4550 : pValue);
4551 :
4552 : T *pSrcBand = reinterpret_cast<T *>(poWK->papabySrcImage[iBand]);
4553 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4554 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4555 :
4556 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4557 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4558 :
4559 : int iMin = 1 - nXRadius;
4560 : if (iSrcX + iMin < 0)
4561 : iMin = -iSrcX;
4562 : int iMax = nXRadius;
4563 : if (iSrcX + iMax >= nSrcXSize - 1)
4564 : iMax = nSrcXSize - 1 - iSrcX;
4565 :
4566 : int jMin = 1 - nYRadius;
4567 : if (iSrcY + jMin < 0)
4568 : jMin = -iSrcY;
4569 : int jMax = nYRadius;
4570 : if (iSrcY + jMax >= nSrcYSize - 1)
4571 : jMax = nSrcYSize - 1 - iSrcY;
4572 :
4573 : if (iBand == 0)
4574 : {
4575 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4576 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4577 : padfWeightsVertical, dfInvWeights);
4578 : }
4579 :
4580 : // Loop over all rows in the kernel.
4581 : double dfAccumulator = 0.0;
4582 : for (int jC = 0, j = jMin; j <= jMax; ++j, ++jC)
4583 : {
4584 : const GPtrDiff_t iSampJ =
4585 : iSrcOffset + static_cast<GPtrDiff_t>(j) * nSrcXSize;
4586 :
4587 : // Loop over all pixels in the row.
4588 : double dfAccumulatorLocal = 0.0;
4589 : double dfAccumulatorLocal2 = 0.0;
4590 : int iC = 0;
4591 : int i = iMin;
4592 : // Process by chunk of 4 cols.
4593 : for (; i + 2 < iMax; i += 4, iC += 4)
4594 : {
4595 : // Retrieve the pixel & accumulate.
4596 : dfAccumulatorLocal +=
4597 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4598 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4599 : padfWeightsHorizontal[iC + 1];
4600 : dfAccumulatorLocal2 += double(pSrcBand[i + 2 + iSampJ]) *
4601 : padfWeightsHorizontal[iC + 2];
4602 : dfAccumulatorLocal2 += double(pSrcBand[i + 3 + iSampJ]) *
4603 : padfWeightsHorizontal[iC + 3];
4604 : }
4605 : dfAccumulatorLocal += dfAccumulatorLocal2;
4606 : if (i < iMax)
4607 : {
4608 : dfAccumulatorLocal +=
4609 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4610 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4611 : padfWeightsHorizontal[iC + 1];
4612 : i += 2;
4613 : iC += 2;
4614 : }
4615 : if (i == iMax)
4616 : {
4617 : dfAccumulatorLocal +=
4618 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4619 : }
4620 :
4621 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4622 : }
4623 :
4624 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4625 :
4626 : return true;
4627 : }
4628 :
4629 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
4630 : /* Could possibly be used too on 32bit, but we would need to check at runtime */
4631 : #if defined(USE_SSE2)
4632 :
4633 : /************************************************************************/
4634 : /* GWKResampleNoMasks_SSE2_T() */
4635 : /************************************************************************/
4636 :
4637 : template <class T>
4638 9549263 : static bool GWKResampleNoMasks_SSE2_T(const GDALWarpKernel *poWK, int iBand,
4639 : double dfSrcX, double dfSrcY, T *pValue,
4640 : double *padfWeightsHorizontal,
4641 : double *padfWeightsVertical,
4642 : double &dfInvWeights)
4643 : {
4644 : // Commonly used; save locally.
4645 9549263 : const int nSrcXSize = poWK->nSrcXSize;
4646 9549263 : const int nSrcYSize = poWK->nSrcYSize;
4647 :
4648 9549263 : const int iSrcX = static_cast<int>(floor(dfSrcX - 0.5));
4649 9549263 : const int iSrcY = static_cast<int>(floor(dfSrcY - 0.5));
4650 9549263 : const GPtrDiff_t iSrcOffset =
4651 9549263 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
4652 9549263 : const int nXRadius = poWK->nXRadius;
4653 9549263 : const int nYRadius = poWK->nYRadius;
4654 :
4655 : // Politely refuse to process invalid coordinates or obscenely small image.
4656 9549263 : if (iSrcX >= nSrcXSize || iSrcY >= nSrcYSize || nXRadius > nSrcXSize ||
4657 : nYRadius > nSrcYSize)
4658 453 : return GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
4659 3 : pValue);
4660 :
4661 9548801 : const T *pSrcBand =
4662 9548801 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
4663 :
4664 9548801 : const double dfDeltaX = dfSrcX - 0.5 - iSrcX;
4665 9548801 : const double dfDeltaY = dfSrcY - 0.5 - iSrcY;
4666 9548801 : const double dfXScale = std::min(poWK->dfXScale, 1.0);
4667 9534021 : const double dfYScale = std::min(poWK->dfYScale, 1.0);
4668 :
4669 9537951 : int iMin = 1 - nXRadius;
4670 9537951 : if (iSrcX + iMin < 0)
4671 46218 : iMin = -iSrcX;
4672 9537951 : int iMax = nXRadius;
4673 9537951 : if (iSrcX + iMax >= nSrcXSize - 1)
4674 42714 : iMax = nSrcXSize - 1 - iSrcX;
4675 :
4676 9537951 : int jMin = 1 - nYRadius;
4677 9537951 : if (iSrcY + jMin < 0)
4678 49554 : jMin = -iSrcY;
4679 9537951 : int jMax = nYRadius;
4680 9537951 : if (iSrcY + jMax >= nSrcYSize - 1)
4681 35683 : jMax = nSrcYSize - 1 - iSrcY;
4682 :
4683 9537951 : if (iBand == 0)
4684 : {
4685 3875081 : GWKComputeWeights(poWK->eResample, iMin, iMax, dfDeltaX, dfXScale, jMin,
4686 : jMax, dfDeltaY, dfYScale, padfWeightsHorizontal,
4687 : padfWeightsVertical, dfInvWeights);
4688 : }
4689 :
4690 9542561 : GPtrDiff_t iSampJ = iSrcOffset + static_cast<GPtrDiff_t>(jMin) * nSrcXSize;
4691 : // Process by chunk of 4 rows.
4692 9542561 : int jC = 0;
4693 9542561 : int j = jMin;
4694 9542561 : double dfAccumulator = 0.0;
4695 20162193 : for (; j + 2 < jMax; j += 4, iSampJ += 4 * nSrcXSize, jC += 4)
4696 : {
4697 : // Loop over all pixels in the row.
4698 10639512 : int iC = 0;
4699 10639512 : int i = iMin;
4700 : // Process by chunk of 4 cols.
4701 10639512 : XMMReg4Double v_acc_1 = XMMReg4Double::Zero();
4702 10614862 : XMMReg4Double v_acc_2 = XMMReg4Double::Zero();
4703 10626782 : XMMReg4Double v_acc_3 = XMMReg4Double::Zero();
4704 10633642 : XMMReg4Double v_acc_4 = XMMReg4Double::Zero();
4705 27911180 : for (; i + 2 < iMax; i += 4, iC += 4)
4706 : {
4707 : // Retrieve the pixel & accumulate.
4708 17275988 : XMMReg4Double v_pixels_1 =
4709 17275988 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4710 17269688 : XMMReg4Double v_pixels_2 =
4711 17269688 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + nSrcXSize);
4712 17284088 : XMMReg4Double v_pixels_3 =
4713 17284088 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4714 17286588 : XMMReg4Double v_pixels_4 =
4715 17286588 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4716 :
4717 17289588 : XMMReg4Double v_padfWeight =
4718 17289588 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4719 :
4720 17258688 : v_acc_1 += v_pixels_1 * v_padfWeight;
4721 17261688 : v_acc_2 += v_pixels_2 * v_padfWeight;
4722 17264988 : v_acc_3 += v_pixels_3 * v_padfWeight;
4723 17274488 : v_acc_4 += v_pixels_4 * v_padfWeight;
4724 : }
4725 :
4726 10635222 : if (i < iMax)
4727 : {
4728 145982 : XMMReg2Double v_pixels_1 =
4729 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ);
4730 145982 : XMMReg2Double v_pixels_2 =
4731 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + nSrcXSize);
4732 145982 : XMMReg2Double v_pixels_3 =
4733 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 2 * nSrcXSize);
4734 145982 : XMMReg2Double v_pixels_4 =
4735 145982 : XMMReg2Double::Load2Val(pSrcBand + i + iSampJ + 3 * nSrcXSize);
4736 :
4737 145982 : XMMReg2Double v_padfWeight =
4738 145982 : XMMReg2Double::Load2Val(padfWeightsHorizontal + iC);
4739 :
4740 145982 : v_acc_1.AddToLow(v_pixels_1 * v_padfWeight);
4741 145982 : v_acc_2.AddToLow(v_pixels_2 * v_padfWeight);
4742 145982 : v_acc_3.AddToLow(v_pixels_3 * v_padfWeight);
4743 145982 : v_acc_4.AddToLow(v_pixels_4 * v_padfWeight);
4744 :
4745 145982 : i += 2;
4746 145982 : iC += 2;
4747 : }
4748 :
4749 10635222 : double dfAccumulatorLocal_1 = v_acc_1.GetHorizSum();
4750 10622102 : double dfAccumulatorLocal_2 = v_acc_2.GetHorizSum();
4751 10635762 : double dfAccumulatorLocal_3 = v_acc_3.GetHorizSum();
4752 10637242 : double dfAccumulatorLocal_4 = v_acc_4.GetHorizSum();
4753 :
4754 10619652 : if (i == iMax)
4755 : {
4756 52267 : dfAccumulatorLocal_1 += static_cast<double>(pSrcBand[i + iSampJ]) *
4757 52267 : padfWeightsHorizontal[iC];
4758 52267 : dfAccumulatorLocal_2 +=
4759 52267 : static_cast<double>(pSrcBand[i + iSampJ + nSrcXSize]) *
4760 52267 : padfWeightsHorizontal[iC];
4761 52267 : dfAccumulatorLocal_3 +=
4762 52267 : static_cast<double>(pSrcBand[i + iSampJ + 2 * nSrcXSize]) *
4763 52267 : padfWeightsHorizontal[iC];
4764 52267 : dfAccumulatorLocal_4 +=
4765 52267 : static_cast<double>(pSrcBand[i + iSampJ + 3 * nSrcXSize]) *
4766 52267 : padfWeightsHorizontal[iC];
4767 : }
4768 :
4769 10619652 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal_1;
4770 10619652 : dfAccumulator += padfWeightsVertical[jC + 1] * dfAccumulatorLocal_2;
4771 10619652 : dfAccumulator += padfWeightsVertical[jC + 2] * dfAccumulatorLocal_3;
4772 10619652 : dfAccumulator += padfWeightsVertical[jC + 3] * dfAccumulatorLocal_4;
4773 : }
4774 22624341 : for (; j <= jMax; ++j, iSampJ += nSrcXSize, ++jC)
4775 : {
4776 : // Loop over all pixels in the row.
4777 13063940 : int iC = 0;
4778 13063940 : int i = iMin;
4779 : // Process by chunk of 4 cols.
4780 13063940 : XMMReg4Double v_acc = XMMReg4Double::Zero();
4781 26105363 : for (; i + 2 < iMax; i += 4, iC += 4)
4782 : {
4783 : // Retrieve the pixel & accumulate.
4784 13070023 : XMMReg4Double v_pixels =
4785 13070023 : XMMReg4Double::Load4Val(pSrcBand + i + iSampJ);
4786 13041223 : XMMReg4Double v_padfWeight =
4787 13041223 : XMMReg4Double::Load4Val(padfWeightsHorizontal + iC);
4788 :
4789 13081023 : v_acc += v_pixels * v_padfWeight;
4790 : }
4791 :
4792 13035340 : double dfAccumulatorLocal = v_acc.GetHorizSum();
4793 :
4794 13101640 : if (i < iMax)
4795 : {
4796 173976 : dfAccumulatorLocal +=
4797 173976 : double(pSrcBand[i + iSampJ]) * padfWeightsHorizontal[iC];
4798 173976 : dfAccumulatorLocal += double(pSrcBand[i + 1 + iSampJ]) *
4799 173976 : padfWeightsHorizontal[iC + 1];
4800 173976 : i += 2;
4801 173976 : iC += 2;
4802 : }
4803 13101640 : if (i == iMax)
4804 : {
4805 33032 : dfAccumulatorLocal += static_cast<double>(pSrcBand[i + iSampJ]) *
4806 33032 : padfWeightsHorizontal[iC];
4807 : }
4808 :
4809 13101640 : dfAccumulator += padfWeightsVertical[jC] * dfAccumulatorLocal;
4810 : }
4811 :
4812 9560501 : *pValue = GWKClampValueT<T>(dfAccumulator * dfInvWeights);
4813 :
4814 9538851 : return true;
4815 : }
4816 :
4817 : /************************************************************************/
4818 : /* GWKResampleNoMasksT<GByte>() */
4819 : /************************************************************************/
4820 :
4821 : template <>
4822 8964750 : bool GWKResampleNoMasksT<GByte>(const GDALWarpKernel *poWK, int iBand,
4823 : double dfSrcX, double dfSrcY, GByte *pValue,
4824 : double *padfWeightsHorizontal,
4825 : double *padfWeightsVertical,
4826 : double &dfInvWeights)
4827 : {
4828 8964750 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4829 : padfWeightsHorizontal, padfWeightsVertical,
4830 8934350 : dfInvWeights);
4831 : }
4832 :
4833 : /************************************************************************/
4834 : /* GWKResampleNoMasksT<GInt16>() */
4835 : /************************************************************************/
4836 :
4837 : template <>
4838 252563 : bool GWKResampleNoMasksT<GInt16>(const GDALWarpKernel *poWK, int iBand,
4839 : double dfSrcX, double dfSrcY, GInt16 *pValue,
4840 : double *padfWeightsHorizontal,
4841 : double *padfWeightsVertical,
4842 : double &dfInvWeights)
4843 : {
4844 252563 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4845 : padfWeightsHorizontal, padfWeightsVertical,
4846 252563 : dfInvWeights);
4847 : }
4848 :
4849 : /************************************************************************/
4850 : /* GWKResampleNoMasksT<GUInt16>() */
4851 : /************************************************************************/
4852 :
4853 : template <>
4854 343440 : bool GWKResampleNoMasksT<GUInt16>(const GDALWarpKernel *poWK, int iBand,
4855 : double dfSrcX, double dfSrcY, GUInt16 *pValue,
4856 : double *padfWeightsHorizontal,
4857 : double *padfWeightsVertical,
4858 : double &dfInvWeights)
4859 : {
4860 343440 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4861 : padfWeightsHorizontal, padfWeightsVertical,
4862 343440 : dfInvWeights);
4863 : }
4864 :
4865 : /************************************************************************/
4866 : /* GWKResampleNoMasksT<float>() */
4867 : /************************************************************************/
4868 :
4869 : template <>
4870 2500 : bool GWKResampleNoMasksT<float>(const GDALWarpKernel *poWK, int iBand,
4871 : double dfSrcX, double dfSrcY, float *pValue,
4872 : double *padfWeightsHorizontal,
4873 : double *padfWeightsVertical,
4874 : double &dfInvWeights)
4875 : {
4876 2500 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4877 : padfWeightsHorizontal, padfWeightsVertical,
4878 2500 : dfInvWeights);
4879 : }
4880 :
4881 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
4882 :
4883 : /************************************************************************/
4884 : /* GWKResampleNoMasksT<double>() */
4885 : /************************************************************************/
4886 :
4887 : template <>
4888 : bool GWKResampleNoMasksT<double>(const GDALWarpKernel *poWK, int iBand,
4889 : double dfSrcX, double dfSrcY, double *pValue,
4890 : double *padfWeightsHorizontal,
4891 : double *padfWeightsVertical,
4892 : double &dfInvWeights)
4893 : {
4894 : return GWKResampleNoMasks_SSE2_T(poWK, iBand, dfSrcX, dfSrcY, pValue,
4895 : padfWeightsHorizontal, padfWeightsVertical,
4896 : dfInvWeights);
4897 : }
4898 :
4899 : #endif /* INSTANTIATE_FLOAT64_SSE2_IMPL */
4900 :
4901 : #endif /* defined(USE_SSE2) */
4902 :
4903 : /************************************************************************/
4904 : /* GWKRoundSourceCoordinates() */
4905 : /************************************************************************/
4906 :
4907 1000 : static void GWKRoundSourceCoordinates(
4908 : int nDstXSize, double *padfX, double *padfY, double *padfZ, int *pabSuccess,
4909 : double dfSrcCoordPrecision, double dfErrorThreshold,
4910 : GDALTransformerFunc pfnTransformer, void *pTransformerArg, double dfDstXOff,
4911 : double dfDstY)
4912 : {
4913 1000 : double dfPct = 0.8;
4914 1000 : if (dfErrorThreshold > 0 && dfSrcCoordPrecision / dfErrorThreshold >= 10.0)
4915 : {
4916 1000 : dfPct = 1.0 - 2 * 1.0 / (dfSrcCoordPrecision / dfErrorThreshold);
4917 : }
4918 1000 : const double dfExactTransformThreshold = 0.5 * dfPct * dfSrcCoordPrecision;
4919 :
4920 501000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
4921 : {
4922 500000 : const double dfXBefore = padfX[iDstX];
4923 500000 : const double dfYBefore = padfY[iDstX];
4924 500000 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4925 : dfSrcCoordPrecision;
4926 500000 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4927 : dfSrcCoordPrecision;
4928 :
4929 : // If we are in an uncertainty zone, go to non-approximated
4930 : // transformation.
4931 : // Due to the 80% of half-precision threshold, dfSrcCoordPrecision must
4932 : // be at least 10 times greater than the approximation error.
4933 500000 : if (fabs(dfXBefore - padfX[iDstX]) > dfExactTransformThreshold ||
4934 399914 : fabs(dfYBefore - padfY[iDstX]) > dfExactTransformThreshold)
4935 : {
4936 180090 : padfX[iDstX] = iDstX + dfDstXOff;
4937 180090 : padfY[iDstX] = dfDstY;
4938 180090 : padfZ[iDstX] = 0.0;
4939 180090 : pfnTransformer(pTransformerArg, TRUE, 1, padfX + iDstX,
4940 180090 : padfY + iDstX, padfZ + iDstX, pabSuccess + iDstX);
4941 180090 : padfX[iDstX] = floor(padfX[iDstX] / dfSrcCoordPrecision + 0.5) *
4942 : dfSrcCoordPrecision;
4943 180090 : padfY[iDstX] = floor(padfY[iDstX] / dfSrcCoordPrecision + 0.5) *
4944 : dfSrcCoordPrecision;
4945 : }
4946 : }
4947 1000 : }
4948 :
4949 : /************************************************************************/
4950 : /* GWKCheckAndComputeSrcOffsets() */
4951 : /************************************************************************/
4952 : static CPL_INLINE bool
4953 147681000 : GWKCheckAndComputeSrcOffsets(GWKJobStruct *psJob, int *_pabSuccess, int _iDstX,
4954 : int _iDstY, double *_padfX, double *_padfY,
4955 : int _nSrcXSize, int _nSrcYSize,
4956 : GPtrDiff_t &iSrcOffset)
4957 : {
4958 147681000 : const GDALWarpKernel *_poWK = psJob->poWK;
4959 147885000 : for (int iTry = 0; iTry < 2; ++iTry)
4960 : {
4961 148220000 : if (iTry == 1)
4962 : {
4963 : // If the source coordinate is slightly outside of the source raster
4964 : // retry to transform it alone, so that the exact coordinate
4965 : // transformer is used.
4966 :
4967 204492 : _padfX[_iDstX] = _iDstX + 0.5 + _poWK->nDstXOff;
4968 204492 : _padfY[_iDstX] = _iDstY + 0.5 + _poWK->nDstYOff;
4969 204492 : double dfZ = 0;
4970 204492 : _poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1,
4971 204492 : _padfX + _iDstX, _padfY + _iDstX, &dfZ,
4972 204492 : _pabSuccess + _iDstX);
4973 : }
4974 148220000 : if (!_pabSuccess[_iDstX])
4975 3614790 : return false;
4976 :
4977 : // If this happens this is likely the symptom of a bug somewhere.
4978 144605000 : if (std::isnan(_padfX[_iDstX]) || std::isnan(_padfY[_iDstX]))
4979 : {
4980 : static bool bNanCoordFound = false;
4981 0 : if (!bNanCoordFound)
4982 : {
4983 0 : CPLDebug("WARP",
4984 : "GWKCheckAndComputeSrcOffsets(): "
4985 : "NaN coordinate found on point %d.",
4986 : _iDstX);
4987 0 : bNanCoordFound = true;
4988 : }
4989 0 : return false;
4990 : }
4991 :
4992 : /* --------------------------------------------------------------------
4993 : */
4994 : /* Figure out what pixel we want in our source raster, and skip */
4995 : /* further processing if it is well off the source image. */
4996 : /* --------------------------------------------------------------------
4997 : */
4998 : /* We test against the value before casting to avoid the */
4999 : /* problem of asymmetric truncation effects around zero. That is */
5000 : /* -0.5 will be 0 when cast to an int. */
5001 144460000 : if (_padfX[_iDstX] < _poWK->nSrcXOff)
5002 : {
5003 : // If the source coordinate is slightly outside of the source raster
5004 : // retry to transform it alone, so that the exact coordinate
5005 : // transformer is used.
5006 6005840 : if (iTry == 0 && _padfX[_iDstX] > _poWK->nSrcXOff - 1)
5007 41982 : continue;
5008 5963860 : return false;
5009 : }
5010 :
5011 138454000 : if (_padfY[_iDstX] < _poWK->nSrcYOff)
5012 : {
5013 : // If the source coordinate is slightly outside of the source raster
5014 : // retry to transform it alone, so that the exact coordinate
5015 : // transformer is used.
5016 6202900 : if (iTry == 0 && _padfY[_iDstX] > _poWK->nSrcYOff - 1)
5017 64198 : continue;
5018 6138710 : return false;
5019 : }
5020 :
5021 : // Check for potential overflow when casting from float to int, (if
5022 : // operating outside natural projection area, padfX/Y can be a very huge
5023 : // positive number before doing the actual conversion), as such cast is
5024 : // undefined behavior that can trigger exception with some compilers
5025 : // (see #6753)
5026 132252000 : if (_padfX[_iDstX] + 1e-10 > _nSrcXSize + _poWK->nSrcXOff)
5027 : {
5028 : // If the source coordinate is slightly outside of the source raster
5029 : // retry to transform it alone, so that the exact coordinate
5030 : // transformer is used.
5031 3931390 : if (iTry == 0 && _padfX[_iDstX] < _nSrcXSize + _poWK->nSrcXOff + 1)
5032 47248 : continue;
5033 3884140 : return false;
5034 : }
5035 128320000 : if (_padfY[_iDstX] + 1e-10 > _nSrcYSize + _poWK->nSrcYOff)
5036 : {
5037 : // If the source coordinate is slightly outside of the source raster
5038 : // retry to transform it alone, so that the exact coordinate
5039 : // transformer is used.
5040 4487970 : if (iTry == 0 && _padfY[_iDstX] < _nSrcYSize + _poWK->nSrcYOff + 1)
5041 51064 : continue;
5042 4436910 : return false;
5043 : }
5044 :
5045 123832000 : break;
5046 : }
5047 :
5048 123497000 : int iSrcX = static_cast<int>(_padfX[_iDstX] + 1.0e-10) - _poWK->nSrcXOff;
5049 123497000 : int iSrcY = static_cast<int>(_padfY[_iDstX] + 1.0e-10) - _poWK->nSrcYOff;
5050 123497000 : if (iSrcX == _nSrcXSize)
5051 0 : iSrcX--;
5052 123497000 : if (iSrcY == _nSrcYSize)
5053 0 : iSrcY--;
5054 :
5055 : // Those checks should normally be OK given the previous ones.
5056 123497000 : CPLAssert(iSrcX >= 0);
5057 123497000 : CPLAssert(iSrcY >= 0);
5058 123497000 : CPLAssert(iSrcX < _nSrcXSize);
5059 123497000 : CPLAssert(iSrcY < _nSrcYSize);
5060 :
5061 123497000 : iSrcOffset = iSrcX + static_cast<GPtrDiff_t>(iSrcY) * _nSrcXSize;
5062 :
5063 123497000 : return true;
5064 : }
5065 :
5066 : /************************************************************************/
5067 : /* GWKOneSourceCornerFailsToReproject() */
5068 : /************************************************************************/
5069 :
5070 818 : static bool GWKOneSourceCornerFailsToReproject(GWKJobStruct *psJob)
5071 : {
5072 818 : GDALWarpKernel *poWK = psJob->poWK;
5073 2444 : for (int iY = 0; iY <= 1; ++iY)
5074 : {
5075 4884 : for (int iX = 0; iX <= 1; ++iX)
5076 : {
5077 3258 : double dfXTmp = poWK->nSrcXOff + iX * poWK->nSrcXSize;
5078 3258 : double dfYTmp = poWK->nSrcYOff + iY * poWK->nSrcYSize;
5079 3258 : double dfZTmp = 0;
5080 3258 : int nSuccess = FALSE;
5081 3258 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp,
5082 : &dfYTmp, &dfZTmp, &nSuccess);
5083 3258 : if (!nSuccess)
5084 6 : return true;
5085 : }
5086 : }
5087 812 : return false;
5088 : }
5089 :
5090 : /************************************************************************/
5091 : /* GWKAdjustSrcOffsetOnEdge() */
5092 : /************************************************************************/
5093 :
5094 9714 : static bool GWKAdjustSrcOffsetOnEdge(GWKJobStruct *psJob,
5095 : GPtrDiff_t &iSrcOffset)
5096 : {
5097 9714 : GDALWarpKernel *poWK = psJob->poWK;
5098 9714 : const int nSrcXSize = poWK->nSrcXSize;
5099 9714 : const int nSrcYSize = poWK->nSrcYSize;
5100 :
5101 : // Check if the computed source position slightly altered
5102 : // fails to reproject. If so, then we are at the edge of
5103 : // the validity area, and it is worth checking neighbour
5104 : // source pixels for validity.
5105 9714 : int nSuccess = FALSE;
5106 : {
5107 9714 : double dfXTmp =
5108 9714 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5109 9714 : double dfYTmp =
5110 9714 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5111 9714 : double dfZTmp = 0;
5112 9714 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5113 : &dfZTmp, &nSuccess);
5114 : }
5115 9714 : if (nSuccess)
5116 : {
5117 6996 : double dfXTmp =
5118 6996 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5119 6996 : double dfYTmp =
5120 6996 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5121 6996 : double dfZTmp = 0;
5122 6996 : nSuccess = FALSE;
5123 6996 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5124 : &dfZTmp, &nSuccess);
5125 : }
5126 9714 : if (nSuccess)
5127 : {
5128 5624 : double dfXTmp =
5129 5624 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5130 5624 : double dfYTmp =
5131 5624 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5132 5624 : double dfZTmp = 0;
5133 5624 : nSuccess = FALSE;
5134 5624 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5135 : &dfZTmp, &nSuccess);
5136 : }
5137 :
5138 14166 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5139 4452 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + 1))
5140 : {
5141 1860 : iSrcOffset++;
5142 1860 : return true;
5143 : }
5144 10290 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5145 2436 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset + nSrcXSize))
5146 : {
5147 1334 : iSrcOffset += nSrcXSize;
5148 1334 : return true;
5149 : }
5150 7838 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5151 1318 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - 1))
5152 : {
5153 956 : iSrcOffset--;
5154 956 : return true;
5155 : }
5156 5924 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5157 360 : CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset - nSrcXSize))
5158 : {
5159 340 : iSrcOffset -= nSrcXSize;
5160 340 : return true;
5161 : }
5162 :
5163 5224 : return false;
5164 : }
5165 :
5166 : /************************************************************************/
5167 : /* GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity() */
5168 : /************************************************************************/
5169 :
5170 0 : static bool GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(GWKJobStruct *psJob,
5171 : GPtrDiff_t &iSrcOffset)
5172 : {
5173 0 : GDALWarpKernel *poWK = psJob->poWK;
5174 0 : const int nSrcXSize = poWK->nSrcXSize;
5175 0 : const int nSrcYSize = poWK->nSrcYSize;
5176 :
5177 : // Check if the computed source position slightly altered
5178 : // fails to reproject. If so, then we are at the edge of
5179 : // the validity area, and it is worth checking neighbour
5180 : // source pixels for validity.
5181 0 : int nSuccess = FALSE;
5182 : {
5183 0 : double dfXTmp =
5184 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5185 0 : double dfYTmp =
5186 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5187 0 : double dfZTmp = 0;
5188 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5189 : &dfZTmp, &nSuccess);
5190 : }
5191 0 : if (nSuccess)
5192 : {
5193 0 : double dfXTmp =
5194 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize);
5195 0 : double dfYTmp =
5196 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize) + 1;
5197 0 : double dfZTmp = 0;
5198 0 : nSuccess = FALSE;
5199 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5200 : &dfZTmp, &nSuccess);
5201 : }
5202 0 : if (nSuccess)
5203 : {
5204 0 : double dfXTmp =
5205 0 : poWK->nSrcXOff + static_cast<int>(iSrcOffset % nSrcXSize) + 1;
5206 0 : double dfYTmp =
5207 0 : poWK->nSrcYOff + static_cast<int>(iSrcOffset / nSrcXSize);
5208 0 : double dfZTmp = 0;
5209 0 : nSuccess = FALSE;
5210 0 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1, &dfXTmp, &dfYTmp,
5211 : &dfZTmp, &nSuccess);
5212 : }
5213 :
5214 0 : if (!nSuccess && (iSrcOffset % nSrcXSize) + 1 < nSrcXSize &&
5215 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + 1] >=
5216 : SRC_DENSITY_THRESHOLD_FLOAT)
5217 : {
5218 0 : iSrcOffset++;
5219 0 : return true;
5220 : }
5221 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) + 1 < nSrcYSize &&
5222 0 : poWK->pafUnifiedSrcDensity[iSrcOffset + nSrcXSize] >=
5223 : SRC_DENSITY_THRESHOLD_FLOAT)
5224 : {
5225 0 : iSrcOffset += nSrcXSize;
5226 0 : return true;
5227 : }
5228 0 : else if (!nSuccess && (iSrcOffset % nSrcXSize) > 0 &&
5229 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - 1] >=
5230 : SRC_DENSITY_THRESHOLD_FLOAT)
5231 : {
5232 0 : iSrcOffset--;
5233 0 : return true;
5234 : }
5235 0 : else if (!nSuccess && (iSrcOffset / nSrcXSize) > 0 &&
5236 0 : poWK->pafUnifiedSrcDensity[iSrcOffset - nSrcXSize] >=
5237 : SRC_DENSITY_THRESHOLD_FLOAT)
5238 : {
5239 0 : iSrcOffset -= nSrcXSize;
5240 0 : return true;
5241 : }
5242 :
5243 0 : return false;
5244 : }
5245 :
5246 : /************************************************************************/
5247 : /* GWKGeneralCase() */
5248 : /* */
5249 : /* This is the most general case. It attempts to handle all */
5250 : /* possible features with relatively little concern for */
5251 : /* efficiency. */
5252 : /************************************************************************/
5253 :
5254 239 : static void GWKGeneralCaseThread(void *pData)
5255 : {
5256 239 : GWKJobStruct *psJob = reinterpret_cast<GWKJobStruct *>(pData);
5257 239 : GDALWarpKernel *poWK = psJob->poWK;
5258 239 : const int iYMin = psJob->iYMin;
5259 239 : const int iYMax = psJob->iYMax;
5260 : const double dfMultFactorVerticalShiftPipeline =
5261 239 : poWK->bApplyVerticalShift
5262 239 : ? CPLAtof(CSLFetchNameValueDef(
5263 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5264 : "1.0"))
5265 239 : : 0.0;
5266 :
5267 239 : int nDstXSize = poWK->nDstXSize;
5268 239 : int nSrcXSize = poWK->nSrcXSize;
5269 239 : int nSrcYSize = poWK->nSrcYSize;
5270 :
5271 : /* -------------------------------------------------------------------- */
5272 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5273 : /* scanlines worth of positions. */
5274 : /* -------------------------------------------------------------------- */
5275 : // For x, 2 *, because we cache the precomputed values at the end.
5276 : double *padfX =
5277 239 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5278 : double *padfY =
5279 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5280 : double *padfZ =
5281 239 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5282 239 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5283 :
5284 239 : const bool bUse4SamplesFormula =
5285 239 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5286 :
5287 239 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5288 239 : if (poWK->eResample != GRA_NearestNeighbour)
5289 : {
5290 220 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5291 : }
5292 239 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5293 239 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5294 239 : const double dfErrorThreshold = CPLAtof(
5295 239 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5296 :
5297 : const bool bOneSourceCornerFailsToReproject =
5298 239 : GWKOneSourceCornerFailsToReproject(psJob);
5299 :
5300 : // Precompute values.
5301 6469 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5302 6230 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5303 :
5304 : /* ==================================================================== */
5305 : /* Loop over output lines. */
5306 : /* ==================================================================== */
5307 6469 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5308 : {
5309 : /* --------------------------------------------------------------------
5310 : */
5311 : /* Setup points to transform to source image space. */
5312 : /* --------------------------------------------------------------------
5313 : */
5314 6230 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5315 6230 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5316 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5317 236160 : padfY[iDstX] = dfY;
5318 6230 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5319 :
5320 : /* --------------------------------------------------------------------
5321 : */
5322 : /* Transform the points from destination pixel/line coordinates */
5323 : /* to source pixel/line coordinates. */
5324 : /* --------------------------------------------------------------------
5325 : */
5326 6230 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5327 : padfY, padfZ, pabSuccess);
5328 6230 : if (dfSrcCoordPrecision > 0.0)
5329 : {
5330 0 : GWKRoundSourceCoordinates(
5331 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5332 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5333 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5334 : }
5335 :
5336 : /* ====================================================================
5337 : */
5338 : /* Loop over pixels in output scanline. */
5339 : /* ====================================================================
5340 : */
5341 242390 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5342 : {
5343 236160 : GPtrDiff_t iSrcOffset = 0;
5344 236160 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5345 : padfX, padfY, nSrcXSize,
5346 : nSrcYSize, iSrcOffset))
5347 0 : continue;
5348 :
5349 : /* --------------------------------------------------------------------
5350 : */
5351 : /* Do not try to apply transparent/invalid source pixels to the
5352 : */
5353 : /* destination. This currently ignores the multi-pixel input
5354 : */
5355 : /* of bilinear and cubic resamples. */
5356 : /* --------------------------------------------------------------------
5357 : */
5358 236160 : double dfDensity = 1.0;
5359 :
5360 236160 : if (poWK->pafUnifiedSrcDensity != nullptr)
5361 : {
5362 1200 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5363 1200 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5364 : {
5365 0 : if (!bOneSourceCornerFailsToReproject)
5366 : {
5367 0 : continue;
5368 : }
5369 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5370 : psJob, iSrcOffset))
5371 : {
5372 0 : dfDensity =
5373 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5374 : }
5375 : else
5376 : {
5377 0 : continue;
5378 : }
5379 : }
5380 : }
5381 :
5382 236160 : if (poWK->panUnifiedSrcValid != nullptr &&
5383 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5384 : {
5385 0 : if (!bOneSourceCornerFailsToReproject)
5386 : {
5387 0 : continue;
5388 : }
5389 0 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5390 : {
5391 0 : continue;
5392 : }
5393 : }
5394 :
5395 : /* ====================================================================
5396 : */
5397 : /* Loop processing each band. */
5398 : /* ====================================================================
5399 : */
5400 236160 : bool bHasFoundDensity = false;
5401 :
5402 236160 : const GPtrDiff_t iDstOffset =
5403 236160 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5404 472320 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5405 : {
5406 236160 : double dfBandDensity = 0.0;
5407 236160 : double dfValueReal = 0.0;
5408 236160 : double dfValueImag = 0.0;
5409 :
5410 : /* --------------------------------------------------------------------
5411 : */
5412 : /* Collect the source value. */
5413 : /* --------------------------------------------------------------------
5414 : */
5415 236160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5416 : nSrcYSize == 1)
5417 : {
5418 : // FALSE is returned if dfBandDensity == 0, which is
5419 : // checked below.
5420 568 : CPL_IGNORE_RET_VAL(GWKGetPixelValue(
5421 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal,
5422 : &dfValueImag));
5423 : }
5424 235592 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5425 : {
5426 248 : GWKBilinearResample4Sample(
5427 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5428 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5429 : &dfValueReal, &dfValueImag);
5430 : }
5431 235344 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5432 : {
5433 248 : GWKCubicResample4Sample(
5434 248 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5435 248 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5436 : &dfValueReal, &dfValueImag);
5437 : }
5438 : else
5439 : #ifdef DEBUG
5440 : // Only useful for clang static analyzer.
5441 235096 : if (psWrkStruct != nullptr)
5442 : #endif
5443 : {
5444 235096 : psWrkStruct->pfnGWKResample(
5445 235096 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5446 235096 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5447 : &dfValueReal, &dfValueImag, psWrkStruct);
5448 : }
5449 :
5450 : // If we didn't find any valid inputs skip to next band.
5451 236160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5452 0 : continue;
5453 :
5454 236160 : if (poWK->bApplyVerticalShift)
5455 : {
5456 0 : if (!std::isfinite(padfZ[iDstX]))
5457 0 : continue;
5458 : // Subtract padfZ[] since the coordinate transformation is
5459 : // from target to source
5460 0 : dfValueReal =
5461 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5462 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5463 : }
5464 :
5465 236160 : bHasFoundDensity = true;
5466 :
5467 : /* --------------------------------------------------------------------
5468 : */
5469 : /* We have a computed value from the source. Now apply it
5470 : * to */
5471 : /* the destination pixel. */
5472 : /* --------------------------------------------------------------------
5473 : */
5474 236160 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
5475 : dfValueReal, dfValueImag);
5476 : }
5477 :
5478 236160 : if (!bHasFoundDensity)
5479 0 : continue;
5480 :
5481 : /* --------------------------------------------------------------------
5482 : */
5483 : /* Update destination density/validity masks. */
5484 : /* --------------------------------------------------------------------
5485 : */
5486 236160 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5487 :
5488 236160 : if (poWK->panDstValid != nullptr)
5489 : {
5490 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5491 : }
5492 : } /* Next iDstX */
5493 :
5494 : /* --------------------------------------------------------------------
5495 : */
5496 : /* Report progress to the user, and optionally cancel out. */
5497 : /* --------------------------------------------------------------------
5498 : */
5499 6230 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5500 0 : break;
5501 : }
5502 :
5503 : /* -------------------------------------------------------------------- */
5504 : /* Cleanup and return. */
5505 : /* -------------------------------------------------------------------- */
5506 239 : CPLFree(padfX);
5507 239 : CPLFree(padfY);
5508 239 : CPLFree(padfZ);
5509 239 : CPLFree(pabSuccess);
5510 239 : if (psWrkStruct)
5511 220 : GWKResampleDeleteWrkStruct(psWrkStruct);
5512 239 : }
5513 :
5514 239 : static CPLErr GWKGeneralCase(GDALWarpKernel *poWK)
5515 : {
5516 239 : return GWKRun(poWK, "GWKGeneralCase", GWKGeneralCaseThread);
5517 : }
5518 :
5519 : /************************************************************************/
5520 : /* GWKRealCase() */
5521 : /* */
5522 : /* General case for non-complex data types. */
5523 : /************************************************************************/
5524 :
5525 158 : static void GWKRealCaseThread(void *pData)
5526 :
5527 : {
5528 158 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5529 158 : GDALWarpKernel *poWK = psJob->poWK;
5530 158 : const int iYMin = psJob->iYMin;
5531 158 : const int iYMax = psJob->iYMax;
5532 :
5533 158 : const int nDstXSize = poWK->nDstXSize;
5534 158 : const int nSrcXSize = poWK->nSrcXSize;
5535 158 : const int nSrcYSize = poWK->nSrcYSize;
5536 : const double dfMultFactorVerticalShiftPipeline =
5537 158 : poWK->bApplyVerticalShift
5538 158 : ? CPLAtof(CSLFetchNameValueDef(
5539 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5540 : "1.0"))
5541 158 : : 0.0;
5542 :
5543 : /* -------------------------------------------------------------------- */
5544 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5545 : /* scanlines worth of positions. */
5546 : /* -------------------------------------------------------------------- */
5547 :
5548 : // For x, 2 *, because we cache the precomputed values at the end.
5549 : double *padfX =
5550 158 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5551 : double *padfY =
5552 158 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5553 : double *padfZ =
5554 158 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5555 158 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5556 :
5557 158 : const bool bUse4SamplesFormula =
5558 158 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
5559 :
5560 158 : GWKResampleWrkStruct *psWrkStruct = nullptr;
5561 158 : if (poWK->eResample != GRA_NearestNeighbour)
5562 : {
5563 137 : psWrkStruct = GWKResampleCreateWrkStruct(poWK);
5564 : }
5565 158 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
5566 158 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
5567 158 : const double dfErrorThreshold = CPLAtof(
5568 158 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
5569 :
5570 458 : const bool bSrcMaskIsDensity = poWK->panUnifiedSrcValid == nullptr &&
5571 300 : poWK->papanBandSrcValid == nullptr &&
5572 142 : poWK->pafUnifiedSrcDensity != nullptr;
5573 :
5574 : const bool bOneSourceCornerFailsToReproject =
5575 158 : GWKOneSourceCornerFailsToReproject(psJob);
5576 :
5577 : // Precompute values.
5578 22201 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5579 22043 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
5580 :
5581 : /* ==================================================================== */
5582 : /* Loop over output lines. */
5583 : /* ==================================================================== */
5584 24952 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
5585 : {
5586 : /* --------------------------------------------------------------------
5587 : */
5588 : /* Setup points to transform to source image space. */
5589 : /* --------------------------------------------------------------------
5590 : */
5591 24794 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
5592 24794 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
5593 44315700 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5594 44290900 : padfY[iDstX] = dfY;
5595 24794 : memset(padfZ, 0, sizeof(double) * nDstXSize);
5596 :
5597 : /* --------------------------------------------------------------------
5598 : */
5599 : /* Transform the points from destination pixel/line coordinates */
5600 : /* to source pixel/line coordinates. */
5601 : /* --------------------------------------------------------------------
5602 : */
5603 24794 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
5604 : padfY, padfZ, pabSuccess);
5605 24794 : if (dfSrcCoordPrecision > 0.0)
5606 : {
5607 0 : GWKRoundSourceCoordinates(
5608 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
5609 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
5610 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
5611 : }
5612 :
5613 : /* ====================================================================
5614 : */
5615 : /* Loop over pixels in output scanline. */
5616 : /* ====================================================================
5617 : */
5618 44315700 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
5619 : {
5620 44290900 : GPtrDiff_t iSrcOffset = 0;
5621 44290900 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
5622 : padfX, padfY, nSrcXSize,
5623 : nSrcYSize, iSrcOffset))
5624 43566800 : continue;
5625 :
5626 : /* --------------------------------------------------------------------
5627 : */
5628 : /* Do not try to apply transparent/invalid source pixels to the
5629 : */
5630 : /* destination. This currently ignores the multi-pixel input
5631 : */
5632 : /* of bilinear and cubic resamples. */
5633 : /* --------------------------------------------------------------------
5634 : */
5635 31778200 : double dfDensity = 1.0;
5636 :
5637 31778200 : if (poWK->pafUnifiedSrcDensity != nullptr)
5638 : {
5639 1656100 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5640 1656100 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
5641 : {
5642 1525010 : if (!bOneSourceCornerFailsToReproject)
5643 : {
5644 1525010 : continue;
5645 : }
5646 0 : else if (GWKAdjustSrcOffsetOnEdgeUnifiedSrcDensity(
5647 : psJob, iSrcOffset))
5648 : {
5649 0 : dfDensity =
5650 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
5651 : }
5652 : else
5653 : {
5654 0 : continue;
5655 : }
5656 : }
5657 : }
5658 :
5659 59882200 : if (poWK->panUnifiedSrcValid != nullptr &&
5660 29628900 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
5661 : {
5662 29531300 : if (!bOneSourceCornerFailsToReproject)
5663 : {
5664 29529100 : continue;
5665 : }
5666 2229 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
5667 : {
5668 0 : continue;
5669 : }
5670 : }
5671 :
5672 : /* ====================================================================
5673 : */
5674 : /* Loop processing each band. */
5675 : /* ====================================================================
5676 : */
5677 724104 : bool bHasFoundDensity = false;
5678 :
5679 724104 : const GPtrDiff_t iDstOffset =
5680 724104 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
5681 2039260 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5682 : {
5683 1315160 : double dfBandDensity = 0.0;
5684 1315160 : double dfValueReal = 0.0;
5685 :
5686 : /* --------------------------------------------------------------------
5687 : */
5688 : /* Collect the source value. */
5689 : /* --------------------------------------------------------------------
5690 : */
5691 1315160 : if (poWK->eResample == GRA_NearestNeighbour || nSrcXSize == 1 ||
5692 : nSrcYSize == 1)
5693 : {
5694 : // FALSE is returned if dfBandDensity == 0, which is
5695 : // checked below.
5696 1092 : CPL_IGNORE_RET_VAL(GWKGetPixelValueReal(
5697 : poWK, iBand, iSrcOffset, &dfBandDensity, &dfValueReal));
5698 : }
5699 1314070 : else if (poWK->eResample == GRA_Bilinear && bUse4SamplesFormula)
5700 : {
5701 1086 : double dfValueImagIgnored = 0.0;
5702 1086 : GWKBilinearResample4Sample(
5703 1086 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5704 1086 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5705 1086 : &dfValueReal, &dfValueImagIgnored);
5706 : }
5707 1312980 : else if (poWK->eResample == GRA_Cubic && bUse4SamplesFormula)
5708 : {
5709 691552 : if (bSrcMaskIsDensity)
5710 : {
5711 389755 : if (poWK->eWorkingDataType == GDT_Byte)
5712 : {
5713 389755 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<GByte>(
5714 389755 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5715 389755 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5716 : &dfValueReal);
5717 : }
5718 0 : else if (poWK->eWorkingDataType == GDT_UInt16)
5719 : {
5720 : GWKCubicResampleSrcMaskIsDensity4SampleRealT<
5721 0 : GUInt16>(poWK, iBand,
5722 0 : padfX[iDstX] - poWK->nSrcXOff,
5723 0 : padfY[iDstX] - poWK->nSrcYOff,
5724 : &dfBandDensity, &dfValueReal);
5725 : }
5726 : else
5727 : {
5728 0 : GWKCubicResampleSrcMaskIsDensity4SampleReal(
5729 0 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5730 0 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5731 : &dfValueReal);
5732 : }
5733 : }
5734 : else
5735 : {
5736 301797 : double dfValueImagIgnored = 0.0;
5737 301797 : GWKCubicResample4Sample(
5738 301797 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5739 301797 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5740 : &dfValueReal, &dfValueImagIgnored);
5741 691552 : }
5742 : }
5743 : else
5744 : #ifdef DEBUG
5745 : // Only useful for clang static analyzer.
5746 621431 : if (psWrkStruct != nullptr)
5747 : #endif
5748 : {
5749 621431 : double dfValueImagIgnored = 0.0;
5750 621431 : psWrkStruct->pfnGWKResample(
5751 621431 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
5752 621431 : padfY[iDstX] - poWK->nSrcYOff, &dfBandDensity,
5753 : &dfValueReal, &dfValueImagIgnored, psWrkStruct);
5754 : }
5755 :
5756 : // If we didn't find any valid inputs skip to next band.
5757 1315160 : if (dfBandDensity < BAND_DENSITY_THRESHOLD)
5758 0 : continue;
5759 :
5760 1315160 : if (poWK->bApplyVerticalShift)
5761 : {
5762 0 : if (!std::isfinite(padfZ[iDstX]))
5763 0 : continue;
5764 : // Subtract padfZ[] since the coordinate transformation is
5765 : // from target to source
5766 0 : dfValueReal =
5767 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
5768 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
5769 : }
5770 :
5771 1315160 : bHasFoundDensity = true;
5772 :
5773 : /* --------------------------------------------------------------------
5774 : */
5775 : /* We have a computed value from the source. Now apply it
5776 : * to */
5777 : /* the destination pixel. */
5778 : /* --------------------------------------------------------------------
5779 : */
5780 1315160 : GWKSetPixelValueReal(poWK, iBand, iDstOffset, dfBandDensity,
5781 : dfValueReal);
5782 : }
5783 :
5784 724104 : if (!bHasFoundDensity)
5785 0 : continue;
5786 :
5787 : /* --------------------------------------------------------------------
5788 : */
5789 : /* Update destination density/validity masks. */
5790 : /* --------------------------------------------------------------------
5791 : */
5792 724104 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
5793 :
5794 724104 : if (poWK->panDstValid != nullptr)
5795 : {
5796 103866 : CPLMaskSet(poWK->panDstValid, iDstOffset);
5797 : }
5798 : } // Next iDstX.
5799 :
5800 : /* --------------------------------------------------------------------
5801 : */
5802 : /* Report progress to the user, and optionally cancel out. */
5803 : /* --------------------------------------------------------------------
5804 : */
5805 24794 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
5806 0 : break;
5807 : }
5808 :
5809 : /* -------------------------------------------------------------------- */
5810 : /* Cleanup and return. */
5811 : /* -------------------------------------------------------------------- */
5812 158 : CPLFree(padfX);
5813 158 : CPLFree(padfY);
5814 158 : CPLFree(padfZ);
5815 158 : CPLFree(pabSuccess);
5816 158 : if (psWrkStruct)
5817 137 : GWKResampleDeleteWrkStruct(psWrkStruct);
5818 158 : }
5819 :
5820 158 : static CPLErr GWKRealCase(GDALWarpKernel *poWK)
5821 : {
5822 158 : return GWKRun(poWK, "GWKRealCase", GWKRealCaseThread);
5823 : }
5824 :
5825 : /************************************************************************/
5826 : /* GWKCubicResampleNoMasks4MultiBandT() */
5827 : /************************************************************************/
5828 :
5829 : /* We restrict to 64bit processors because they are guaranteed to have SSE2 */
5830 : /* and enough SSE registries */
5831 : #if defined(USE_SSE2)
5832 :
5833 94342100 : static inline float Convolute4x4(const __m128 row0, const __m128 row1,
5834 : const __m128 row2, const __m128 row3,
5835 : const __m128 weightsXY0,
5836 : const __m128 weightsXY1,
5837 : const __m128 weightsXY2,
5838 : const __m128 weightsXY3)
5839 : {
5840 660394000 : return XMMHorizontalAdd(_mm_add_ps(
5841 : _mm_add_ps(_mm_mul_ps(row0, weightsXY0), _mm_mul_ps(row1, weightsXY1)),
5842 : _mm_add_ps(_mm_mul_ps(row2, weightsXY2),
5843 94922100 : _mm_mul_ps(row3, weightsXY3))));
5844 : }
5845 :
5846 : template <class T>
5847 32790977 : static void GWKCubicResampleNoMasks4MultiBandT(const GDALWarpKernel *poWK,
5848 : double dfSrcX, double dfSrcY,
5849 : const GPtrDiff_t iDstOffset)
5850 : {
5851 32790977 : const double dfSrcXShifted = dfSrcX - 0.5;
5852 32790977 : const int iSrcX = static_cast<int>(dfSrcXShifted);
5853 32790977 : const double dfSrcYShifted = dfSrcY - 0.5;
5854 32790977 : const int iSrcY = static_cast<int>(dfSrcYShifted);
5855 32790977 : const GPtrDiff_t iSrcOffset =
5856 32790977 : iSrcX + static_cast<GPtrDiff_t>(iSrcY) * poWK->nSrcXSize;
5857 :
5858 : // Get the bilinear interpolation at the image borders.
5859 32790977 : if (iSrcX - 1 < 0 || iSrcX + 2 >= poWK->nSrcXSize || iSrcY - 1 < 0 ||
5860 31898077 : iSrcY + 2 >= poWK->nSrcYSize)
5861 : {
5862 3598480 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
5863 : {
5864 : T value;
5865 2702950 : GWKBilinearResampleNoMasks4SampleT(poWK, iBand, dfSrcX, dfSrcY,
5866 : &value);
5867 2739240 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
5868 : value;
5869 895531 : }
5870 : }
5871 : else
5872 : {
5873 31931777 : const float fDeltaX = static_cast<float>(dfSrcXShifted) - iSrcX;
5874 31931777 : const float fDeltaY = static_cast<float>(dfSrcYShifted) - iSrcY;
5875 :
5876 : float afCoeffsX[4];
5877 : float afCoeffsY[4];
5878 31931777 : GWKCubicComputeWeights(fDeltaX, afCoeffsX);
5879 31590977 : GWKCubicComputeWeights(fDeltaY, afCoeffsY);
5880 31918977 : const auto weightsX = _mm_loadu_ps(afCoeffsX);
5881 : const auto weightsXY0 =
5882 63837854 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[0]), weightsX);
5883 : const auto weightsXY1 =
5884 63837854 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[1]), weightsX);
5885 : const auto weightsXY2 =
5886 63837854 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[2]), weightsX);
5887 : const auto weightsXY3 =
5888 31918977 : _mm_mul_ps(_mm_load1_ps(&afCoeffsY[3]), weightsX);
5889 :
5890 31918977 : const GPtrDiff_t iOffset = iSrcOffset - poWK->nSrcXSize - 1;
5891 :
5892 31918977 : int iBand = 0;
5893 : // Process 2 bands at a time
5894 63442954 : for (; iBand + 1 < poWK->nBands; iBand += 2)
5895 : {
5896 31872177 : const T *CPL_RESTRICT pBand0 =
5897 31872177 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5898 31872177 : const auto row0_0 = XMMLoad4Values(pBand0 + iOffset);
5899 : const auto row1_0 =
5900 31797977 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5901 : const auto row2_0 =
5902 31844377 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5903 : const auto row3_0 =
5904 31657877 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5905 :
5906 31823877 : const T *CPL_RESTRICT pBand1 =
5907 31823877 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand + 1]);
5908 31823877 : const auto row0_1 = XMMLoad4Values(pBand1 + iOffset);
5909 : const auto row1_1 =
5910 31836577 : XMMLoad4Values(pBand1 + iOffset + poWK->nSrcXSize);
5911 : const auto row2_1 =
5912 31878477 : XMMLoad4Values(pBand1 + iOffset + 2 * poWK->nSrcXSize);
5913 : const auto row3_1 =
5914 31690177 : XMMLoad4Values(pBand1 + iOffset + 3 * poWK->nSrcXSize);
5915 :
5916 : const float fValue_0 =
5917 31767377 : Convolute4x4(row0_0, row1_0, row2_0, row3_0, weightsXY0,
5918 : weightsXY1, weightsXY2, weightsXY3);
5919 :
5920 : const float fValue_1 =
5921 31833377 : Convolute4x4(row0_1, row1_1, row2_1, row3_1, weightsXY0,
5922 : weightsXY1, weightsXY2, weightsXY3);
5923 :
5924 31852277 : T *CPL_RESTRICT pDstBand0 =
5925 31852277 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5926 31852277 : pDstBand0[iDstOffset] = GWKClampValueT<T>(fValue_0);
5927 :
5928 31559977 : T *CPL_RESTRICT pDstBand1 =
5929 31559977 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand + 1]);
5930 31559977 : pDstBand1[iDstOffset] = GWKClampValueT<T>(fValue_1);
5931 : }
5932 31570777 : if (iBand < poWK->nBands)
5933 : {
5934 31656777 : const T *CPL_RESTRICT pBand0 =
5935 31656777 : reinterpret_cast<const T *>(poWK->papabySrcImage[iBand]);
5936 31656777 : const auto row0 = XMMLoad4Values(pBand0 + iOffset);
5937 : const auto row1 =
5938 31773177 : XMMLoad4Values(pBand0 + iOffset + poWK->nSrcXSize);
5939 : const auto row2 =
5940 31831777 : XMMLoad4Values(pBand0 + iOffset + 2 * poWK->nSrcXSize);
5941 : const auto row3 =
5942 31634277 : XMMLoad4Values(pBand0 + iOffset + 3 * poWK->nSrcXSize);
5943 :
5944 : const float fValue =
5945 31768377 : Convolute4x4(row0, row1, row2, row3, weightsXY0, weightsXY1,
5946 : weightsXY2, weightsXY3);
5947 :
5948 31843477 : T *CPL_RESTRICT pDstBand =
5949 31843477 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand]);
5950 31843477 : pDstBand[iDstOffset] = GWKClampValueT<T>(fValue);
5951 : }
5952 : }
5953 :
5954 32558977 : if (poWK->pafDstDensity)
5955 32471236 : poWK->pafDstDensity[iDstOffset] = 1.0f;
5956 32558977 : }
5957 :
5958 : #endif // defined(USE_SSE2)
5959 :
5960 : /************************************************************************/
5961 : /* GWKResampleNoMasksOrDstDensityOnlyThreadInternal() */
5962 : /************************************************************************/
5963 :
5964 : template <class T, GDALResampleAlg eResample, int bUse4SamplesFormula>
5965 1770 : static void GWKResampleNoMasksOrDstDensityOnlyThreadInternal(void *pData)
5966 :
5967 : {
5968 1770 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
5969 1770 : GDALWarpKernel *poWK = psJob->poWK;
5970 1770 : const int iYMin = psJob->iYMin;
5971 1770 : const int iYMax = psJob->iYMax;
5972 1752 : const double dfMultFactorVerticalShiftPipeline =
5973 1770 : poWK->bApplyVerticalShift
5974 18 : ? CPLAtof(CSLFetchNameValueDef(
5975 18 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
5976 : "1.0"))
5977 : : 0.0;
5978 :
5979 1770 : const int nDstXSize = poWK->nDstXSize;
5980 1770 : const int nSrcXSize = poWK->nSrcXSize;
5981 1770 : const int nSrcYSize = poWK->nSrcYSize;
5982 :
5983 : /* -------------------------------------------------------------------- */
5984 : /* Allocate x,y,z coordinate arrays for transformation ... one */
5985 : /* scanlines worth of positions. */
5986 : /* -------------------------------------------------------------------- */
5987 :
5988 : // For x, 2 *, because we cache the precomputed values at the end.
5989 : double *padfX =
5990 1770 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
5991 : double *padfY =
5992 1769 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5993 : double *padfZ =
5994 1771 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
5995 1770 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
5996 :
5997 1771 : const int nXRadius = poWK->nXRadius;
5998 : double *padfWeightsX =
5999 1771 : static_cast<double *>(CPLCalloc(1 + nXRadius * 2, sizeof(double)));
6000 : double *padfWeightsY = static_cast<double *>(
6001 1771 : CPLCalloc(1 + poWK->nYRadius * 2, sizeof(double)));
6002 1771 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6003 1771 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6004 1770 : const double dfErrorThreshold = CPLAtof(
6005 1770 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6006 :
6007 : // Precompute values.
6008 401749 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6009 399978 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6010 :
6011 : /* ==================================================================== */
6012 : /* Loop over output lines. */
6013 : /* ==================================================================== */
6014 276842 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6015 : {
6016 : /* --------------------------------------------------------------------
6017 : */
6018 : /* Setup points to transform to source image space. */
6019 : /* --------------------------------------------------------------------
6020 : */
6021 275069 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6022 275069 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6023 94308759 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6024 94033620 : padfY[iDstX] = dfY;
6025 275069 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6026 :
6027 : /* --------------------------------------------------------------------
6028 : */
6029 : /* Transform the points from destination pixel/line coordinates */
6030 : /* to source pixel/line coordinates. */
6031 : /* --------------------------------------------------------------------
6032 : */
6033 275069 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6034 : padfY, padfZ, pabSuccess);
6035 275074 : if (dfSrcCoordPrecision > 0.0)
6036 : {
6037 1000 : GWKRoundSourceCoordinates(
6038 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6039 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6040 1000 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6041 : }
6042 :
6043 : /* ====================================================================
6044 : */
6045 : /* Loop over pixels in output scanline. */
6046 : /* ====================================================================
6047 : */
6048 94028719 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6049 : {
6050 93753680 : GPtrDiff_t iSrcOffset = 0;
6051 93753680 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6052 : padfX, padfY, nSrcXSize,
6053 : nSrcYSize, iSrcOffset))
6054 42799239 : continue;
6055 :
6056 : /* ====================================================================
6057 : */
6058 : /* Loop processing each band. */
6059 : /* ====================================================================
6060 : */
6061 83701273 : const GPtrDiff_t iDstOffset =
6062 83701273 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6063 :
6064 : #if defined(USE_SSE2)
6065 : if constexpr (bUse4SamplesFormula && eResample == GRA_Cubic &&
6066 : (std::is_same<T, GByte>::value ||
6067 : std::is_same<T, GUInt16>::value))
6068 : {
6069 33889776 : if (poWK->nBands > 1 && !poWK->bApplyVerticalShift)
6070 : {
6071 32838177 : GWKCubicResampleNoMasks4MultiBandT<T>(
6072 32838177 : poWK, padfX[iDstX] - poWK->nSrcXOff,
6073 32838177 : padfY[iDstX] - poWK->nSrcYOff, iDstOffset);
6074 :
6075 32579677 : continue;
6076 : }
6077 : }
6078 : #endif // defined(USE_SSE2)
6079 :
6080 50863024 : [[maybe_unused]] double dfInvWeights = 0;
6081 144580088 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6082 : {
6083 93625804 : T value = 0;
6084 : if constexpr (eResample == GRA_NearestNeighbour)
6085 : {
6086 78467930 : value = reinterpret_cast<T *>(
6087 78467930 : poWK->papabySrcImage[iBand])[iSrcOffset];
6088 : }
6089 : else if constexpr (bUse4SamplesFormula)
6090 : {
6091 : if constexpr (eResample == GRA_Bilinear)
6092 3363189 : GWKBilinearResampleNoMasks4SampleT(
6093 3363189 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6094 3363189 : padfY[iDstX] - poWK->nSrcYOff, &value);
6095 : else
6096 2231485 : GWKCubicResampleNoMasks4SampleT(
6097 2231485 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6098 2231485 : padfY[iDstX] - poWK->nSrcYOff, &value);
6099 : }
6100 : else
6101 : {
6102 9563200 : GWKResampleNoMasksT(
6103 9563200 : poWK, iBand, padfX[iDstX] - poWK->nSrcXOff,
6104 9563200 : padfY[iDstX] - poWK->nSrcYOff, &value, padfWeightsX,
6105 : padfWeightsY, dfInvWeights);
6106 : }
6107 :
6108 93610814 : if (poWK->bApplyVerticalShift)
6109 : {
6110 818 : if (!std::isfinite(padfZ[iDstX]))
6111 0 : continue;
6112 : // Subtract padfZ[] since the coordinate transformation is
6113 : // from target to source
6114 107166 : value = GWKClampValueT<T>(
6115 818 : double(value) * poWK->dfMultFactorVerticalShift -
6116 818 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6117 : }
6118 :
6119 93717214 : if (poWK->pafDstDensity)
6120 14049274 : poWK->pafDstDensity[iDstOffset] = 1.0f;
6121 :
6122 93717214 : reinterpret_cast<T *>(poWK->papabyDstImage[iBand])[iDstOffset] =
6123 : value;
6124 : }
6125 : }
6126 :
6127 : /* --------------------------------------------------------------------
6128 : */
6129 : /* Report progress to the user, and optionally cancel out. */
6130 : /* --------------------------------------------------------------------
6131 : */
6132 275070 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6133 1 : break;
6134 : }
6135 :
6136 : /* -------------------------------------------------------------------- */
6137 : /* Cleanup and return. */
6138 : /* -------------------------------------------------------------------- */
6139 1774 : CPLFree(padfX);
6140 1771 : CPLFree(padfY);
6141 1771 : CPLFree(padfZ);
6142 1771 : CPLFree(pabSuccess);
6143 1771 : CPLFree(padfWeightsX);
6144 1771 : CPLFree(padfWeightsY);
6145 1771 : }
6146 :
6147 : template <class T, GDALResampleAlg eResample>
6148 987 : static void GWKResampleNoMasksOrDstDensityOnlyThread(void *pData)
6149 : {
6150 987 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6151 : pData);
6152 987 : }
6153 :
6154 : template <class T, GDALResampleAlg eResample>
6155 784 : static void GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread(void *pData)
6156 :
6157 : {
6158 784 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6159 784 : GDALWarpKernel *poWK = psJob->poWK;
6160 : static_assert(eResample == GRA_Bilinear || eResample == GRA_Cubic);
6161 784 : const bool bUse4SamplesFormula =
6162 784 : poWK->dfXScale >= 0.95 && poWK->dfYScale >= 0.95;
6163 784 : if (bUse4SamplesFormula)
6164 681 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, TRUE>(
6165 : pData);
6166 : else
6167 103 : GWKResampleNoMasksOrDstDensityOnlyThreadInternal<T, eResample, FALSE>(
6168 : pData);
6169 784 : }
6170 :
6171 936 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6172 : {
6173 936 : return GWKRun(
6174 : poWK, "GWKNearestNoMasksOrDstDensityOnlyByte",
6175 936 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_NearestNeighbour>);
6176 : }
6177 :
6178 126 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6179 : {
6180 126 : return GWKRun(
6181 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyByte",
6182 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte,
6183 126 : GRA_Bilinear>);
6184 : }
6185 :
6186 612 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6187 : {
6188 612 : return GWKRun(
6189 : poWK, "GWKCubicNoMasksOrDstDensityOnlyByte",
6190 612 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GByte, GRA_Cubic>);
6191 : }
6192 :
6193 9 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6194 : {
6195 9 : return GWKRun(
6196 : poWK, "GWKCubicNoMasksOrDstDensityOnlyFloat",
6197 9 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float, GRA_Cubic>);
6198 : }
6199 :
6200 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6201 :
6202 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6203 : {
6204 : return GWKRun(
6205 : poWK, "GWKCubicNoMasksOrDstDensityOnlyDouble",
6206 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double, GRA_Cubic>);
6207 : }
6208 : #endif
6209 :
6210 12 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyByte(GDALWarpKernel *poWK)
6211 : {
6212 12 : return GWKRun(
6213 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyByte",
6214 12 : GWKResampleNoMasksOrDstDensityOnlyThread<GByte, GRA_CubicSpline>);
6215 : }
6216 :
6217 : /************************************************************************/
6218 : /* GWKNearestByte() */
6219 : /* */
6220 : /* Case for 8bit input data with nearest neighbour resampling */
6221 : /* using valid flags. Should be as fast as possible for this */
6222 : /* particular transformation type. */
6223 : /************************************************************************/
6224 :
6225 421 : template <class T> static void GWKNearestThread(void *pData)
6226 :
6227 : {
6228 421 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
6229 421 : GDALWarpKernel *poWK = psJob->poWK;
6230 421 : const int iYMin = psJob->iYMin;
6231 421 : const int iYMax = psJob->iYMax;
6232 421 : const double dfMultFactorVerticalShiftPipeline =
6233 421 : poWK->bApplyVerticalShift
6234 0 : ? CPLAtof(CSLFetchNameValueDef(
6235 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6236 : "1.0"))
6237 : : 0.0;
6238 :
6239 421 : const int nDstXSize = poWK->nDstXSize;
6240 421 : const int nSrcXSize = poWK->nSrcXSize;
6241 421 : const int nSrcYSize = poWK->nSrcYSize;
6242 :
6243 : /* -------------------------------------------------------------------- */
6244 : /* Allocate x,y,z coordinate arrays for transformation ... one */
6245 : /* scanlines worth of positions. */
6246 : /* -------------------------------------------------------------------- */
6247 :
6248 : // For x, 2 *, because we cache the precomputed values at the end.
6249 : double *padfX =
6250 421 : static_cast<double *>(CPLMalloc(2 * sizeof(double) * nDstXSize));
6251 : double *padfY =
6252 421 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6253 : double *padfZ =
6254 421 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6255 421 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6256 :
6257 421 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6258 421 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6259 421 : const double dfErrorThreshold = CPLAtof(
6260 421 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6261 :
6262 : const bool bOneSourceCornerFailsToReproject =
6263 421 : GWKOneSourceCornerFailsToReproject(psJob);
6264 :
6265 : // Precompute values.
6266 61068 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6267 60647 : padfX[nDstXSize + iDstX] = iDstX + 0.5 + poWK->nDstXOff;
6268 :
6269 : /* ==================================================================== */
6270 : /* Loop over output lines. */
6271 : /* ==================================================================== */
6272 46302 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6273 : {
6274 :
6275 : /* --------------------------------------------------------------------
6276 : */
6277 : /* Setup points to transform to source image space. */
6278 : /* --------------------------------------------------------------------
6279 : */
6280 45881 : memcpy(padfX, padfX + nDstXSize, sizeof(double) * nDstXSize);
6281 45881 : const double dfY = iDstY + 0.5 + poWK->nDstYOff;
6282 9626833 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6283 9580956 : padfY[iDstX] = dfY;
6284 45881 : memset(padfZ, 0, sizeof(double) * nDstXSize);
6285 :
6286 : /* --------------------------------------------------------------------
6287 : */
6288 : /* Transform the points from destination pixel/line coordinates */
6289 : /* to source pixel/line coordinates. */
6290 : /* --------------------------------------------------------------------
6291 : */
6292 45881 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX,
6293 : padfY, padfZ, pabSuccess);
6294 45881 : if (dfSrcCoordPrecision > 0.0)
6295 : {
6296 0 : GWKRoundSourceCoordinates(
6297 : nDstXSize, padfX, padfY, padfZ, pabSuccess, dfSrcCoordPrecision,
6298 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6299 0 : 0.5 + poWK->nDstXOff, iDstY + 0.5 + poWK->nDstYOff);
6300 : }
6301 : /* ====================================================================
6302 : */
6303 : /* Loop over pixels in output scanline. */
6304 : /* ====================================================================
6305 : */
6306 9626833 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6307 : {
6308 9580956 : GPtrDiff_t iSrcOffset = 0;
6309 9580956 : if (!GWKCheckAndComputeSrcOffsets(psJob, pabSuccess, iDstX, iDstY,
6310 : padfX, padfY, nSrcXSize,
6311 : nSrcYSize, iSrcOffset))
6312 2357899 : continue;
6313 :
6314 : /* --------------------------------------------------------------------
6315 : */
6316 : /* Do not try to apply invalid source pixels to the dest. */
6317 : /* --------------------------------------------------------------------
6318 : */
6319 9402154 : if (poWK->panUnifiedSrcValid != nullptr &&
6320 1127315 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6321 : {
6322 48962 : if (!bOneSourceCornerFailsToReproject)
6323 : {
6324 41477 : continue;
6325 : }
6326 7485 : else if (!GWKAdjustSrcOffsetOnEdge(psJob, iSrcOffset))
6327 : {
6328 5224 : continue;
6329 : }
6330 : }
6331 :
6332 : /* --------------------------------------------------------------------
6333 : */
6334 : /* Do not try to apply transparent source pixels to the
6335 : * destination.*/
6336 : /* --------------------------------------------------------------------
6337 : */
6338 8228129 : double dfDensity = 1.0;
6339 :
6340 8228129 : if (poWK->pafUnifiedSrcDensity != nullptr)
6341 : {
6342 1557335 : dfDensity = double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
6343 1557335 : if (dfDensity < SRC_DENSITY_THRESHOLD_DOUBLE)
6344 1005075 : continue;
6345 : }
6346 :
6347 : /* ====================================================================
6348 : */
6349 : /* Loop processing each band. */
6350 : /* ====================================================================
6351 : */
6352 :
6353 7223057 : const GPtrDiff_t iDstOffset =
6354 7223057 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6355 :
6356 17008004 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6357 : {
6358 9784947 : T value = 0;
6359 9784947 : double dfBandDensity = 0.0;
6360 :
6361 : /* --------------------------------------------------------------------
6362 : */
6363 : /* Collect the source value. */
6364 : /* --------------------------------------------------------------------
6365 : */
6366 9784947 : if (GWKGetPixelT(poWK, iBand, iSrcOffset, &dfBandDensity,
6367 : &value))
6368 : {
6369 :
6370 9784937 : if (poWK->bApplyVerticalShift)
6371 : {
6372 0 : if (!std::isfinite(padfZ[iDstX]))
6373 0 : continue;
6374 : // Subtract padfZ[] since the coordinate transformation
6375 : // is from target to source
6376 0 : value = GWKClampValueT<T>(
6377 0 : double(value) * poWK->dfMultFactorVerticalShift -
6378 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline);
6379 : }
6380 :
6381 9784937 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6382 : dfBandDensity, value);
6383 : }
6384 : }
6385 :
6386 : /* --------------------------------------------------------------------
6387 : */
6388 : /* Mark this pixel valid/opaque in the output. */
6389 : /* --------------------------------------------------------------------
6390 : */
6391 7223057 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6392 :
6393 7223057 : if (poWK->panDstValid != nullptr)
6394 : {
6395 5953054 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6396 : }
6397 : } /* Next iDstX */
6398 :
6399 : /* --------------------------------------------------------------------
6400 : */
6401 : /* Report progress to the user, and optionally cancel out. */
6402 : /* --------------------------------------------------------------------
6403 : */
6404 45881 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6405 0 : break;
6406 : }
6407 :
6408 : /* -------------------------------------------------------------------- */
6409 : /* Cleanup and return. */
6410 : /* -------------------------------------------------------------------- */
6411 421 : CPLFree(padfX);
6412 421 : CPLFree(padfY);
6413 421 : CPLFree(padfZ);
6414 421 : CPLFree(pabSuccess);
6415 421 : }
6416 :
6417 341 : static CPLErr GWKNearestByte(GDALWarpKernel *poWK)
6418 : {
6419 341 : return GWKRun(poWK, "GWKNearestByte", GWKNearestThread<GByte>);
6420 : }
6421 :
6422 14 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6423 : {
6424 14 : return GWKRun(
6425 : poWK, "GWKNearestNoMasksOrDstDensityOnlyShort",
6426 14 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_NearestNeighbour>);
6427 : }
6428 :
6429 5 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6430 : {
6431 5 : return GWKRun(
6432 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyShort",
6433 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16,
6434 5 : GRA_Bilinear>);
6435 : }
6436 :
6437 6 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6438 : {
6439 6 : return GWKRun(
6440 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyUShort",
6441 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16,
6442 6 : GRA_Bilinear>);
6443 : }
6444 :
6445 4 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6446 : {
6447 4 : return GWKRun(
6448 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyFloat",
6449 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<float,
6450 4 : GRA_Bilinear>);
6451 : }
6452 :
6453 : #ifdef INSTANTIATE_FLOAT64_SSE2_IMPL
6454 :
6455 : static CPLErr GWKBilinearNoMasksOrDstDensityOnlyDouble(GDALWarpKernel *poWK)
6456 : {
6457 : return GWKRun(
6458 : poWK, "GWKBilinearNoMasksOrDstDensityOnlyDouble",
6459 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<double,
6460 : GRA_Bilinear>);
6461 : }
6462 : #endif
6463 :
6464 5 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6465 : {
6466 5 : return GWKRun(
6467 : poWK, "GWKCubicNoMasksOrDstDensityOnlyShort",
6468 5 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GInt16, GRA_Cubic>);
6469 : }
6470 :
6471 14 : static CPLErr GWKCubicNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6472 : {
6473 14 : return GWKRun(
6474 : poWK, "GWKCubicNoMasksOrDstDensityOnlyUShort",
6475 14 : GWKResampleNoMasksOrDstDensityOnlyHas4SampleThread<GUInt16, GRA_Cubic>);
6476 : }
6477 :
6478 6 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyShort(GDALWarpKernel *poWK)
6479 : {
6480 6 : return GWKRun(
6481 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyShort",
6482 6 : GWKResampleNoMasksOrDstDensityOnlyThread<GInt16, GRA_CubicSpline>);
6483 : }
6484 :
6485 5 : static CPLErr GWKCubicSplineNoMasksOrDstDensityOnlyUShort(GDALWarpKernel *poWK)
6486 : {
6487 5 : return GWKRun(
6488 : poWK, "GWKCubicSplineNoMasksOrDstDensityOnlyUShort",
6489 5 : GWKResampleNoMasksOrDstDensityOnlyThread<GUInt16, GRA_CubicSpline>);
6490 : }
6491 :
6492 26 : static CPLErr GWKNearestShort(GDALWarpKernel *poWK)
6493 : {
6494 26 : return GWKRun(poWK, "GWKNearestShort", GWKNearestThread<GInt16>);
6495 : }
6496 :
6497 6 : static CPLErr GWKNearestUnsignedShort(GDALWarpKernel *poWK)
6498 : {
6499 6 : return GWKRun(poWK, "GWKNearestUnsignedShort", GWKNearestThread<GUInt16>);
6500 : }
6501 :
6502 11 : static CPLErr GWKNearestNoMasksOrDstDensityOnlyFloat(GDALWarpKernel *poWK)
6503 : {
6504 11 : return GWKRun(
6505 : poWK, "GWKNearestNoMasksOrDstDensityOnlyFloat",
6506 11 : GWKResampleNoMasksOrDstDensityOnlyThread<float, GRA_NearestNeighbour>);
6507 : }
6508 :
6509 44 : static CPLErr GWKNearestFloat(GDALWarpKernel *poWK)
6510 : {
6511 44 : return GWKRun(poWK, "GWKNearestFloat", GWKNearestThread<float>);
6512 : }
6513 :
6514 : /************************************************************************/
6515 : /* GWKAverageOrMode() */
6516 : /* */
6517 : /************************************************************************/
6518 :
6519 : #define COMPUTE_WEIGHT_Y(iSrcY) \
6520 : ((iSrcY == iSrcYMin) \
6521 : ? ((iSrcYMin + 1 == iSrcYMax) ? 1.0 : 1 - (dfYMin - iSrcYMin)) \
6522 : : (iSrcY + 1 == iSrcYMax) ? 1 - (iSrcYMax - dfYMax) \
6523 : : 1.0)
6524 :
6525 : #define COMPUTE_WEIGHT(iSrcX, dfWeightY) \
6526 : ((iSrcX == iSrcXMin) ? ((iSrcXMin + 1 == iSrcXMax) \
6527 : ? dfWeightY \
6528 : : dfWeightY * (1 - (dfXMin - iSrcXMin))) \
6529 : : (iSrcX + 1 == iSrcXMax) ? dfWeightY * (1 - (iSrcXMax - dfXMax)) \
6530 : : dfWeightY)
6531 :
6532 : static void GWKAverageOrModeThread(void *pData);
6533 :
6534 163 : static CPLErr GWKAverageOrMode(GDALWarpKernel *poWK)
6535 : {
6536 163 : return GWKRun(poWK, "GWKAverageOrMode", GWKAverageOrModeThread);
6537 : }
6538 :
6539 : /************************************************************************/
6540 : /* GWKAverageOrModeComputeLineCoords() */
6541 : /************************************************************************/
6542 :
6543 8183 : static void GWKAverageOrModeComputeLineCoords(
6544 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6545 : double *padfY2, double *padfZ, double *padfZ2, int *pabSuccess,
6546 : int *pabSuccess2, int iDstY, double dfSrcCoordPrecision,
6547 : double dfErrorThreshold)
6548 : {
6549 8183 : const GDALWarpKernel *poWK = psJob->poWK;
6550 8183 : const int nDstXSize = poWK->nDstXSize;
6551 :
6552 : // Setup points to transform to source image space.
6553 2097530 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6554 : {
6555 2089340 : padfX[iDstX] = iDstX + poWK->nDstXOff;
6556 2089340 : padfY[iDstX] = iDstY + poWK->nDstYOff;
6557 2089340 : padfZ[iDstX] = 0.0;
6558 2089340 : padfX2[iDstX] = iDstX + 1.0 + poWK->nDstXOff;
6559 2089340 : padfY2[iDstX] = iDstY + 1.0 + poWK->nDstYOff;
6560 2089340 : padfZ2[iDstX] = 0.0;
6561 : }
6562 :
6563 : /* ----------------------------------------------------------------- */
6564 : /* Transform the points from destination pixel/line coordinates */
6565 : /* to source pixel/line coordinates. */
6566 : /* ----------------------------------------------------------------- */
6567 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX, padfY,
6568 : padfZ, pabSuccess);
6569 8183 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, nDstXSize, padfX2,
6570 : padfY2, padfZ2, pabSuccess2);
6571 :
6572 8183 : if (dfSrcCoordPrecision > 0.0)
6573 : {
6574 0 : GWKRoundSourceCoordinates(nDstXSize, padfX, padfY, padfZ, pabSuccess,
6575 : dfSrcCoordPrecision, dfErrorThreshold,
6576 0 : poWK->pfnTransformer, psJob->pTransformerArg,
6577 0 : poWK->nDstXOff, iDstY + poWK->nDstYOff);
6578 0 : GWKRoundSourceCoordinates(
6579 : nDstXSize, padfX2, padfY2, padfZ2, pabSuccess2, dfSrcCoordPrecision,
6580 0 : dfErrorThreshold, poWK->pfnTransformer, psJob->pTransformerArg,
6581 0 : 1.0 + poWK->nDstXOff, iDstY + 1.0 + poWK->nDstYOff);
6582 : }
6583 8183 : }
6584 :
6585 : /************************************************************************/
6586 : /* GWKAverageOrModeComputeSourceCoords() */
6587 : /************************************************************************/
6588 :
6589 2089340 : static bool GWKAverageOrModeComputeSourceCoords(
6590 : const GWKJobStruct *psJob, double *padfX, double *padfX2, double *padfY,
6591 : double *padfY2, int iDstX, int iDstY, int nXMargin, int nYMargin,
6592 : // Output:
6593 : bool &bWrapOverX, double &dfXMin, double &dfYMin, double &dfXMax,
6594 : double &dfYMax, int &iSrcXMin, int &iSrcYMin, int &iSrcXMax, int &iSrcYMax)
6595 : {
6596 2089340 : const GDALWarpKernel *poWK = psJob->poWK;
6597 2089340 : const int nSrcXSize = poWK->nSrcXSize;
6598 2089340 : const int nSrcYSize = poWK->nSrcYSize;
6599 :
6600 : // Add some checks so that padfX[iDstX] - poWK->nSrcXOff is in
6601 : // reasonable range (https://github.com/OSGeo/gdal/issues/2365)
6602 2089340 : if (!(padfX[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6603 1991690 : padfX2[iDstX] - poWK->nSrcXOff >= -nXMargin &&
6604 1991690 : padfY[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6605 1965300 : padfY2[iDstX] - poWK->nSrcYOff >= -nYMargin &&
6606 1965300 : padfX[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6607 1911930 : padfX2[iDstX] - poWK->nSrcXOff - nSrcXSize <= nXMargin &&
6608 1911430 : padfY[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin &&
6609 1910040 : padfY2[iDstX] - poWK->nSrcYOff - nSrcYSize <= nYMargin))
6610 : {
6611 179362 : return false;
6612 : }
6613 :
6614 : // Compute corners in source crs.
6615 :
6616 : // The transformation might not have preserved ordering of
6617 : // coordinates so do the necessary swapping (#5433).
6618 : // NOTE: this is really an approximative fix. To do something
6619 : // more precise we would for example need to compute the
6620 : // transformation of coordinates in the
6621 : // [iDstX,iDstY]x[iDstX+1,iDstY+1] square back to source
6622 : // coordinates, and take the bounding box of the got source
6623 : // coordinates.
6624 :
6625 1909980 : if (padfX[iDstX] > padfX2[iDstX])
6626 268744 : std::swap(padfX[iDstX], padfX2[iDstX]);
6627 :
6628 : // Detect situations where the target pixel is close to the
6629 : // antimeridian and when padfX[iDstX] and padfX2[iDstX] are very
6630 : // close to the left-most and right-most columns of the source
6631 : // raster. The 2 value below was experimentally determined to
6632 : // avoid false-positives and false-negatives.
6633 : // Addresses https://github.com/OSGeo/gdal/issues/6478
6634 1909980 : bWrapOverX = false;
6635 1909980 : const int nThresholdWrapOverX = std::min(2, nSrcXSize / 10);
6636 1909980 : if (poWK->nSrcXOff == 0 &&
6637 1909980 : padfX[iDstX] * poWK->dfXScale < nThresholdWrapOverX &&
6638 16499 : (nSrcXSize - padfX2[iDstX]) * poWK->dfXScale < nThresholdWrapOverX)
6639 : {
6640 : // Check there is a discontinuity by checking at mid-pixel.
6641 : // NOTE: all this remains fragile. To confidently
6642 : // detect antimeridian warping we should probably try to access
6643 : // georeferenced coordinates, and not rely only on tests on
6644 : // image space coordinates. But accessing georeferenced
6645 : // coordinates from here is not trivial, and we would for example
6646 : // have to handle both geographic, Mercator, etc.
6647 : // Let's hope this heuristics is good enough for now.
6648 1041 : double x = iDstX + 0.5 + poWK->nDstXOff;
6649 1041 : double y = iDstY + poWK->nDstYOff;
6650 1041 : double z = 0;
6651 1041 : int bSuccess = FALSE;
6652 1041 : poWK->pfnTransformer(psJob->pTransformerArg, TRUE, 1, &x, &y, &z,
6653 : &bSuccess);
6654 1041 : if (bSuccess && x < padfX[iDstX])
6655 : {
6656 1008 : bWrapOverX = true;
6657 1008 : std::swap(padfX[iDstX], padfX2[iDstX]);
6658 1008 : padfX2[iDstX] += nSrcXSize;
6659 : }
6660 : }
6661 :
6662 1909980 : dfXMin = padfX[iDstX] - poWK->nSrcXOff;
6663 1909980 : dfXMax = padfX2[iDstX] - poWK->nSrcXOff;
6664 1909980 : constexpr double EPSILON = 1e-10;
6665 : // Check that [dfXMin, dfXMax] intersect with [0,nSrcXSize] with a tolerance
6666 1909980 : if (!(dfXMax > -EPSILON && dfXMin < nSrcXSize + EPSILON))
6667 156 : return false;
6668 1909830 : iSrcXMin = static_cast<int>(std::max(floor(dfXMin + EPSILON), 0.0));
6669 1909830 : iSrcXMax = static_cast<int>(
6670 1909830 : std::min(ceil(dfXMax - EPSILON), static_cast<double>(INT_MAX)));
6671 1909830 : if (!bWrapOverX)
6672 1908820 : iSrcXMax = std::min(iSrcXMax, nSrcXSize);
6673 1909830 : if (iSrcXMin == iSrcXMax && iSrcXMax < nSrcXSize)
6674 472 : iSrcXMax++;
6675 :
6676 1909830 : if (padfY[iDstX] > padfY2[iDstX])
6677 270117 : std::swap(padfY[iDstX], padfY2[iDstX]);
6678 1909830 : dfYMin = padfY[iDstX] - poWK->nSrcYOff;
6679 1909830 : dfYMax = padfY2[iDstX] - poWK->nSrcYOff;
6680 : // Check that [dfYMin, dfYMax] intersect with [0,nSrcYSize] with a tolerance
6681 1909830 : if (!(dfYMax > -EPSILON && dfYMin < nSrcYSize + EPSILON))
6682 78 : return false;
6683 1909750 : iSrcYMin = static_cast<int>(std::max(floor(dfYMin + EPSILON), 0.0));
6684 1909750 : iSrcYMax = std::min(static_cast<int>(ceil(dfYMax - EPSILON)), nSrcYSize);
6685 1909750 : if (iSrcYMin == iSrcYMax && iSrcYMax < nSrcYSize)
6686 0 : iSrcYMax++;
6687 :
6688 1909750 : return true;
6689 : }
6690 :
6691 : /************************************************************************/
6692 : /* GWKModeRealType() */
6693 : /************************************************************************/
6694 :
6695 17780 : template <class T> static inline bool IsSame(T a, T b)
6696 : {
6697 17780 : return a == b;
6698 : }
6699 :
6700 0 : template <> bool IsSame<GFloat16>(GFloat16 a, GFloat16 b)
6701 : {
6702 0 : return a == b || (CPLIsNan(a) && CPLIsNan(b));
6703 : }
6704 :
6705 18 : template <> bool IsSame<float>(float a, float b)
6706 : {
6707 18 : return a == b || (std::isnan(a) && std::isnan(b));
6708 : }
6709 :
6710 56 : template <> bool IsSame<double>(double a, double b)
6711 : {
6712 56 : return a == b || (std::isnan(a) && std::isnan(b));
6713 : }
6714 :
6715 19 : template <class T> static void GWKModeRealType(GWKJobStruct *psJob)
6716 : {
6717 19 : const GDALWarpKernel *poWK = psJob->poWK;
6718 19 : const int iYMin = psJob->iYMin;
6719 19 : const int iYMax = psJob->iYMax;
6720 19 : const int nDstXSize = poWK->nDstXSize;
6721 19 : const int nSrcXSize = poWK->nSrcXSize;
6722 19 : const int nSrcYSize = poWK->nSrcYSize;
6723 19 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6724 :
6725 19 : T *pVals = nullptr;
6726 19 : float *pafCounts = nullptr;
6727 :
6728 19 : if (nSrcXSize > 0 && nSrcYSize > 0)
6729 : {
6730 : pVals = static_cast<T *>(
6731 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(T)));
6732 : pafCounts = static_cast<float *>(
6733 19 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6734 19 : if (pVals == nullptr || pafCounts == nullptr)
6735 : {
6736 0 : VSIFree(pVals);
6737 0 : VSIFree(pafCounts);
6738 0 : return;
6739 : }
6740 : }
6741 :
6742 : /* -------------------------------------------------------------------- */
6743 : /* Allocate x,y,z coordinate arrays for transformation ... two */
6744 : /* scanlines worth of positions. */
6745 : /* -------------------------------------------------------------------- */
6746 :
6747 : double *padfX =
6748 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6749 : double *padfY =
6750 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6751 : double *padfZ =
6752 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6753 : double *padfX2 =
6754 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6755 : double *padfY2 =
6756 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6757 : double *padfZ2 =
6758 19 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
6759 19 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6760 19 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
6761 :
6762 19 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
6763 19 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
6764 19 : const double dfErrorThreshold = CPLAtof(
6765 19 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
6766 :
6767 19 : const int nXMargin =
6768 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
6769 19 : const int nYMargin =
6770 19 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
6771 :
6772 : /* ==================================================================== */
6773 : /* Loop over output lines. */
6774 : /* ==================================================================== */
6775 116 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
6776 : {
6777 97 : GWKAverageOrModeComputeLineCoords(
6778 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
6779 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
6780 :
6781 : // Loop over pixels in output scanline.
6782 3514 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
6783 : {
6784 3417 : GPtrDiff_t iSrcOffset = 0;
6785 3417 : double dfDensity = 1.0;
6786 3417 : bool bHasFoundDensity = false;
6787 :
6788 3417 : bool bWrapOverX = false;
6789 3417 : double dfXMin = 0;
6790 3417 : double dfYMin = 0;
6791 3417 : double dfXMax = 0;
6792 3417 : double dfYMax = 0;
6793 3417 : int iSrcXMin = 0;
6794 3417 : int iSrcYMin = 0;
6795 3417 : int iSrcXMax = 0;
6796 3417 : int iSrcYMax = 0;
6797 3417 : if (!GWKAverageOrModeComputeSourceCoords(
6798 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
6799 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
6800 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
6801 : {
6802 0 : continue;
6803 : }
6804 :
6805 3417 : const GPtrDiff_t iDstOffset =
6806 3417 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
6807 :
6808 : // Loop processing each band.
6809 6834 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
6810 : {
6811 3417 : double dfBandDensity = 0.0;
6812 :
6813 3417 : int nBins = 0;
6814 3417 : int iModeIndex = -1;
6815 3417 : T nVal{};
6816 :
6817 10248 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
6818 : {
6819 6831 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
6820 6831 : iSrcOffset =
6821 6831 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6822 20530 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
6823 : iSrcX++, iSrcOffset++)
6824 : {
6825 13699 : if (bWrapOverX)
6826 0 : iSrcOffset =
6827 0 : (iSrcX % nSrcXSize) +
6828 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
6829 :
6830 13699 : if (poWK->panUnifiedSrcValid != nullptr &&
6831 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
6832 0 : continue;
6833 :
6834 13699 : if (GWKGetPixelT(poWK, iBand, iSrcOffset,
6835 27398 : &dfBandDensity, &nVal) &&
6836 13699 : dfBandDensity > BAND_DENSITY_THRESHOLD)
6837 : {
6838 13699 : const double dfWeight =
6839 13699 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
6840 :
6841 : // Check array for existing entry.
6842 13699 : int i = 0;
6843 29194 : for (i = 0; i < nBins; ++i)
6844 : {
6845 17807 : if (IsSame(pVals[i], nVal))
6846 : {
6847 :
6848 2312 : pafCounts[i] +=
6849 2312 : static_cast<float>(dfWeight);
6850 2312 : bool bValIsMaxCount =
6851 2312 : (pafCounts[i] > pafCounts[iModeIndex]);
6852 :
6853 2312 : if (!bValIsMaxCount &&
6854 1498 : pafCounts[i] == pafCounts[iModeIndex])
6855 : {
6856 1490 : switch (eTieStrategy)
6857 : {
6858 1477 : case GWKTS_First:
6859 1477 : break;
6860 6 : case GWKTS_Min:
6861 6 : bValIsMaxCount =
6862 6 : nVal < pVals[iModeIndex];
6863 6 : break;
6864 7 : case GWKTS_Max:
6865 7 : bValIsMaxCount =
6866 7 : nVal > pVals[iModeIndex];
6867 7 : break;
6868 : }
6869 : }
6870 :
6871 2312 : if (bValIsMaxCount)
6872 : {
6873 817 : iModeIndex = i;
6874 : }
6875 :
6876 2312 : break;
6877 : }
6878 : }
6879 :
6880 : // Add to arr if entry not already there.
6881 13699 : if (i == nBins)
6882 : {
6883 11387 : pVals[i] = nVal;
6884 11387 : pafCounts[i] = static_cast<float>(dfWeight);
6885 :
6886 11387 : if (iModeIndex < 0)
6887 3417 : iModeIndex = i;
6888 :
6889 11387 : ++nBins;
6890 : }
6891 : }
6892 : }
6893 : }
6894 :
6895 3417 : if (iModeIndex != -1)
6896 : {
6897 3417 : nVal = pVals[iModeIndex];
6898 3417 : dfBandDensity = 1;
6899 3417 : bHasFoundDensity = true;
6900 : }
6901 :
6902 : // We have a computed value from the source. Now apply it
6903 : // to the destination pixel
6904 3417 : if (bHasFoundDensity)
6905 : {
6906 3417 : GWKSetPixelValueRealT(poWK, iBand, iDstOffset,
6907 : dfBandDensity, nVal);
6908 : }
6909 : }
6910 :
6911 3417 : if (!bHasFoundDensity)
6912 0 : continue;
6913 :
6914 : /* --------------------------------------------------------------------
6915 : */
6916 : /* Update destination density/validity masks. */
6917 : /* --------------------------------------------------------------------
6918 : */
6919 3417 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
6920 :
6921 3417 : if (poWK->panDstValid != nullptr)
6922 : {
6923 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
6924 : }
6925 : } /* Next iDstX */
6926 :
6927 : /* --------------------------------------------------------------------
6928 : */
6929 : /* Report progress to the user, and optionally cancel out. */
6930 : /* --------------------------------------------------------------------
6931 : */
6932 97 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
6933 0 : break;
6934 : }
6935 :
6936 : /* -------------------------------------------------------------------- */
6937 : /* Cleanup and return. */
6938 : /* -------------------------------------------------------------------- */
6939 19 : CPLFree(padfX);
6940 19 : CPLFree(padfY);
6941 19 : CPLFree(padfZ);
6942 19 : CPLFree(padfX2);
6943 19 : CPLFree(padfY2);
6944 19 : CPLFree(padfZ2);
6945 19 : CPLFree(pabSuccess);
6946 19 : CPLFree(pabSuccess2);
6947 19 : VSIFree(pVals);
6948 19 : VSIFree(pafCounts);
6949 : }
6950 :
6951 : /************************************************************************/
6952 : /* GWKModeComplexType() */
6953 : /************************************************************************/
6954 :
6955 8 : static void GWKModeComplexType(GWKJobStruct *psJob)
6956 : {
6957 8 : const GDALWarpKernel *poWK = psJob->poWK;
6958 8 : const int iYMin = psJob->iYMin;
6959 8 : const int iYMax = psJob->iYMax;
6960 8 : const int nDstXSize = poWK->nDstXSize;
6961 8 : const int nSrcXSize = poWK->nSrcXSize;
6962 8 : const int nSrcYSize = poWK->nSrcYSize;
6963 8 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
6964 : const double dfMultFactorVerticalShiftPipeline =
6965 8 : poWK->bApplyVerticalShift
6966 8 : ? CPLAtof(CSLFetchNameValueDef(
6967 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
6968 : "1.0"))
6969 8 : : 0.0;
6970 :
6971 8 : double *padfRealVals = nullptr;
6972 8 : double *padfImagVals = nullptr;
6973 8 : float *pafCounts = nullptr;
6974 :
6975 8 : if (nSrcXSize > 0 && nSrcYSize > 0)
6976 : {
6977 : padfRealVals = static_cast<double *>(
6978 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
6979 : padfImagVals = static_cast<double *>(
6980 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(double)));
6981 : pafCounts = static_cast<float *>(
6982 8 : VSI_MALLOC3_VERBOSE(nSrcXSize, nSrcYSize, sizeof(float)));
6983 8 : if (padfRealVals == nullptr || padfImagVals == nullptr ||
6984 : pafCounts == nullptr)
6985 : {
6986 0 : VSIFree(padfRealVals);
6987 0 : VSIFree(padfImagVals);
6988 0 : VSIFree(pafCounts);
6989 0 : return;
6990 : }
6991 : }
6992 :
6993 : /* -------------------------------------------------------------------- */
6994 : /* Allocate x,y,z coordinate arrays for transformation ... two */
6995 : /* scanlines worth of positions. */
6996 : /* -------------------------------------------------------------------- */
6997 :
6998 : double *padfX =
6999 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7000 : double *padfY =
7001 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7002 : double *padfZ =
7003 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7004 : double *padfX2 =
7005 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7006 : double *padfY2 =
7007 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7008 : double *padfZ2 =
7009 8 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7010 8 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7011 8 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7012 :
7013 8 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7014 8 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7015 8 : const double dfErrorThreshold = CPLAtof(
7016 8 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7017 :
7018 : const int nXMargin =
7019 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7020 : const int nYMargin =
7021 8 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7022 :
7023 : /* ==================================================================== */
7024 : /* Loop over output lines. */
7025 : /* ==================================================================== */
7026 16 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7027 : {
7028 8 : GWKAverageOrModeComputeLineCoords(
7029 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7030 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7031 :
7032 : // Loop over pixels in output scanline.
7033 16 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7034 : {
7035 8 : GPtrDiff_t iSrcOffset = 0;
7036 8 : double dfDensity = 1.0;
7037 8 : bool bHasFoundDensity = false;
7038 :
7039 8 : bool bWrapOverX = false;
7040 8 : double dfXMin = 0;
7041 8 : double dfYMin = 0;
7042 8 : double dfXMax = 0;
7043 8 : double dfYMax = 0;
7044 8 : int iSrcXMin = 0;
7045 8 : int iSrcYMin = 0;
7046 8 : int iSrcXMax = 0;
7047 8 : int iSrcYMax = 0;
7048 8 : if (!GWKAverageOrModeComputeSourceCoords(
7049 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7050 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7051 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7052 : {
7053 0 : continue;
7054 : }
7055 :
7056 8 : const GPtrDiff_t iDstOffset =
7057 8 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7058 :
7059 : // Loop processing each band.
7060 16 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7061 : {
7062 8 : double dfBandDensity = 0.0;
7063 :
7064 8 : int nBins = 0;
7065 8 : int iModeIndex = -1;
7066 8 : double dfValueReal = 0;
7067 8 : double dfValueImag = 0;
7068 :
7069 16 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7070 : {
7071 8 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7072 8 : iSrcOffset =
7073 8 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7074 38 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7075 : iSrcX++, iSrcOffset++)
7076 : {
7077 30 : if (bWrapOverX)
7078 0 : iSrcOffset =
7079 0 : (iSrcX % nSrcXSize) +
7080 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7081 :
7082 30 : if (poWK->panUnifiedSrcValid != nullptr &&
7083 0 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7084 0 : continue;
7085 :
7086 30 : if (GWKGetPixelValue(poWK, iBand, iSrcOffset,
7087 : &dfBandDensity, &dfValueReal,
7088 60 : &dfValueImag) &&
7089 30 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7090 : {
7091 30 : const double dfWeight =
7092 30 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7093 :
7094 : // Check array for existing entry.
7095 30 : int i = 0;
7096 49 : for (i = 0; i < nBins; ++i)
7097 : {
7098 47 : if (IsSame(padfRealVals[i], dfValueReal) &&
7099 14 : IsSame(padfImagVals[i], dfValueImag))
7100 : {
7101 :
7102 14 : pafCounts[i] +=
7103 14 : static_cast<float>(dfWeight);
7104 14 : bool bValIsMaxCount =
7105 14 : (pafCounts[i] > pafCounts[iModeIndex]);
7106 :
7107 14 : if (!bValIsMaxCount &&
7108 6 : pafCounts[i] == pafCounts[iModeIndex])
7109 : {
7110 3 : switch (eTieStrategy)
7111 : {
7112 3 : case GWKTS_First:
7113 3 : break;
7114 0 : case GWKTS_Min:
7115 0 : bValIsMaxCount =
7116 0 : dfValueReal <
7117 0 : padfRealVals[iModeIndex];
7118 0 : break;
7119 0 : case GWKTS_Max:
7120 0 : bValIsMaxCount =
7121 0 : dfValueReal >
7122 0 : padfRealVals[iModeIndex];
7123 0 : break;
7124 : }
7125 : }
7126 :
7127 14 : if (bValIsMaxCount)
7128 : {
7129 8 : iModeIndex = i;
7130 : }
7131 :
7132 14 : break;
7133 : }
7134 : }
7135 :
7136 : // Add to arr if entry not already there.
7137 30 : if (i == nBins)
7138 : {
7139 16 : padfRealVals[i] = dfValueReal;
7140 16 : padfImagVals[i] = dfValueImag;
7141 16 : pafCounts[i] = static_cast<float>(dfWeight);
7142 :
7143 16 : if (iModeIndex < 0)
7144 8 : iModeIndex = i;
7145 :
7146 16 : ++nBins;
7147 : }
7148 : }
7149 : }
7150 : }
7151 :
7152 8 : if (iModeIndex != -1)
7153 : {
7154 8 : dfValueReal = padfRealVals[iModeIndex];
7155 8 : dfValueImag = padfImagVals[iModeIndex];
7156 8 : dfBandDensity = 1;
7157 :
7158 8 : if (poWK->bApplyVerticalShift)
7159 : {
7160 0 : if (!std::isfinite(padfZ[iDstX]))
7161 0 : continue;
7162 : // Subtract padfZ[] since the coordinate
7163 : // transformation is from target to source
7164 0 : dfValueReal =
7165 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7166 0 : padfZ[iDstX] * dfMultFactorVerticalShiftPipeline;
7167 : }
7168 :
7169 8 : bHasFoundDensity = true;
7170 : }
7171 :
7172 : // We have a computed value from the source. Now apply it
7173 : // to the destination pixel
7174 8 : if (bHasFoundDensity)
7175 : {
7176 8 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
7177 : dfValueReal, dfValueImag);
7178 : }
7179 : }
7180 :
7181 8 : if (!bHasFoundDensity)
7182 0 : continue;
7183 :
7184 : /* --------------------------------------------------------------------
7185 : */
7186 : /* Update destination density/validity masks. */
7187 : /* --------------------------------------------------------------------
7188 : */
7189 8 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
7190 :
7191 8 : if (poWK->panDstValid != nullptr)
7192 : {
7193 0 : CPLMaskSet(poWK->panDstValid, iDstOffset);
7194 : }
7195 : } /* Next iDstX */
7196 :
7197 : /* --------------------------------------------------------------------
7198 : */
7199 : /* Report progress to the user, and optionally cancel out. */
7200 : /* --------------------------------------------------------------------
7201 : */
7202 8 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
7203 0 : break;
7204 : }
7205 :
7206 : /* -------------------------------------------------------------------- */
7207 : /* Cleanup and return. */
7208 : /* -------------------------------------------------------------------- */
7209 8 : CPLFree(padfX);
7210 8 : CPLFree(padfY);
7211 8 : CPLFree(padfZ);
7212 8 : CPLFree(padfX2);
7213 8 : CPLFree(padfY2);
7214 8 : CPLFree(padfZ2);
7215 8 : CPLFree(pabSuccess);
7216 8 : CPLFree(pabSuccess2);
7217 8 : VSIFree(padfRealVals);
7218 8 : VSIFree(padfImagVals);
7219 8 : VSIFree(pafCounts);
7220 : }
7221 :
7222 : /************************************************************************/
7223 : /* GWKAverageOrModeThread() */
7224 : /************************************************************************/
7225 :
7226 : // Overall logic based on GWKGeneralCaseThread().
7227 163 : static void GWKAverageOrModeThread(void *pData)
7228 : {
7229 163 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
7230 163 : const GDALWarpKernel *poWK = psJob->poWK;
7231 163 : const int iYMin = psJob->iYMin;
7232 163 : const int iYMax = psJob->iYMax;
7233 : const double dfMultFactorVerticalShiftPipeline =
7234 163 : poWK->bApplyVerticalShift
7235 163 : ? CPLAtof(CSLFetchNameValueDef(
7236 0 : poWK->papszWarpOptions, "MULT_FACTOR_VERTICAL_SHIFT_PIPELINE",
7237 : "1.0"))
7238 163 : : 0.0;
7239 :
7240 163 : const int nDstXSize = poWK->nDstXSize;
7241 163 : const int nSrcXSize = poWK->nSrcXSize;
7242 :
7243 : /* -------------------------------------------------------------------- */
7244 : /* Find out which algorithm to use (small optim.) */
7245 : /* -------------------------------------------------------------------- */
7246 :
7247 : // Only used for GRA_Mode
7248 163 : float *pafCounts = nullptr;
7249 163 : int nBins = 0;
7250 163 : int nBinsOffset = 0;
7251 163 : const GWKTieStrategy eTieStrategy = poWK->eTieStrategy;
7252 :
7253 : // Only used with Q1, Med and Q3
7254 163 : float quant = 0.0f;
7255 :
7256 : // To control array allocation only when data type is complex
7257 163 : const bool bIsComplex = GDALDataTypeIsComplex(poWK->eWorkingDataType) != 0;
7258 :
7259 163 : if (poWK->eResample == GRA_Mode)
7260 : {
7261 45 : if (poWK->bApplyVerticalShift)
7262 : {
7263 0 : return GWKModeComplexType(psJob);
7264 : }
7265 :
7266 45 : switch (poWK->eWorkingDataType)
7267 : {
7268 7 : case GDT_Byte:
7269 7 : nBins = 256;
7270 7 : break;
7271 :
7272 0 : case GDT_Int8:
7273 0 : nBins = 256;
7274 0 : nBinsOffset = nBins / 2;
7275 0 : break;
7276 :
7277 1 : case GDT_UInt16:
7278 1 : nBins = 65536;
7279 1 : break;
7280 :
7281 10 : case GDT_Int16:
7282 10 : nBins = 65536;
7283 10 : nBinsOffset = nBins / 2;
7284 10 : break;
7285 :
7286 10 : case GDT_Int32:
7287 10 : return GWKModeRealType<int32_t>(psJob);
7288 :
7289 1 : case GDT_UInt32:
7290 1 : return GWKModeRealType<uint32_t>(psJob);
7291 :
7292 1 : case GDT_Int64:
7293 1 : return GWKModeRealType<int64_t>(psJob);
7294 :
7295 1 : case GDT_UInt64:
7296 1 : return GWKModeRealType<uint64_t>(psJob);
7297 :
7298 0 : case GDT_Float16:
7299 0 : return GWKModeRealType<GFloat16>(psJob);
7300 :
7301 4 : case GDT_Float32:
7302 4 : return GWKModeRealType<float>(psJob);
7303 :
7304 2 : case GDT_Float64:
7305 2 : return GWKModeRealType<double>(psJob);
7306 :
7307 8 : case GDT_CInt16:
7308 : case GDT_CInt32:
7309 : case GDT_CFloat16:
7310 : case GDT_CFloat32:
7311 : case GDT_CFloat64:
7312 8 : return GWKModeComplexType(psJob);
7313 :
7314 0 : case GDT_Unknown:
7315 : case GDT_TypeCount:
7316 0 : CPLAssert(false);
7317 : return;
7318 : }
7319 :
7320 18 : if (nBins)
7321 : {
7322 : pafCounts =
7323 18 : static_cast<float *>(VSI_MALLOC_VERBOSE(nBins * sizeof(float)));
7324 18 : if (pafCounts == nullptr)
7325 0 : return;
7326 : }
7327 : }
7328 118 : else if (poWK->eResample == GRA_Med)
7329 : {
7330 6 : quant = 0.5f;
7331 : }
7332 112 : else if (poWK->eResample == GRA_Q1)
7333 : {
7334 10 : quant = 0.25f;
7335 : }
7336 102 : else if (poWK->eResample == GRA_Q3)
7337 : {
7338 5 : quant = 0.75f;
7339 : }
7340 97 : else if (poWK->eResample != GRA_Average && poWK->eResample != GRA_RMS &&
7341 11 : poWK->eResample != GRA_Min && poWK->eResample != GRA_Max)
7342 : {
7343 : // Other resample algorithms not permitted here.
7344 0 : CPLError(CE_Fatal, CPLE_AppDefined,
7345 : "GDALWarpKernel():GWKAverageOrModeThread() ERROR, "
7346 : "illegal resample");
7347 : }
7348 :
7349 136 : CPLDebug("GDAL", "GDALWarpKernel():GWKAverageOrModeThread()");
7350 :
7351 : /* -------------------------------------------------------------------- */
7352 : /* Allocate x,y,z coordinate arrays for transformation ... two */
7353 : /* scanlines worth of positions. */
7354 : /* -------------------------------------------------------------------- */
7355 :
7356 : double *padfX =
7357 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7358 : double *padfY =
7359 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7360 : double *padfZ =
7361 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7362 : double *padfX2 =
7363 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7364 : double *padfY2 =
7365 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7366 : double *padfZ2 =
7367 136 : static_cast<double *>(CPLMalloc(sizeof(double) * nDstXSize));
7368 136 : int *pabSuccess = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7369 136 : int *pabSuccess2 = static_cast<int *>(CPLMalloc(sizeof(int) * nDstXSize));
7370 :
7371 136 : const double dfSrcCoordPrecision = CPLAtof(CSLFetchNameValueDef(
7372 136 : poWK->papszWarpOptions, "SRC_COORD_PRECISION", "0"));
7373 136 : const double dfErrorThreshold = CPLAtof(
7374 136 : CSLFetchNameValueDef(poWK->papszWarpOptions, "ERROR_THRESHOLD", "0"));
7375 :
7376 : const double dfExcludedValuesThreshold =
7377 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7378 : "EXCLUDED_VALUES_PCT_THRESHOLD", "50")) /
7379 136 : 100.0;
7380 : const double dfNodataValuesThreshold =
7381 136 : CPLAtof(CSLFetchNameValueDef(poWK->papszWarpOptions,
7382 : "NODATA_VALUES_PCT_THRESHOLD", "100")) /
7383 136 : 100.0;
7384 :
7385 : const int nXMargin =
7386 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfXScale)));
7387 : const int nYMargin =
7388 136 : 2 * std::max(1, static_cast<int>(std::ceil(1. / poWK->dfYScale)));
7389 :
7390 : /* ==================================================================== */
7391 : /* Loop over output lines. */
7392 : /* ==================================================================== */
7393 8214 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
7394 : {
7395 8078 : GWKAverageOrModeComputeLineCoords(
7396 : psJob, padfX, padfX2, padfY, padfY2, padfZ, padfZ2, pabSuccess,
7397 : pabSuccess2, iDstY, dfSrcCoordPrecision, dfErrorThreshold);
7398 :
7399 : /* ====================================================================
7400 : */
7401 : /* Loop over pixels in output scanline. */
7402 : /* ====================================================================
7403 : */
7404 2094000 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
7405 : {
7406 2085920 : GPtrDiff_t iSrcOffset = 0;
7407 2085920 : double dfDensity = 1.0;
7408 2085920 : bool bHasFoundDensity = false;
7409 :
7410 2085920 : bool bWrapOverX = false;
7411 2085920 : double dfXMin = 0;
7412 2085920 : double dfYMin = 0;
7413 2085920 : double dfXMax = 0;
7414 2085920 : double dfYMax = 0;
7415 2085920 : int iSrcXMin = 0;
7416 2085920 : int iSrcYMin = 0;
7417 2085920 : int iSrcXMax = 0;
7418 2085920 : int iSrcYMax = 0;
7419 2085920 : if (!GWKAverageOrModeComputeSourceCoords(
7420 : psJob, padfX, padfX2, padfY, padfY2, iDstX, iDstY, nXMargin,
7421 : nYMargin, bWrapOverX, dfXMin, dfYMin, dfXMax, dfYMax,
7422 : iSrcXMin, iSrcYMin, iSrcXMax, iSrcYMax))
7423 : {
7424 687183 : continue;
7425 : }
7426 :
7427 1906320 : const GPtrDiff_t iDstOffset =
7428 1906320 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
7429 :
7430 1906320 : bool bDone = false;
7431 :
7432 : // Special Average mode where we process all bands together,
7433 : // to avoid averaging tuples that match an entry of m_aadfExcludedValues
7434 1906320 : constexpr double EPSILON = 1e-10;
7435 4613330 : if (poWK->eResample == GRA_Average &&
7436 800681 : (!poWK->m_aadfExcludedValues.empty() ||
7437 589832 : dfNodataValuesThreshold < 1 - EPSILON) &&
7438 2707000 : !poWK->bApplyVerticalShift && !bIsComplex)
7439 : {
7440 589832 : double dfTotalWeightInvalid = 0.0;
7441 589832 : double dfTotalWeightExcluded = 0.0;
7442 589832 : double dfTotalWeightRegular = 0.0;
7443 1179660 : std::vector<double> adfValueReal(poWK->nBands, 0);
7444 1179660 : std::vector<double> adfValueAveraged(poWK->nBands, 0);
7445 : std::vector<int> anCountExcludedValues(
7446 589832 : poWK->m_aadfExcludedValues.size(), 0);
7447 :
7448 2162710 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7449 : {
7450 1572880 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7451 1572880 : iSrcOffset =
7452 1572880 : iSrcXMin + static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7453 6291500 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7454 : iSrcX++, iSrcOffset++)
7455 : {
7456 4718620 : if (bWrapOverX)
7457 0 : iSrcOffset =
7458 0 : (iSrcX % nSrcXSize) +
7459 0 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7460 :
7461 4718620 : const double dfWeight =
7462 4718620 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7463 4718620 : if (dfWeight <= 0)
7464 0 : continue;
7465 :
7466 4718640 : if (poWK->panUnifiedSrcValid != nullptr &&
7467 12 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
7468 : {
7469 3 : dfTotalWeightInvalid += dfWeight;
7470 3 : continue;
7471 : }
7472 :
7473 4718620 : bool bAllValid = true;
7474 8651150 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7475 : {
7476 7340300 : double dfBandDensity = 0;
7477 7340300 : double dfValueImagTmp = 0;
7478 11272800 : if (!(GWKGetPixelValue(
7479 : poWK, iBand, iSrcOffset, &dfBandDensity,
7480 7340300 : &adfValueReal[iBand], &dfValueImagTmp) &&
7481 3932530 : dfBandDensity > BAND_DENSITY_THRESHOLD))
7482 : {
7483 3407770 : bAllValid = false;
7484 3407770 : break;
7485 : }
7486 : }
7487 :
7488 4718620 : if (!bAllValid)
7489 : {
7490 3407770 : dfTotalWeightInvalid += dfWeight;
7491 3407770 : continue;
7492 : }
7493 :
7494 1310850 : bool bExcludedValueFound = false;
7495 2490500 : for (size_t i = 0;
7496 2490500 : i < poWK->m_aadfExcludedValues.size(); ++i)
7497 : {
7498 1179670 : if (poWK->m_aadfExcludedValues[i] == adfValueReal)
7499 : {
7500 22 : bExcludedValueFound = true;
7501 22 : ++anCountExcludedValues[i];
7502 22 : dfTotalWeightExcluded += dfWeight;
7503 22 : break;
7504 : }
7505 : }
7506 1310850 : if (!bExcludedValueFound)
7507 : {
7508 : // Weighted incremental algorithm mean
7509 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7510 1310830 : dfTotalWeightRegular += dfWeight;
7511 5243290 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7512 : {
7513 3932460 : adfValueAveraged[iBand] +=
7514 7864930 : (dfWeight / dfTotalWeightRegular) *
7515 7864930 : (adfValueReal[iBand] -
7516 3932460 : adfValueAveraged[iBand]);
7517 : }
7518 : }
7519 : }
7520 : }
7521 :
7522 589832 : const double dfTotalWeight = dfTotalWeightInvalid +
7523 : dfTotalWeightExcluded +
7524 : dfTotalWeightRegular;
7525 589832 : if (dfTotalWeightInvalid > 0 &&
7526 : dfTotalWeightInvalid >=
7527 458751 : dfNodataValuesThreshold * dfTotalWeight)
7528 : {
7529 : // Do nothing. Let bHasFoundDensity to false.
7530 : }
7531 131085 : else if (dfTotalWeightExcluded > 0 &&
7532 : dfTotalWeightExcluded >=
7533 7 : dfExcludedValuesThreshold * dfTotalWeight)
7534 : {
7535 : // Find the most represented excluded value tuple
7536 3 : size_t iExcludedValue = 0;
7537 3 : int nExcludedValueCount = 0;
7538 6 : for (size_t i = 0; i < poWK->m_aadfExcludedValues.size();
7539 : ++i)
7540 : {
7541 3 : if (anCountExcludedValues[i] > nExcludedValueCount)
7542 : {
7543 3 : iExcludedValue = i;
7544 3 : nExcludedValueCount = anCountExcludedValues[i];
7545 : }
7546 : }
7547 :
7548 3 : bHasFoundDensity = true;
7549 :
7550 12 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7551 : {
7552 9 : GWKSetPixelValue(
7553 : poWK, iBand, iDstOffset, /* dfBandDensity = */ 1.0,
7554 9 : poWK->m_aadfExcludedValues[iExcludedValue][iBand],
7555 : 0);
7556 3 : }
7557 : }
7558 131082 : else if (dfTotalWeightRegular > 0)
7559 : {
7560 131082 : bHasFoundDensity = true;
7561 :
7562 524324 : for (int iBand = 0; iBand < poWK->nBands; iBand++)
7563 : {
7564 393242 : GWKSetPixelValue(poWK, iBand, iDstOffset,
7565 : /* dfBandDensity = */ 1.0,
7566 393242 : adfValueAveraged[iBand], 0);
7567 : }
7568 : }
7569 :
7570 : // Skip below loop on bands
7571 589832 : bDone = true;
7572 : }
7573 :
7574 : /* ====================================================================
7575 : */
7576 : /* Loop processing each band. */
7577 : /* ====================================================================
7578 : */
7579 :
7580 4729250 : for (int iBand = 0; !bDone && iBand < poWK->nBands; iBand++)
7581 : {
7582 2822920 : double dfBandDensity = 0.0;
7583 2822920 : double dfValueReal = 0.0;
7584 2822920 : double dfValueImag = 0.0;
7585 2822920 : double dfValueRealTmp = 0.0;
7586 2822920 : double dfValueImagTmp = 0.0;
7587 :
7588 : /* --------------------------------------------------------------------
7589 : */
7590 : /* Collect the source value. */
7591 : /* --------------------------------------------------------------------
7592 : */
7593 :
7594 : // Loop over source lines and pixels - 3 possible algorithms.
7595 :
7596 2822920 : if (poWK->eResample == GRA_Average)
7597 : {
7598 300849 : double dfTotalWeight = 0.0;
7599 :
7600 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7601 : // in gcore/overview.cpp.
7602 631308 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7603 : {
7604 330459 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7605 330459 : iSrcOffset = iSrcXMin +
7606 330459 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7607 803200 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7608 : iSrcX++, iSrcOffset++)
7609 : {
7610 472741 : if (bWrapOverX)
7611 630 : iSrcOffset =
7612 630 : (iSrcX % nSrcXSize) +
7613 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7614 :
7615 472745 : if (poWK->panUnifiedSrcValid != nullptr &&
7616 4 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7617 : iSrcOffset))
7618 : {
7619 1 : continue;
7620 : }
7621 :
7622 472740 : if (GWKGetPixelValue(
7623 : poWK, iBand, iSrcOffset, &dfBandDensity,
7624 945480 : &dfValueRealTmp, &dfValueImagTmp) &&
7625 472740 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7626 : {
7627 472740 : const double dfWeight =
7628 472740 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7629 472740 : if (dfWeight > 0)
7630 : {
7631 : // Weighted incremental algorithm mean
7632 : // Cf https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Weighted_incremental_algorithm
7633 472740 : dfTotalWeight += dfWeight;
7634 472740 : dfValueReal +=
7635 472740 : (dfWeight / dfTotalWeight) *
7636 472740 : (dfValueRealTmp - dfValueReal);
7637 472740 : if (bIsComplex)
7638 : {
7639 252 : dfValueImag +=
7640 252 : (dfWeight / dfTotalWeight) *
7641 252 : (dfValueImagTmp - dfValueImag);
7642 : }
7643 : }
7644 : }
7645 : }
7646 : }
7647 :
7648 300849 : if (dfTotalWeight > 0)
7649 : {
7650 300849 : if (poWK->bApplyVerticalShift)
7651 : {
7652 0 : if (!std::isfinite(padfZ[iDstX]))
7653 0 : continue;
7654 : // Subtract padfZ[] since the coordinate
7655 : // transformation is from target to source
7656 0 : dfValueReal =
7657 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7658 0 : padfZ[iDstX] *
7659 : dfMultFactorVerticalShiftPipeline;
7660 : }
7661 :
7662 300849 : dfBandDensity = 1;
7663 300849 : bHasFoundDensity = true;
7664 : }
7665 : } // GRA_Average.
7666 :
7667 2522070 : else if (poWK->eResample == GRA_RMS)
7668 : {
7669 300416 : double dfTotalReal = 0.0;
7670 300416 : double dfTotalImag = 0.0;
7671 300416 : double dfTotalWeight = 0.0;
7672 : // This code adapted from GDALDownsampleChunk32R_AverageT()
7673 : // in gcore/overview.cpp.
7674 630578 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7675 : {
7676 330162 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7677 330162 : iSrcOffset = iSrcXMin +
7678 330162 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7679 802723 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7680 : iSrcX++, iSrcOffset++)
7681 : {
7682 472561 : if (bWrapOverX)
7683 630 : iSrcOffset =
7684 630 : (iSrcX % nSrcXSize) +
7685 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7686 :
7687 472561 : if (poWK->panUnifiedSrcValid != nullptr &&
7688 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7689 : iSrcOffset))
7690 : {
7691 0 : continue;
7692 : }
7693 :
7694 472561 : if (GWKGetPixelValue(
7695 : poWK, iBand, iSrcOffset, &dfBandDensity,
7696 945122 : &dfValueRealTmp, &dfValueImagTmp) &&
7697 472561 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7698 : {
7699 472561 : const double dfWeight =
7700 472561 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7701 472561 : dfTotalWeight += dfWeight;
7702 472561 : dfTotalReal +=
7703 472561 : dfValueRealTmp * dfValueRealTmp * dfWeight;
7704 472561 : if (bIsComplex)
7705 48 : dfTotalImag += dfValueImagTmp *
7706 48 : dfValueImagTmp * dfWeight;
7707 : }
7708 : }
7709 : }
7710 :
7711 300416 : if (dfTotalWeight > 0)
7712 : {
7713 300416 : dfValueReal = sqrt(dfTotalReal / dfTotalWeight);
7714 :
7715 300416 : if (poWK->bApplyVerticalShift)
7716 : {
7717 0 : if (!std::isfinite(padfZ[iDstX]))
7718 0 : continue;
7719 : // Subtract padfZ[] since the coordinate
7720 : // transformation is from target to source
7721 0 : dfValueReal =
7722 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7723 0 : padfZ[iDstX] *
7724 : dfMultFactorVerticalShiftPipeline;
7725 : }
7726 :
7727 300416 : if (bIsComplex)
7728 12 : dfValueImag = sqrt(dfTotalImag / dfTotalWeight);
7729 :
7730 300416 : dfBandDensity = 1;
7731 300416 : bHasFoundDensity = true;
7732 : }
7733 : } // GRA_RMS.
7734 :
7735 2221660 : else if (poWK->eResample == GRA_Mode)
7736 : {
7737 496623 : float fMaxCount = 0.0f;
7738 496623 : int nMode = -1;
7739 496623 : bool bHasSourceValues = false;
7740 :
7741 496623 : memset(pafCounts, 0, nBins * sizeof(float));
7742 :
7743 1612560 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7744 : {
7745 1115940 : const double dfWeightY = COMPUTE_WEIGHT_Y(iSrcY);
7746 1115940 : iSrcOffset = iSrcXMin +
7747 1115940 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7748 4733160 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7749 : iSrcX++, iSrcOffset++)
7750 : {
7751 3617230 : if (bWrapOverX)
7752 630 : iSrcOffset =
7753 630 : (iSrcX % nSrcXSize) +
7754 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7755 :
7756 3617230 : if (poWK->panUnifiedSrcValid != nullptr &&
7757 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7758 : iSrcOffset))
7759 0 : continue;
7760 :
7761 3617230 : if (GWKGetPixelValue(
7762 : poWK, iBand, iSrcOffset, &dfBandDensity,
7763 7234450 : &dfValueRealTmp, &dfValueImagTmp) &&
7764 3617230 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7765 : {
7766 3617230 : bHasSourceValues = true;
7767 3617230 : const int nVal =
7768 3617230 : static_cast<int>(dfValueRealTmp);
7769 3617230 : const int iBin = nVal + nBinsOffset;
7770 3617230 : const double dfWeight =
7771 3617230 : COMPUTE_WEIGHT(iSrcX, dfWeightY);
7772 :
7773 : // Sum the density.
7774 3617230 : pafCounts[iBin] += static_cast<float>(dfWeight);
7775 : // Is it the most common value so far?
7776 3617230 : bool bUpdateMode = pafCounts[iBin] > fMaxCount;
7777 3617230 : if (!bUpdateMode &&
7778 778316 : pafCounts[iBin] == fMaxCount)
7779 : {
7780 218628 : switch (eTieStrategy)
7781 : {
7782 218620 : case GWKTS_First:
7783 218620 : break;
7784 4 : case GWKTS_Min:
7785 4 : bUpdateMode = nVal < nMode;
7786 4 : break;
7787 4 : case GWKTS_Max:
7788 4 : bUpdateMode = nVal > nMode;
7789 4 : break;
7790 : }
7791 : }
7792 3617230 : if (bUpdateMode)
7793 : {
7794 2838920 : nMode = nVal;
7795 2838920 : fMaxCount = pafCounts[iBin];
7796 : }
7797 : }
7798 : }
7799 : }
7800 :
7801 496623 : if (bHasSourceValues)
7802 : {
7803 496623 : dfValueReal = nMode;
7804 496623 : dfBandDensity = 1;
7805 496623 : bHasFoundDensity = true;
7806 : }
7807 : } // GRA_Mode.
7808 :
7809 1725040 : else if (poWK->eResample == GRA_Max)
7810 : {
7811 335037 : bool bFoundValid = false;
7812 335037 : double dfTotalReal = cpl::NumericLimits<double>::lowest();
7813 : // This code adapted from nAlgo 1 method, GRA_Average.
7814 1288010 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7815 : {
7816 952975 : iSrcOffset = iSrcXMin +
7817 952975 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7818 4406540 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7819 : iSrcX++, iSrcOffset++)
7820 : {
7821 3453560 : if (bWrapOverX)
7822 630 : iSrcOffset =
7823 630 : (iSrcX % nSrcXSize) +
7824 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7825 :
7826 3456370 : if (poWK->panUnifiedSrcValid != nullptr &&
7827 2809 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7828 : iSrcOffset))
7829 : {
7830 2446 : continue;
7831 : }
7832 :
7833 : // Returns pixel value if it is not no data.
7834 3451120 : if (GWKGetPixelValue(
7835 : poWK, iBand, iSrcOffset, &dfBandDensity,
7836 6902230 : &dfValueRealTmp, &dfValueImagTmp) &&
7837 3451120 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7838 : {
7839 3451120 : bFoundValid = true;
7840 3451120 : if (dfTotalReal < dfValueRealTmp)
7841 : {
7842 442642 : dfTotalReal = dfValueRealTmp;
7843 : }
7844 : }
7845 : }
7846 : }
7847 :
7848 335037 : if (bFoundValid)
7849 : {
7850 335037 : dfValueReal = dfTotalReal;
7851 :
7852 335037 : if (poWK->bApplyVerticalShift)
7853 : {
7854 0 : if (!std::isfinite(padfZ[iDstX]))
7855 0 : continue;
7856 : // Subtract padfZ[] since the coordinate
7857 : // transformation is from target to source
7858 0 : dfValueReal =
7859 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7860 0 : padfZ[iDstX] *
7861 : dfMultFactorVerticalShiftPipeline;
7862 : }
7863 :
7864 335037 : dfBandDensity = 1;
7865 335037 : bHasFoundDensity = true;
7866 : }
7867 : }
7868 :
7869 1390000 : else if (poWK->eResample == GRA_Min)
7870 : {
7871 335012 : bool bFoundValid = false;
7872 335012 : double dfTotalReal = cpl::NumericLimits<double>::max();
7873 : // This code adapted from nAlgo 1 method, GRA_Average.
7874 1287720 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7875 : {
7876 952710 : iSrcOffset = iSrcXMin +
7877 952710 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7878 4403460 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7879 : iSrcX++, iSrcOffset++)
7880 : {
7881 3450750 : if (bWrapOverX)
7882 630 : iSrcOffset =
7883 630 : (iSrcX % nSrcXSize) +
7884 630 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7885 :
7886 3450750 : if (poWK->panUnifiedSrcValid != nullptr &&
7887 0 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7888 : iSrcOffset))
7889 : {
7890 0 : continue;
7891 : }
7892 :
7893 : // Returns pixel value if it is not no data.
7894 3450750 : if (GWKGetPixelValue(
7895 : poWK, iBand, iSrcOffset, &dfBandDensity,
7896 6901500 : &dfValueRealTmp, &dfValueImagTmp) &&
7897 3450750 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7898 : {
7899 3450750 : bFoundValid = true;
7900 3450750 : if (dfTotalReal > dfValueRealTmp)
7901 : {
7902 443069 : dfTotalReal = dfValueRealTmp;
7903 : }
7904 : }
7905 : }
7906 : }
7907 :
7908 335012 : if (bFoundValid)
7909 : {
7910 335012 : dfValueReal = dfTotalReal;
7911 :
7912 335012 : if (poWK->bApplyVerticalShift)
7913 : {
7914 0 : if (!std::isfinite(padfZ[iDstX]))
7915 0 : continue;
7916 : // Subtract padfZ[] since the coordinate
7917 : // transformation is from target to source
7918 0 : dfValueReal =
7919 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7920 0 : padfZ[iDstX] *
7921 : dfMultFactorVerticalShiftPipeline;
7922 : }
7923 :
7924 335012 : dfBandDensity = 1;
7925 335012 : bHasFoundDensity = true;
7926 : }
7927 : } // GRA_Min.
7928 :
7929 : else
7930 : // poWK->eResample == GRA_Med | GRA_Q1 | GRA_Q3.
7931 : {
7932 1054990 : CPLAssert(quant > 0.0f);
7933 :
7934 1054990 : bool bFoundValid = false;
7935 1054990 : std::vector<double> dfRealValuesTmp;
7936 :
7937 : // This code adapted from nAlgo 1 method, GRA_Average.
7938 4012980 : for (int iSrcY = iSrcYMin; iSrcY < iSrcYMax; iSrcY++)
7939 : {
7940 2957990 : iSrcOffset = iSrcXMin +
7941 2957990 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7942 13509900 : for (int iSrcX = iSrcXMin; iSrcX < iSrcXMax;
7943 : iSrcX++, iSrcOffset++)
7944 : {
7945 10551900 : if (bWrapOverX)
7946 1890 : iSrcOffset =
7947 1890 : (iSrcX % nSrcXSize) +
7948 1890 : static_cast<GPtrDiff_t>(iSrcY) * nSrcXSize;
7949 :
7950 10748500 : if (poWK->panUnifiedSrcValid != nullptr &&
7951 196608 : !CPLMaskGet(poWK->panUnifiedSrcValid,
7952 : iSrcOffset))
7953 : {
7954 195449 : continue;
7955 : }
7956 :
7957 : // Returns pixel value if it is not no data.
7958 10356400 : if (GWKGetPixelValue(
7959 : poWK, iBand, iSrcOffset, &dfBandDensity,
7960 20712900 : &dfValueRealTmp, &dfValueImagTmp) &&
7961 10356400 : dfBandDensity > BAND_DENSITY_THRESHOLD)
7962 : {
7963 10356400 : bFoundValid = true;
7964 10356400 : dfRealValuesTmp.push_back(dfValueRealTmp);
7965 : }
7966 : }
7967 : }
7968 :
7969 1054990 : if (bFoundValid)
7970 : {
7971 1006150 : std::sort(dfRealValuesTmp.begin(),
7972 : dfRealValuesTmp.end());
7973 : int quantIdx = static_cast<int>(
7974 1006150 : std::ceil(quant * dfRealValuesTmp.size() - 1));
7975 1006150 : dfValueReal = dfRealValuesTmp[quantIdx];
7976 :
7977 1006150 : if (poWK->bApplyVerticalShift)
7978 : {
7979 0 : if (!std::isfinite(padfZ[iDstX]))
7980 0 : continue;
7981 : // Subtract padfZ[] since the coordinate
7982 : // transformation is from target to source
7983 0 : dfValueReal =
7984 0 : dfValueReal * poWK->dfMultFactorVerticalShift -
7985 0 : padfZ[iDstX] *
7986 : dfMultFactorVerticalShiftPipeline;
7987 : }
7988 :
7989 1006150 : dfBandDensity = 1;
7990 1006150 : bHasFoundDensity = true;
7991 1006150 : dfRealValuesTmp.clear();
7992 : }
7993 : } // Quantile.
7994 :
7995 : /* --------------------------------------------------------------------
7996 : */
7997 : /* We have a computed value from the source. Now apply it
7998 : * to */
7999 : /* the destination pixel. */
8000 : /* --------------------------------------------------------------------
8001 : */
8002 2822920 : if (bHasFoundDensity)
8003 : {
8004 : // TODO: Should we compute dfBandDensity in fct of
8005 : // nCount/nCount2, or use as a threshold to set the dest
8006 : // value?
8007 : // dfBandDensity = (float) nCount / nCount2;
8008 : // if( (float) nCount / nCount2 > 0.1 )
8009 : // or fix gdalwarp crop_to_cutline to crop partially
8010 : // overlapping pixels.
8011 2774080 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8012 : dfValueReal, dfValueImag);
8013 : }
8014 : }
8015 :
8016 1906320 : if (!bHasFoundDensity)
8017 507587 : continue;
8018 :
8019 : /* --------------------------------------------------------------------
8020 : */
8021 : /* Update destination density/validity masks. */
8022 : /* --------------------------------------------------------------------
8023 : */
8024 1398740 : GWKOverlayDensity(poWK, iDstOffset, dfDensity);
8025 :
8026 1398740 : if (poWK->panDstValid != nullptr)
8027 : {
8028 1184 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8029 : }
8030 : } /* Next iDstX */
8031 :
8032 : /* --------------------------------------------------------------------
8033 : */
8034 : /* Report progress to the user, and optionally cancel out. */
8035 : /* --------------------------------------------------------------------
8036 : */
8037 8078 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8038 0 : break;
8039 : }
8040 :
8041 : /* -------------------------------------------------------------------- */
8042 : /* Cleanup and return. */
8043 : /* -------------------------------------------------------------------- */
8044 136 : CPLFree(padfX);
8045 136 : CPLFree(padfY);
8046 136 : CPLFree(padfZ);
8047 136 : CPLFree(padfX2);
8048 136 : CPLFree(padfY2);
8049 136 : CPLFree(padfZ2);
8050 136 : CPLFree(pabSuccess);
8051 136 : CPLFree(pabSuccess2);
8052 136 : VSIFree(pafCounts);
8053 : }
8054 :
8055 : /************************************************************************/
8056 : /* getOrientation() */
8057 : /************************************************************************/
8058 :
8059 : typedef std::pair<double, double> XYPair;
8060 :
8061 : // Returns 1 whether (p1,p2,p3) is clockwise oriented,
8062 : // -1 if it is counter-clockwise oriented,
8063 : // or 0 if it is colinear.
8064 2355910 : static int getOrientation(const XYPair &p1, const XYPair &p2, const XYPair &p3)
8065 : {
8066 2355910 : const double p1x = p1.first;
8067 2355910 : const double p1y = p1.second;
8068 2355910 : const double p2x = p2.first;
8069 2355910 : const double p2y = p2.second;
8070 2355910 : const double p3x = p3.first;
8071 2355910 : const double p3y = p3.second;
8072 2355910 : const double val = (p2y - p1y) * (p3x - p2x) - (p2x - p1x) * (p3y - p2y);
8073 2355910 : if (std::abs(val) < 1e-20)
8074 2690 : return 0;
8075 2353220 : else if (val > 0)
8076 0 : return 1;
8077 : else
8078 2353220 : return -1;
8079 : }
8080 :
8081 : /************************************************************************/
8082 : /* isConvex() */
8083 : /************************************************************************/
8084 :
8085 : typedef std::vector<XYPair> XYPoly;
8086 :
8087 : // poly must be closed
8088 785302 : static bool isConvex(const XYPoly &poly)
8089 : {
8090 785302 : const size_t n = poly.size();
8091 785302 : size_t i = 0;
8092 785302 : int last_orientation = getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8093 785302 : ++i;
8094 2355910 : for (; i < n - 2; ++i)
8095 : {
8096 : const int orientation =
8097 1570600 : getOrientation(poly[i], poly[i + 1], poly[i + 2]);
8098 1570600 : if (orientation != 0)
8099 : {
8100 1567910 : if (last_orientation == 0)
8101 0 : last_orientation = orientation;
8102 1567910 : else if (orientation != last_orientation)
8103 0 : return false;
8104 : }
8105 : }
8106 785302 : return true;
8107 : }
8108 :
8109 : /************************************************************************/
8110 : /* pointIntersectsConvexPoly() */
8111 : /************************************************************************/
8112 :
8113 : // Returns whether xy intersects poly, that must be closed and convex.
8114 6049100 : static bool pointIntersectsConvexPoly(const XYPair &xy, const XYPoly &poly)
8115 : {
8116 6049100 : const size_t n = poly.size();
8117 6049100 : double dx1 = xy.first - poly[0].first;
8118 6049100 : double dy1 = xy.second - poly[0].second;
8119 6049100 : double dx2 = poly[1].first - poly[0].first;
8120 6049100 : double dy2 = poly[1].second - poly[0].second;
8121 6049100 : double prevCrossProduct = dx1 * dy2 - dx2 * dy1;
8122 :
8123 : // Check if the point remains on the same side (left/right) of all edges
8124 14556400 : for (size_t i = 2; i < n; i++)
8125 : {
8126 12793100 : dx1 = xy.first - poly[i - 1].first;
8127 12793100 : dy1 = xy.second - poly[i - 1].second;
8128 :
8129 12793100 : dx2 = poly[i].first - poly[i - 1].first;
8130 12793100 : dy2 = poly[i].second - poly[i - 1].second;
8131 :
8132 12793100 : double crossProduct = dx1 * dy2 - dx2 * dy1;
8133 12793100 : if (std::abs(prevCrossProduct) < 1e-20)
8134 725558 : prevCrossProduct = crossProduct;
8135 12067500 : else if (prevCrossProduct * crossProduct < 0)
8136 4285760 : return false;
8137 : }
8138 :
8139 1763340 : return true;
8140 : }
8141 :
8142 : /************************************************************************/
8143 : /* getIntersection() */
8144 : /************************************************************************/
8145 :
8146 : /* Returns intersection of [p1,p2] with [p3,p4], if
8147 : * it is a single point, and the 2 segments are not colinear.
8148 : */
8149 11811000 : static bool getIntersection(const XYPair &p1, const XYPair &p2,
8150 : const XYPair &p3, const XYPair &p4, XYPair &xy)
8151 : {
8152 11811000 : const double x1 = p1.first;
8153 11811000 : const double y1 = p1.second;
8154 11811000 : const double x2 = p2.first;
8155 11811000 : const double y2 = p2.second;
8156 11811000 : const double x3 = p3.first;
8157 11811000 : const double y3 = p3.second;
8158 11811000 : const double x4 = p4.first;
8159 11811000 : const double y4 = p4.second;
8160 11811000 : const double t_num = (x1 - x3) * (y3 - y4) - (y1 - y3) * (x3 - x4);
8161 11811000 : const double denom = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4);
8162 11811000 : if (t_num * denom < 0 || std::abs(t_num) > std::abs(denom) || denom == 0)
8163 9260780 : return false;
8164 :
8165 2550260 : const double u_num = (x1 - x3) * (y1 - y2) - (y1 - y3) * (x1 - x2);
8166 2550260 : if (u_num * denom < 0 || std::abs(u_num) > std::abs(denom))
8167 973924 : return false;
8168 :
8169 1576340 : const double t = t_num / denom;
8170 1576340 : xy.first = x1 + t * (x2 - x1);
8171 1576340 : xy.second = y1 + t * (y2 - y1);
8172 1576340 : return true;
8173 : }
8174 :
8175 : /************************************************************************/
8176 : /* getConvexPolyIntersection() */
8177 : /************************************************************************/
8178 :
8179 : // poly1 and poly2 must be closed and convex.
8180 : // The returned intersection will not necessary be closed.
8181 785302 : static void getConvexPolyIntersection(const XYPoly &poly1, const XYPoly &poly2,
8182 : XYPoly &intersection)
8183 : {
8184 785302 : intersection.clear();
8185 :
8186 : // Add all points of poly1 inside poly2
8187 3926510 : for (size_t i = 0; i < poly1.size() - 1; ++i)
8188 : {
8189 3141210 : if (pointIntersectsConvexPoly(poly1[i], poly2))
8190 1187430 : intersection.push_back(poly1[i]);
8191 : }
8192 785302 : if (intersection.size() == poly1.size() - 1)
8193 : {
8194 : // poly1 is inside poly2
8195 119100 : return;
8196 : }
8197 :
8198 : // Add all points of poly2 inside poly1
8199 3634860 : for (size_t i = 0; i < poly2.size() - 1; ++i)
8200 : {
8201 2907890 : if (pointIntersectsConvexPoly(poly2[i], poly1))
8202 575904 : intersection.push_back(poly2[i]);
8203 : }
8204 :
8205 : // Compute the intersection of all edges of both polygons
8206 726972 : XYPair xy;
8207 3634860 : for (size_t i1 = 0; i1 < poly1.size() - 1; ++i1)
8208 : {
8209 14539400 : for (size_t i2 = 0; i2 < poly2.size() - 1; ++i2)
8210 : {
8211 11631600 : if (getIntersection(poly1[i1], poly1[i1 + 1], poly2[i2],
8212 11631600 : poly2[i2 + 1], xy))
8213 : {
8214 1576230 : intersection.push_back(xy);
8215 : }
8216 : }
8217 : }
8218 :
8219 726972 : if (intersection.empty())
8220 60770 : return;
8221 :
8222 : // Find lowest-left point in intersection set
8223 666202 : double lowest_x = cpl::NumericLimits<double>::max();
8224 666202 : double lowest_y = cpl::NumericLimits<double>::max();
8225 3772450 : for (const auto &pair : intersection)
8226 : {
8227 3106240 : const double x = pair.first;
8228 3106240 : const double y = pair.second;
8229 3106240 : if (y < lowest_y || (y == lowest_y && x < lowest_x))
8230 : {
8231 1096040 : lowest_x = x;
8232 1096040 : lowest_y = y;
8233 : }
8234 : }
8235 :
8236 5737980 : const auto sortFunc = [&](const XYPair &p1, const XYPair &p2)
8237 : {
8238 5737980 : const double p1x_diff = p1.first - lowest_x;
8239 5737980 : const double p1y_diff = p1.second - lowest_y;
8240 5737980 : const double p2x_diff = p2.first - lowest_x;
8241 5737980 : const double p2y_diff = p2.second - lowest_y;
8242 5737980 : if (p2y_diff == 0.0 && p1y_diff == 0.0)
8243 : {
8244 2655420 : if (p1x_diff >= 0)
8245 : {
8246 2655420 : if (p2x_diff >= 0)
8247 2655420 : return p1.first < p2.first;
8248 0 : return true;
8249 : }
8250 : else
8251 : {
8252 0 : if (p2x_diff >= 0)
8253 0 : return false;
8254 0 : return p1.first < p2.first;
8255 : }
8256 : }
8257 :
8258 3082560 : if (p2x_diff == 0.0 && p1x_diff == 0.0)
8259 1046960 : return p1.second < p2.second;
8260 :
8261 : double tan_p1;
8262 2035600 : if (p1x_diff == 0.0)
8263 464622 : tan_p1 = p1y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8264 : else
8265 1570980 : tan_p1 = p1y_diff / p1x_diff;
8266 :
8267 : double tan_p2;
8268 2035600 : if (p2x_diff == 0.0)
8269 839515 : tan_p2 = p2y_diff == 0.0 ? 0.0 : cpl::NumericLimits<double>::max();
8270 : else
8271 1196080 : tan_p2 = p2y_diff / p2x_diff;
8272 :
8273 2035600 : if (tan_p1 >= 0)
8274 : {
8275 1904790 : if (tan_p2 >= 0)
8276 1881590 : return tan_p1 < tan_p2;
8277 : else
8278 23199 : return true;
8279 : }
8280 : else
8281 : {
8282 130806 : if (tan_p2 >= 0)
8283 103900 : return false;
8284 : else
8285 26906 : return tan_p1 < tan_p2;
8286 : }
8287 666202 : };
8288 :
8289 : // Sort points by increasing atan2(y-lowest_y, x-lowest_x) to form a convex
8290 : // hull
8291 666202 : std::sort(intersection.begin(), intersection.end(), sortFunc);
8292 :
8293 : // Remove duplicated points
8294 666202 : size_t j = 1;
8295 3106240 : for (size_t i = 1; i < intersection.size(); ++i)
8296 : {
8297 2440040 : if (intersection[i] != intersection[i - 1])
8298 : {
8299 1452560 : if (j < i)
8300 545275 : intersection[j] = intersection[i];
8301 1452560 : ++j;
8302 : }
8303 : }
8304 666202 : intersection.resize(j);
8305 : }
8306 :
8307 : /************************************************************************/
8308 : /* getArea() */
8309 : /************************************************************************/
8310 :
8311 : // poly may or may not be closed.
8312 558521 : static double getArea(const XYPoly &poly)
8313 : {
8314 : // CPLAssert(poly.size() >= 2);
8315 558521 : const size_t nPointCount = poly.size();
8316 : double dfAreaSum =
8317 558521 : poly[0].first * (poly[1].second - poly[nPointCount - 1].second);
8318 :
8319 1765140 : for (size_t i = 1; i < nPointCount - 1; i++)
8320 : {
8321 1206610 : dfAreaSum += poly[i].first * (poly[i + 1].second - poly[i - 1].second);
8322 : }
8323 :
8324 558521 : dfAreaSum += poly[nPointCount - 1].first *
8325 558521 : (poly[0].second - poly[nPointCount - 2].second);
8326 :
8327 558521 : return 0.5 * std::fabs(dfAreaSum);
8328 : }
8329 :
8330 : /************************************************************************/
8331 : /* GWKSumPreserving() */
8332 : /************************************************************************/
8333 :
8334 : static void GWKSumPreservingThread(void *pData);
8335 :
8336 18 : static CPLErr GWKSumPreserving(GDALWarpKernel *poWK)
8337 : {
8338 18 : return GWKRun(poWK, "GWKSumPreserving", GWKSumPreservingThread);
8339 : }
8340 :
8341 18 : static void GWKSumPreservingThread(void *pData)
8342 : {
8343 18 : GWKJobStruct *psJob = static_cast<GWKJobStruct *>(pData);
8344 18 : GDALWarpKernel *poWK = psJob->poWK;
8345 18 : const int iYMin = psJob->iYMin;
8346 18 : const int iYMax = psJob->iYMax;
8347 : const bool bIsAffineNoRotation =
8348 18 : GDALTransformIsAffineNoRotation(poWK->pfnTransformer,
8349 26 : poWK->pTransformerArg) &&
8350 : // for debug/testing purposes
8351 8 : CPLTestBool(
8352 18 : CPLGetConfigOption("GDAL_WARP_USE_AFFINE_OPTIMIZATION", "YES"));
8353 :
8354 18 : const int nDstXSize = poWK->nDstXSize;
8355 18 : const int nSrcXSize = poWK->nSrcXSize;
8356 18 : const int nSrcYSize = poWK->nSrcYSize;
8357 :
8358 36 : std::vector<double> adfX0(nSrcXSize + 1);
8359 36 : std::vector<double> adfY0(nSrcXSize + 1);
8360 36 : std::vector<double> adfZ0(nSrcXSize + 1);
8361 36 : std::vector<double> adfX1(nSrcXSize + 1);
8362 36 : std::vector<double> adfY1(nSrcXSize + 1);
8363 36 : std::vector<double> adfZ1(nSrcXSize + 1);
8364 36 : std::vector<int> abSuccess0(nSrcXSize + 1);
8365 36 : std::vector<int> abSuccess1(nSrcXSize + 1);
8366 :
8367 : CPLRectObj sGlobalBounds;
8368 18 : sGlobalBounds.minx = -2 * poWK->dfXScale;
8369 18 : sGlobalBounds.miny = iYMin - 2 * poWK->dfYScale;
8370 18 : sGlobalBounds.maxx = nDstXSize + 2 * poWK->dfXScale;
8371 18 : sGlobalBounds.maxy = iYMax + 2 * poWK->dfYScale;
8372 18 : CPLQuadTree *hQuadTree = CPLQuadTreeCreate(&sGlobalBounds, nullptr);
8373 :
8374 : struct SourcePixel
8375 : {
8376 : int iSrcX;
8377 : int iSrcY;
8378 :
8379 : // Coordinates of source pixel in target pixel coordinates
8380 : double dfDstX0;
8381 : double dfDstY0;
8382 : double dfDstX1;
8383 : double dfDstY1;
8384 : double dfDstX2;
8385 : double dfDstY2;
8386 : double dfDstX3;
8387 : double dfDstY3;
8388 :
8389 : // Source pixel total area (might be larger than the one described
8390 : // by above coordinates, if the pixel was crossing the antimeridian
8391 : // and split)
8392 : double dfArea;
8393 : };
8394 :
8395 36 : std::vector<SourcePixel> sourcePixels;
8396 :
8397 36 : XYPoly discontinuityLeft(5);
8398 36 : XYPoly discontinuityRight(5);
8399 :
8400 : /* ==================================================================== */
8401 : /* First pass: transform the 4 corners of each potential */
8402 : /* contributing source pixel to target pixel coordinates. */
8403 : /* ==================================================================== */
8404 :
8405 : // Special case for top line
8406 : {
8407 18 : int iY = 0;
8408 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8409 : {
8410 1112 : adfX1[iX] = iX + poWK->nSrcXOff;
8411 1112 : adfY1[iX] = iY + poWK->nSrcYOff;
8412 1112 : adfZ1[iX] = 0;
8413 : }
8414 :
8415 18 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8416 : adfX1.data(), adfY1.data(), adfZ1.data(),
8417 : abSuccess1.data());
8418 :
8419 1130 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8420 : {
8421 1112 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8422 0 : abSuccess1[iX] = FALSE;
8423 : else
8424 : {
8425 1112 : adfX1[iX] -= poWK->nDstXOff;
8426 1112 : adfY1[iX] -= poWK->nDstYOff;
8427 : }
8428 : }
8429 : }
8430 :
8431 413412 : const auto getInsideXSign = [poWK, nDstXSize](double dfX)
8432 : {
8433 413412 : return dfX - poWK->nDstXOff >= -2 * poWK->dfXScale &&
8434 205344 : dfX - poWK->nDstXOff <= nDstXSize + 2 * poWK->dfXScale
8435 413412 : ? 1
8436 208068 : : -1;
8437 18 : };
8438 :
8439 : const auto FindDiscontinuity =
8440 80 : [poWK, psJob, getInsideXSign](
8441 : double dfXLeft, double dfXRight, double dfY,
8442 : int XLeftReprojectedInsideSign, double &dfXMidReprojectedLeft,
8443 800 : double &dfXMidReprojectedRight, double &dfYMidReprojected)
8444 : {
8445 880 : for (int i = 0; i < 10 && dfXRight - dfXLeft > 1e-8; ++i)
8446 : {
8447 800 : double dfXMid = (dfXLeft + dfXRight) / 2;
8448 800 : double dfXMidReprojected = dfXMid;
8449 800 : dfYMidReprojected = dfY;
8450 800 : double dfZ = 0;
8451 800 : int nSuccess = 0;
8452 800 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, 1,
8453 : &dfXMidReprojected, &dfYMidReprojected, &dfZ,
8454 : &nSuccess);
8455 800 : if (XLeftReprojectedInsideSign != getInsideXSign(dfXMidReprojected))
8456 : {
8457 456 : dfXRight = dfXMid;
8458 456 : dfXMidReprojectedRight = dfXMidReprojected;
8459 : }
8460 : else
8461 : {
8462 344 : dfXLeft = dfXMid;
8463 344 : dfXMidReprojectedLeft = dfXMidReprojected;
8464 : }
8465 : }
8466 80 : };
8467 :
8468 566 : for (int iY = 0; iY < nSrcYSize; ++iY)
8469 : {
8470 548 : std::swap(adfX0, adfX1);
8471 548 : std::swap(adfY0, adfY1);
8472 548 : std::swap(adfZ0, adfZ1);
8473 548 : std::swap(abSuccess0, abSuccess1);
8474 :
8475 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8476 : {
8477 103964 : adfX1[iX] = iX + poWK->nSrcXOff;
8478 103964 : adfY1[iX] = iY + 1 + poWK->nSrcYOff;
8479 103964 : adfZ1[iX] = 0;
8480 : }
8481 :
8482 548 : poWK->pfnTransformer(psJob->pTransformerArg, FALSE, nSrcXSize + 1,
8483 : adfX1.data(), adfY1.data(), adfZ1.data(),
8484 : abSuccess1.data());
8485 :
8486 104512 : for (int iX = 0; iX <= nSrcXSize; ++iX)
8487 : {
8488 103964 : if (abSuccess1[iX] && !std::isfinite(adfX1[iX]))
8489 0 : abSuccess1[iX] = FALSE;
8490 : else
8491 : {
8492 103964 : adfX1[iX] -= poWK->nDstXOff;
8493 103964 : adfY1[iX] -= poWK->nDstYOff;
8494 : }
8495 : }
8496 :
8497 103964 : for (int iX = 0; iX < nSrcXSize; ++iX)
8498 : {
8499 206832 : if (abSuccess0[iX] && abSuccess0[iX + 1] && abSuccess1[iX] &&
8500 103416 : abSuccess1[iX + 1])
8501 : {
8502 : /* --------------------------------------------------------------------
8503 : */
8504 : /* Do not try to apply transparent source pixels to the
8505 : * destination.*/
8506 : /* --------------------------------------------------------------------
8507 : */
8508 103416 : const auto iSrcOffset =
8509 103416 : iX + static_cast<GPtrDiff_t>(iY) * nSrcXSize;
8510 105816 : if (poWK->panUnifiedSrcValid != nullptr &&
8511 2400 : !CPLMaskGet(poWK->panUnifiedSrcValid, iSrcOffset))
8512 : {
8513 10971 : continue;
8514 : }
8515 :
8516 103410 : if (poWK->pafUnifiedSrcDensity != nullptr)
8517 : {
8518 0 : if (poWK->pafUnifiedSrcDensity[iSrcOffset] <
8519 : SRC_DENSITY_THRESHOLD_FLOAT)
8520 0 : continue;
8521 : }
8522 :
8523 : SourcePixel sp;
8524 103410 : sp.dfArea = 0;
8525 103410 : sp.dfDstX0 = adfX0[iX];
8526 103410 : sp.dfDstY0 = adfY0[iX];
8527 103410 : sp.dfDstX1 = adfX0[iX + 1];
8528 103410 : sp.dfDstY1 = adfY0[iX + 1];
8529 103410 : sp.dfDstX2 = adfX1[iX + 1];
8530 103410 : sp.dfDstY2 = adfY1[iX + 1];
8531 103410 : sp.dfDstX3 = adfX1[iX];
8532 103410 : sp.dfDstY3 = adfY1[iX];
8533 :
8534 : // Detect pixel that likely cross the anti-meridian and
8535 : // introduce a discontinuity when reprojected.
8536 :
8537 103410 : if (getInsideXSign(adfX0[iX]) !=
8538 103506 : getInsideXSign(adfX0[iX + 1]) &&
8539 164 : getInsideXSign(adfX0[iX]) == getInsideXSign(adfX1[iX]) &&
8540 68 : getInsideXSign(adfX0[iX + 1]) ==
8541 103574 : getInsideXSign(adfX1[iX + 1]) &&
8542 40 : (adfY1[iX] - adfY0[iX]) * (adfY1[iX + 1] - adfY0[iX + 1]) >
8543 : 0)
8544 : {
8545 40 : double dfXMidReprojectedLeftTop = 0;
8546 40 : double dfXMidReprojectedRightTop = 0;
8547 40 : double dfYMidReprojectedTop = 0;
8548 40 : FindDiscontinuity(
8549 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8550 80 : iY + poWK->nSrcYOff, getInsideXSign(adfX0[iX]),
8551 : dfXMidReprojectedLeftTop, dfXMidReprojectedRightTop,
8552 : dfYMidReprojectedTop);
8553 40 : double dfXMidReprojectedLeftBottom = 0;
8554 40 : double dfXMidReprojectedRightBottom = 0;
8555 40 : double dfYMidReprojectedBottom = 0;
8556 40 : FindDiscontinuity(
8557 40 : iX + poWK->nSrcXOff, iX + poWK->nSrcXOff + 1,
8558 80 : iY + poWK->nSrcYOff + 1, getInsideXSign(adfX1[iX]),
8559 : dfXMidReprojectedLeftBottom,
8560 : dfXMidReprojectedRightBottom, dfYMidReprojectedBottom);
8561 :
8562 40 : discontinuityLeft[0] = XYPair(adfX0[iX], adfY0[iX]);
8563 40 : discontinuityLeft[1] =
8564 80 : XYPair(dfXMidReprojectedLeftTop, dfYMidReprojectedTop);
8565 40 : discontinuityLeft[2] = XYPair(dfXMidReprojectedLeftBottom,
8566 40 : dfYMidReprojectedBottom);
8567 40 : discontinuityLeft[3] = XYPair(adfX1[iX], adfY1[iX]);
8568 40 : discontinuityLeft[4] = XYPair(adfX0[iX], adfY0[iX]);
8569 :
8570 40 : discontinuityRight[0] =
8571 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8572 40 : discontinuityRight[1] =
8573 80 : XYPair(dfXMidReprojectedRightTop, dfYMidReprojectedTop);
8574 40 : discontinuityRight[2] = XYPair(dfXMidReprojectedRightBottom,
8575 40 : dfYMidReprojectedBottom);
8576 40 : discontinuityRight[3] =
8577 80 : XYPair(adfX1[iX + 1], adfY1[iX + 1]);
8578 40 : discontinuityRight[4] =
8579 80 : XYPair(adfX0[iX + 1], adfY0[iX + 1]);
8580 :
8581 40 : sp.dfArea = getArea(discontinuityLeft) +
8582 40 : getArea(discontinuityRight);
8583 40 : if (getInsideXSign(adfX0[iX]) >= 1)
8584 : {
8585 20 : sp.dfDstX1 = dfXMidReprojectedLeftTop;
8586 20 : sp.dfDstY1 = dfYMidReprojectedTop;
8587 20 : sp.dfDstX2 = dfXMidReprojectedLeftBottom;
8588 20 : sp.dfDstY2 = dfYMidReprojectedBottom;
8589 : }
8590 : else
8591 : {
8592 20 : sp.dfDstX0 = dfXMidReprojectedRightTop;
8593 20 : sp.dfDstY0 = dfYMidReprojectedTop;
8594 20 : sp.dfDstX3 = dfXMidReprojectedRightBottom;
8595 20 : sp.dfDstY3 = dfYMidReprojectedBottom;
8596 : }
8597 : }
8598 :
8599 : // Bounding box of source pixel (expressed in target pixel
8600 : // coordinates)
8601 : CPLRectObj sRect;
8602 103410 : sRect.minx = std::min(std::min(sp.dfDstX0, sp.dfDstX1),
8603 103410 : std::min(sp.dfDstX2, sp.dfDstX3));
8604 103410 : sRect.miny = std::min(std::min(sp.dfDstY0, sp.dfDstY1),
8605 103410 : std::min(sp.dfDstY2, sp.dfDstY3));
8606 103410 : sRect.maxx = std::max(std::max(sp.dfDstX0, sp.dfDstX1),
8607 103410 : std::max(sp.dfDstX2, sp.dfDstX3));
8608 103410 : sRect.maxy = std::max(std::max(sp.dfDstY0, sp.dfDstY1),
8609 103410 : std::max(sp.dfDstY2, sp.dfDstY3));
8610 103410 : if (!(sRect.minx < nDstXSize && sRect.maxx > 0 &&
8611 101350 : sRect.miny < iYMax && sRect.maxy > iYMin))
8612 : {
8613 10852 : continue;
8614 : }
8615 :
8616 92558 : sp.iSrcX = iX;
8617 92558 : sp.iSrcY = iY;
8618 :
8619 92558 : if (!bIsAffineNoRotation)
8620 : {
8621 : // Check polygon validity (no self-crossing)
8622 89745 : XYPair xy;
8623 89745 : if (getIntersection(XYPair(sp.dfDstX0, sp.dfDstY0),
8624 89745 : XYPair(sp.dfDstX1, sp.dfDstY1),
8625 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8626 269235 : XYPair(sp.dfDstX3, sp.dfDstY3), xy) ||
8627 89745 : getIntersection(XYPair(sp.dfDstX1, sp.dfDstY1),
8628 89745 : XYPair(sp.dfDstX2, sp.dfDstY2),
8629 89745 : XYPair(sp.dfDstX0, sp.dfDstY0),
8630 179490 : XYPair(sp.dfDstX3, sp.dfDstY3), xy))
8631 : {
8632 113 : continue;
8633 : }
8634 : }
8635 :
8636 92445 : CPLQuadTreeInsertWithBounds(
8637 : hQuadTree,
8638 : reinterpret_cast<void *>(
8639 92445 : static_cast<uintptr_t>(sourcePixels.size())),
8640 : &sRect);
8641 :
8642 92445 : sourcePixels.push_back(sp);
8643 : }
8644 : }
8645 : }
8646 :
8647 36 : std::vector<double> adfRealValue(poWK->nBands);
8648 36 : std::vector<double> adfImagValue(poWK->nBands);
8649 36 : std::vector<double> adfBandDensity(poWK->nBands);
8650 36 : std::vector<double> adfWeight(poWK->nBands);
8651 :
8652 : #ifdef CHECK_SUM_WITH_GEOS
8653 : auto hGEOSContext = OGRGeometry::createGEOSContext();
8654 : auto seq1 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8655 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 0, 0.0, 0.0);
8656 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 1, 1.0, 0.0);
8657 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 2, 1.0, 1.0);
8658 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 3, 0.0, 1.0);
8659 : GEOSCoordSeq_setXY_r(hGEOSContext, seq1, 4, 0.0, 0.0);
8660 : auto hLR1 = GEOSGeom_createLinearRing_r(hGEOSContext, seq1);
8661 : auto hP1 = GEOSGeom_createPolygon_r(hGEOSContext, hLR1, nullptr, 0);
8662 :
8663 : auto seq2 = GEOSCoordSeq_create_r(hGEOSContext, 5, 2);
8664 : auto hLR2 = GEOSGeom_createLinearRing_r(hGEOSContext, seq2);
8665 : auto hP2 = GEOSGeom_createPolygon_r(hGEOSContext, hLR2, nullptr, 0);
8666 : #endif
8667 :
8668 : const XYPoly xy1{
8669 36 : {0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}};
8670 36 : XYPoly xy2(5);
8671 36 : XYPoly xy2_triangle(4);
8672 36 : XYPoly intersection;
8673 :
8674 : /* ==================================================================== */
8675 : /* Loop over output lines. */
8676 : /* ==================================================================== */
8677 891 : for (int iDstY = iYMin; iDstY < iYMax; iDstY++)
8678 : {
8679 : CPLRectObj sRect;
8680 873 : sRect.miny = iDstY;
8681 873 : sRect.maxy = iDstY + 1;
8682 :
8683 : /* ====================================================================
8684 : */
8685 : /* Loop over pixels in output scanline. */
8686 : /* ====================================================================
8687 : */
8688 221042 : for (int iDstX = 0; iDstX < nDstXSize; iDstX++)
8689 : {
8690 220169 : sRect.minx = iDstX;
8691 220169 : sRect.maxx = iDstX + 1;
8692 220169 : int nSourcePixels = 0;
8693 : void **pahSourcePixel =
8694 220169 : CPLQuadTreeSearch(hQuadTree, &sRect, &nSourcePixels);
8695 220169 : if (nSourcePixels == 0)
8696 : {
8697 1258 : CPLFree(pahSourcePixel);
8698 1262 : continue;
8699 : }
8700 :
8701 218911 : std::fill(adfRealValue.begin(), adfRealValue.end(), 0);
8702 218911 : std::fill(adfImagValue.begin(), adfImagValue.end(), 0);
8703 218911 : std::fill(adfBandDensity.begin(), adfBandDensity.end(), 0);
8704 218911 : std::fill(adfWeight.begin(), adfWeight.end(), 0);
8705 218911 : double dfDensity = 0;
8706 : // Just above zero to please Coveriy Scan
8707 218911 : double dfTotalWeight = std::numeric_limits<double>::min();
8708 :
8709 : /* ====================================================================
8710 : */
8711 : /* Iterate over each contributing source pixel to add its
8712 : */
8713 : /* value weighed by the ratio of the area of its
8714 : * intersection */
8715 : /* with the target pixel divided by the area of the source
8716 : */
8717 : /* pixel. */
8718 : /* ====================================================================
8719 : */
8720 1020520 : for (int i = 0; i < nSourcePixels; ++i)
8721 : {
8722 801614 : const int iSourcePixel = static_cast<int>(
8723 801614 : reinterpret_cast<uintptr_t>(pahSourcePixel[i]));
8724 801614 : auto &sp = sourcePixels[iSourcePixel];
8725 :
8726 801614 : double dfWeight = 0.0;
8727 801614 : if (bIsAffineNoRotation)
8728 : {
8729 : // Optimization since the source pixel is a rectangle in
8730 : // target pixel coordinates
8731 16312 : double dfSrcMinX = std::min(sp.dfDstX0, sp.dfDstX2);
8732 16312 : double dfSrcMaxX = std::max(sp.dfDstX0, sp.dfDstX2);
8733 16312 : double dfSrcMinY = std::min(sp.dfDstY0, sp.dfDstY2);
8734 16312 : double dfSrcMaxY = std::max(sp.dfDstY0, sp.dfDstY2);
8735 16312 : double dfIntersMinX = std::max<double>(dfSrcMinX, iDstX);
8736 16312 : double dfIntersMaxX = std::min(dfSrcMaxX, iDstX + 1.0);
8737 16312 : double dfIntersMinY = std::max<double>(dfSrcMinY, iDstY);
8738 16312 : double dfIntersMaxY = std::min(dfSrcMaxY, iDstY + 1.0);
8739 16312 : dfWeight =
8740 16312 : ((dfIntersMaxX - dfIntersMinX) *
8741 16312 : (dfIntersMaxY - dfIntersMinY)) /
8742 16312 : ((dfSrcMaxX - dfSrcMinX) * (dfSrcMaxY - dfSrcMinY));
8743 : }
8744 : else
8745 : {
8746 : // Compute the polygon of the source pixel in target pixel
8747 : // coordinates, and shifted to the target pixel (unit square
8748 : // coordinates)
8749 :
8750 785302 : xy2[0] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8751 785302 : xy2[1] = {sp.dfDstX1 - iDstX, sp.dfDstY1 - iDstY};
8752 785302 : xy2[2] = {sp.dfDstX2 - iDstX, sp.dfDstY2 - iDstY};
8753 785302 : xy2[3] = {sp.dfDstX3 - iDstX, sp.dfDstY3 - iDstY};
8754 785302 : xy2[4] = {sp.dfDstX0 - iDstX, sp.dfDstY0 - iDstY};
8755 :
8756 785302 : if (isConvex(xy2))
8757 : {
8758 785302 : getConvexPolyIntersection(xy1, xy2, intersection);
8759 785302 : if (intersection.size() >= 3)
8760 : {
8761 468849 : dfWeight = getArea(intersection);
8762 : }
8763 : }
8764 : else
8765 : {
8766 : // Split xy2 into 2 triangles.
8767 0 : xy2_triangle[0] = xy2[0];
8768 0 : xy2_triangle[1] = xy2[1];
8769 0 : xy2_triangle[2] = xy2[2];
8770 0 : xy2_triangle[3] = xy2[0];
8771 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8772 : intersection);
8773 0 : if (intersection.size() >= 3)
8774 : {
8775 0 : dfWeight = getArea(intersection);
8776 : }
8777 :
8778 0 : xy2_triangle[1] = xy2[2];
8779 0 : xy2_triangle[2] = xy2[3];
8780 0 : getConvexPolyIntersection(xy1, xy2_triangle,
8781 : intersection);
8782 0 : if (intersection.size() >= 3)
8783 : {
8784 0 : dfWeight += getArea(intersection);
8785 : }
8786 : }
8787 785302 : if (dfWeight > 0.0)
8788 : {
8789 468828 : if (sp.dfArea == 0)
8790 89592 : sp.dfArea = getArea(xy2);
8791 468828 : dfWeight /= sp.dfArea;
8792 : }
8793 :
8794 : #ifdef CHECK_SUM_WITH_GEOS
8795 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 0,
8796 : sp.dfDstX0 - iDstX,
8797 : sp.dfDstY0 - iDstY);
8798 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 1,
8799 : sp.dfDstX1 - iDstX,
8800 : sp.dfDstY1 - iDstY);
8801 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 2,
8802 : sp.dfDstX2 - iDstX,
8803 : sp.dfDstY2 - iDstY);
8804 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 3,
8805 : sp.dfDstX3 - iDstX,
8806 : sp.dfDstY3 - iDstY);
8807 : GEOSCoordSeq_setXY_r(hGEOSContext, seq2, 4,
8808 : sp.dfDstX0 - iDstX,
8809 : sp.dfDstY0 - iDstY);
8810 :
8811 : double dfWeightGEOS = 0.0;
8812 : auto hIntersection =
8813 : GEOSIntersection_r(hGEOSContext, hP1, hP2);
8814 : if (hIntersection)
8815 : {
8816 : double dfIntersArea = 0.0;
8817 : if (GEOSArea_r(hGEOSContext, hIntersection,
8818 : &dfIntersArea) &&
8819 : dfIntersArea > 0)
8820 : {
8821 : double dfSourceArea = 0.0;
8822 : if (GEOSArea_r(hGEOSContext, hP2, &dfSourceArea))
8823 : {
8824 : dfWeightGEOS = dfIntersArea / dfSourceArea;
8825 : }
8826 : }
8827 : GEOSGeom_destroy_r(hGEOSContext, hIntersection);
8828 : }
8829 : if (fabs(dfWeight - dfWeightGEOS) > 1e-5 * dfWeightGEOS)
8830 : {
8831 : /* ok */ printf("dfWeight=%f dfWeightGEOS=%f\n",
8832 : dfWeight, dfWeightGEOS);
8833 : printf("xy2: "); // ok
8834 : for (const auto &xy : xy2)
8835 : printf("[%f, %f], ", xy.first, xy.second); // ok
8836 : printf("\n"); // ok
8837 : printf("intersection: "); // ok
8838 : for (const auto &xy : intersection)
8839 : printf("[%f, %f], ", xy.first, xy.second); // ok
8840 : printf("\n"); // ok
8841 : }
8842 : #endif
8843 : }
8844 801614 : if (dfWeight > 0.0)
8845 : {
8846 474099 : const GPtrDiff_t iSrcOffset =
8847 474099 : sp.iSrcX +
8848 474099 : static_cast<GPtrDiff_t>(sp.iSrcY) * nSrcXSize;
8849 474099 : dfTotalWeight += dfWeight;
8850 :
8851 474099 : if (poWK->pafUnifiedSrcDensity != nullptr)
8852 : {
8853 0 : dfDensity +=
8854 0 : dfWeight *
8855 0 : double(poWK->pafUnifiedSrcDensity[iSrcOffset]);
8856 : }
8857 : else
8858 : {
8859 474099 : dfDensity += dfWeight;
8860 : }
8861 :
8862 1818720 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8863 : {
8864 : // Returns pixel value if it is not no data.
8865 : double dfBandDensity;
8866 : double dfRealValue;
8867 : double dfImagValue;
8868 2689240 : if (!(GWKGetPixelValue(poWK, iBand, iSrcOffset,
8869 : &dfBandDensity, &dfRealValue,
8870 : &dfImagValue) &&
8871 1344620 : dfBandDensity > BAND_DENSITY_THRESHOLD))
8872 : {
8873 0 : continue;
8874 : }
8875 :
8876 1344620 : adfRealValue[iBand] += dfRealValue * dfWeight;
8877 1344620 : adfImagValue[iBand] += dfImagValue * dfWeight;
8878 1344620 : adfBandDensity[iBand] += dfBandDensity * dfWeight;
8879 1344620 : adfWeight[iBand] += dfWeight;
8880 : }
8881 : }
8882 : }
8883 :
8884 218911 : CPLFree(pahSourcePixel);
8885 :
8886 : /* --------------------------------------------------------------------
8887 : */
8888 : /* Update destination pixel value. */
8889 : /* --------------------------------------------------------------------
8890 : */
8891 218911 : bool bHasFoundDensity = false;
8892 218911 : const GPtrDiff_t iDstOffset =
8893 218911 : iDstX + static_cast<GPtrDiff_t>(iDstY) * nDstXSize;
8894 827822 : for (int iBand = 0; iBand < poWK->nBands; ++iBand)
8895 : {
8896 608911 : if (adfWeight[iBand] > 0)
8897 : {
8898 : const double dfBandDensity =
8899 608907 : adfBandDensity[iBand] / adfWeight[iBand];
8900 608907 : if (dfBandDensity > BAND_DENSITY_THRESHOLD)
8901 : {
8902 608907 : bHasFoundDensity = true;
8903 608907 : GWKSetPixelValue(poWK, iBand, iDstOffset, dfBandDensity,
8904 608907 : adfRealValue[iBand],
8905 608907 : adfImagValue[iBand]);
8906 : }
8907 : }
8908 : }
8909 :
8910 218911 : if (!bHasFoundDensity)
8911 4 : continue;
8912 :
8913 : /* --------------------------------------------------------------------
8914 : */
8915 : /* Update destination density/validity masks. */
8916 : /* --------------------------------------------------------------------
8917 : */
8918 218907 : GWKOverlayDensity(poWK, iDstOffset, dfDensity / dfTotalWeight);
8919 :
8920 218907 : if (poWK->panDstValid != nullptr)
8921 : {
8922 11750 : CPLMaskSet(poWK->panDstValid, iDstOffset);
8923 : }
8924 : }
8925 :
8926 : /* --------------------------------------------------------------------
8927 : */
8928 : /* Report progress to the user, and optionally cancel out. */
8929 : /* --------------------------------------------------------------------
8930 : */
8931 873 : if (psJob->pfnProgress && psJob->pfnProgress(psJob))
8932 0 : break;
8933 : }
8934 :
8935 : #ifdef CHECK_SUM_WITH_GEOS
8936 : GEOSGeom_destroy_r(hGEOSContext, hP1);
8937 : GEOSGeom_destroy_r(hGEOSContext, hP2);
8938 : OGRGeometry::freeGEOSContext(hGEOSContext);
8939 : #endif
8940 18 : CPLQuadTreeDestroy(hQuadTree);
8941 18 : }
|