Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Contains default implementation of GDALRasterBand::IRasterIO()
5 : * and supporting functions of broader utility.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1998, Frank Warmerdam
10 : * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11 : *
12 : * SPDX-License-Identifier: MIT
13 : ****************************************************************************/
14 :
15 : #include "cpl_port.h"
16 : #include "gdal.h"
17 : #include "gdal_priv.h"
18 :
19 : #include <cassert>
20 : #include <climits>
21 : #include <cmath>
22 : #include <cstddef>
23 : #include <cstdio>
24 : #include <cstdlib>
25 : #include <cstring>
26 :
27 : #include <algorithm>
28 : #include <limits>
29 : #include <stdexcept>
30 : #include <type_traits>
31 :
32 : #include "cpl_conv.h"
33 : #include "cpl_cpu_features.h"
34 : #include "cpl_error.h"
35 : #include "cpl_progress.h"
36 : #include "cpl_string.h"
37 : #include "cpl_vsi.h"
38 : #include "gdal_priv_templates.hpp"
39 : #include "gdal_vrt.h"
40 : #include "gdalwarper.h"
41 : #include "memdataset.h"
42 : #include "vrtdataset.h"
43 :
44 : #if defined(__x86_64) || defined(_M_X64)
45 : #include <emmintrin.h>
46 : #define HAVE_SSE2
47 : #elif defined(USE_NEON_OPTIMIZATIONS)
48 : #include "include_sse2neon.h"
49 : #define HAVE_SSE2
50 : #endif
51 :
52 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
53 : #include "rasterio_ssse3.h"
54 : #ifdef __SSSE3__
55 : #include <tmmintrin.h>
56 : #endif
57 : #endif
58 :
59 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
60 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
61 : int nDstPixelStride, GPtrDiff_t nWordCount);
62 :
63 : /************************************************************************/
64 : /* DownsamplingIntegerXFactor() */
65 : /************************************************************************/
66 :
67 : template <bool bSameDataType, int DATA_TYPE_SIZE>
68 413236 : static bool DownsamplingIntegerXFactor(
69 : GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
70 : GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
71 : GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
72 : int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
73 : {
74 413236 : const int nBandDataSize =
75 : bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
76 413236 : int nOuterLoopIters = nBufXSize - 1;
77 413236 : const int nIncSrcOffset = nSrcXInc * nBandDataSize;
78 : const GByte *CPL_RESTRICT pabySrcData;
79 413236 : int nEndBlockX = nBlockXSize + nStartBlockX;
80 :
81 413236 : if (iSrcX < nEndBlockX)
82 : {
83 226134 : CPLAssert(poBlock);
84 226134 : goto no_reload_block;
85 : }
86 187102 : goto reload_block;
87 :
88 : // Don't do the last iteration in the loop, as iSrcX might go beyond
89 : // nRasterXSize - 1
90 932852 : while (--nOuterLoopIters >= 1)
91 : {
92 189034 : iSrcX += nSrcXInc;
93 189034 : pabySrcData += nIncSrcOffset;
94 189034 : pabyDstData += nPixelSpace;
95 :
96 : /* --------------------------------------------------------------------
97 : */
98 : /* Ensure we have the appropriate block loaded. */
99 : /* --------------------------------------------------------------------
100 : */
101 189034 : if (iSrcX >= nEndBlockX)
102 : {
103 189034 : reload_block:
104 : {
105 388726 : const int nLBlockX = iSrcX / nBlockXSize;
106 388726 : nStartBlockX = nLBlockX * nBlockXSize;
107 388726 : nEndBlockX = nStartBlockX + nBlockXSize;
108 :
109 388726 : if (poBlock != nullptr)
110 316739 : poBlock->DropLock();
111 :
112 388726 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
113 388726 : if (poBlock == nullptr)
114 : {
115 1 : return false;
116 : }
117 : }
118 :
119 388725 : no_reload_block:
120 : const GByte *pabySrcBlock =
121 932852 : static_cast<const GByte *>(poBlock->GetDataRef());
122 932852 : GPtrDiff_t iSrcOffset =
123 932852 : (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
124 932852 : pabySrcData = pabySrcBlock + iSrcOffset;
125 : }
126 :
127 : /* --------------------------------------------------------------------
128 : */
129 : /* Copy the maximum run of pixels. */
130 : /* --------------------------------------------------------------------
131 : */
132 :
133 932852 : const int nIters = std::min(
134 932852 : (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
135 : if (bSameDataType)
136 : {
137 932447 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
138 932447 : if (nIters > 1)
139 : {
140 : if (DATA_TYPE_SIZE == 1)
141 : {
142 276287 : pabySrcData += nIncSrcOffset;
143 276287 : pabyDstData += nPixelSpace;
144 276287 : GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
145 276287 : nPixelSpace, nIters - 1);
146 276287 : pabySrcData +=
147 276287 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
148 276287 : pabyDstData +=
149 276287 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
150 : }
151 : else
152 : {
153 4443828 : for (int i = 0; i < nIters - 1; i++)
154 : {
155 4245254 : pabySrcData += nIncSrcOffset;
156 4245254 : pabyDstData += nPixelSpace;
157 4245254 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
158 : }
159 : }
160 474861 : iSrcX += nSrcXInc * (nIters - 1);
161 474861 : nOuterLoopIters -= nIters - 1;
162 : }
163 : }
164 : else
165 : {
166 : // Type to type conversion ...
167 405 : GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
168 405 : eBufType, nPixelSpace, std::max(1, nIters));
169 405 : if (nIters > 1)
170 : {
171 198 : pabySrcData +=
172 198 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
173 198 : pabyDstData +=
174 198 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
175 198 : iSrcX += nSrcXInc * (nIters - 1);
176 198 : nOuterLoopIters -= nIters - 1;
177 : }
178 : }
179 : }
180 :
181 : // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
182 743818 : if (nOuterLoopIters == 0)
183 : {
184 330583 : const int nRasterXSize = poBand->GetXSize();
185 330583 : iSrcX =
186 661166 : static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
187 330583 : static_cast<GInt64>(nRasterXSize - 1)));
188 330583 : pabyDstData += nPixelSpace;
189 330583 : if (iSrcX < nEndBlockX)
190 : {
191 317993 : goto no_reload_block;
192 : }
193 12590 : goto reload_block;
194 : }
195 413235 : return true;
196 : }
197 :
198 : /************************************************************************/
199 : /* IRasterIO() */
200 : /* */
201 : /* Default internal implementation of RasterIO() ... utilizes */
202 : /* the Block access methods to satisfy the request. This would */
203 : /* normally only be overridden by formats with overviews. */
204 : /************************************************************************/
205 :
206 5706710 : CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
207 : int nXSize, int nYSize, void *pData,
208 : int nBufXSize, int nBufYSize,
209 : GDALDataType eBufType, GSpacing nPixelSpace,
210 : GSpacing nLineSpace,
211 : GDALRasterIOExtraArg *psExtraArg)
212 :
213 : {
214 5706710 : if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
215 : {
216 0 : CPLError(eFlushBlockErr, CPLE_AppDefined,
217 : "An error occurred while writing a dirty block "
218 : "from GDALRasterBand::IRasterIO");
219 0 : CPLErr eErr = eFlushBlockErr;
220 0 : eFlushBlockErr = CE_None;
221 0 : return eErr;
222 : }
223 5706710 : if (nBlockXSize <= 0 || nBlockYSize <= 0)
224 : {
225 86 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
226 0 : return CE_Failure;
227 : }
228 :
229 5706620 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
230 5706580 : const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
231 5706550 : GByte dummyBlock[2] = {0, 0};
232 5706550 : GByte *pabySrcBlock =
233 : dummyBlock; /* to avoid Coverity warning about nullptr dereference */
234 5706550 : GDALRasterBlock *poBlock = nullptr;
235 5706550 : const bool bUseIntegerRequestCoords =
236 5745890 : (!psExtraArg->bFloatingPointWindowValidity ||
237 39339 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
238 15979 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
239 :
240 : /* ==================================================================== */
241 : /* A common case is the data requested with the destination */
242 : /* is packed, and the block width is the raster width. */
243 : /* ==================================================================== */
244 5624630 : if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
245 2945030 : nBlockXSize == GetXSize() && nBufXSize == nXSize &&
246 11331200 : nBufYSize == nYSize && bUseIntegerRequestCoords)
247 : {
248 2812690 : CPLErr eErr = CE_None;
249 2812690 : int nLBlockY = -1;
250 :
251 8202380 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
252 : {
253 5390360 : const int iSrcY = iBufYOff + nYOff;
254 :
255 5390360 : if (iSrcY < nLBlockY * nBlockYSize ||
256 5390410 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
257 : {
258 3054820 : nLBlockY = iSrcY / nBlockYSize;
259 3054820 : bool bJustInitialize =
260 97703 : eRWFlag == GF_Write && nXOff == 0 &&
261 3203980 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
262 51455 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
263 :
264 : // Is this a partial tile at right and/or bottom edges of
265 : // the raster, and that is going to be completely written?
266 : // If so, do not load it from storage, but zero it so that
267 : // the content outsize of the validity area is initialized.
268 3054820 : bool bMemZeroBuffer = false;
269 97703 : if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
270 21961 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
271 3152610 : nYOff + nYSize == GetYSize() &&
272 89 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
273 : {
274 89 : bJustInitialize = true;
275 89 : bMemZeroBuffer = true;
276 : }
277 :
278 3054820 : if (poBlock)
279 242083 : poBlock->DropLock();
280 :
281 3054820 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
282 3054780 : poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
283 3054980 : if (poBlock == nullptr)
284 : {
285 1067 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
286 : nullptr)
287 : {
288 0 : CPLError(CE_Failure, CPLE_AppDefined,
289 : "GetBlockRef failed at X block offset %d, "
290 : "Y block offset %d%s",
291 : 0, nLBlockY,
292 0 : (nErrorCounter != CPLGetErrorCounter())
293 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
294 : : "");
295 : }
296 1067 : eErr = CE_Failure;
297 1067 : break;
298 : }
299 :
300 3053910 : if (eRWFlag == GF_Write)
301 97703 : poBlock->MarkDirty();
302 :
303 3053910 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
304 3053910 : if (bMemZeroBuffer)
305 : {
306 89 : memset(pabySrcBlock, 0,
307 89 : static_cast<GPtrDiff_t>(nBandDataSize) *
308 89 : nBlockXSize * nBlockYSize);
309 : }
310 : }
311 :
312 5389450 : const auto nSrcByteOffset =
313 5389450 : (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
314 5389450 : nBlockXSize +
315 5389450 : nXOff) *
316 5389450 : nBandDataSize;
317 :
318 5389450 : if (eDataType == eBufType)
319 : {
320 1740010 : if (eRWFlag == GF_Read)
321 1498150 : memcpy(static_cast<GByte *>(pData) +
322 1498150 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
323 1498150 : pabySrcBlock + nSrcByteOffset,
324 : static_cast<size_t>(nLineSpace));
325 : else
326 241865 : memcpy(pabySrcBlock + nSrcByteOffset,
327 241865 : static_cast<GByte *>(pData) +
328 241865 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
329 : static_cast<size_t>(nLineSpace));
330 : }
331 : else
332 : {
333 : // Type to type conversion.
334 :
335 3649430 : if (eRWFlag == GF_Read)
336 3628880 : GDALCopyWords64(
337 3628880 : pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
338 : static_cast<GByte *>(pData) +
339 3628880 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
340 : eBufType, static_cast<int>(nPixelSpace), nBufXSize);
341 : else
342 20557 : GDALCopyWords64(static_cast<GByte *>(pData) +
343 20557 : static_cast<GPtrDiff_t>(iBufYOff) *
344 : nLineSpace,
345 : eBufType, static_cast<int>(nPixelSpace),
346 20557 : pabySrcBlock + nSrcByteOffset, eDataType,
347 : nBandDataSize, nBufXSize);
348 : }
349 :
350 5449590 : if (psExtraArg->pfnProgress != nullptr &&
351 59892 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
352 : psExtraArg->pProgressData))
353 : {
354 5 : eErr = CE_Failure;
355 5 : break;
356 : }
357 : }
358 :
359 2813100 : if (poBlock)
360 2811780 : poBlock->DropLock();
361 :
362 2812850 : return eErr;
363 : }
364 :
365 : /* ==================================================================== */
366 : /* Do we have overviews that would be appropriate to satisfy */
367 : /* this request? */
368 : /* ==================================================================== */
369 2893890 : if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
370 : eRWFlag == GF_Read)
371 : {
372 : GDALRasterIOExtraArg sExtraArg;
373 2832 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
374 :
375 : const int nOverview =
376 2832 : GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
377 : nBufXSize, nBufYSize, &sExtraArg);
378 2832 : if (nOverview >= 0)
379 : {
380 2812 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
381 2812 : if (poOverviewBand == nullptr)
382 2812 : return CE_Failure;
383 :
384 2812 : return poOverviewBand->RasterIO(
385 : eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
386 2812 : nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
387 : }
388 : }
389 :
390 702432 : if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
391 0 : nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
392 3593500 : nLineSpace == nPixelSpace * nBufXSize &&
393 0 : CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
394 : {
395 0 : memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
396 0 : return CE_None;
397 : }
398 :
399 : /* ==================================================================== */
400 : /* The second case when we don't need subsample data but likely */
401 : /* need data type conversion. */
402 : /* ==================================================================== */
403 2891070 : if ( // nPixelSpace == nBufDataSize &&
404 2891070 : nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
405 : {
406 : #if DEBUG_VERBOSE
407 : printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
408 : nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
409 : #endif
410 :
411 : /* --------------------------------------------------------------------
412 : */
413 : /* Loop over buffer computing source locations. */
414 : /* --------------------------------------------------------------------
415 : */
416 : // Calculate starting values out of loop
417 2528130 : const int nLBlockXStart = nXOff / nBlockXSize;
418 2528130 : const int nXSpanEnd = nBufXSize + nXOff;
419 :
420 2528130 : int nYInc = 0;
421 5090840 : for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
422 2562710 : iBufYOff += nYInc, iSrcY += nYInc)
423 : {
424 2562600 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
425 : static_cast<GPtrDiff_t>(nLineSpace);
426 2562600 : int nLBlockY = iSrcY / nBlockYSize;
427 2562600 : int nLBlockX = nLBlockXStart;
428 2562600 : int iSrcX = nXOff;
429 5336860 : while (iSrcX < nXSpanEnd)
430 : {
431 2774040 : int nXSpan = nLBlockX * nBlockXSize;
432 2774040 : if (nXSpan < INT_MAX - nBlockXSize)
433 2773940 : nXSpan += nBlockXSize;
434 : else
435 101 : nXSpan = INT_MAX;
436 2774040 : const int nXRight = nXSpan;
437 2774040 : nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
438 2774040 : const size_t nXSpanSize =
439 2774040 : nXSpan * static_cast<size_t>(nPixelSpace);
440 :
441 2774040 : bool bJustInitialize =
442 2042060 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
443 37173 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
444 4841680 : nXOff <= nLBlockX * nBlockXSize &&
445 25572 : nXOff + nXSize >= nXRight;
446 :
447 : // Is this a partial tile at right and/or bottom edges of
448 : // the raster, and that is going to be completely written?
449 : // If so, do not load it from storage, but zero it so that
450 : // the content outsize of the validity area is initialized.
451 2774040 : bool bMemZeroBuffer = false;
452 2042060 : if (eRWFlag == GF_Write && !bJustInitialize &&
453 2017740 : nXOff <= nLBlockX * nBlockXSize &&
454 2016120 : nYOff <= nLBlockY * nBlockYSize &&
455 12095 : (nXOff + nXSize >= nXRight ||
456 : // cppcheck-suppress knownConditionTrueFalse
457 4818790 : (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
458 11917 : (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
459 10678 : (nYOff + nYSize == GetYSize() &&
460 1891 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
461 : {
462 3130 : bJustInitialize = true;
463 3130 : bMemZeroBuffer = true;
464 : }
465 :
466 : /* --------------------------------------------------------------------
467 : */
468 : /* Ensure we have the appropriate block loaded. */
469 : /* --------------------------------------------------------------------
470 : */
471 2774040 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
472 2774290 : poBlock =
473 2773940 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
474 2774290 : if (!poBlock)
475 : {
476 74 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
477 : nullptr)
478 : {
479 0 : CPLError(CE_Failure, CPLE_AppDefined,
480 : "GetBlockRef failed at X block offset %d, "
481 : "Y block offset %d%s",
482 : nLBlockX, nLBlockY,
483 0 : (nErrorCounter != CPLGetErrorCounter())
484 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
485 : : "");
486 : }
487 74 : return (CE_Failure);
488 : }
489 :
490 2774220 : if (eRWFlag == GF_Write)
491 2042060 : poBlock->MarkDirty();
492 :
493 2774220 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
494 2774210 : if (bMemZeroBuffer)
495 : {
496 3130 : memset(pabySrcBlock, 0,
497 3130 : static_cast<GPtrDiff_t>(nBandDataSize) *
498 3130 : nBlockXSize * nBlockYSize);
499 : }
500 : /* --------------------------------------------------------------------
501 : */
502 : /* Copy over this chunk of data. */
503 : /* --------------------------------------------------------------------
504 : */
505 2774210 : GPtrDiff_t iSrcOffset =
506 2774210 : (static_cast<GPtrDiff_t>(iSrcX) -
507 2774210 : static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
508 2774210 : (static_cast<GPtrDiff_t>(iSrcY) -
509 2774210 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
510 2774210 : nBlockXSize) *
511 2774210 : nBandDataSize;
512 : // Fill up as many rows as possible for the loaded block.
513 5548390 : const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
514 2774210 : nBufYSize - iBufYOff);
515 58577900 : for (int k = 0; k < kmax; k++)
516 : {
517 55803900 : if (eDataType == eBufType && nPixelSpace == nBufDataSize)
518 : {
519 51775200 : if (eRWFlag == GF_Read)
520 47410200 : memcpy(static_cast<GByte *>(pData) + iBufOffset +
521 47410200 : static_cast<GPtrDiff_t>(k) * nLineSpace,
522 47410200 : pabySrcBlock + iSrcOffset, nXSpanSize);
523 : else
524 4365040 : memcpy(pabySrcBlock + iSrcOffset,
525 4365040 : static_cast<GByte *>(pData) + iBufOffset +
526 4365040 : static_cast<GPtrDiff_t>(k) * nLineSpace,
527 : nXSpanSize);
528 : }
529 : else
530 : {
531 : /* type to type conversion */
532 4028700 : if (eRWFlag == GF_Read)
533 3908520 : GDALCopyWords64(
534 3908520 : pabySrcBlock + iSrcOffset, eDataType,
535 : nBandDataSize,
536 3908520 : static_cast<GByte *>(pData) + iBufOffset +
537 3908520 : static_cast<GPtrDiff_t>(k) * nLineSpace,
538 : eBufType, static_cast<int>(nPixelSpace),
539 : nXSpan);
540 : else
541 120182 : GDALCopyWords64(
542 120182 : static_cast<GByte *>(pData) + iBufOffset +
543 120182 : static_cast<GPtrDiff_t>(k) * nLineSpace,
544 : eBufType, static_cast<int>(nPixelSpace),
545 120182 : pabySrcBlock + iSrcOffset, eDataType,
546 : nBandDataSize, nXSpan);
547 : }
548 :
549 55803800 : iSrcOffset +=
550 55803800 : static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
551 : }
552 :
553 : iBufOffset =
554 2774020 : CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
555 2774030 : nLBlockX++;
556 2774030 : iSrcX += nXSpan;
557 :
558 2774030 : poBlock->DropLock();
559 2774260 : poBlock = nullptr;
560 : }
561 :
562 : /* Compute the increment to go on a block boundary */
563 2562820 : nYInc = nBlockYSize - (iSrcY % nBlockYSize);
564 :
565 2564600 : if (psExtraArg->pfnProgress != nullptr &&
566 1784 : !psExtraArg->pfnProgress(
567 2564600 : 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
568 : psExtraArg->pProgressData))
569 : {
570 100 : return CE_Failure;
571 : }
572 : }
573 :
574 2528240 : return CE_None;
575 : }
576 :
577 : /* ==================================================================== */
578 : /* Loop reading required source blocks to satisfy output */
579 : /* request. This is the most general implementation. */
580 : /* ==================================================================== */
581 :
582 362937 : double dfXOff = nXOff;
583 362937 : double dfYOff = nYOff;
584 362937 : double dfXSize = nXSize;
585 362937 : double dfYSize = nYSize;
586 362937 : if (psExtraArg->bFloatingPointWindowValidity)
587 : {
588 28159 : dfXOff = psExtraArg->dfXOff;
589 28159 : dfYOff = psExtraArg->dfYOff;
590 28159 : dfXSize = psExtraArg->dfXSize;
591 28159 : dfYSize = psExtraArg->dfYSize;
592 : }
593 :
594 : /* -------------------------------------------------------------------- */
595 : /* Compute stepping increment. */
596 : /* -------------------------------------------------------------------- */
597 362937 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
598 362937 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
599 362937 : CPLErr eErr = CE_None;
600 :
601 362937 : if (eRWFlag == GF_Write)
602 : {
603 : /* --------------------------------------------------------------------
604 : */
605 : /* Write case */
606 : /* Loop over raster window computing source locations in the buffer.
607 : */
608 : /* --------------------------------------------------------------------
609 : */
610 166650 : GByte *pabyDstBlock = nullptr;
611 166650 : int nLBlockX = -1;
612 166650 : int nLBlockY = -1;
613 :
614 1259590 : for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
615 : {
616 1092940 : const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
617 :
618 12063600 : for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
619 : {
620 10970600 : const int iBufXOff =
621 10970600 : static_cast<int>((iDstX - nXOff) / dfSrcXInc);
622 10970600 : GPtrDiff_t iBufOffset =
623 10970600 : static_cast<GPtrDiff_t>(iBufYOff) *
624 : static_cast<GPtrDiff_t>(nLineSpace) +
625 10970600 : iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
626 :
627 : // FIXME: this code likely doesn't work if the dirty block gets
628 : // flushed to disk before being completely written.
629 : // In the meantime, bJustInitialize should probably be set to
630 : // FALSE even if it is not ideal performance wise, and for
631 : // lossy compression.
632 :
633 : /* --------------------------------------------------------------------
634 : */
635 : /* Ensure we have the appropriate block loaded. */
636 : /* --------------------------------------------------------------------
637 : */
638 10970600 : if (iDstX < nLBlockX * nBlockXSize ||
639 10721300 : iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
640 10264600 : iDstY < nLBlockY * nBlockYSize ||
641 10264600 : iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
642 : {
643 738642 : nLBlockX = iDstX / nBlockXSize;
644 738642 : nLBlockY = iDstY / nBlockYSize;
645 :
646 738642 : const bool bJustInitialize =
647 1065870 : nYOff <= nLBlockY * nBlockYSize &&
648 327231 : nYOff + nYSize - nBlockYSize >=
649 327231 : nLBlockY * nBlockYSize &&
650 1116140 : nXOff <= nLBlockX * nBlockXSize &&
651 50265 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
652 : /*bool bMemZeroBuffer = FALSE;
653 : if( !bJustInitialize &&
654 : nXOff <= nLBlockX * nBlockXSize &&
655 : nYOff <= nLBlockY * nBlockYSize &&
656 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
657 : (nXOff + nXSize == GetXSize() &&
658 : (nLBlockX+1) * nBlockXSize > GetXSize())) &&
659 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
660 : (nYOff + nYSize == GetYSize() &&
661 : (nLBlockY+1) * nBlockYSize > GetYSize())) )
662 : {
663 : bJustInitialize = TRUE;
664 : bMemZeroBuffer = TRUE;
665 : }*/
666 738642 : if (poBlock != nullptr)
667 571992 : poBlock->DropLock();
668 :
669 738642 : poBlock =
670 738642 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
671 738642 : if (poBlock == nullptr)
672 : {
673 0 : return (CE_Failure);
674 : }
675 :
676 738642 : poBlock->MarkDirty();
677 :
678 738642 : pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
679 : /*if( bMemZeroBuffer )
680 : {
681 : memset(pabyDstBlock, 0,
682 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
683 : * nBlockYSize);
684 : }*/
685 : }
686 :
687 : // To make Coverity happy. Should not happen by design.
688 10970600 : if (pabyDstBlock == nullptr)
689 : {
690 0 : CPLAssert(false);
691 : eErr = CE_Failure;
692 : break;
693 : }
694 :
695 : /* --------------------------------------------------------------------
696 : */
697 : /* Copy over this pixel of data. */
698 : /* --------------------------------------------------------------------
699 : */
700 10970600 : GPtrDiff_t iDstOffset =
701 10970600 : (static_cast<GPtrDiff_t>(iDstX) -
702 10970600 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
703 10970600 : (static_cast<GPtrDiff_t>(iDstY) -
704 10970600 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
705 10970600 : nBlockXSize) *
706 10970600 : nBandDataSize;
707 :
708 10970600 : if (eDataType == eBufType)
709 : {
710 10967500 : memcpy(pabyDstBlock + iDstOffset,
711 10967500 : static_cast<GByte *>(pData) + iBufOffset,
712 : nBandDataSize);
713 : }
714 : else
715 : {
716 : /* type to type conversion ... ouch, this is expensive way
717 : of handling single words */
718 :
719 3096 : GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset,
720 3096 : eBufType, 0, pabyDstBlock + iDstOffset,
721 : eDataType, 0, 1);
722 : }
723 : }
724 :
725 1092940 : if (psExtraArg->pfnProgress != nullptr &&
726 0 : !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
727 : psExtraArg->pProgressData))
728 : {
729 0 : eErr = CE_Failure;
730 0 : break;
731 : }
732 : }
733 : }
734 : else
735 : {
736 196287 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
737 : {
738 7638 : if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
739 2496 : psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
740 2494 : psExtraArg->eResampleAlg == GRIORA_Bilinear ||
741 5147 : psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
742 2465 : GetColorTable() != nullptr)
743 : {
744 0 : CPLError(CE_Warning, CPLE_NotSupported,
745 : "Resampling method not supported on paletted band. "
746 : "Falling back to nearest neighbour");
747 : }
748 2574 : else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
749 3 : GDALDataTypeIsComplex(eDataType))
750 : {
751 0 : CPLError(CE_Warning, CPLE_NotSupported,
752 : "Resampling method not supported on complex data type "
753 : "band. Falling back to nearest neighbour");
754 : }
755 : else
756 : {
757 2571 : return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
758 : pData, nBufXSize, nBufYSize, eBufType,
759 2571 : nPixelSpace, nLineSpace, psExtraArg);
760 : }
761 : }
762 :
763 193625 : int nLimitBlockY = 0;
764 193625 : const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
765 193625 : int nStartBlockX = -nBlockXSize;
766 193625 : const double EPS = 1e-10;
767 193625 : int nLBlockY = -1;
768 193625 : const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
769 193625 : const bool bIntegerXFactor =
770 170978 : bUseIntegerRequestCoords &&
771 265612 : static_cast<int>(dfSrcXInc) == dfSrcXInc &&
772 71987 : static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
773 :
774 : /* --------------------------------------------------------------------
775 : */
776 : /* Read case */
777 : /* Loop over buffer computing source locations. */
778 : /* --------------------------------------------------------------------
779 : */
780 1945910 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
781 : {
782 : // Add small epsilon to avoid some numeric precision issues.
783 1752300 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
784 1752300 : const int iSrcY = static_cast<int>(std::min(
785 1752300 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
786 :
787 1752300 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
788 : static_cast<GPtrDiff_t>(nLineSpace);
789 :
790 1752300 : if (iSrcY >= nLimitBlockY)
791 : {
792 234842 : nLBlockY = iSrcY / nBlockYSize;
793 234842 : nLimitBlockY = nLBlockY * nBlockYSize;
794 234842 : if (nLimitBlockY < INT_MAX - nBlockYSize)
795 234842 : nLimitBlockY += nBlockYSize;
796 : else
797 0 : nLimitBlockY = INT_MAX;
798 : // Make sure a new block is loaded.
799 234842 : nStartBlockX = -nBlockXSize;
800 : }
801 1517450 : else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
802 : {
803 : // Make sure a new block is loaded.
804 429795 : nStartBlockX = -nBlockXSize;
805 : }
806 :
807 1752300 : GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
808 1752300 : static_cast<GPtrDiff_t>(nBlockXSize);
809 :
810 1752300 : if (bIntegerXFactor)
811 : {
812 413236 : int iSrcX = static_cast<int>(dfSrcXStart);
813 413236 : const int nSrcXInc = static_cast<int>(dfSrcXInc);
814 413236 : GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
815 413236 : bool bRet = false;
816 413236 : if (bByteCopy)
817 : {
818 302849 : bRet = DownsamplingIntegerXFactor<true, 1>(
819 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
820 : static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
821 : GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
822 : }
823 110387 : else if (eDataType == eBufType)
824 : {
825 110182 : switch (nBandDataSize)
826 : {
827 110102 : case 2:
828 110102 : bRet = DownsamplingIntegerXFactor<true, 2>(
829 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
830 : pabyDstData, static_cast<int>(nPixelSpace),
831 : nBufXSize, eDataType, eDataType, nStartBlockX,
832 : nBlockXSize, poBlock, nLBlockY);
833 110102 : break;
834 22 : case 4:
835 22 : bRet = DownsamplingIntegerXFactor<true, 4>(
836 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
837 : pabyDstData, static_cast<int>(nPixelSpace),
838 : nBufXSize, eDataType, eDataType, nStartBlockX,
839 : nBlockXSize, poBlock, nLBlockY);
840 22 : break;
841 56 : case 8:
842 56 : bRet = DownsamplingIntegerXFactor<true, 8>(
843 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
844 : pabyDstData, static_cast<int>(nPixelSpace),
845 : nBufXSize, eDataType, eDataType, nStartBlockX,
846 : nBlockXSize, poBlock, nLBlockY);
847 56 : break;
848 2 : case 16:
849 2 : bRet = DownsamplingIntegerXFactor<true, 16>(
850 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
851 : pabyDstData, static_cast<int>(nPixelSpace),
852 : nBufXSize, eDataType, eDataType, nStartBlockX,
853 : nBlockXSize, poBlock, nLBlockY);
854 2 : break;
855 0 : default:
856 0 : CPLAssert(false);
857 : break;
858 : }
859 : }
860 : else
861 : {
862 205 : bRet = DownsamplingIntegerXFactor<false, 0>(
863 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
864 : static_cast<int>(nPixelSpace), nBufXSize, eDataType,
865 : eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
866 : }
867 413236 : if (!bRet)
868 1 : eErr = CE_Failure;
869 : }
870 : else
871 : {
872 1339060 : double dfSrcX = dfSrcXStart;
873 560923000 : for (int iBufXOff = 0; iBufXOff < nBufXSize;
874 559584000 : iBufXOff++, dfSrcX += dfSrcXInc)
875 : {
876 : // TODO?: try to avoid the clamping for most iterations
877 : const int iSrcX = static_cast<int>(
878 1119170000 : std::min(std::max(0.0, dfSrcX),
879 559584000 : static_cast<double>(nRasterXSize - 1)));
880 :
881 : /* --------------------------------------------------------------------
882 : */
883 : /* Ensure we have the appropriate block loaded. */
884 : /* --------------------------------------------------------------------
885 : */
886 559584000 : if (iSrcX >= nBlockXSize + nStartBlockX)
887 : {
888 1705400 : const int nLBlockX = iSrcX / nBlockXSize;
889 1705400 : nStartBlockX = nLBlockX * nBlockXSize;
890 :
891 1705400 : if (poBlock != nullptr)
892 1583760 : poBlock->DropLock();
893 :
894 1705400 : poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
895 1705400 : if (poBlock == nullptr)
896 : {
897 9 : eErr = CE_Failure;
898 9 : break;
899 : }
900 :
901 : pabySrcBlock =
902 1705390 : static_cast<GByte *>(poBlock->GetDataRef());
903 : }
904 559584000 : const GPtrDiff_t nDiffX =
905 559584000 : static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
906 :
907 : /* --------------------------------------------------------------------
908 : */
909 : /* Copy over this pixel of data. */
910 : /* --------------------------------------------------------------------
911 : */
912 :
913 559584000 : if (bByteCopy)
914 : {
915 506145000 : GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
916 506145000 : static_cast<GByte *>(pData)[iBufOffset] =
917 506145000 : pabySrcBlock[iSrcOffset];
918 : }
919 53439100 : else if (eDataType == eBufType)
920 : {
921 48225500 : GPtrDiff_t iSrcOffset =
922 48225500 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
923 48225500 : memcpy(static_cast<GByte *>(pData) + iBufOffset,
924 48225500 : pabySrcBlock + iSrcOffset, nBandDataSize);
925 : }
926 : else
927 : {
928 : // Type to type conversion ...
929 5213610 : GPtrDiff_t iSrcOffset =
930 5213610 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
931 5213610 : GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
932 : static_cast<GByte *>(pData) +
933 5213610 : iBufOffset,
934 : eBufType, 0, 1);
935 : }
936 :
937 559584000 : iBufOffset += static_cast<int>(nPixelSpace);
938 : }
939 : }
940 1752300 : if (eErr == CE_Failure)
941 11 : break;
942 :
943 1963450 : if (psExtraArg->pfnProgress != nullptr &&
944 211166 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
945 : psExtraArg->pProgressData))
946 : {
947 1 : eErr = CE_Failure;
948 1 : break;
949 : }
950 : }
951 : }
952 :
953 360275 : if (poBlock != nullptr)
954 360265 : poBlock->DropLock();
955 :
956 360275 : return eErr;
957 : }
958 :
959 : /************************************************************************/
960 : /* GDALRasterIOTransformer() */
961 : /************************************************************************/
962 :
963 : struct GDALRasterIOTransformerStruct
964 : {
965 : double dfXOff;
966 : double dfYOff;
967 : double dfXRatioDstToSrc;
968 : double dfYRatioDstToSrc;
969 : };
970 :
971 6748 : static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
972 : int nPointCount, double *x, double *y,
973 : double * /* z */, int *panSuccess)
974 : {
975 6748 : GDALRasterIOTransformerStruct *psParams =
976 : static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
977 6748 : if (bDstToSrc)
978 : {
979 252996 : for (int i = 0; i < nPointCount; i++)
980 : {
981 246836 : x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
982 246836 : y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
983 246836 : panSuccess[i] = TRUE;
984 : }
985 : }
986 : else
987 : {
988 1176 : for (int i = 0; i < nPointCount; i++)
989 : {
990 588 : x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
991 588 : y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
992 588 : panSuccess[i] = TRUE;
993 : }
994 : }
995 6748 : return TRUE;
996 : }
997 :
998 : /************************************************************************/
999 : /* RasterIOResampled() */
1000 : /************************************************************************/
1001 :
1002 : //! @cond Doxygen_Suppress
1003 2571 : CPLErr GDALRasterBand::RasterIOResampled(
1004 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1005 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1006 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1007 : {
1008 : // Determine if we use warping resampling or overview resampling
1009 : const bool bUseWarp =
1010 2571 : (GDALDataTypeIsComplex(eDataType) &&
1011 2728 : psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
1012 157 : psExtraArg->eResampleAlg != GRIORA_Mode);
1013 :
1014 2571 : double dfXOff = nXOff;
1015 2571 : double dfYOff = nYOff;
1016 2571 : double dfXSize = nXSize;
1017 2571 : double dfYSize = nYSize;
1018 2571 : if (psExtraArg->bFloatingPointWindowValidity)
1019 : {
1020 2114 : dfXOff = psExtraArg->dfXOff;
1021 2114 : dfYOff = psExtraArg->dfYOff;
1022 2114 : dfXSize = psExtraArg->dfXSize;
1023 2114 : dfYSize = psExtraArg->dfYSize;
1024 : }
1025 :
1026 2571 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1027 2571 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1028 :
1029 : // Determine the coordinates in the "virtual" output raster to see
1030 : // if there are not integers, in which case we will use them as a shift
1031 : // so that subwindow extracts give the exact same results as entire raster
1032 : // scaling.
1033 2571 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1034 2571 : bool bHasXOffVirtual = false;
1035 2571 : int nDestXOffVirtual = 0;
1036 2571 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1037 : {
1038 2245 : bHasXOffVirtual = true;
1039 2245 : dfXOff = nXOff;
1040 2245 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1041 : }
1042 :
1043 2571 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1044 2571 : bool bHasYOffVirtual = false;
1045 2571 : int nDestYOffVirtual = 0;
1046 2571 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1047 : {
1048 2239 : bHasYOffVirtual = true;
1049 2239 : dfYOff = nYOff;
1050 2239 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1051 : }
1052 :
1053 : // Create a MEM dataset that wraps the output buffer.
1054 : GDALDataset *poMEMDS;
1055 2571 : void *pTempBuffer = nullptr;
1056 2571 : GSpacing nPSMem = nPixelSpace;
1057 2571 : GSpacing nLSMem = nLineSpace;
1058 2571 : void *pDataMem = pData;
1059 2571 : GDALDataType eDTMem = eBufType;
1060 2571 : if (eBufType != eDataType)
1061 : {
1062 40 : nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1063 40 : nLSMem = nPSMem * nBufXSize;
1064 : pTempBuffer =
1065 40 : VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1066 40 : if (pTempBuffer == nullptr)
1067 0 : return CE_Failure;
1068 40 : pDataMem = pTempBuffer;
1069 40 : eDTMem = eDataType;
1070 : }
1071 :
1072 : poMEMDS =
1073 2571 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1074 : nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1075 2571 : GByte *pabyData = static_cast<GByte *>(pDataMem) -
1076 2571 : nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1077 2571 : GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1078 : poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1079 2571 : poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1080 :
1081 2571 : const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1082 2571 : const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
1083 2571 : if (pszNBITS)
1084 6 : GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
1085 6 : "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1086 :
1087 2571 : CPLErr eErr = CE_None;
1088 :
1089 : // Do the resampling.
1090 2571 : if (bUseWarp)
1091 : {
1092 149 : int bHasNoData = FALSE;
1093 149 : double dfNoDataValue = GetNoDataValue(&bHasNoData);
1094 :
1095 149 : VRTDatasetH hVRTDS = nullptr;
1096 149 : GDALRasterBandH hVRTBand = nullptr;
1097 149 : if (GetDataset() == nullptr)
1098 : {
1099 : /* Create VRT dataset that wraps the whole dataset */
1100 0 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1101 0 : VRTAddBand(hVRTDS, eDataType, nullptr);
1102 0 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1103 0 : VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1104 : 0, 0, nRasterXSize, nRasterYSize, nullptr,
1105 : VRT_NODATA_UNSET);
1106 :
1107 : /* Add a mask band if needed */
1108 0 : if (GetMaskFlags() != GMF_ALL_VALID)
1109 : {
1110 0 : GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1111 : VRTSourcedRasterBand *poVRTMaskBand =
1112 : reinterpret_cast<VRTSourcedRasterBand *>(
1113 : reinterpret_cast<GDALRasterBand *>(hVRTBand)
1114 0 : ->GetMaskBand());
1115 0 : poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1116 0 : nRasterYSize, 0, 0,
1117 0 : nRasterXSize, nRasterYSize);
1118 : }
1119 : }
1120 :
1121 149 : GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1122 149 : switch (psExtraArg->eResampleAlg)
1123 : {
1124 0 : case GRIORA_NearestNeighbour:
1125 0 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1126 0 : break;
1127 147 : case GRIORA_Bilinear:
1128 147 : psWarpOptions->eResampleAlg = GRA_Bilinear;
1129 147 : break;
1130 0 : case GRIORA_Cubic:
1131 0 : psWarpOptions->eResampleAlg = GRA_Cubic;
1132 0 : break;
1133 0 : case GRIORA_CubicSpline:
1134 0 : psWarpOptions->eResampleAlg = GRA_CubicSpline;
1135 0 : break;
1136 0 : case GRIORA_Lanczos:
1137 0 : psWarpOptions->eResampleAlg = GRA_Lanczos;
1138 0 : break;
1139 0 : case GRIORA_Average:
1140 0 : psWarpOptions->eResampleAlg = GRA_Average;
1141 0 : break;
1142 2 : case GRIORA_RMS:
1143 2 : psWarpOptions->eResampleAlg = GRA_RMS;
1144 2 : break;
1145 0 : case GRIORA_Mode:
1146 0 : psWarpOptions->eResampleAlg = GRA_Mode;
1147 0 : break;
1148 0 : default:
1149 0 : CPLAssert(false);
1150 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1151 : break;
1152 : }
1153 149 : psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1154 149 : psWarpOptions->hDstDS = poMEMDS;
1155 149 : psWarpOptions->nBandCount = 1;
1156 149 : int nSrcBandNumber = hVRTDS ? 1 : nBand;
1157 149 : int nDstBandNumber = 1;
1158 149 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1159 149 : psWarpOptions->panDstBands = &nDstBandNumber;
1160 298 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1161 149 : ? psExtraArg->pfnProgress
1162 : : GDALDummyProgress;
1163 149 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1164 149 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1165 149 : if (bHasNoData)
1166 : {
1167 0 : psWarpOptions->papszWarpOptions = CSLSetNameValue(
1168 : psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1169 0 : if (psWarpOptions->padfSrcNoDataReal == nullptr)
1170 : {
1171 0 : psWarpOptions->padfSrcNoDataReal =
1172 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1173 0 : psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1174 : }
1175 :
1176 0 : if (psWarpOptions->padfDstNoDataReal == nullptr)
1177 : {
1178 0 : psWarpOptions->padfDstNoDataReal =
1179 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1180 0 : psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1181 : }
1182 : }
1183 :
1184 : GDALRasterIOTransformerStruct sTransformer;
1185 149 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1186 149 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1187 149 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1188 149 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1189 149 : psWarpOptions->pTransformerArg = &sTransformer;
1190 :
1191 : GDALWarpOperationH hWarpOperation =
1192 149 : GDALCreateWarpOperation(psWarpOptions);
1193 149 : eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1194 : nDestYOffVirtual, nBufXSize, nBufYSize);
1195 149 : GDALDestroyWarpOperation(hWarpOperation);
1196 :
1197 149 : psWarpOptions->panSrcBands = nullptr;
1198 149 : psWarpOptions->panDstBands = nullptr;
1199 149 : GDALDestroyWarpOptions(psWarpOptions);
1200 :
1201 149 : if (hVRTDS)
1202 0 : GDALClose(hVRTDS);
1203 : }
1204 : else
1205 : {
1206 2422 : const char *pszResampling =
1207 2608 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1208 297 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1209 220 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1210 213 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1211 159 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1212 95 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1213 43 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1214 3 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1215 : : "UNKNOWN";
1216 :
1217 2422 : int nKernelRadius = 0;
1218 : GDALResampleFunction pfnResampleFunc =
1219 2422 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1220 2422 : CPLAssert(pfnResampleFunc);
1221 : GDALDataType eWrkDataType =
1222 2422 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1223 2422 : int nHasNoData = 0;
1224 2422 : double dfNoDataValue = GetNoDataValue(&nHasNoData);
1225 2422 : const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1226 2422 : if (!bHasNoData)
1227 2358 : dfNoDataValue = 0.0;
1228 :
1229 2422 : int nDstBlockXSize = nBufXSize;
1230 2422 : int nDstBlockYSize = nBufYSize;
1231 2422 : int nFullResXChunk = 0;
1232 2422 : int nFullResYChunk = 0;
1233 : while (true)
1234 : {
1235 2422 : nFullResXChunk =
1236 2422 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1237 2422 : nFullResYChunk =
1238 2422 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1239 2422 : if (nFullResXChunk > nRasterXSize)
1240 2233 : nFullResXChunk = nRasterXSize;
1241 2422 : if (nFullResYChunk > nRasterYSize)
1242 216 : nFullResYChunk = nRasterYSize;
1243 2422 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1244 2376 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1245 : 1024 * 1024))
1246 : break;
1247 : // When operating on the full width of a raster whose block width is
1248 : // the raster width, prefer doing chunks in height.
1249 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1250 : nDstBlockYSize > 1)
1251 0 : nDstBlockYSize /= 2;
1252 : /* Otherwise cut the maximal dimension */
1253 0 : else if (nDstBlockXSize > 1 &&
1254 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1255 0 : nDstBlockXSize /= 2;
1256 : else
1257 0 : nDstBlockYSize /= 2;
1258 : }
1259 :
1260 2422 : int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1261 2422 : int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1262 2422 : if (nOvrXFactor == 0)
1263 2024 : nOvrXFactor = 1;
1264 2422 : if (nOvrYFactor == 0)
1265 2023 : nOvrYFactor = 1;
1266 2422 : int nFullResXSizeQueried =
1267 2422 : nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1268 2422 : int nFullResYSizeQueried =
1269 2422 : nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1270 :
1271 2422 : if (nFullResXSizeQueried > nRasterXSize)
1272 2135 : nFullResXSizeQueried = nRasterXSize;
1273 2422 : if (nFullResYSizeQueried > nRasterYSize)
1274 129 : nFullResYSizeQueried = nRasterYSize;
1275 :
1276 : void *pChunk =
1277 2422 : VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1278 : nFullResXSizeQueried, nFullResYSizeQueried);
1279 2422 : GByte *pabyChunkNoDataMask = nullptr;
1280 :
1281 2422 : GDALRasterBand *poMaskBand = GetMaskBand();
1282 2422 : int l_nMaskFlags = GetMaskFlags();
1283 :
1284 2422 : bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1285 2422 : if (bUseNoDataMask)
1286 : {
1287 126 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1288 : nFullResXSizeQueried, nFullResYSizeQueried));
1289 : }
1290 2422 : if (pChunk == nullptr ||
1291 126 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1292 : {
1293 0 : GDALClose(poMEMDS);
1294 0 : CPLFree(pChunk);
1295 0 : CPLFree(pabyChunkNoDataMask);
1296 0 : VSIFree(pTempBuffer);
1297 0 : return CE_Failure;
1298 : }
1299 :
1300 2422 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1301 2422 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1302 2422 : int nBlocksDone = 0;
1303 :
1304 : int nDstYOff;
1305 4844 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1306 2422 : nDstYOff += nDstBlockYSize)
1307 : {
1308 : int nDstYCount;
1309 2422 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1310 2422 : nDstYCount = nDstBlockYSize;
1311 : else
1312 0 : nDstYCount = nBufYSize - nDstYOff;
1313 :
1314 2422 : int nChunkYOff =
1315 2422 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1316 2422 : int nChunkYOff2 = nYOff + 1 +
1317 2422 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1318 : dfYRatioDstToSrc));
1319 2422 : if (nChunkYOff2 > nRasterYSize)
1320 323 : nChunkYOff2 = nRasterYSize;
1321 2422 : int nYCount = nChunkYOff2 - nChunkYOff;
1322 2422 : CPLAssert(nYCount <= nFullResYChunk);
1323 :
1324 2422 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1325 2422 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1326 2422 : if (nChunkYOffQueried < 0)
1327 : {
1328 231 : nChunkYSizeQueried += nChunkYOffQueried;
1329 231 : nChunkYOffQueried = 0;
1330 : }
1331 2422 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1332 331 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1333 2422 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1334 :
1335 2422 : int nDstXOff = 0;
1336 4844 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1337 2422 : nDstXOff += nDstBlockXSize)
1338 : {
1339 2422 : int nDstXCount = 0;
1340 2422 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1341 2422 : nDstXCount = nDstBlockXSize;
1342 : else
1343 0 : nDstXCount = nBufXSize - nDstXOff;
1344 :
1345 2422 : int nChunkXOff =
1346 2422 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1347 2422 : int nChunkXOff2 =
1348 2422 : nXOff + 1 +
1349 2422 : static_cast<int>(
1350 2422 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1351 2422 : if (nChunkXOff2 > nRasterXSize)
1352 2234 : nChunkXOff2 = nRasterXSize;
1353 2422 : int nXCount = nChunkXOff2 - nChunkXOff;
1354 2422 : CPLAssert(nXCount <= nFullResXChunk);
1355 :
1356 2422 : int nChunkXOffQueried =
1357 2422 : nChunkXOff - nKernelRadius * nOvrXFactor;
1358 2422 : int nChunkXSizeQueried =
1359 2422 : nXCount + 2 * nKernelRadius * nOvrXFactor;
1360 2422 : if (nChunkXOffQueried < 0)
1361 : {
1362 2148 : nChunkXSizeQueried += nChunkXOffQueried;
1363 2148 : nChunkXOffQueried = 0;
1364 : }
1365 2422 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1366 2134 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1367 2422 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1368 :
1369 : // Read the source buffers.
1370 2422 : eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1371 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1372 : nChunkXSizeQueried, nChunkYSizeQueried,
1373 : eWrkDataType, 0, 0, nullptr);
1374 :
1375 2422 : bool bSkipResample = false;
1376 2422 : bool bNoDataMaskFullyOpaque = false;
1377 2422 : if (eErr == CE_None && bUseNoDataMask)
1378 : {
1379 126 : eErr = poMaskBand->RasterIO(
1380 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1381 : nChunkXSizeQueried, nChunkYSizeQueried,
1382 : pabyChunkNoDataMask, nChunkXSizeQueried,
1383 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1384 :
1385 : /* Optimizations if mask if fully opaque or transparent */
1386 126 : int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1387 126 : GByte bVal = pabyChunkNoDataMask[0];
1388 126 : int i = 1;
1389 241310 : for (; i < nPixels; i++)
1390 : {
1391 241261 : if (pabyChunkNoDataMask[i] != bVal)
1392 77 : break;
1393 : }
1394 126 : if (i == nPixels)
1395 : {
1396 49 : if (bVal == 0)
1397 : {
1398 712 : for (int j = 0; j < nDstYCount; j++)
1399 : {
1400 686 : GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0,
1401 : static_cast<GByte *>(pDataMem) +
1402 686 : nLSMem * (j + nDstYOff) +
1403 686 : nDstXOff * nPSMem,
1404 : eDTMem,
1405 : static_cast<int>(nPSMem),
1406 : nDstXCount);
1407 : }
1408 26 : bSkipResample = true;
1409 : }
1410 : else
1411 : {
1412 23 : bNoDataMaskFullyOpaque = true;
1413 : }
1414 : }
1415 : }
1416 :
1417 2422 : if (!bSkipResample && eErr == CE_None)
1418 : {
1419 2394 : const bool bPropagateNoData = false;
1420 2394 : void *pDstBuffer = nullptr;
1421 2394 : GDALDataType eDstBufferDataType = GDT_Unknown;
1422 : GDALRasterBand *poMEMBand =
1423 2394 : GDALRasterBand::FromHandle(hMEMBand);
1424 2394 : GDALOverviewResampleArgs args;
1425 2394 : args.eSrcDataType = eDataType;
1426 2394 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1427 2394 : args.nOvrXSize = poMEMBand->GetXSize();
1428 2394 : args.nOvrYSize = poMEMBand->GetYSize();
1429 2394 : args.nOvrNBITS = nNBITS;
1430 2394 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1431 2394 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1432 2394 : args.dfSrcXDelta =
1433 2394 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1434 2394 : args.dfSrcYDelta =
1435 2394 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1436 2394 : args.eWrkDataType = eWrkDataType;
1437 2394 : args.pabyChunkNodataMask =
1438 2394 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
1439 2394 : args.nChunkXOff =
1440 2394 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1441 2394 : args.nChunkXSize = nChunkXSizeQueried;
1442 2394 : args.nChunkYOff =
1443 2394 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1444 2394 : args.nChunkYSize = nChunkYSizeQueried;
1445 2394 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1446 2394 : args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
1447 2394 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1448 2394 : args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
1449 2394 : args.pszResampling = pszResampling;
1450 2394 : args.bHasNoData = bHasNoData;
1451 2394 : args.dfNoDataValue = dfNoDataValue;
1452 2394 : args.poColorTable = GetColorTable();
1453 2394 : args.bPropagateNoData = bPropagateNoData;
1454 2394 : eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
1455 : &eDstBufferDataType);
1456 2394 : if (eErr == CE_None)
1457 : {
1458 2394 : eErr = poMEMBand->RasterIO(
1459 : GF_Write, nDstXOff + nDestXOffVirtual,
1460 : nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1461 : pDstBuffer, nDstXCount, nDstYCount,
1462 : eDstBufferDataType, 0, 0, nullptr);
1463 : }
1464 2394 : CPLFree(pDstBuffer);
1465 : }
1466 :
1467 2422 : nBlocksDone++;
1468 2451 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1469 29 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1470 : "", psExtraArg->pProgressData))
1471 : {
1472 1 : eErr = CE_Failure;
1473 : }
1474 : }
1475 : }
1476 :
1477 2422 : CPLFree(pChunk);
1478 2422 : CPLFree(pabyChunkNoDataMask);
1479 : }
1480 :
1481 2571 : if (eBufType != eDataType)
1482 : {
1483 40 : CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1484 : GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1485 : pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1486 : nullptr));
1487 : }
1488 2571 : GDALClose(poMEMDS);
1489 2571 : VSIFree(pTempBuffer);
1490 :
1491 2571 : return eErr;
1492 : }
1493 :
1494 : /************************************************************************/
1495 : /* RasterIOResampled() */
1496 : /************************************************************************/
1497 :
1498 284 : CPLErr GDALDataset::RasterIOResampled(
1499 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1500 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1501 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1502 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1503 :
1504 : {
1505 : #if 0
1506 : // Determine if we use warping resampling or overview resampling
1507 : bool bUseWarp = false;
1508 : if( GDALDataTypeIsComplex( eDataType ) )
1509 : bUseWarp = true;
1510 : #endif
1511 :
1512 284 : double dfXOff = nXOff;
1513 284 : double dfYOff = nYOff;
1514 284 : double dfXSize = nXSize;
1515 284 : double dfYSize = nYSize;
1516 284 : if (psExtraArg->bFloatingPointWindowValidity)
1517 : {
1518 162 : dfXOff = psExtraArg->dfXOff;
1519 162 : dfYOff = psExtraArg->dfYOff;
1520 162 : dfXSize = psExtraArg->dfXSize;
1521 162 : dfYSize = psExtraArg->dfYSize;
1522 : }
1523 :
1524 284 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1525 284 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1526 :
1527 : // Determine the coordinates in the "virtual" output raster to see
1528 : // if there are not integers, in which case we will use them as a shift
1529 : // so that subwindow extracts give the exact same results as entire raster
1530 : // scaling.
1531 284 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1532 284 : bool bHasXOffVirtual = false;
1533 284 : int nDestXOffVirtual = 0;
1534 284 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1535 : {
1536 161 : bHasXOffVirtual = true;
1537 161 : dfXOff = nXOff;
1538 161 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1539 : }
1540 :
1541 284 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1542 284 : bool bHasYOffVirtual = false;
1543 284 : int nDestYOffVirtual = 0;
1544 284 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1545 : {
1546 120 : bHasYOffVirtual = true;
1547 120 : dfYOff = nYOff;
1548 120 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1549 : }
1550 :
1551 : // Create a MEM dataset that wraps the output buffer.
1552 : GDALDataset *poMEMDS =
1553 284 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1554 : nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1555 : GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1556 280 : CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
1557 275 : int nNBITS = 0;
1558 1230 : for (int i = 0; i < nBandCount; i++)
1559 : {
1560 949 : char szBuffer[32] = {'\0'};
1561 1917 : int nRet = CPLPrintPointer(
1562 : szBuffer,
1563 949 : static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
1564 949 : nLineSpace * nDestYOffVirtual + nBandSpace * i,
1565 : sizeof(szBuffer));
1566 968 : szBuffer[nRet] = 0;
1567 :
1568 968 : char szBuffer0[64] = {'\0'};
1569 968 : snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1570 :
1571 968 : char szBuffer1[64] = {'\0'};
1572 968 : snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
1573 : static_cast<GIntBig>(nPixelSpace));
1574 :
1575 968 : char szBuffer2[64] = {'\0'};
1576 968 : snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
1577 : static_cast<GIntBig>(nLineSpace));
1578 :
1579 968 : char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
1580 :
1581 968 : poMEMDS->AddBand(eBufType, apszOptions);
1582 :
1583 965 : GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1584 951 : papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
1585 : const char *pszNBITS =
1586 954 : poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1587 954 : if (pszNBITS)
1588 : {
1589 0 : nNBITS = atoi(pszNBITS);
1590 0 : poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1591 0 : "IMAGE_STRUCTURE");
1592 : }
1593 : }
1594 :
1595 281 : CPLErr eErr = CE_None;
1596 :
1597 : // TODO(schwehr): Why disabled? Why not just delete?
1598 : // Looks like this code was initially added as disable by copying
1599 : // from RasterIO here:
1600 : // https://trac.osgeo.org/gdal/changeset/29572
1601 : #if 0
1602 : // Do the resampling.
1603 : if( bUseWarp )
1604 : {
1605 : VRTDatasetH hVRTDS = nullptr;
1606 : GDALRasterBandH hVRTBand = nullptr;
1607 : if( GetDataset() == nullptr )
1608 : {
1609 : /* Create VRT dataset that wraps the whole dataset */
1610 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1611 : VRTAddBand( hVRTDS, eDataType, nullptr );
1612 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1613 : VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1614 : (GDALRasterBandH)this,
1615 : 0, 0,
1616 : nRasterXSize, nRasterYSize,
1617 : 0, 0,
1618 : nRasterXSize, nRasterYSize,
1619 : nullptr, VRT_NODATA_UNSET );
1620 :
1621 : /* Add a mask band if needed */
1622 : if( GetMaskFlags() != GMF_ALL_VALID )
1623 : {
1624 : ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1625 : VRTSourcedRasterBand* poVRTMaskBand =
1626 : (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1627 : poVRTMaskBand->
1628 : AddMaskBandSource( this,
1629 : 0, 0,
1630 : nRasterXSize, nRasterYSize,
1631 : 0, 0,
1632 : nRasterXSize, nRasterYSize);
1633 : }
1634 : }
1635 :
1636 : GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1637 : psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1638 : psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1639 : psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1640 : psWarpOptions->nBandCount = 1;
1641 : int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1642 : int nDstBandNumber = 1;
1643 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1644 : psWarpOptions->panDstBands = &nDstBandNumber;
1645 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1646 : psExtraArg->pfnProgress : GDALDummyProgress;
1647 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1648 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1649 : GDALRasterIOTransformerStruct sTransformer;
1650 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1651 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1652 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1653 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1654 : psWarpOptions->pTransformerArg = &sTransformer;
1655 :
1656 : GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1657 : eErr = GDALChunkAndWarpImage( hWarpOperation,
1658 : nDestXOffVirtual, nDestYOffVirtual,
1659 : nBufXSize, nBufYSize );
1660 : GDALDestroyWarpOperation( hWarpOperation );
1661 :
1662 : psWarpOptions->panSrcBands = nullptr;
1663 : psWarpOptions->panDstBands = nullptr;
1664 : GDALDestroyWarpOptions( psWarpOptions );
1665 :
1666 : if( hVRTDS )
1667 : GDALClose(hVRTDS);
1668 : }
1669 : else
1670 : #endif
1671 : {
1672 281 : const char *pszResampling =
1673 441 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1674 160 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1675 0 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1676 0 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1677 0 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1678 0 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1679 0 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1680 0 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1681 : : "UNKNOWN";
1682 :
1683 281 : GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1684 278 : GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1685 : int nBlockXSize, nBlockYSize;
1686 273 : poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1687 :
1688 : int nKernelRadius;
1689 : GDALResampleFunction pfnResampleFunc =
1690 273 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1691 273 : CPLAssert(pfnResampleFunc);
1692 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1693 : GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1694 : GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1695 : #endif
1696 : GDALDataType eWrkDataType =
1697 273 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1698 :
1699 271 : int nDstBlockXSize = nBufXSize;
1700 271 : int nDstBlockYSize = nBufYSize;
1701 : int nFullResXChunk, nFullResYChunk;
1702 : while (true)
1703 : {
1704 271 : nFullResXChunk =
1705 271 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1706 271 : nFullResYChunk =
1707 271 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1708 271 : if (nFullResXChunk > nRasterXSize)
1709 151 : nFullResXChunk = nRasterXSize;
1710 271 : if (nFullResYChunk > nRasterYSize)
1711 33 : nFullResYChunk = nRasterYSize;
1712 271 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1713 269 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1714 : 1024 * 1024))
1715 : break;
1716 : // When operating on the full width of a raster whose block width is
1717 : // the raster width, prefer doing chunks in height.
1718 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1719 : nDstBlockYSize > 1)
1720 0 : nDstBlockYSize /= 2;
1721 : /* Otherwise cut the maximal dimension */
1722 0 : else if (nDstBlockXSize > 1 &&
1723 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1724 0 : nDstBlockXSize /= 2;
1725 : else
1726 0 : nDstBlockYSize /= 2;
1727 : }
1728 :
1729 541 : int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1730 271 : static_cast<int>(0.5 + dfYRatioDstToSrc));
1731 270 : if (nOvrFactor == 0)
1732 94 : nOvrFactor = 1;
1733 270 : int nFullResXSizeQueried =
1734 270 : nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1735 270 : int nFullResYSizeQueried =
1736 270 : nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1737 :
1738 270 : if (nFullResXSizeQueried > nRasterXSize)
1739 154 : nFullResXSizeQueried = nRasterXSize;
1740 270 : if (nFullResYSizeQueried > nRasterYSize)
1741 36 : nFullResYSizeQueried = nRasterYSize;
1742 :
1743 270 : void *pChunk = VSI_MALLOC3_VERBOSE(
1744 : cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1745 : nBandCount),
1746 : nFullResXSizeQueried, nFullResYSizeQueried);
1747 284 : GByte *pabyChunkNoDataMask = nullptr;
1748 :
1749 284 : GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1750 280 : int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1751 :
1752 280 : bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1753 280 : if (bUseNoDataMask)
1754 : {
1755 55 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1756 : nFullResXSizeQueried, nFullResYSizeQueried));
1757 : }
1758 280 : if (pChunk == nullptr ||
1759 55 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1760 : {
1761 1 : GDALClose(poMEMDS);
1762 0 : CPLFree(pChunk);
1763 0 : CPLFree(pabyChunkNoDataMask);
1764 0 : CPLFree(papoDstBands);
1765 0 : return CE_Failure;
1766 : }
1767 :
1768 279 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1769 279 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1770 279 : int nBlocksDone = 0;
1771 :
1772 : int nDstYOff;
1773 566 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1774 287 : nDstYOff += nDstBlockYSize)
1775 : {
1776 : int nDstYCount;
1777 272 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1778 273 : nDstYCount = nDstBlockYSize;
1779 : else
1780 0 : nDstYCount = nBufYSize - nDstYOff;
1781 :
1782 272 : int nChunkYOff =
1783 272 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1784 272 : int nChunkYOff2 = nYOff + 1 +
1785 272 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1786 : dfYRatioDstToSrc));
1787 272 : if (nChunkYOff2 > nRasterYSize)
1788 56 : nChunkYOff2 = nRasterYSize;
1789 272 : int nYCount = nChunkYOff2 - nChunkYOff;
1790 272 : CPLAssert(nYCount <= nFullResYChunk);
1791 :
1792 272 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1793 272 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1794 272 : if (nChunkYOffQueried < 0)
1795 : {
1796 56 : nChunkYSizeQueried += nChunkYOffQueried;
1797 56 : nChunkYOffQueried = 0;
1798 : }
1799 272 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1800 66 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1801 272 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1802 :
1803 : int nDstXOff;
1804 556 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1805 284 : nDstXOff += nDstBlockXSize)
1806 : {
1807 : int nDstXCount;
1808 269 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1809 268 : nDstXCount = nDstBlockXSize;
1810 : else
1811 1 : nDstXCount = nBufXSize - nDstXOff;
1812 :
1813 269 : int nChunkXOff =
1814 269 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1815 269 : int nChunkXOff2 =
1816 269 : nXOff + 1 +
1817 269 : static_cast<int>(
1818 269 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1819 269 : if (nChunkXOff2 > nRasterXSize)
1820 144 : nChunkXOff2 = nRasterXSize;
1821 269 : int nXCount = nChunkXOff2 - nChunkXOff;
1822 269 : CPLAssert(nXCount <= nFullResXChunk);
1823 :
1824 269 : int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1825 269 : int nChunkXSizeQueried =
1826 269 : nXCount + 2 * nKernelRadius * nOvrFactor;
1827 269 : if (nChunkXOffQueried < 0)
1828 : {
1829 144 : nChunkXSizeQueried += nChunkXOffQueried;
1830 144 : nChunkXOffQueried = 0;
1831 : }
1832 269 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1833 150 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1834 269 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1835 :
1836 269 : bool bSkipResample = false;
1837 269 : bool bNoDataMaskFullyOpaque = false;
1838 269 : if (eErr == CE_None && bUseNoDataMask)
1839 : {
1840 55 : eErr = poMaskBand->RasterIO(
1841 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1842 : nChunkXSizeQueried, nChunkYSizeQueried,
1843 : pabyChunkNoDataMask, nChunkXSizeQueried,
1844 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1845 :
1846 : /* Optimizations if mask if fully opaque or transparent */
1847 55 : const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1848 55 : const GByte bVal = pabyChunkNoDataMask[0];
1849 55 : int i = 1; // Used after for.
1850 123794 : for (; i < nPixels; i++)
1851 : {
1852 123777 : if (pabyChunkNoDataMask[i] != bVal)
1853 38 : break;
1854 : }
1855 55 : if (i == nPixels)
1856 : {
1857 17 : if (bVal == 0)
1858 : {
1859 16 : GByte abyZero[16] = {0};
1860 64 : for (int iBand = 0; iBand < nBandCount; iBand++)
1861 : {
1862 2016 : for (int j = 0; j < nDstYCount; j++)
1863 : {
1864 1968 : GDALCopyWords64(
1865 : abyZero, GDT_Byte, 0,
1866 : static_cast<GByte *>(pData) +
1867 1968 : iBand * nBandSpace +
1868 1968 : nLineSpace * (j + nDstYOff) +
1869 1968 : nDstXOff * nPixelSpace,
1870 : eBufType, static_cast<int>(nPixelSpace),
1871 : nDstXCount);
1872 : }
1873 : }
1874 16 : bSkipResample = true;
1875 : }
1876 : else
1877 : {
1878 1 : bNoDataMaskFullyOpaque = true;
1879 : }
1880 : }
1881 : }
1882 :
1883 269 : if (!bSkipResample && eErr == CE_None)
1884 : {
1885 : /* Read the source buffers */
1886 253 : eErr = RasterIO(
1887 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1888 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1889 : nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1890 : nBandCount, panBandMap, 0, 0, 0, nullptr);
1891 : }
1892 :
1893 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1894 : if (pfnResampleFuncMultiBands && !bSkipResample &&
1895 : eErr == CE_None)
1896 : {
1897 : eErr = pfnResampleFuncMultiBands(
1898 : dfXRatioDstToSrc, dfYRatioDstToSrc,
1899 : dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1900 : dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1901 : eWrkDataType, (GByte *)pChunk, nBandCount,
1902 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1903 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1904 : nChunkXSizeQueried,
1905 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1906 : nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1907 : nDstXOff + nDestXOffVirtual + nDstXCount,
1908 : nDstYOff + nDestYOffVirtual,
1909 : nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1910 : pszResampling, FALSE /*bHasNoData*/,
1911 : 0.0 /* dfNoDataValue */, nullptr /* color table*/,
1912 : eDataType);
1913 : }
1914 : else
1915 : #endif
1916 : {
1917 : size_t nChunkBandOffset =
1918 281 : static_cast<size_t>(nChunkXSizeQueried) *
1919 281 : nChunkYSizeQueried *
1920 281 : GDALGetDataTypeSizeBytes(eWrkDataType);
1921 1205 : for (int i = 0;
1922 1205 : i < nBandCount && !bSkipResample && eErr == CE_None;
1923 : i++)
1924 : {
1925 921 : const bool bPropagateNoData = false;
1926 921 : void *pDstBuffer = nullptr;
1927 921 : GDALDataType eDstBufferDataType = GDT_Unknown;
1928 : GDALRasterBand *poMEMBand =
1929 921 : poMEMDS->GetRasterBand(i + 1);
1930 922 : GDALOverviewResampleArgs args;
1931 922 : args.eSrcDataType = eDataType;
1932 922 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1933 921 : args.nOvrXSize = poMEMBand->GetXSize();
1934 920 : args.nOvrYSize = poMEMBand->GetYSize();
1935 917 : args.nOvrNBITS = nNBITS;
1936 917 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1937 917 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1938 917 : args.dfSrcXDelta =
1939 917 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1940 917 : args.dfSrcYDelta =
1941 917 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1942 917 : args.eWrkDataType = eWrkDataType;
1943 917 : args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
1944 917 : ? nullptr
1945 : : pabyChunkNoDataMask;
1946 917 : args.nChunkXOff =
1947 917 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1948 917 : args.nChunkXSize = nChunkXSizeQueried;
1949 917 : args.nChunkYOff =
1950 917 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1951 917 : args.nChunkYSize = nChunkYSizeQueried;
1952 917 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1953 917 : args.nDstXOff2 =
1954 917 : nDstXOff + nDestXOffVirtual + nDstXCount;
1955 917 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1956 917 : args.nDstYOff2 =
1957 917 : nDstYOff + nDestYOffVirtual + nDstYCount;
1958 917 : args.pszResampling = pszResampling;
1959 917 : args.bHasNoData = false;
1960 917 : args.dfNoDataValue = 0.0;
1961 917 : args.poColorTable = nullptr;
1962 917 : args.bPropagateNoData = bPropagateNoData;
1963 :
1964 : eErr =
1965 1839 : pfnResampleFunc(args,
1966 917 : reinterpret_cast<GByte *>(pChunk) +
1967 917 : i * nChunkBandOffset,
1968 : &pDstBuffer, &eDstBufferDataType);
1969 922 : if (eErr == CE_None)
1970 : {
1971 922 : eErr = poMEMBand->RasterIO(
1972 : GF_Write, nDstXOff + nDestXOffVirtual,
1973 : nDstYOff + nDestYOffVirtual, nDstXCount,
1974 : nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1975 : eDstBufferDataType, 0, 0, nullptr);
1976 : }
1977 922 : CPLFree(pDstBuffer);
1978 : }
1979 : }
1980 :
1981 284 : nBlocksDone++;
1982 286 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1983 2 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1984 : "", psExtraArg->pProgressData))
1985 : {
1986 0 : eErr = CE_Failure;
1987 : }
1988 : }
1989 : }
1990 :
1991 294 : CPLFree(pChunk);
1992 284 : CPLFree(pabyChunkNoDataMask);
1993 : }
1994 :
1995 284 : CPLFree(papoDstBands);
1996 284 : GDALClose(poMEMDS);
1997 :
1998 284 : return eErr;
1999 : }
2000 :
2001 : //! @endcond
2002 :
2003 : /************************************************************************/
2004 : /* GDALSwapWords() */
2005 : /************************************************************************/
2006 :
2007 : /**
2008 : * Byte swap words in-place.
2009 : *
2010 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2011 : * a memory array. No assumption is made that the words being swapped are
2012 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2013 : * to determine if the current platform is big endian or little endian. Use
2014 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2015 : * of a function call.
2016 : *
2017 : * @param pData pointer to start of data buffer.
2018 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2019 : * @param nWordCount the number of words to be swapped in this call.
2020 : * @param nWordSkip the byte offset from the start of one word to the start of
2021 : * the next. For packed buffers this is the same as nWordSize.
2022 : */
2023 :
2024 438669 : void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
2025 : int nWordSkip)
2026 :
2027 : {
2028 438669 : if (nWordCount > 0)
2029 438669 : VALIDATE_POINTER0(pData, "GDALSwapWords");
2030 :
2031 438669 : GByte *pabyData = static_cast<GByte *>(pData);
2032 :
2033 438669 : switch (nWordSize)
2034 : {
2035 7234 : case 1:
2036 7234 : break;
2037 :
2038 418175 : case 2:
2039 418175 : CPLAssert(nWordSkip >= 2 || nWordCount == 1);
2040 289160000 : for (int i = 0; i < nWordCount; i++)
2041 : {
2042 288742000 : CPL_SWAP16PTR(pabyData);
2043 288742000 : pabyData += nWordSkip;
2044 : }
2045 418175 : break;
2046 :
2047 10689 : case 4:
2048 10689 : CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2049 10689 : if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2050 : {
2051 29148800 : for (int i = 0; i < nWordCount; i++)
2052 : {
2053 29138100 : *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2054 : *reinterpret_cast<const GUInt32 *>(pabyData));
2055 29138100 : pabyData += nWordSkip;
2056 10686 : }
2057 : }
2058 : else
2059 : {
2060 9 : for (int i = 0; i < nWordCount; i++)
2061 : {
2062 6 : CPL_SWAP32PTR(pabyData);
2063 6 : pabyData += nWordSkip;
2064 : }
2065 : }
2066 10689 : break;
2067 :
2068 2571 : case 8:
2069 2571 : CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2070 2571 : if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2071 : {
2072 3359870 : for (int i = 0; i < nWordCount; i++)
2073 : {
2074 3357300 : *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2075 : *reinterpret_cast<const GUInt64 *>(pabyData));
2076 3357300 : pabyData += nWordSkip;
2077 2570 : }
2078 : }
2079 : else
2080 : {
2081 3 : for (int i = 0; i < nWordCount; i++)
2082 : {
2083 2 : CPL_SWAP64PTR(pabyData);
2084 2 : pabyData += nWordSkip;
2085 : }
2086 : }
2087 2571 : break;
2088 :
2089 0 : default:
2090 0 : CPLAssert(false);
2091 : }
2092 : }
2093 :
2094 : /************************************************************************/
2095 : /* GDALSwapWordsEx() */
2096 : /************************************************************************/
2097 :
2098 : /**
2099 : * Byte swap words in-place.
2100 : *
2101 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2102 : * a memory array. No assumption is made that the words being swapped are
2103 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2104 : * to determine if the current platform is big endian or little endian. Use
2105 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2106 : * of a function call.
2107 : *
2108 : * @param pData pointer to start of data buffer.
2109 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2110 : * @param nWordCount the number of words to be swapped in this call.
2111 : * @param nWordSkip the byte offset from the start of one word to the start of
2112 : * the next. For packed buffers this is the same as nWordSize.
2113 : * @since GDAL 2.1
2114 : */
2115 6378 : void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2116 : int nWordSkip)
2117 : {
2118 6378 : GByte *pabyData = static_cast<GByte *>(pData);
2119 12756 : while (nWordCount)
2120 : {
2121 : // Pick-up a multiple of 8 as max chunk size.
2122 6378 : const int nWordCountSmall =
2123 6378 : (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2124 6378 : GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2125 6378 : pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2126 6378 : nWordCount -= nWordCountSmall;
2127 : }
2128 6378 : }
2129 :
2130 : // Place the new GDALCopyWords helpers in an anonymous namespace
2131 : namespace
2132 : {
2133 :
2134 : /************************************************************************/
2135 : /* GDALCopyWordsT() */
2136 : /************************************************************************/
2137 : /**
2138 : * Template function, used to copy data from pSrcData into buffer
2139 : * pDstData, with stride nSrcPixelStride in the source data and
2140 : * stride nDstPixelStride in the destination data. This template can
2141 : * deal with the case where the input data type is real or complex and
2142 : * the output is real.
2143 : *
2144 : * @param pSrcData the source data buffer
2145 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2146 : * of interest.
2147 : * @param pDstData the destination buffer.
2148 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2149 : * interest.
2150 : * @param nWordCount the total number of pixel words to copy
2151 : *
2152 : * @code
2153 : * // Assume an input buffer of type GUInt16 named pBufferIn
2154 : * GByte *pBufferOut = new GByte[numBytesOut];
2155 : * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2156 : * @endcode
2157 : * @note
2158 : * This is a private function, and should not be exposed outside of
2159 : * rasterio.cpp. External users should call the GDALCopyWords driver function.
2160 : */
2161 :
2162 : template <class Tin, class Tout>
2163 46725913 : static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2164 : int nSrcPixelStride,
2165 : Tout *const CPL_RESTRICT pDstData,
2166 : int nDstPixelStride,
2167 : GPtrDiff_t nWordCount)
2168 : {
2169 46725913 : decltype(nWordCount) nDstOffset = 0;
2170 :
2171 46725913 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2172 46725913 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2173 601037742 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2174 : {
2175 554311580 : const Tin tValue =
2176 554311580 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2177 554311580 : Tout *const pOutPixel =
2178 554311580 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2179 :
2180 554311580 : GDALCopyWord(tValue, *pOutPixel);
2181 :
2182 554311680 : nDstOffset += nDstPixelStride;
2183 : }
2184 46726031 : }
2185 :
2186 : template <class Tin, class Tout>
2187 38279445 : static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2188 : int nSrcPixelStride,
2189 : Tout *const CPL_RESTRICT pDstData,
2190 : int nDstPixelStride, GPtrDiff_t nWordCount)
2191 : {
2192 38279445 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2193 : nWordCount);
2194 38279493 : }
2195 :
2196 : template <class Tin, class Tout>
2197 194358 : static void inline GDALCopyWordsT_8atatime(
2198 : const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2199 : Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2200 : GPtrDiff_t nWordCount)
2201 : {
2202 194358 : decltype(nWordCount) nDstOffset = 0;
2203 :
2204 194358 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2205 194358 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2206 194358 : decltype(nWordCount) n = 0;
2207 194358 : if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2208 : nDstPixelStride == static_cast<int>(sizeof(Tout)))
2209 : {
2210 22734780 : for (; n < nWordCount - 7; n += 8)
2211 : {
2212 22545358 : const Tin *pInValues = reinterpret_cast<const Tin *>(
2213 22545358 : pSrcDataPtr + (n * nSrcPixelStride));
2214 22545358 : Tout *const pOutPixels =
2215 22545358 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2216 :
2217 22545358 : GDALCopy8Words(pInValues, pOutPixels);
2218 :
2219 22541328 : nDstOffset += 8 * nDstPixelStride;
2220 : }
2221 : }
2222 687692 : for (; n < nWordCount; n++)
2223 : {
2224 493331 : const Tin tValue =
2225 493331 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2226 493331 : Tout *const pOutPixel =
2227 493331 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2228 :
2229 493331 : GDALCopyWord(tValue, *pOutPixel);
2230 :
2231 497417 : nDstOffset += nDstPixelStride;
2232 : }
2233 194361 : }
2234 :
2235 : #ifdef HAVE_SSE2
2236 :
2237 : template <class Tout>
2238 39381 : void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2239 : int nSrcPixelStride,
2240 : Tout *const CPL_RESTRICT pDstData,
2241 : int nDstPixelStride, GPtrDiff_t nWordCount)
2242 : {
2243 : static_assert(std::is_integral<Tout>::value &&
2244 : sizeof(Tout) == sizeof(uint16_t),
2245 : "Bad Tout");
2246 39381 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2247 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2248 : {
2249 33330 : decltype(nWordCount) n = 0;
2250 33330 : const __m128i xmm_zero = _mm_setzero_si128();
2251 33330 : GByte *CPL_RESTRICT pabyDstDataPtr =
2252 : reinterpret_cast<GByte *>(pDstData);
2253 1501757 : for (; n < nWordCount - 15; n += 16)
2254 : {
2255 1468427 : __m128i xmm = _mm_loadu_si128(
2256 1468427 : reinterpret_cast<const __m128i *>(pSrcData + n));
2257 1468427 : __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2258 1468427 : __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2259 : _mm_storeu_si128(
2260 1468427 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2261 : _mm_storeu_si128(
2262 1468427 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2263 : }
2264 108789 : for (; n < nWordCount; n++)
2265 : {
2266 75459 : pDstData[n] = pSrcData[n];
2267 33330 : }
2268 : }
2269 : else
2270 : {
2271 6051 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2272 : nDstPixelStride, nWordCount);
2273 : }
2274 39381 : }
2275 :
2276 : template <>
2277 25764 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2278 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2279 : int nDstPixelStride, GPtrDiff_t nWordCount)
2280 : {
2281 25764 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2282 : nDstPixelStride, nWordCount);
2283 25764 : }
2284 :
2285 : template <>
2286 13617 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2287 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2288 : int nDstPixelStride, GPtrDiff_t nWordCount)
2289 : {
2290 13617 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2291 : nDstPixelStride, nWordCount);
2292 13617 : }
2293 :
2294 : template <class Tout>
2295 12270938 : void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2296 : int nSrcPixelStride,
2297 : Tout *const CPL_RESTRICT pDstData,
2298 : int nDstPixelStride, GPtrDiff_t nWordCount)
2299 : {
2300 : static_assert(std::is_integral<Tout>::value &&
2301 : sizeof(Tout) == sizeof(uint32_t),
2302 : "Bad Tout");
2303 12270938 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2304 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2305 : {
2306 6210628 : decltype(nWordCount) n = 0;
2307 6210628 : const __m128i xmm_zero = _mm_setzero_si128();
2308 6210628 : GByte *CPL_RESTRICT pabyDstDataPtr =
2309 : reinterpret_cast<GByte *>(pDstData);
2310 68847252 : for (; n < nWordCount - 15; n += 16)
2311 : {
2312 62822524 : __m128i xmm = _mm_loadu_si128(
2313 62822524 : reinterpret_cast<const __m128i *>(pSrcData + n));
2314 62784524 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2315 62795624 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2316 62783724 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2317 62591024 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2318 62505924 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2319 62636624 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2320 : _mm_storeu_si128(
2321 62636624 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2322 : _mm_storeu_si128(
2323 62636624 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2324 : _mm_storeu_si128(
2325 62636624 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2326 : _mm_storeu_si128(
2327 62636624 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2328 : }
2329 14087639 : for (; n < nWordCount; n++)
2330 : {
2331 8062831 : pDstData[n] = pSrcData[n];
2332 6024788 : }
2333 : }
2334 : else
2335 : {
2336 6060320 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2337 : nDstPixelStride, nWordCount);
2338 : }
2339 12081138 : }
2340 :
2341 : template <>
2342 438 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2343 : int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData,
2344 : int nDstPixelStride, GPtrDiff_t nWordCount)
2345 : {
2346 438 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2347 : nDstPixelStride, nWordCount);
2348 438 : }
2349 :
2350 : template <>
2351 12272200 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2352 : int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData,
2353 : int nDstPixelStride, GPtrDiff_t nWordCount)
2354 : {
2355 12272200 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2356 : nDstPixelStride, nWordCount);
2357 12282000 : }
2358 :
2359 : template <>
2360 2470670 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2361 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2362 : int nDstPixelStride, GPtrDiff_t nWordCount)
2363 : {
2364 2470670 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2365 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2366 : {
2367 111225 : decltype(nWordCount) n = 0;
2368 111225 : const __m128i xmm_zero = _mm_setzero_si128();
2369 111225 : GByte *CPL_RESTRICT pabyDstDataPtr =
2370 : reinterpret_cast<GByte *>(pDstData);
2371 3273060 : for (; n < nWordCount - 15; n += 16)
2372 : {
2373 3161840 : __m128i xmm = _mm_loadu_si128(
2374 3161840 : reinterpret_cast<const __m128i *>(pSrcData + n));
2375 3161840 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2376 3161840 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2377 3161840 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2378 3161840 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2379 3161840 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2380 3161840 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2381 3161840 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2382 3161840 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2383 3161840 : __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2384 3161840 : __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2385 3161840 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2386 : xmm0_f);
2387 : _mm_storeu_ps(
2388 3161840 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2389 : _mm_storeu_ps(
2390 3161840 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2391 : _mm_storeu_ps(
2392 3161840 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2393 : }
2394 472813 : for (; n < nWordCount; n++)
2395 : {
2396 361588 : pDstData[n] = pSrcData[n];
2397 111225 : }
2398 : }
2399 : else
2400 : {
2401 2359440 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2402 : nDstPixelStride, nWordCount);
2403 : }
2404 2470670 : }
2405 :
2406 : template <>
2407 146702 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2408 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2409 : int nDstPixelStride, GPtrDiff_t nWordCount)
2410 : {
2411 146702 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2412 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2413 : {
2414 123720 : decltype(nWordCount) n = 0;
2415 123720 : const __m128i xmm_zero = _mm_setzero_si128();
2416 123720 : GByte *CPL_RESTRICT pabyDstDataPtr =
2417 : reinterpret_cast<GByte *>(pDstData);
2418 1421860 : for (; n < nWordCount - 15; n += 16)
2419 : {
2420 1298140 : __m128i xmm = _mm_loadu_si128(
2421 1298140 : reinterpret_cast<const __m128i *>(pSrcData + n));
2422 1298140 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2423 1298140 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2424 1298140 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2425 1298140 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2426 1298140 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2427 1298140 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2428 :
2429 1298140 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2430 1298140 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2431 1298140 : __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2432 1298140 : __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2433 1298140 : xmm0 = _mm_srli_si128(xmm0, 8);
2434 1298140 : xmm1 = _mm_srli_si128(xmm1, 8);
2435 1298140 : xmm2 = _mm_srli_si128(xmm2, 8);
2436 1298140 : xmm3 = _mm_srli_si128(xmm3, 8);
2437 1298140 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2438 1298140 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2439 1298140 : __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2440 1298140 : __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2441 :
2442 1298140 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2443 : xmm0_low_d);
2444 : _mm_storeu_pd(
2445 1298140 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2446 : xmm0_high_d);
2447 : _mm_storeu_pd(
2448 1298140 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2449 : xmm1_low_d);
2450 : _mm_storeu_pd(
2451 1298140 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2452 : xmm1_high_d);
2453 : _mm_storeu_pd(
2454 1298140 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2455 : xmm2_low_d);
2456 : _mm_storeu_pd(
2457 1298140 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2458 : xmm2_high_d);
2459 : _mm_storeu_pd(
2460 1298140 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2461 : xmm3_low_d);
2462 : _mm_storeu_pd(
2463 1298140 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2464 : xmm3_high_d);
2465 : }
2466 234770 : for (; n < nWordCount; n++)
2467 : {
2468 111050 : pDstData[n] = pSrcData[n];
2469 123720 : }
2470 : }
2471 : else
2472 : {
2473 22982 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2474 : nDstPixelStride, nWordCount);
2475 : }
2476 146702 : }
2477 :
2478 : template <>
2479 6006 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2480 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2481 : int nDstPixelStride, GPtrDiff_t nWordCount)
2482 : {
2483 6006 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2484 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2485 : {
2486 5031 : decltype(nWordCount) n = 0;
2487 : // In SSE2, min_epu16 does not exist, so shift from
2488 : // UInt16 to SInt16 to be able to use min_epi16
2489 5031 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2490 5031 : const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2491 138471 : for (; n < nWordCount - 7; n += 8)
2492 : {
2493 133440 : __m128i xmm = _mm_loadu_si128(
2494 133440 : reinterpret_cast<const __m128i *>(pSrcData + n));
2495 133440 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2496 133440 : xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2497 133440 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2498 133440 : xmm = _mm_packus_epi16(xmm, xmm);
2499 133440 : GDALCopyXMMToInt64(xmm,
2500 133440 : reinterpret_cast<GPtrDiff_t *>(pDstData + n));
2501 : }
2502 16005 : for (; n < nWordCount; n++)
2503 : {
2504 10974 : pDstData[n] =
2505 10974 : pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2506 5031 : }
2507 : }
2508 : else
2509 : {
2510 975 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2511 : nDstPixelStride, nWordCount);
2512 : }
2513 6006 : }
2514 :
2515 : template <>
2516 21 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2517 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2518 : int nDstPixelStride, GPtrDiff_t nWordCount)
2519 : {
2520 21 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2521 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2522 : {
2523 15 : decltype(nWordCount) n = 0;
2524 : // In SSE2, min_epu16 does not exist, so shift from
2525 : // UInt16 to SInt16 to be able to use min_epi16
2526 15 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2527 15 : const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2528 31 : for (; n < nWordCount - 7; n += 8)
2529 : {
2530 16 : __m128i xmm = _mm_loadu_si128(
2531 16 : reinterpret_cast<const __m128i *>(pSrcData + n));
2532 16 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2533 16 : xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2534 16 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2535 16 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm);
2536 : }
2537 55 : for (; n < nWordCount; n++)
2538 : {
2539 40 : pDstData[n] =
2540 40 : pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2541 15 : }
2542 : }
2543 : else
2544 : {
2545 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2546 : nDstPixelStride, nWordCount);
2547 : }
2548 21 : }
2549 :
2550 : template <>
2551 412 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2552 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2553 : int nDstPixelStride, GPtrDiff_t nWordCount)
2554 : {
2555 412 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2556 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2557 : {
2558 406 : decltype(nWordCount) n = 0;
2559 406 : const __m128i xmm_zero = _mm_setzero_si128();
2560 406 : GByte *CPL_RESTRICT pabyDstDataPtr =
2561 : reinterpret_cast<GByte *>(pDstData);
2562 1500 : for (; n < nWordCount - 7; n += 8)
2563 : {
2564 1094 : __m128i xmm = _mm_loadu_si128(
2565 1094 : reinterpret_cast<const __m128i *>(pSrcData + n));
2566 1094 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2567 1094 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2568 1094 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2569 1094 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2570 1094 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2571 : xmm0_f);
2572 : _mm_storeu_ps(
2573 1094 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2574 : }
2575 1483 : for (; n < nWordCount; n++)
2576 : {
2577 1077 : pDstData[n] = pSrcData[n];
2578 406 : }
2579 : }
2580 : else
2581 : {
2582 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2583 : nDstPixelStride, nWordCount);
2584 : }
2585 412 : }
2586 :
2587 : template <>
2588 279 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2589 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2590 : int nDstPixelStride, GPtrDiff_t nWordCount)
2591 : {
2592 279 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2593 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2594 : {
2595 171 : decltype(nWordCount) n = 0;
2596 171 : const __m128i xmm_zero = _mm_setzero_si128();
2597 171 : GByte *CPL_RESTRICT pabyDstDataPtr =
2598 : reinterpret_cast<GByte *>(pDstData);
2599 219 : for (; n < nWordCount - 7; n += 8)
2600 : {
2601 48 : __m128i xmm = _mm_loadu_si128(
2602 48 : reinterpret_cast<const __m128i *>(pSrcData + n));
2603 48 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2604 48 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2605 :
2606 48 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2607 48 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2608 48 : xmm0 = _mm_srli_si128(xmm0, 8);
2609 48 : xmm1 = _mm_srli_si128(xmm1, 8);
2610 48 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2611 48 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2612 :
2613 48 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2614 : xmm0_low_d);
2615 : _mm_storeu_pd(
2616 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2617 : xmm0_high_d);
2618 : _mm_storeu_pd(
2619 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2620 : xmm1_low_d);
2621 : _mm_storeu_pd(
2622 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2623 : xmm1_high_d);
2624 : }
2625 429 : for (; n < nWordCount; n++)
2626 : {
2627 258 : pDstData[n] = pSrcData[n];
2628 171 : }
2629 : }
2630 : else
2631 : {
2632 108 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2633 : nDstPixelStride, nWordCount);
2634 : }
2635 279 : }
2636 :
2637 : template <>
2638 811 : void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
2639 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2640 : int nDstPixelStride, GPtrDiff_t nWordCount)
2641 : {
2642 811 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2643 : nDstPixelStride, nWordCount);
2644 811 : }
2645 :
2646 : #endif // HAVE_SSE2
2647 :
2648 : template <>
2649 116766 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2650 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2651 : int nDstPixelStride, GPtrDiff_t nWordCount)
2652 : {
2653 116766 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2654 : nDstPixelStride, nWordCount);
2655 116766 : }
2656 :
2657 : template <>
2658 15146 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2659 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2660 : int nDstPixelStride, GPtrDiff_t nWordCount)
2661 : {
2662 15146 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2663 : nDstPixelStride, nWordCount);
2664 15146 : }
2665 :
2666 : template <>
2667 61645 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2668 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2669 : int nDstPixelStride, GPtrDiff_t nWordCount)
2670 : {
2671 61645 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2672 : nDstPixelStride, nWordCount);
2673 61639 : }
2674 :
2675 : /************************************************************************/
2676 : /* GDALCopyWordsComplexT() */
2677 : /************************************************************************/
2678 : /**
2679 : * Template function, used to copy data from pSrcData into buffer
2680 : * pDstData, with stride nSrcPixelStride in the source data and
2681 : * stride nDstPixelStride in the destination data. Deals with the
2682 : * complex case, where input is complex and output is complex.
2683 : *
2684 : * @param pSrcData the source data buffer
2685 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2686 : * of interest.
2687 : * @param pDstData the destination buffer.
2688 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2689 : * interest.
2690 : * @param nWordCount the total number of pixel words to copy
2691 : *
2692 : */
2693 : template <class Tin, class Tout>
2694 125172 : inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
2695 : int nSrcPixelStride,
2696 : Tout *const CPL_RESTRICT pDstData,
2697 : int nDstPixelStride, GPtrDiff_t nWordCount)
2698 : {
2699 125172 : decltype(nWordCount) nDstOffset = 0;
2700 125172 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2701 125172 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2702 :
2703 7337873 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2704 : {
2705 7212696 : const Tin *const pPixelIn =
2706 7212696 : reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2707 7212696 : Tout *const pPixelOut =
2708 7212696 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2709 :
2710 7212696 : GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2711 7212696 : GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2712 :
2713 7212696 : nDstOffset += nDstPixelStride;
2714 : }
2715 125172 : }
2716 :
2717 : /************************************************************************/
2718 : /* GDALCopyWordsComplexOutT() */
2719 : /************************************************************************/
2720 : /**
2721 : * Template function, used to copy data from pSrcData into buffer
2722 : * pDstData, with stride nSrcPixelStride in the source data and
2723 : * stride nDstPixelStride in the destination data. Deals with the
2724 : * case where the value is real coming in, but complex going out.
2725 : *
2726 : * @param pSrcData the source data buffer
2727 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2728 : * of interest, in bytes.
2729 : * @param pDstData the destination buffer.
2730 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2731 : * interest, in bytes.
2732 : * @param nWordCount the total number of pixel words to copy
2733 : *
2734 : */
2735 : template <class Tin, class Tout>
2736 3168 : inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
2737 : int nSrcPixelStride,
2738 : Tout *const CPL_RESTRICT pDstData,
2739 : int nDstPixelStride, GPtrDiff_t nWordCount)
2740 : {
2741 3168 : decltype(nWordCount) nDstOffset = 0;
2742 :
2743 3168 : const Tout tOutZero = static_cast<Tout>(0);
2744 :
2745 3168 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2746 3168 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2747 :
2748 1112479 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2749 : {
2750 1109311 : const Tin tValue =
2751 1109311 : *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2752 1109311 : Tout *const pPixelOut =
2753 1109311 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2754 1109311 : GDALCopyWord(tValue, *pPixelOut);
2755 :
2756 1109311 : pPixelOut[1] = tOutZero;
2757 :
2758 1109311 : nDstOffset += nDstPixelStride;
2759 : }
2760 3168 : }
2761 :
2762 : /************************************************************************/
2763 : /* GDALCopyWordsFromT() */
2764 : /************************************************************************/
2765 : /**
2766 : * Template driver function. Given the input type T, call the appropriate
2767 : * GDALCopyWordsT function template for the desired output type. You should
2768 : * never call this function directly (call GDALCopyWords instead).
2769 : *
2770 : * @param pSrcData source data buffer
2771 : * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2772 : * @param bInComplex input is complex
2773 : * @param pDstData destination data buffer
2774 : * @param eDstType destination data type
2775 : * @param nDstPixelStride pixel stride in output buffer, in pixel words
2776 : * @param nWordCount number of pixel words to be copied
2777 : */
2778 : template <class T>
2779 53542097 : inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
2780 : int nSrcPixelStride, bool bInComplex,
2781 : void *CPL_RESTRICT pDstData,
2782 : GDALDataType eDstType, int nDstPixelStride,
2783 : GPtrDiff_t nWordCount)
2784 : {
2785 53542097 : switch (eDstType)
2786 : {
2787 4557959 : case GDT_Byte:
2788 4557959 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2789 : static_cast<unsigned char *>(pDstData),
2790 : nDstPixelStride, nWordCount);
2791 4558149 : break;
2792 458 : case GDT_Int8:
2793 458 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2794 : static_cast<signed char *>(pDstData),
2795 : nDstPixelStride, nWordCount);
2796 458 : break;
2797 101145 : case GDT_UInt16:
2798 101145 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2799 : static_cast<unsigned short *>(pDstData),
2800 : nDstPixelStride, nWordCount);
2801 101140 : break;
2802 4126377 : case GDT_Int16:
2803 4126377 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2804 : static_cast<short *>(pDstData), nDstPixelStride,
2805 : nWordCount);
2806 4126377 : break;
2807 4180 : case GDT_UInt32:
2808 4180 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2809 : static_cast<unsigned int *>(pDstData),
2810 : nDstPixelStride, nWordCount);
2811 4180 : break;
2812 25496917 : case GDT_Int32:
2813 25496917 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2814 : static_cast<int *>(pDstData), nDstPixelStride,
2815 : nWordCount);
2816 25509715 : break;
2817 593 : case GDT_UInt64:
2818 593 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2819 : static_cast<std::uint64_t *>(pDstData),
2820 : nDstPixelStride, nWordCount);
2821 593 : break;
2822 4158 : case GDT_Int64:
2823 4158 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2824 : static_cast<std::int64_t *>(pDstData),
2825 : nDstPixelStride, nWordCount);
2826 4158 : break;
2827 3869493 : case GDT_Float32:
2828 3869493 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2829 : static_cast<float *>(pDstData), nDstPixelStride,
2830 : nWordCount);
2831 3869493 : break;
2832 15244911 : case GDT_Float64:
2833 15244911 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2834 : static_cast<double *>(pDstData), nDstPixelStride,
2835 : nWordCount);
2836 15244901 : break;
2837 122401 : case GDT_CInt16:
2838 122401 : if (bInComplex)
2839 : {
2840 121390 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2841 : static_cast<short *>(pDstData),
2842 : nDstPixelStride, nWordCount);
2843 : }
2844 : else // input is not complex, so we need to promote to a complex
2845 : // buffer
2846 : {
2847 1011 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2848 : static_cast<short *>(pDstData),
2849 : nDstPixelStride, nWordCount);
2850 : }
2851 122401 : break;
2852 800 : case GDT_CInt32:
2853 800 : if (bInComplex)
2854 : {
2855 411 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2856 : static_cast<int *>(pDstData),
2857 : nDstPixelStride, nWordCount);
2858 : }
2859 : else // input is not complex, so we need to promote to a complex
2860 : // buffer
2861 : {
2862 389 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2863 : static_cast<int *>(pDstData),
2864 : nDstPixelStride, nWordCount);
2865 : }
2866 800 : break;
2867 3171 : case GDT_CFloat32:
2868 3171 : if (bInComplex)
2869 : {
2870 2589 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2871 : static_cast<float *>(pDstData),
2872 : nDstPixelStride, nWordCount);
2873 : }
2874 : else // input is not complex, so we need to promote to a complex
2875 : // buffer
2876 : {
2877 582 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2878 : static_cast<float *>(pDstData),
2879 : nDstPixelStride, nWordCount);
2880 : }
2881 3171 : break;
2882 1968 : case GDT_CFloat64:
2883 1968 : if (bInComplex)
2884 : {
2885 782 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2886 : static_cast<double *>(pDstData),
2887 : nDstPixelStride, nWordCount);
2888 : }
2889 : else // input is not complex, so we need to promote to a complex
2890 : // buffer
2891 : {
2892 1186 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2893 : static_cast<double *>(pDstData),
2894 : nDstPixelStride, nWordCount);
2895 : }
2896 1968 : break;
2897 0 : case GDT_Unknown:
2898 : case GDT_TypeCount:
2899 0 : CPLAssert(false);
2900 : }
2901 53555081 : }
2902 :
2903 : } // end anonymous namespace
2904 :
2905 : /************************************************************************/
2906 : /* GDALReplicateWord() */
2907 : /************************************************************************/
2908 :
2909 : template <class T>
2910 528127 : inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
2911 : GPtrDiff_t nWordCount)
2912 : {
2913 528127 : const T valSet = *static_cast<const T *>(pDstData);
2914 528127 : if (nDstPixelStride == static_cast<int>(sizeof(T)))
2915 : {
2916 499722 : T *pDstPtr = static_cast<T *>(pDstData) + 1;
2917 20687937 : while (nWordCount >= 4)
2918 : {
2919 20188224 : nWordCount -= 4;
2920 20188224 : pDstPtr[0] = valSet;
2921 20188224 : pDstPtr[1] = valSet;
2922 20188224 : pDstPtr[2] = valSet;
2923 20188224 : pDstPtr[3] = valSet;
2924 20188224 : pDstPtr += 4;
2925 : }
2926 1267522 : while (nWordCount > 0)
2927 : {
2928 767800 : --nWordCount;
2929 767800 : *pDstPtr = valSet;
2930 767800 : pDstPtr++;
2931 : }
2932 : }
2933 : else
2934 : {
2935 28407 : GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2936 954322 : while (nWordCount > 0)
2937 : {
2938 925915 : --nWordCount;
2939 925915 : *reinterpret_cast<T *>(pabyDstPtr) = valSet;
2940 925915 : pabyDstPtr += nDstPixelStride;
2941 : }
2942 : }
2943 528127 : }
2944 :
2945 906316 : static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
2946 : GDALDataType eSrcType,
2947 : void *CPL_RESTRICT pDstData,
2948 : GDALDataType eDstType, int nDstPixelStride,
2949 : GPtrDiff_t nWordCount)
2950 : {
2951 : /* -----------------------------------------------------------------------
2952 : */
2953 : /* Special case when the source data is always the same value */
2954 : /* (for VRTSourcedRasterBand::IRasterIO and
2955 : * VRTDerivedRasterBand::IRasterIO*/
2956 : /* for example) */
2957 : /* -----------------------------------------------------------------------
2958 : */
2959 : // Let the general translation case do the necessary conversions
2960 : // on the first destination element.
2961 906316 : GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
2962 :
2963 : // Now copy the first element to the nWordCount - 1 following destination
2964 : // elements.
2965 906206 : nWordCount--;
2966 906206 : GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
2967 :
2968 906206 : switch (eDstType)
2969 : {
2970 377978 : case GDT_Byte:
2971 : case GDT_Int8:
2972 : {
2973 377978 : if (nDstPixelStride == 1)
2974 : {
2975 344226 : if (nWordCount > 0)
2976 344226 : memset(pabyDstWord,
2977 344226 : *reinterpret_cast<const GByte *>(pDstData),
2978 : nWordCount);
2979 : }
2980 : else
2981 : {
2982 33752 : GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
2983 5438530 : while (nWordCount > 0)
2984 : {
2985 5404780 : --nWordCount;
2986 5404780 : *pabyDstWord = valSet;
2987 5404780 : pabyDstWord += nDstPixelStride;
2988 : }
2989 : }
2990 377978 : break;
2991 : }
2992 :
2993 : #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \
2994 : case enum_type: \
2995 : { \
2996 : GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \
2997 : break; \
2998 : }
2999 :
3000 354 : CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3001 169653 : CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
3002 56 : CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3003 300130 : CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3004 21 : CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
3005 662 : CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
3006 52216 : CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3007 5049 : CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3008 :
3009 : #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \
3010 : case enum_type: \
3011 : { \
3012 : c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \
3013 : c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \
3014 : while (nWordCount > 0) \
3015 : { \
3016 : --nWordCount; \
3017 : reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \
3018 : reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \
3019 : pabyDstWord += nDstPixelStride; \
3020 : } \
3021 : break; \
3022 : }
3023 :
3024 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3025 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3026 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3027 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3028 :
3029 0 : case GDT_Unknown:
3030 : case GDT_TypeCount:
3031 0 : CPLAssert(false);
3032 : }
3033 906300 : }
3034 :
3035 : /************************************************************************/
3036 : /* GDALUnrolledCopy() */
3037 : /************************************************************************/
3038 :
3039 : template <class T, int srcStride, int dstStride>
3040 5329411 : static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3041 : const T *CPL_RESTRICT pSrc,
3042 : GPtrDiff_t nIters)
3043 : {
3044 5329411 : if (nIters >= 16)
3045 : {
3046 138285268 : for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3047 : {
3048 133085684 : pDest[0 * dstStride] = pSrc[0 * srcStride];
3049 133085684 : pDest[1 * dstStride] = pSrc[1 * srcStride];
3050 133085684 : pDest[2 * dstStride] = pSrc[2 * srcStride];
3051 133085684 : pDest[3 * dstStride] = pSrc[3 * srcStride];
3052 133085684 : pDest[4 * dstStride] = pSrc[4 * srcStride];
3053 133085684 : pDest[5 * dstStride] = pSrc[5 * srcStride];
3054 133085684 : pDest[6 * dstStride] = pSrc[6 * srcStride];
3055 133085684 : pDest[7 * dstStride] = pSrc[7 * srcStride];
3056 133085684 : pDest[8 * dstStride] = pSrc[8 * srcStride];
3057 133085684 : pDest[9 * dstStride] = pSrc[9 * srcStride];
3058 133085684 : pDest[10 * dstStride] = pSrc[10 * srcStride];
3059 133085684 : pDest[11 * dstStride] = pSrc[11 * srcStride];
3060 133085684 : pDest[12 * dstStride] = pSrc[12 * srcStride];
3061 133085684 : pDest[13 * dstStride] = pSrc[13 * srcStride];
3062 133085684 : pDest[14 * dstStride] = pSrc[14 * srcStride];
3063 133085684 : pDest[15 * dstStride] = pSrc[15 * srcStride];
3064 133085684 : pDest += 16 * dstStride;
3065 133085684 : pSrc += 16 * srcStride;
3066 : }
3067 5199632 : nIters = nIters % 16;
3068 : }
3069 7591706 : for (GPtrDiff_t i = 0; i < nIters; i++)
3070 : {
3071 2262300 : pDest[i * dstStride] = *pSrc;
3072 2262300 : pSrc += srcStride;
3073 : }
3074 5329411 : }
3075 :
3076 : template <class T, int srcStride, int dstStride>
3077 5324211 : static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3078 : const T *CPL_RESTRICT pSrc,
3079 : GPtrDiff_t nIters)
3080 : {
3081 5324211 : GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3082 5324231 : }
3083 :
3084 : #ifdef HAVE_SSE2
3085 :
3086 : template <>
3087 303985 : void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3088 : const GByte *CPL_RESTRICT pSrc,
3089 : GPtrDiff_t nIters)
3090 : {
3091 303985 : decltype(nIters) i = 0;
3092 303985 : if (nIters > 16)
3093 : {
3094 145815 : const __m128i xmm_mask = _mm_set1_epi16(0xff);
3095 : // If we were sure that there would always be 1 trailing byte, we could
3096 : // check against nIters - 15
3097 2544120 : for (; i < nIters - 16; i += 16)
3098 : {
3099 : __m128i xmm0 =
3100 2398300 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3101 : __m128i xmm1 =
3102 4796610 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3103 : // Set higher 8bit of each int16 packed word to 0
3104 2398300 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3105 2398300 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3106 : // Pack int16 to uint8 and merge back both vector
3107 2398300 : xmm0 = _mm_packus_epi16(xmm0, xmm1);
3108 :
3109 : // Store result
3110 2398300 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3111 :
3112 2398300 : pSrc += 2 * 16;
3113 : }
3114 : }
3115 3875160 : for (; i < nIters; i++)
3116 : {
3117 3571180 : pDest[i] = *pSrc;
3118 3571180 : pSrc += 2;
3119 : }
3120 303985 : }
3121 :
3122 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3123 :
3124 : template <>
3125 184630 : void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3126 : const GByte *CPL_RESTRICT pSrc,
3127 : GPtrDiff_t nIters)
3128 : {
3129 184630 : if (nIters > 16 && CPLHaveRuntimeSSSE3())
3130 : {
3131 179430 : GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3132 : }
3133 : else
3134 : {
3135 5200 : GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
3136 : }
3137 184630 : }
3138 :
3139 : #endif
3140 :
3141 : template <>
3142 105200 : void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3143 : const GByte *CPL_RESTRICT pSrc,
3144 : GPtrDiff_t nIters)
3145 : {
3146 105200 : decltype(nIters) i = 0;
3147 105200 : if (nIters > 16)
3148 : {
3149 99907 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
3150 : // If we were sure that there would always be 3 trailing bytes, we could
3151 : // check against nIters - 15
3152 8826220 : for (; i < nIters - 16; i += 16)
3153 : {
3154 : __m128i xmm0 =
3155 8725980 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3156 : __m128i xmm1 =
3157 8725980 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3158 : __m128i xmm2 =
3159 8725980 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3160 : __m128i xmm3 =
3161 17452000 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3162 : // Set higher 24bit of each int32 packed word to 0
3163 8725980 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3164 8725980 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3165 8725980 : xmm2 = _mm_and_si128(xmm2, xmm_mask);
3166 8725980 : xmm3 = _mm_and_si128(xmm3, xmm_mask);
3167 : // Pack int32 to int16
3168 8726510 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
3169 8726450 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
3170 : // Pack int16 to uint8
3171 8726310 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
3172 :
3173 : // Store result
3174 8726310 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3175 :
3176 8726310 : pSrc += 4 * 16;
3177 : }
3178 : }
3179 1118790 : for (; i < nIters; i++)
3180 : {
3181 1013250 : pDest[i] = *pSrc;
3182 1013250 : pSrc += 4;
3183 : }
3184 105535 : }
3185 : #endif // HAVE_SSE2
3186 :
3187 : /************************************************************************/
3188 : /* GDALFastCopy() */
3189 : /************************************************************************/
3190 :
3191 : template <class T>
3192 39811900 : static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
3193 : const T *CPL_RESTRICT pSrc, int nSrcStride,
3194 : GPtrDiff_t nIters)
3195 : {
3196 39811900 : constexpr int sizeofT = static_cast<int>(sizeof(T));
3197 39811900 : if (nIters == 1)
3198 : {
3199 22302140 : *pDest = *pSrc;
3200 : }
3201 17509816 : else if (nDestStride == sizeofT)
3202 : {
3203 12248223 : if (nSrcStride == sizeofT)
3204 : {
3205 11512790 : memcpy(pDest, pSrc, nIters * sizeof(T));
3206 : }
3207 735430 : else if (nSrcStride == 2 * sizeofT)
3208 : {
3209 306938 : GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
3210 : }
3211 428492 : else if (nSrcStride == 3 * sizeofT)
3212 : {
3213 289938 : GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
3214 : }
3215 138554 : else if (nSrcStride == 4 * sizeofT)
3216 : {
3217 134068 : GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
3218 : }
3219 : else
3220 : {
3221 12978040 : while (nIters-- > 0)
3222 : {
3223 12973530 : *pDest = *pSrc;
3224 12973530 : pSrc += nSrcStride / sizeofT;
3225 12973530 : pDest++;
3226 : }
3227 : }
3228 : }
3229 5261553 : else if (nSrcStride == sizeofT)
3230 : {
3231 5246505 : if (nDestStride == 2 * sizeofT)
3232 : {
3233 129155 : GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
3234 : }
3235 5117350 : else if (nDestStride == 3 * sizeofT)
3236 : {
3237 4410211 : GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
3238 : }
3239 707143 : else if (nDestStride == 4 * sizeofT)
3240 : {
3241 647716 : GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
3242 : }
3243 : else
3244 : {
3245 12650500 : while (nIters-- > 0)
3246 : {
3247 12591080 : *pDest = *pSrc;
3248 12591080 : pSrc++;
3249 12591080 : pDest += nDestStride / sizeofT;
3250 : }
3251 : }
3252 : }
3253 : else
3254 : {
3255 1113938 : while (nIters-- > 0)
3256 : {
3257 1098888 : *pDest = *pSrc;
3258 1098888 : pSrc += nSrcStride / sizeofT;
3259 1098888 : pDest += nDestStride / sizeofT;
3260 : }
3261 : }
3262 39811900 : }
3263 :
3264 : /************************************************************************/
3265 : /* GDALFastCopyByte() */
3266 : /************************************************************************/
3267 :
3268 276287 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
3269 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3270 : int nDstPixelStride, GPtrDiff_t nWordCount)
3271 : {
3272 276287 : GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
3273 : nWordCount);
3274 276287 : }
3275 :
3276 : /************************************************************************/
3277 : /* GDALCopyWords() */
3278 : /************************************************************************/
3279 :
3280 : /**
3281 : * Copy pixel words from buffer to buffer.
3282 : *
3283 : * @see GDALCopyWords64()
3284 : */
3285 87098000 : void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
3286 : GDALDataType eSrcType, int nSrcPixelStride,
3287 : void *CPL_RESTRICT pDstData,
3288 : GDALDataType eDstType, int nDstPixelStride,
3289 : int nWordCount)
3290 : {
3291 87098000 : GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
3292 : nDstPixelStride, nWordCount);
3293 87095400 : }
3294 :
3295 : /************************************************************************/
3296 : /* GDALCopyWords64() */
3297 : /************************************************************************/
3298 :
3299 : /**
3300 : * Copy pixel words from buffer to buffer.
3301 : *
3302 : * This function is used to copy pixel word values from one memory buffer
3303 : * to another, with support for conversion between data types, and differing
3304 : * step factors. The data type conversion is done using the following
3305 : * rules:
3306 : * <ul>
3307 : * <li>Values assigned to a lower range integer type are clipped. For
3308 : * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3309 : * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3310 : * </li>
3311 : * <li>
3312 : * Assignment from floating point to integer rounds to closest integer.
3313 : * +Infinity is mapped to the largest integer. -Infinity is mapped to the
3314 : * smallest integer. NaN is mapped to 0.
3315 : * </li>
3316 : * <li>
3317 : * Assignment from non-complex to complex will result in the imaginary part
3318 : * being set to zero on output.
3319 : * </li>
3320 : * <li> Assignment from complex to
3321 : * non-complex will result in the complex portion being lost and the real
3322 : * component being preserved (<i>not magnitude!</i>).
3323 : * </li>
3324 : * </ul>
3325 : *
3326 : * No assumptions are made about the source or destination words occurring
3327 : * on word boundaries. It is assumed that all values are in native machine
3328 : * byte order.
3329 : *
3330 : * @param pSrcData Pointer to source data to be converted.
3331 : * @param eSrcType the source data type (see GDALDataType enum)
3332 : * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3333 : * in bytes
3334 : * @param pDstData Pointer to buffer where destination data should go
3335 : * @param eDstType the destination data type (see GDALDataType enum)
3336 : * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3337 : * words), in bytes
3338 : * @param nWordCount number of words to be copied
3339 : *
3340 : * @note
3341 : * When adding a new data type to GDAL, you must do the following to
3342 : * support it properly within the GDALCopyWords function:
3343 : * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3344 : * This should invoke the appropriate GDALCopyWordsFromT wrapper.
3345 : * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3346 : * This should call the appropriate GDALCopyWordsT template.
3347 : * 3. If appropriate, overload the appropriate CopyWord template in the
3348 : * above namespace. This will ensure that any conversion issues are
3349 : * handled (cases like the float -> int32 case, where the min/max)
3350 : * values are subject to roundoff error.
3351 : */
3352 :
3353 108301000 : void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
3354 : GDALDataType eSrcType, int nSrcPixelStride,
3355 : void *CPL_RESTRICT pDstData,
3356 : GDALDataType eDstType, int nDstPixelStride,
3357 : GPtrDiff_t nWordCount)
3358 :
3359 : {
3360 : // On platforms where alignment matters, be careful
3361 108301000 : const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3362 108285000 : const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3363 108286000 : if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
3364 : {
3365 2 : CPLError(CE_Failure, CPLE_NotSupported,
3366 : "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
3367 : "argument");
3368 2 : return;
3369 : }
3370 108286000 : if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3371 60236000 : ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3372 60234200 : (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
3373 60226200 : (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3374 60222900 : (nDstPixelStride % nDstDataTypeSize) != 0))
3375 : {
3376 905 : if (eSrcType == eDstType)
3377 : {
3378 34800 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3379 : {
3380 34000 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3381 : static_cast<const GByte *>(pSrcData) +
3382 34000 : nSrcPixelStride * i,
3383 : nDstDataTypeSize);
3384 : }
3385 : }
3386 : else
3387 : {
3388 210 : const auto getAlignedPtr = [](GByte *ptr, int align)
3389 : {
3390 : return ptr +
3391 210 : ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
3392 210 : align);
3393 : };
3394 :
3395 : // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3396 : // be sure to get correctly aligned pointer.
3397 105 : constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
3398 : GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
3399 : GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
3400 : GByte *pabySrcBuffer =
3401 105 : getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
3402 : GByte *pabyDstBuffer =
3403 105 : getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
3404 3360 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3405 : {
3406 3255 : memcpy(pabySrcBuffer,
3407 : static_cast<const GByte *>(pSrcData) +
3408 3255 : nSrcPixelStride * i,
3409 : nSrcDataTypeSize);
3410 3255 : GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3411 : eDstType, 0, 1);
3412 3255 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3413 : pabyDstBuffer, nDstDataTypeSize);
3414 : }
3415 : }
3416 905 : return;
3417 : }
3418 :
3419 : // Deal with the case where we're replicating a single word into the
3420 : // provided buffer
3421 108285000 : if (nSrcPixelStride == 0 && nWordCount > 1)
3422 : {
3423 906311 : GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
3424 : nDstPixelStride, nWordCount);
3425 906328 : return;
3426 : }
3427 :
3428 107379000 : if (eSrcType == eDstType)
3429 : {
3430 54005300 : if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
3431 : {
3432 18570200 : GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
3433 : static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3434 : nWordCount);
3435 18569600 : return;
3436 : }
3437 :
3438 35435100 : if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
3439 20965500 : (nDstPixelStride % 2) == 0)
3440 : {
3441 20965500 : GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
3442 : static_cast<const short *>(pSrcData), nSrcPixelStride,
3443 : nWordCount);
3444 20965100 : return;
3445 : }
3446 :
3447 14469600 : if (nWordCount == 1)
3448 : {
3449 : #if defined(CSA_BUILD) || defined(__COVERITY__)
3450 : // Avoid false positives...
3451 : memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3452 : #else
3453 14056600 : if (nSrcDataTypeSize == 2)
3454 0 : memcpy(pDstData, pSrcData, 2);
3455 14056600 : else if (nSrcDataTypeSize == 4)
3456 14014000 : memcpy(pDstData, pSrcData, 4);
3457 42617 : else if (nSrcDataTypeSize == 8)
3458 26100 : memcpy(pDstData, pSrcData, 8);
3459 : else /* if( eSrcType == GDT_CFloat64 ) */
3460 16517 : memcpy(pDstData, pSrcData, 16);
3461 : #endif
3462 14056600 : return;
3463 : }
3464 :
3465 : // Let memcpy() handle the case where we're copying a packed buffer
3466 : // of pixels.
3467 412965 : if (nSrcPixelStride == nDstPixelStride)
3468 : {
3469 259264 : if (nSrcPixelStride == nSrcDataTypeSize)
3470 : {
3471 257094 : memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3472 257094 : return;
3473 : }
3474 : }
3475 : }
3476 :
3477 : // Handle the more general case -- deals with conversion of data types
3478 : // directly.
3479 53529500 : switch (eSrcType)
3480 : {
3481 14923700 : case GDT_Byte:
3482 14923700 : GDALCopyWordsFromT<unsigned char>(
3483 : static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3484 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3485 14941200 : break;
3486 976 : case GDT_Int8:
3487 976 : GDALCopyWordsFromT<signed char>(
3488 : static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3489 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3490 976 : break;
3491 53125 : case GDT_UInt16:
3492 53125 : GDALCopyWordsFromT<unsigned short>(
3493 : static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3494 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3495 53125 : break;
3496 4543330 : case GDT_Int16:
3497 4543330 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3498 : nSrcPixelStride, false, pDstData,
3499 : eDstType, nDstPixelStride, nWordCount);
3500 4543320 : break;
3501 6747 : case GDT_UInt32:
3502 6747 : GDALCopyWordsFromT<unsigned int>(
3503 : static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3504 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3505 6747 : break;
3506 12254600 : case GDT_Int32:
3507 12254600 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3508 : nSrcPixelStride, false, pDstData, eDstType,
3509 : nDstPixelStride, nWordCount);
3510 12254600 : break;
3511 1430 : case GDT_UInt64:
3512 1430 : GDALCopyWordsFromT<std::uint64_t>(
3513 : static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3514 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3515 1430 : break;
3516 7280 : case GDT_Int64:
3517 7280 : GDALCopyWordsFromT<std::int64_t>(
3518 : static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3519 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3520 7280 : break;
3521 318785 : case GDT_Float32:
3522 318785 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3523 : nSrcPixelStride, false, pDstData,
3524 : eDstType, nDstPixelStride, nWordCount);
3525 318779 : break;
3526 20678100 : case GDT_Float64:
3527 20678100 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3528 : nSrcPixelStride, false, pDstData,
3529 : eDstType, nDstPixelStride, nWordCount);
3530 20678200 : break;
3531 566961 : case GDT_CInt16:
3532 566961 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3533 : nSrcPixelStride, true, pDstData, eDstType,
3534 : nDstPixelStride, nWordCount);
3535 566961 : break;
3536 397 : case GDT_CInt32:
3537 397 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3538 : nSrcPixelStride, true, pDstData, eDstType,
3539 : nDstPixelStride, nWordCount);
3540 397 : break;
3541 1357 : case GDT_CFloat32:
3542 1357 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3543 : nSrcPixelStride, true, pDstData, eDstType,
3544 : nDstPixelStride, nWordCount);
3545 1357 : break;
3546 172534 : case GDT_CFloat64:
3547 172534 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3548 : nSrcPixelStride, true, pDstData,
3549 : eDstType, nDstPixelStride, nWordCount);
3550 172534 : break;
3551 0 : case GDT_Unknown:
3552 : case GDT_TypeCount:
3553 0 : CPLAssert(false);
3554 : }
3555 : }
3556 :
3557 : /************************************************************************/
3558 : /* GDALCopyBits() */
3559 : /************************************************************************/
3560 :
3561 : /**
3562 : * Bitwise word copying.
3563 : *
3564 : * A function for moving sets of partial bytes around. Loosely
3565 : * speaking this is a bitwise analog to GDALCopyWords().
3566 : *
3567 : * It copies nStepCount "words" where each word is nBitCount bits long.
3568 : * The nSrcStep and nDstStep are the number of bits from the start of one
3569 : * word to the next (same as nBitCount if they are packed). The nSrcOffset
3570 : * and nDstOffset are the offset into the source and destination buffers
3571 : * to start at, also measured in bits.
3572 : *
3573 : * All bit offsets are assumed to start from the high order bit in a byte
3574 : * (i.e. most significant bit first). Currently this function is not very
3575 : * optimized, but it may be improved for some common cases in the future
3576 : * as needed.
3577 : *
3578 : * @param pabySrcData the source data buffer.
3579 : * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3580 : * first word to copy.
3581 : * @param nSrcStep the offset in bits from the start one source word to the
3582 : * start of the next.
3583 : * @param pabyDstData the destination data buffer.
3584 : * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3585 : * first word to copy over.
3586 : * @param nDstStep the offset in bits from the start one word to the
3587 : * start of the next.
3588 : * @param nBitCount the number of bits in a word to be copied.
3589 : * @param nStepCount the number of words to copy.
3590 : */
3591 :
3592 0 : void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3593 : GByte *pabyDstData, int nDstOffset, int nDstStep,
3594 : int nBitCount, int nStepCount)
3595 :
3596 : {
3597 0 : VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
3598 :
3599 0 : for (int iStep = 0; iStep < nStepCount; iStep++)
3600 : {
3601 0 : for (int iBit = 0; iBit < nBitCount; iBit++)
3602 : {
3603 0 : if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
3604 0 : pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
3605 : else
3606 0 : pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
3607 :
3608 0 : nSrcOffset++;
3609 0 : nDstOffset++;
3610 : }
3611 :
3612 0 : nSrcOffset += (nSrcStep - nBitCount);
3613 0 : nDstOffset += (nDstStep - nBitCount);
3614 : }
3615 : }
3616 :
3617 : /************************************************************************/
3618 : /* GDALGetBestOverviewLevel() */
3619 : /* */
3620 : /* Returns the best overview level to satisfy the query or -1 if none */
3621 : /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */
3622 : /* returning a valid overview level */
3623 : /************************************************************************/
3624 :
3625 0 : int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
3626 : int &nXSize, int &nYSize, int nBufXSize,
3627 : int nBufYSize)
3628 : {
3629 0 : return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
3630 0 : nBufXSize, nBufYSize, nullptr);
3631 : }
3632 :
3633 322828 : int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
3634 : int &nYOff, int &nXSize, int &nYSize,
3635 : int nBufXSize, int nBufYSize,
3636 : GDALRasterIOExtraArg *psExtraArg)
3637 : {
3638 : /* -------------------------------------------------------------------- */
3639 : /* Compute the desired downsampling factor. It is */
3640 : /* based on the least reduced axis, and represents the number */
3641 : /* of source pixels to one destination pixel. */
3642 : /* -------------------------------------------------------------------- */
3643 322828 : const double dfDesiredDownsamplingFactor =
3644 322828 : ((nXSize / static_cast<double>(nBufXSize)) <
3645 160491 : (nYSize / static_cast<double>(nBufYSize)) ||
3646 : nBufYSize == 1)
3647 354204 : ? nXSize / static_cast<double>(nBufXSize)
3648 129115 : : nYSize / static_cast<double>(nBufYSize);
3649 :
3650 : /* -------------------------------------------------------------------- */
3651 : /* Find the overview level that largest downsampling factor (most */
3652 : /* downsampled) that is still less than (or only a little more) */
3653 : /* downsampled than the request. */
3654 : /* -------------------------------------------------------------------- */
3655 322828 : const int nOverviewCount = poBand->GetOverviewCount();
3656 322828 : GDALRasterBand *poBestOverview = nullptr;
3657 322828 : double dfBestDownsamplingFactor = 0;
3658 322828 : int nBestOverviewLevel = -1;
3659 :
3660 : const char *pszOversampligThreshold =
3661 322828 : CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
3662 :
3663 : // Note: keep this logic for overview selection in sync between
3664 : // gdalwarp_lib.cpp and rasterio.cpp
3665 : // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
3666 : const double dfOversamplingThreshold =
3667 645647 : pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
3668 322819 : : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
3669 645638 : ? 1.0
3670 322828 : : 1.2;
3671 325519 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3672 : {
3673 5529 : GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
3674 11058 : if (poOverview == nullptr ||
3675 11057 : poOverview->GetXSize() > poBand->GetXSize() ||
3676 5528 : poOverview->GetYSize() > poBand->GetYSize())
3677 : {
3678 1 : continue;
3679 : }
3680 :
3681 : // Compute downsampling factor of this overview
3682 : const double dfDownsamplingFactor = std::min(
3683 5528 : poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
3684 11056 : poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
3685 :
3686 : // Is it nearly the requested factor and better (lower) than
3687 : // the current best factor?
3688 : // Use an epsilon because of numerical instability.
3689 5528 : constexpr double EPSILON = 1e-1;
3690 5636 : if (dfDownsamplingFactor >=
3691 5528 : dfDesiredDownsamplingFactor * dfOversamplingThreshold +
3692 5420 : EPSILON ||
3693 : dfDownsamplingFactor <= dfBestDownsamplingFactor)
3694 : {
3695 108 : continue;
3696 : }
3697 :
3698 : // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3699 5420 : const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
3700 :
3701 5420 : if (pszResampling != nullptr &&
3702 71 : STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
3703 16 : continue;
3704 :
3705 : // OK, this is our new best overview.
3706 5404 : poBestOverview = poOverview;
3707 5404 : nBestOverviewLevel = iOverview;
3708 5404 : dfBestDownsamplingFactor = dfDownsamplingFactor;
3709 :
3710 5404 : if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
3711 : EPSILON)
3712 : {
3713 2838 : break;
3714 : }
3715 : }
3716 :
3717 : /* -------------------------------------------------------------------- */
3718 : /* If we didn't find an overview that helps us, just return */
3719 : /* indicating failure and the full resolution image will be used. */
3720 : /* -------------------------------------------------------------------- */
3721 322828 : if (nBestOverviewLevel < 0)
3722 319922 : return -1;
3723 :
3724 : /* -------------------------------------------------------------------- */
3725 : /* Recompute the source window in terms of the selected */
3726 : /* overview. */
3727 : /* -------------------------------------------------------------------- */
3728 : const double dfXFactor =
3729 2906 : poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
3730 : const double dfYFactor =
3731 2906 : poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
3732 2906 : CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
3733 : poBestOverview->GetYSize());
3734 :
3735 8718 : const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
3736 2906 : static_cast<int>(nXOff / dfXFactor + 0.5));
3737 8718 : const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
3738 2906 : static_cast<int>(nYOff / dfYFactor + 0.5));
3739 2906 : int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
3740 2906 : int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
3741 2906 : if (nOXOff + nOXSize > poBestOverview->GetXSize())
3742 0 : nOXSize = poBestOverview->GetXSize() - nOXOff;
3743 2906 : if (nOYOff + nOYSize > poBestOverview->GetYSize())
3744 2 : nOYSize = poBestOverview->GetYSize() - nOYOff;
3745 :
3746 2906 : if (psExtraArg)
3747 : {
3748 2906 : if (psExtraArg->bFloatingPointWindowValidity)
3749 : {
3750 45 : psExtraArg->dfXOff /= dfXFactor;
3751 45 : psExtraArg->dfXSize /= dfXFactor;
3752 45 : psExtraArg->dfYOff /= dfYFactor;
3753 45 : psExtraArg->dfYSize /= dfYFactor;
3754 : }
3755 2861 : else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
3756 : {
3757 16 : psExtraArg->bFloatingPointWindowValidity = true;
3758 16 : psExtraArg->dfXOff = nXOff / dfXFactor;
3759 16 : psExtraArg->dfXSize = nXSize / dfXFactor;
3760 16 : psExtraArg->dfYOff = nYOff / dfYFactor;
3761 16 : psExtraArg->dfYSize = nYSize / dfYFactor;
3762 : }
3763 : }
3764 :
3765 2906 : nXOff = nOXOff;
3766 2906 : nYOff = nOYOff;
3767 2906 : nXSize = nOXSize;
3768 2906 : nYSize = nOYSize;
3769 :
3770 2906 : return nBestOverviewLevel;
3771 : }
3772 :
3773 : /************************************************************************/
3774 : /* OverviewRasterIO() */
3775 : /* */
3776 : /* Special work function to utilize available overviews to */
3777 : /* more efficiently satisfy downsampled requests. It will */
3778 : /* return CE_Failure if there are no appropriate overviews */
3779 : /* available but it doesn't emit any error messages. */
3780 : /************************************************************************/
3781 :
3782 : //! @cond Doxygen_Suppress
3783 2 : CPLErr GDALRasterBand::OverviewRasterIO(
3784 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3785 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3786 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
3787 :
3788 : {
3789 : GDALRasterIOExtraArg sExtraArg;
3790 2 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3791 :
3792 2 : const int nOverview = GDALBandGetBestOverviewLevel2(
3793 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
3794 2 : if (nOverview < 0)
3795 1 : return CE_Failure;
3796 :
3797 : /* -------------------------------------------------------------------- */
3798 : /* Recast the call in terms of the new raster layer. */
3799 : /* -------------------------------------------------------------------- */
3800 1 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
3801 1 : if (poOverviewBand == nullptr)
3802 0 : return CE_Failure;
3803 :
3804 1 : return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3805 : pData, nBufXSize, nBufYSize, eBufType,
3806 1 : nPixelSpace, nLineSpace, &sExtraArg);
3807 : }
3808 :
3809 : /************************************************************************/
3810 : /* TryOverviewRasterIO() */
3811 : /************************************************************************/
3812 :
3813 161946 : CPLErr GDALRasterBand::TryOverviewRasterIO(
3814 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3815 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3816 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
3817 : int *pbTried)
3818 : {
3819 161946 : int nXOffMod = nXOff;
3820 161946 : int nYOffMod = nYOff;
3821 161946 : int nXSizeMod = nXSize;
3822 161946 : int nYSizeMod = nYSize;
3823 : GDALRasterIOExtraArg sExtraArg;
3824 :
3825 161946 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3826 :
3827 161946 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3828 : this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
3829 : &sExtraArg);
3830 :
3831 161946 : if (iOvrLevel >= 0)
3832 : {
3833 49 : GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
3834 49 : if (poOverviewBand)
3835 : {
3836 49 : *pbTried = TRUE;
3837 49 : return poOverviewBand->RasterIO(
3838 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
3839 : nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
3840 49 : &sExtraArg);
3841 : }
3842 : }
3843 :
3844 161897 : *pbTried = FALSE;
3845 161897 : return CE_None;
3846 : }
3847 :
3848 : /************************************************************************/
3849 : /* TryOverviewRasterIO() */
3850 : /************************************************************************/
3851 :
3852 158041 : CPLErr GDALDataset::TryOverviewRasterIO(
3853 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3854 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3855 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
3856 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
3857 : int *pbTried)
3858 : {
3859 158041 : int nXOffMod = nXOff;
3860 158041 : int nYOffMod = nYOff;
3861 158041 : int nXSizeMod = nXSize;
3862 158041 : int nYSizeMod = nYSize;
3863 : GDALRasterIOExtraArg sExtraArg;
3864 158041 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3865 :
3866 316082 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3867 158041 : papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
3868 : nBufYSize, &sExtraArg);
3869 :
3870 158080 : if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3871 39 : papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
3872 : {
3873 39 : *pbTried = TRUE;
3874 39 : return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3875 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
3876 : nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
3877 39 : nLineSpace, nBandSpace, &sExtraArg);
3878 : }
3879 : else
3880 : {
3881 158002 : *pbTried = FALSE;
3882 158002 : return CE_None;
3883 : }
3884 : }
3885 :
3886 : /************************************************************************/
3887 : /* GetBestOverviewLevel() */
3888 : /* */
3889 : /* Returns the best overview level to satisfy the query or -1 if none */
3890 : /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */
3891 : /* overview level */
3892 : /************************************************************************/
3893 :
3894 4 : static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
3895 : int &nYOff, int &nXSize, int &nYSize,
3896 : int nBufXSize, int nBufYSize,
3897 : int nBandCount,
3898 : const int *panBandMap,
3899 : GDALRasterIOExtraArg *psExtraArg)
3900 : {
3901 4 : int nOverviewCount = 0;
3902 4 : GDALRasterBand *poFirstBand = nullptr;
3903 :
3904 : /* -------------------------------------------------------------------- */
3905 : /* Check that all bands have the same number of overviews and */
3906 : /* that they have all the same size and block dimensions */
3907 : /* -------------------------------------------------------------------- */
3908 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
3909 : {
3910 8 : GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
3911 8 : if (poBand == nullptr)
3912 0 : return -1;
3913 8 : if (iBand == 0)
3914 : {
3915 4 : poFirstBand = poBand;
3916 4 : nOverviewCount = poBand->GetOverviewCount();
3917 : }
3918 4 : else if (nOverviewCount != poBand->GetOverviewCount())
3919 : {
3920 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3921 : "mismatched overview count, use std method.");
3922 0 : return -1;
3923 : }
3924 : else
3925 : {
3926 4 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3927 : {
3928 0 : GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
3929 : GDALRasterBand *poOvrFirstBand =
3930 0 : poFirstBand->GetOverview(iOverview);
3931 0 : if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
3932 0 : continue;
3933 :
3934 0 : if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3935 0 : poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
3936 : {
3937 0 : CPLDebug("GDAL",
3938 : "GDALDataset::GetBestOverviewLevel() ... "
3939 : "mismatched overview sizes, use std method.");
3940 0 : return -1;
3941 : }
3942 0 : int nBlockXSizeFirst = 0;
3943 0 : int nBlockYSizeFirst = 0;
3944 0 : poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
3945 : &nBlockYSizeFirst);
3946 :
3947 0 : int nBlockXSizeCurrent = 0;
3948 0 : int nBlockYSizeCurrent = 0;
3949 0 : poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
3950 : &nBlockYSizeCurrent);
3951 :
3952 0 : if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3953 0 : nBlockYSizeFirst != nBlockYSizeCurrent)
3954 : {
3955 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3956 : "mismatched block sizes, use std method.");
3957 0 : return -1;
3958 : }
3959 : }
3960 : }
3961 : }
3962 4 : if (poFirstBand == nullptr)
3963 0 : return -1;
3964 :
3965 4 : return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
3966 : nYSize, nBufXSize, nBufYSize,
3967 4 : psExtraArg);
3968 : }
3969 :
3970 : /************************************************************************/
3971 : /* BlockBasedRasterIO() */
3972 : /* */
3973 : /* This convenience function implements a dataset level */
3974 : /* RasterIO() interface based on calling down to fetch blocks, */
3975 : /* much like the GDALRasterBand::IRasterIO(), but it handles */
3976 : /* all bands at once, so that a format driver that handles a */
3977 : /* request for different bands of the same block efficiently */
3978 : /* (i.e. without re-reading interleaved data) will efficiently. */
3979 : /* */
3980 : /* This method is intended to be called by an overridden */
3981 : /* IRasterIO() method in the driver specific GDALDataset */
3982 : /* derived class. */
3983 : /* */
3984 : /* Default internal implementation of RasterIO() ... utilizes */
3985 : /* the Block access methods to satisfy the request. This would */
3986 : /* normally only be overridden by formats with overviews. */
3987 : /* */
3988 : /* To keep things relatively simple, this method does not */
3989 : /* currently take advantage of some special cases addressed in */
3990 : /* GDALRasterBand::IRasterIO(), so it is likely best to only */
3991 : /* call it when you know it will help. That is in cases where */
3992 : /* data is at 1:1 to the buffer, and you know the driver is */
3993 : /* implementing interleaved IO efficiently on a block by block */
3994 : /* basis. Overviews will be used when possible. */
3995 : /************************************************************************/
3996 :
3997 63624 : CPLErr GDALDataset::BlockBasedRasterIO(
3998 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3999 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4000 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4001 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
4002 :
4003 : {
4004 63624 : CPLAssert(nullptr != pData);
4005 :
4006 63624 : GByte **papabySrcBlock = nullptr;
4007 63624 : GDALRasterBlock *poBlock = nullptr;
4008 63624 : GDALRasterBlock **papoBlocks = nullptr;
4009 63624 : int nLBlockX = -1;
4010 63624 : int nLBlockY = -1;
4011 : int iBufYOff;
4012 : int iBufXOff;
4013 63624 : int nBlockXSize = 1;
4014 63624 : int nBlockYSize = 1;
4015 63624 : CPLErr eErr = CE_None;
4016 63624 : GDALDataType eDataType = GDT_Byte;
4017 :
4018 63624 : const bool bUseIntegerRequestCoords =
4019 64057 : (!psExtraArg->bFloatingPointWindowValidity ||
4020 433 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
4021 431 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
4022 :
4023 : /* -------------------------------------------------------------------- */
4024 : /* Ensure that all bands share a common block size and data type. */
4025 : /* -------------------------------------------------------------------- */
4026 301197 : for (int iBand = 0; iBand < nBandCount; iBand++)
4027 : {
4028 237574 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4029 :
4030 237573 : if (iBand == 0)
4031 : {
4032 63622 : poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4033 63622 : eDataType = poBand->GetRasterDataType();
4034 : }
4035 : else
4036 : {
4037 173951 : int nThisBlockXSize = 0;
4038 173951 : int nThisBlockYSize = 0;
4039 173951 : poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
4040 173951 : if (nThisBlockXSize != nBlockXSize ||
4041 173951 : nThisBlockYSize != nBlockYSize)
4042 : {
4043 1 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4044 : "mismatched block sizes, use std method.");
4045 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4046 : pData, nBufXSize, nBufYSize, eBufType,
4047 : nBandCount, panBandMap, nPixelSpace,
4048 0 : nLineSpace, nBandSpace, psExtraArg);
4049 : }
4050 :
4051 173950 : if (eDataType != poBand->GetRasterDataType() &&
4052 0 : (nXSize != nBufXSize || nYSize != nBufYSize))
4053 : {
4054 1 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4055 : "mismatched band data types, use std method.");
4056 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4057 : pData, nBufXSize, nBufYSize, eBufType,
4058 : nBandCount, panBandMap, nPixelSpace,
4059 0 : nLineSpace, nBandSpace, psExtraArg);
4060 : }
4061 : }
4062 : }
4063 :
4064 : /* ==================================================================== */
4065 : /* In this special case at full resolution we step through in */
4066 : /* blocks, turning the request over to the per-band */
4067 : /* IRasterIO(), but ensuring that all bands of one block are */
4068 : /* called before proceeding to the next. */
4069 : /* ==================================================================== */
4070 :
4071 63623 : if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4072 : {
4073 : GDALRasterIOExtraArg sDummyExtraArg;
4074 63619 : INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4075 :
4076 63619 : int nChunkYSize = 0;
4077 63619 : int nChunkXSize = 0;
4078 :
4079 220077 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4080 : {
4081 157487 : const int nChunkYOff = iBufYOff + nYOff;
4082 157487 : nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4083 157487 : if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4084 58874 : nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4085 :
4086 838808 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4087 : {
4088 682347 : const int nChunkXOff = iBufXOff + nXOff;
4089 682347 : nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4090 682347 : if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4091 74878 : nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4092 :
4093 682347 : GByte *pabyChunkData =
4094 682347 : static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4095 682347 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4096 :
4097 3315810 : for (int iBand = 0; iBand < nBandCount; iBand++)
4098 : {
4099 2634490 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4100 :
4101 5268930 : eErr = poBand->IRasterIO(
4102 : eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4103 : nChunkYSize,
4104 2634440 : pabyChunkData +
4105 2634440 : static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4106 : nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4107 2634440 : nLineSpace, &sDummyExtraArg);
4108 2634490 : if (eErr != CE_None)
4109 1025 : return eErr;
4110 : }
4111 : }
4112 :
4113 176708 : if (psExtraArg->pfnProgress != nullptr &&
4114 20247 : !psExtraArg->pfnProgress(
4115 176708 : 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4116 : nBufYSize,
4117 : "", psExtraArg->pProgressData))
4118 : {
4119 11 : return CE_Failure;
4120 : }
4121 : }
4122 :
4123 62590 : return CE_None;
4124 : }
4125 :
4126 : /* Below code is not compatible with that case. It would need a complete */
4127 : /* separate code like done in GDALRasterBand::IRasterIO. */
4128 4 : if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4129 : {
4130 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4131 : nBufXSize, nBufYSize, eBufType, nBandCount,
4132 : panBandMap, nPixelSpace, nLineSpace,
4133 0 : nBandSpace, psExtraArg);
4134 : }
4135 :
4136 : /* We could have a smarter implementation, but that will do for now */
4137 4 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4138 0 : (nBufXSize != nXSize || nBufYSize != nYSize))
4139 : {
4140 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4141 : nBufXSize, nBufYSize, eBufType, nBandCount,
4142 : panBandMap, nPixelSpace, nLineSpace,
4143 0 : nBandSpace, psExtraArg);
4144 : }
4145 :
4146 : /* ==================================================================== */
4147 : /* Loop reading required source blocks to satisfy output */
4148 : /* request. This is the most general implementation. */
4149 : /* ==================================================================== */
4150 :
4151 4 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4152 :
4153 : papabySrcBlock =
4154 4 : static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4155 : papoBlocks =
4156 4 : static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4157 :
4158 : /* -------------------------------------------------------------------- */
4159 : /* Select an overview level if appropriate. */
4160 : /* -------------------------------------------------------------------- */
4161 :
4162 : GDALRasterIOExtraArg sExtraArg;
4163 4 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4164 4 : const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4165 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4166 : panBandMap, &sExtraArg);
4167 4 : if (nOverviewLevel >= 0)
4168 : {
4169 2 : GetRasterBand(panBandMap[0])
4170 2 : ->GetOverview(nOverviewLevel)
4171 2 : ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4172 : }
4173 :
4174 4 : double dfXOff = nXOff;
4175 4 : double dfYOff = nYOff;
4176 4 : double dfXSize = nXSize;
4177 4 : double dfYSize = nYSize;
4178 4 : if (sExtraArg.bFloatingPointWindowValidity)
4179 : {
4180 2 : dfXOff = sExtraArg.dfXOff;
4181 2 : dfYOff = sExtraArg.dfYOff;
4182 2 : dfXSize = sExtraArg.dfXSize;
4183 2 : dfYSize = sExtraArg.dfYSize;
4184 : }
4185 :
4186 : /* -------------------------------------------------------------------- */
4187 : /* Compute stepping increment. */
4188 : /* -------------------------------------------------------------------- */
4189 4 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4190 4 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4191 :
4192 4 : constexpr double EPS = 1e-10;
4193 : /* -------------------------------------------------------------------- */
4194 : /* Loop over buffer computing source locations. */
4195 : /* -------------------------------------------------------------------- */
4196 36 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
4197 : {
4198 : GPtrDiff_t iSrcOffset;
4199 :
4200 : // Add small epsilon to avoid some numeric precision issues.
4201 32 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4202 32 : const int iSrcY = static_cast<int>(std::min(
4203 32 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
4204 :
4205 32 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
4206 : static_cast<GPtrDiff_t>(nLineSpace);
4207 :
4208 302 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
4209 : {
4210 270 : const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4211 270 : const int iSrcX = static_cast<int>(std::min(
4212 270 : std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
4213 :
4214 : // FIXME: this code likely doesn't work if the dirty block gets
4215 : // flushed to disk before being completely written. In the meantime,
4216 : // bJustInitialize should probably be set to FALSE even if it is not
4217 : // ideal performance wise, and for lossy compression
4218 :
4219 : /* --------------------------------------------------------------------
4220 : */
4221 : /* Ensure we have the appropriate block loaded. */
4222 : /* --------------------------------------------------------------------
4223 : */
4224 270 : if (iSrcX < nLBlockX * nBlockXSize ||
4225 270 : iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
4226 266 : iSrcY < nLBlockY * nBlockYSize ||
4227 266 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
4228 : {
4229 4 : nLBlockX = iSrcX / nBlockXSize;
4230 4 : nLBlockY = iSrcY / nBlockYSize;
4231 :
4232 4 : const bool bJustInitialize =
4233 0 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
4234 0 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
4235 4 : nXOff <= nLBlockX * nBlockXSize &&
4236 0 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4237 : /*bool bMemZeroBuffer = FALSE;
4238 : if( eRWFlag == GF_Write && !bJustInitialize &&
4239 : nXOff <= nLBlockX * nBlockXSize &&
4240 : nYOff <= nLBlockY * nBlockYSize &&
4241 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4242 : (nXOff + nXSize == GetRasterXSize() &&
4243 : (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4244 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4245 : (nYOff + nYSize == GetRasterYSize() &&
4246 : (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4247 : {
4248 : bJustInitialize = TRUE;
4249 : bMemZeroBuffer = TRUE;
4250 : }*/
4251 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4252 : {
4253 8 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4254 8 : if (nOverviewLevel >= 0)
4255 2 : poBand = poBand->GetOverview(nOverviewLevel);
4256 16 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
4257 8 : bJustInitialize);
4258 8 : if (poBlock == nullptr)
4259 : {
4260 0 : eErr = CE_Failure;
4261 0 : goto CleanupAndReturn;
4262 : }
4263 :
4264 8 : if (eRWFlag == GF_Write)
4265 0 : poBlock->MarkDirty();
4266 :
4267 8 : if (papoBlocks[iBand] != nullptr)
4268 0 : papoBlocks[iBand]->DropLock();
4269 :
4270 8 : papoBlocks[iBand] = poBlock;
4271 :
4272 8 : papabySrcBlock[iBand] =
4273 8 : static_cast<GByte *>(poBlock->GetDataRef());
4274 : /*if( bMemZeroBuffer )
4275 : {
4276 : memset(papabySrcBlock[iBand], 0,
4277 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4278 : * nBlockYSize);
4279 : }*/
4280 : }
4281 : }
4282 :
4283 : /* --------------------------------------------------------------------
4284 : */
4285 : /* Copy over this pixel of data. */
4286 : /* --------------------------------------------------------------------
4287 : */
4288 270 : iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
4289 270 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
4290 270 : (static_cast<GPtrDiff_t>(iSrcY) -
4291 270 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
4292 270 : nBlockXSize) *
4293 270 : nBandDataSize;
4294 :
4295 980 : for (int iBand = 0; iBand < nBandCount; iBand++)
4296 : {
4297 710 : GByte *pabySrcBlock = papabySrcBlock[iBand];
4298 710 : GPtrDiff_t iBandBufOffset =
4299 710 : iBufOffset + static_cast<GPtrDiff_t>(iBand) *
4300 : static_cast<GPtrDiff_t>(nBandSpace);
4301 :
4302 710 : if (eDataType == eBufType)
4303 : {
4304 710 : if (eRWFlag == GF_Read)
4305 710 : memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
4306 710 : pabySrcBlock + iSrcOffset, nBandDataSize);
4307 : else
4308 0 : memcpy(pabySrcBlock + iSrcOffset,
4309 : static_cast<const GByte *>(pData) +
4310 0 : iBandBufOffset,
4311 : nBandDataSize);
4312 : }
4313 : else
4314 : {
4315 : /* type to type conversion ... ouch, this is expensive way
4316 : of handling single words */
4317 :
4318 0 : if (eRWFlag == GF_Read)
4319 0 : GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
4320 : static_cast<GByte *>(pData) +
4321 0 : iBandBufOffset,
4322 : eBufType, 0, 1);
4323 : else
4324 0 : GDALCopyWords64(static_cast<const GByte *>(pData) +
4325 0 : iBandBufOffset,
4326 0 : eBufType, 0, pabySrcBlock + iSrcOffset,
4327 : eDataType, 0, 1);
4328 : }
4329 : }
4330 :
4331 270 : iBufOffset += static_cast<int>(nPixelSpace);
4332 : }
4333 : }
4334 :
4335 : /* -------------------------------------------------------------------- */
4336 : /* CleanupAndReturn. */
4337 : /* -------------------------------------------------------------------- */
4338 4 : CleanupAndReturn:
4339 4 : CPLFree(papabySrcBlock);
4340 4 : if (papoBlocks != nullptr)
4341 : {
4342 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4343 : {
4344 8 : if (papoBlocks[iBand] != nullptr)
4345 8 : papoBlocks[iBand]->DropLock();
4346 : }
4347 4 : CPLFree(papoBlocks);
4348 : }
4349 :
4350 4 : return eErr;
4351 : }
4352 :
4353 : //! @endcond
4354 :
4355 : /************************************************************************/
4356 : /* GDALCopyWholeRasterGetSwathSize() */
4357 : /************************************************************************/
4358 :
4359 2895 : static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
4360 : GDALRasterBand *poDstPrototypeBand,
4361 : int nBandCount,
4362 : int bDstIsCompressed,
4363 : int bInterleave, int *pnSwathCols,
4364 : int *pnSwathLines)
4365 : {
4366 2895 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4367 2895 : int nSrcBlockXSize = 0;
4368 2895 : int nSrcBlockYSize = 0;
4369 2895 : int nBlockXSize = 0;
4370 2895 : int nBlockYSize = 0;
4371 :
4372 2895 : int nXSize = poSrcPrototypeBand->GetXSize();
4373 2895 : int nYSize = poSrcPrototypeBand->GetYSize();
4374 :
4375 2895 : poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
4376 2895 : poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4377 :
4378 2895 : const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4379 2895 : const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4380 :
4381 2895 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4382 2895 : if (bInterleave)
4383 1340 : nPixelSize *= nBandCount;
4384 :
4385 : // aim for one row of blocks. Do not settle for less.
4386 2895 : int nSwathCols = nXSize;
4387 2895 : int nSwathLines = nMaxBlockYSize;
4388 :
4389 : const char *pszSrcCompression =
4390 2895 : poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4391 2895 : if (pszSrcCompression == nullptr)
4392 : {
4393 2869 : auto poSrcDS = poSrcPrototypeBand->GetDataset();
4394 2869 : if (poSrcDS)
4395 : pszSrcCompression =
4396 2863 : poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4397 : }
4398 :
4399 : /* -------------------------------------------------------------------- */
4400 : /* What will our swath size be? */
4401 : /* -------------------------------------------------------------------- */
4402 : // When writing interleaved data in a compressed format, we want to be sure
4403 : // that each block will only be written once, so the swath size must not be
4404 : // greater than the block cache.
4405 2895 : const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4406 : int nTargetSwathSize;
4407 2895 : if (pszSwathSize != nullptr)
4408 0 : nTargetSwathSize = static_cast<int>(
4409 0 : std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4410 : else
4411 : {
4412 : // As a default, take one 1/4 of the cache size.
4413 2895 : nTargetSwathSize = static_cast<int>(
4414 2895 : std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4415 :
4416 : // but if the minimum idal swath buf size is less, then go for it to
4417 : // avoid unnecessarily abusing RAM usage.
4418 : // but try to use 10 MB at least.
4419 2895 : GIntBig nIdealSwathBufSize =
4420 2895 : static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4421 2895 : int nMinTargetSwathSize = 10 * 1000 * 1000;
4422 :
4423 2895 : if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
4424 2895 : GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
4425 : {
4426 2 : nMinTargetSwathSize = nTargetSwathSize;
4427 : }
4428 :
4429 2895 : if (nIdealSwathBufSize < nTargetSwathSize &&
4430 2885 : nIdealSwathBufSize < nMinTargetSwathSize)
4431 : {
4432 2882 : nIdealSwathBufSize = nMinTargetSwathSize;
4433 : }
4434 :
4435 2895 : if (pszSrcCompression != nullptr &&
4436 156 : EQUAL(pszSrcCompression, "JPEG2000") &&
4437 0 : (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4438 0 : (nSrcBlockYSize % nBlockYSize) == 0)))
4439 : {
4440 2 : nIdealSwathBufSize =
4441 4 : std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4442 2 : nSrcBlockYSize * nPixelSize);
4443 : }
4444 2895 : if (nTargetSwathSize > nIdealSwathBufSize)
4445 2881 : nTargetSwathSize = static_cast<int>(
4446 2881 : std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4447 : }
4448 :
4449 2895 : if (nTargetSwathSize < 1000000)
4450 8 : nTargetSwathSize = 1000000;
4451 :
4452 : /* But let's check that */
4453 3103 : if (bDstIsCompressed && bInterleave &&
4454 208 : nTargetSwathSize > GDALGetCacheMax64())
4455 : {
4456 0 : CPLError(CE_Warning, CPLE_AppDefined,
4457 : "When translating into a compressed interleave format, "
4458 : "the block cache size (" CPL_FRMT_GIB ") "
4459 : "should be at least the size of the swath (%d) "
4460 : "(GDAL_SWATH_SIZE config. option)",
4461 : GDALGetCacheMax64(), nTargetSwathSize);
4462 : }
4463 :
4464 : #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4465 : #define ROUND_TO(x, y) (((x) / (y)) * (y))
4466 :
4467 : // if both input and output datasets are tiled, that the tile dimensions
4468 : // are "compatible", try to stick to a swath dimension that is a multiple
4469 : // of input and output block dimensions.
4470 2895 : if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4471 34 : IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4472 34 : IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4473 34 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4474 34 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4475 : {
4476 34 : if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
4477 34 : nPixelSize <=
4478 34 : static_cast<GIntBig>(nTargetSwathSize))
4479 : {
4480 34 : nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4481 34 : nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4482 34 : if (nSwathCols == 0)
4483 0 : nSwathCols = nMaxBlockXSize;
4484 34 : if (nSwathCols > nXSize)
4485 32 : nSwathCols = nXSize;
4486 34 : nSwathLines = nMaxBlockYSize;
4487 :
4488 34 : if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4489 34 : static_cast<GIntBig>(nTargetSwathSize))
4490 : {
4491 0 : nSwathCols = nXSize;
4492 0 : nSwathLines = nBlockYSize;
4493 : }
4494 : }
4495 : }
4496 :
4497 2895 : const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4498 2895 : const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4499 2895 : if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
4500 : {
4501 1 : nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4502 1 : if (nSwathLines == 0)
4503 1 : nSwathLines = 1;
4504 :
4505 1 : CPLDebug(
4506 : "GDAL",
4507 : "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4508 : "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4509 : "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4510 1 : nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
4511 : }
4512 : // If we are processing single scans, try to handle several at once.
4513 : // If we are handling swaths already, only grow the swath if a row
4514 : // of blocks is substantially less than our target buffer size.
4515 2894 : else if (nSwathLines == 1 ||
4516 2393 : nMemoryPerCol * nSwathLines <
4517 2393 : static_cast<GIntBig>(nTargetSwathSize) / 10)
4518 : {
4519 2867 : nSwathLines = std::min(
4520 : nYSize,
4521 2867 : std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4522 :
4523 : /* If possible try to align to source and target block height */
4524 2867 : if ((nSwathLines % nMaxBlockYSize) != 0 &&
4525 979 : nSwathLines > nMaxBlockYSize &&
4526 979 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4527 951 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4528 169 : nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4529 : }
4530 :
4531 2895 : if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4532 0 : (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4533 0 : IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
4534 : {
4535 : // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4536 2 : if (nSwathLines < nSrcBlockYSize)
4537 : {
4538 0 : nSwathLines = nSrcBlockYSize;
4539 :
4540 : // Number of pixels that can be read/write simultaneously.
4541 0 : nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4542 0 : nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4543 0 : if (nSwathCols == 0)
4544 0 : nSwathCols = nSrcBlockXSize;
4545 0 : if (nSwathCols > nXSize)
4546 0 : nSwathCols = nXSize;
4547 :
4548 0 : CPLDebug(
4549 : "GDAL",
4550 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4551 : "too high block, "
4552 : "use partial width at one time");
4553 : }
4554 2 : else if ((nSwathLines % nSrcBlockYSize) != 0)
4555 : {
4556 : /* Round on a multiple of nSrcBlockYSize */
4557 0 : nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4558 0 : CPLDebug(
4559 : "GDAL",
4560 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4561 : "round nSwathLines to block height : %d",
4562 : nSwathLines);
4563 : }
4564 : }
4565 2893 : else if (bDstIsCompressed)
4566 : {
4567 366 : if (nSwathLines < nBlockYSize)
4568 : {
4569 142 : nSwathLines = nBlockYSize;
4570 :
4571 : // Number of pixels that can be read/write simultaneously.
4572 142 : nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4573 142 : nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4574 142 : if (nSwathCols == 0)
4575 0 : nSwathCols = nBlockXSize;
4576 142 : if (nSwathCols > nXSize)
4577 142 : nSwathCols = nXSize;
4578 :
4579 142 : CPLDebug(
4580 : "GDAL",
4581 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4582 : "too high block, "
4583 : "use partial width at one time");
4584 : }
4585 224 : else if ((nSwathLines % nBlockYSize) != 0)
4586 : {
4587 : // Round on a multiple of nBlockYSize.
4588 9 : nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4589 9 : CPLDebug(
4590 : "GDAL",
4591 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4592 : "round nSwathLines to block height : %d",
4593 : nSwathLines);
4594 : }
4595 : }
4596 :
4597 2895 : *pnSwathCols = nSwathCols;
4598 2895 : *pnSwathLines = nSwathLines;
4599 2895 : }
4600 :
4601 : /************************************************************************/
4602 : /* GDALDatasetCopyWholeRaster() */
4603 : /************************************************************************/
4604 :
4605 : /**
4606 : * \brief Copy all dataset raster data.
4607 : *
4608 : * This function copies the complete raster contents of one dataset to
4609 : * another similarly configured dataset. The source and destination
4610 : * dataset must have the same number of bands, and the same width
4611 : * and height. The bands do not have to have the same data type.
4612 : *
4613 : * This function is primarily intended to support implementation of
4614 : * driver specific CreateCopy() functions. It implements efficient copying,
4615 : * in particular "chunking" the copy in substantial blocks and, if appropriate,
4616 : * performing the transfer in a pixel interleaved fashion.
4617 : *
4618 : * Currently the only papszOptions value supported are :
4619 : * <ul>
4620 : * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
4621 : * write access pattern (this does not modify the layout of the destination
4622 : * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
4623 : * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
4624 : * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
4625 : * (GDAL >= 2.2)</li>
4626 : * </ul>
4627 : * More options may be supported in the future.
4628 : *
4629 : * @param hSrcDS the source dataset
4630 : * @param hDstDS the destination dataset
4631 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4632 : * @param pfnProgress progress reporting function.
4633 : * @param pProgressData callback data for progress function.
4634 : *
4635 : * @return CE_None on success, or CE_Failure on failure.
4636 : */
4637 :
4638 2867 : CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
4639 : GDALDatasetH hDstDS,
4640 : CSLConstList papszOptions,
4641 : GDALProgressFunc pfnProgress,
4642 : void *pProgressData)
4643 :
4644 : {
4645 2867 : VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4646 2867 : VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4647 :
4648 2867 : GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4649 2867 : GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4650 :
4651 2867 : if (pfnProgress == nullptr)
4652 3 : pfnProgress = GDALDummyProgress;
4653 :
4654 : /* -------------------------------------------------------------------- */
4655 : /* Confirm the datasets match in size and band counts. */
4656 : /* -------------------------------------------------------------------- */
4657 2867 : const int nXSize = poDstDS->GetRasterXSize();
4658 2867 : const int nYSize = poDstDS->GetRasterYSize();
4659 2867 : const int nBandCount = poDstDS->GetRasterCount();
4660 :
4661 2867 : if (poSrcDS->GetRasterXSize() != nXSize ||
4662 5734 : poSrcDS->GetRasterYSize() != nYSize ||
4663 2867 : poSrcDS->GetRasterCount() != nBandCount)
4664 : {
4665 0 : CPLError(CE_Failure, CPLE_AppDefined,
4666 : "Input and output dataset sizes or band counts do not\n"
4667 : "match in GDALDatasetCopyWholeRaster()");
4668 0 : return CE_Failure;
4669 : }
4670 :
4671 : /* -------------------------------------------------------------------- */
4672 : /* Report preliminary (0) progress. */
4673 : /* -------------------------------------------------------------------- */
4674 2867 : if (!pfnProgress(0.0, nullptr, pProgressData))
4675 : {
4676 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4677 : "User terminated CreateCopy()");
4678 1 : return CE_Failure;
4679 : }
4680 :
4681 : /* -------------------------------------------------------------------- */
4682 : /* Get our prototype band, and assume the others are similarly */
4683 : /* configured. */
4684 : /* -------------------------------------------------------------------- */
4685 2866 : if (nBandCount == 0)
4686 0 : return CE_None;
4687 :
4688 2866 : GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4689 2866 : GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4690 2866 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4691 :
4692 : /* -------------------------------------------------------------------- */
4693 : /* Do we want to try and do the operation in a pixel */
4694 : /* interleaved fashion? */
4695 : /* -------------------------------------------------------------------- */
4696 2866 : bool bInterleave = false;
4697 : const char *pszInterleave =
4698 2866 : poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4699 2866 : if (pszInterleave != nullptr &&
4700 1129 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4701 170 : bInterleave = true;
4702 :
4703 2866 : pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4704 2866 : if (pszInterleave != nullptr &&
4705 2062 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4706 1293 : bInterleave = true;
4707 :
4708 2866 : pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
4709 2866 : if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
4710 5 : bInterleave = true;
4711 2861 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
4712 7 : bInterleave = false;
4713 : // attributes is specific to the TileDB driver
4714 2854 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
4715 4 : bInterleave = true;
4716 2850 : else if (pszInterleave != nullptr)
4717 : {
4718 0 : CPLError(CE_Warning, CPLE_NotSupported,
4719 : "Unsupported value for option INTERLEAVE");
4720 : }
4721 :
4722 : // If the destination is compressed, we must try to write blocks just once,
4723 : // to save disk space (GTiff case for example), and to avoid data loss
4724 : // (JPEG compression for example).
4725 2866 : bool bDstIsCompressed = false;
4726 : const char *pszDstCompressed =
4727 2866 : CSLFetchNameValue(papszOptions, "COMPRESSED");
4728 2866 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4729 349 : bDstIsCompressed = true;
4730 :
4731 : /* -------------------------------------------------------------------- */
4732 : /* What will our swath size be? */
4733 : /* -------------------------------------------------------------------- */
4734 :
4735 2866 : int nSwathCols = 0;
4736 2866 : int nSwathLines = 0;
4737 2866 : GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
4738 : nBandCount, bDstIsCompressed, bInterleave,
4739 : &nSwathCols, &nSwathLines);
4740 :
4741 2866 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4742 2866 : if (bInterleave)
4743 1340 : nPixelSize *= nBandCount;
4744 :
4745 2866 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
4746 2866 : if (pSwathBuf == nullptr)
4747 : {
4748 0 : return CE_Failure;
4749 : }
4750 :
4751 2866 : CPLDebug("GDAL",
4752 : "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4753 : nSwathCols, nSwathLines, static_cast<int>(bInterleave));
4754 :
4755 : // Advise the source raster that we are going to read it completely
4756 : // Note: this might already have been done by GDALCreateCopy() in the
4757 : // likely case this function is indirectly called by it
4758 2866 : poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
4759 2866 : nullptr, nullptr);
4760 :
4761 : /* ==================================================================== */
4762 : /* Band oriented (uninterleaved) case. */
4763 : /* ==================================================================== */
4764 2866 : CPLErr eErr = CE_None;
4765 : const bool bCheckHoles =
4766 2866 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
4767 :
4768 2866 : if (!bInterleave)
4769 : {
4770 : GDALRasterIOExtraArg sExtraArg;
4771 1526 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4772 1526 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4773 :
4774 4578 : const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
4775 1526 : DIV_ROUND_UP(nYSize, nSwathLines) *
4776 1526 : DIV_ROUND_UP(nXSize, nSwathCols);
4777 1526 : GIntBig nBlocksDone = 0;
4778 :
4779 3756 : for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
4780 : {
4781 2230 : int nBand = iBand + 1;
4782 :
4783 4618 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4784 : {
4785 2388 : int nThisLines = nSwathLines;
4786 :
4787 2388 : if (iY + nThisLines > nYSize)
4788 274 : nThisLines = nYSize - iY;
4789 :
4790 4776 : for (int iX = 0; iX < nXSize && eErr == CE_None;
4791 2388 : iX += nSwathCols)
4792 : {
4793 2388 : int nThisCols = nSwathCols;
4794 :
4795 2388 : if (iX + nThisCols > nXSize)
4796 0 : nThisCols = nXSize - iX;
4797 :
4798 2388 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4799 2388 : if (bCheckHoles)
4800 : {
4801 : nStatus = poSrcDS->GetRasterBand(nBand)
4802 960 : ->GetDataCoverageStatus(
4803 : iX, iY, nThisCols, nThisLines,
4804 : GDAL_DATA_COVERAGE_STATUS_DATA);
4805 : }
4806 2388 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4807 : {
4808 2384 : sExtraArg.pfnProgress = GDALScaledProgress;
4809 4768 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4810 2384 : nBlocksDone / static_cast<double>(nTotalBlocks),
4811 2384 : (nBlocksDone + 0.5) /
4812 2384 : static_cast<double>(nTotalBlocks),
4813 : pfnProgress, pProgressData);
4814 2384 : if (sExtraArg.pProgressData == nullptr)
4815 1413 : sExtraArg.pfnProgress = nullptr;
4816 :
4817 2384 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4818 : nThisLines, pSwathBuf,
4819 : nThisCols, nThisLines, eDT, 1,
4820 : &nBand, 0, 0, 0, &sExtraArg);
4821 :
4822 2384 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4823 :
4824 2384 : if (eErr == CE_None)
4825 2380 : eErr = poDstDS->RasterIO(
4826 : GF_Write, iX, iY, nThisCols, nThisLines,
4827 : pSwathBuf, nThisCols, nThisLines, eDT, 1,
4828 : &nBand, 0, 0, 0, nullptr);
4829 : }
4830 :
4831 2388 : nBlocksDone++;
4832 4737 : if (eErr == CE_None &&
4833 2349 : !pfnProgress(nBlocksDone /
4834 2349 : static_cast<double>(nTotalBlocks),
4835 : nullptr, pProgressData))
4836 : {
4837 3 : eErr = CE_Failure;
4838 3 : CPLError(CE_Failure, CPLE_UserInterrupt,
4839 : "User terminated CreateCopy()");
4840 : }
4841 : }
4842 : }
4843 : }
4844 : }
4845 :
4846 : /* ==================================================================== */
4847 : /* Pixel interleaved case. */
4848 : /* ==================================================================== */
4849 : else /* if( bInterleave ) */
4850 : {
4851 : GDALRasterIOExtraArg sExtraArg;
4852 1340 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4853 1340 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4854 :
4855 1340 : const GIntBig nTotalBlocks =
4856 1340 : static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4857 1340 : DIV_ROUND_UP(nXSize, nSwathCols);
4858 1340 : GIntBig nBlocksDone = 0;
4859 :
4860 2906 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4861 : {
4862 1566 : int nThisLines = nSwathLines;
4863 :
4864 1566 : if (iY + nThisLines > nYSize)
4865 204 : nThisLines = nYSize - iY;
4866 :
4867 3137 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
4868 : {
4869 1571 : int nThisCols = nSwathCols;
4870 :
4871 1571 : if (iX + nThisCols > nXSize)
4872 3 : nThisCols = nXSize - iX;
4873 :
4874 1571 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4875 1571 : if (bCheckHoles)
4876 : {
4877 1344 : nStatus = 0;
4878 1397 : for (int iBand = 0; iBand < nBandCount; iBand++)
4879 : {
4880 1378 : nStatus |= poSrcDS->GetRasterBand(iBand + 1)
4881 1378 : ->GetDataCoverageStatus(
4882 : iX, iY, nThisCols, nThisLines,
4883 : GDAL_DATA_COVERAGE_STATUS_DATA);
4884 1378 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4885 1325 : break;
4886 : }
4887 : }
4888 1571 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4889 : {
4890 1552 : sExtraArg.pfnProgress = GDALScaledProgress;
4891 3104 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4892 1552 : nBlocksDone / static_cast<double>(nTotalBlocks),
4893 1552 : (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4894 : pfnProgress, pProgressData);
4895 1552 : if (sExtraArg.pProgressData == nullptr)
4896 331 : sExtraArg.pfnProgress = nullptr;
4897 :
4898 1552 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4899 : nThisLines, pSwathBuf, nThisCols,
4900 : nThisLines, eDT, nBandCount,
4901 : nullptr, 0, 0, 0, &sExtraArg);
4902 :
4903 1552 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4904 :
4905 1552 : if (eErr == CE_None)
4906 1551 : eErr = poDstDS->RasterIO(
4907 : GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
4908 : nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
4909 : 0, 0, nullptr);
4910 : }
4911 :
4912 1571 : nBlocksDone++;
4913 3138 : if (eErr == CE_None &&
4914 1567 : !pfnProgress(nBlocksDone /
4915 1567 : static_cast<double>(nTotalBlocks),
4916 : nullptr, pProgressData))
4917 : {
4918 1 : eErr = CE_Failure;
4919 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4920 : "User terminated CreateCopy()");
4921 : }
4922 : }
4923 : }
4924 : }
4925 :
4926 : /* -------------------------------------------------------------------- */
4927 : /* Cleanup */
4928 : /* -------------------------------------------------------------------- */
4929 2866 : CPLFree(pSwathBuf);
4930 :
4931 2866 : return eErr;
4932 : }
4933 :
4934 : /************************************************************************/
4935 : /* GDALRasterBandCopyWholeRaster() */
4936 : /************************************************************************/
4937 :
4938 : /**
4939 : * \brief Copy a whole raster band
4940 : *
4941 : * This function copies the complete raster contents of one band to
4942 : * another similarly configured band. The source and destination
4943 : * bands must have the same width and height. The bands do not have
4944 : * to have the same data type.
4945 : *
4946 : * It implements efficient copying, in particular "chunking" the copy in
4947 : * substantial blocks.
4948 : *
4949 : * Currently the only papszOptions value supported are :
4950 : * <ul>
4951 : * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4952 : * achieve best compression.</li>
4953 : * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4954 : * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li>
4955 : * </ul>
4956 : *
4957 : * @param hSrcBand the source band
4958 : * @param hDstBand the destination band
4959 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4960 : * @param pfnProgress progress reporting function.
4961 : * @param pProgressData callback data for progress function.
4962 : *
4963 : * @return CE_None on success, or CE_Failure on failure.
4964 : */
4965 :
4966 29 : CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
4967 : GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
4968 : const char *const *const papszOptions, GDALProgressFunc pfnProgress,
4969 : void *pProgressData)
4970 :
4971 : {
4972 29 : VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
4973 29 : VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
4974 :
4975 29 : GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
4976 29 : GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
4977 29 : CPLErr eErr = CE_None;
4978 :
4979 29 : if (pfnProgress == nullptr)
4980 11 : pfnProgress = GDALDummyProgress;
4981 :
4982 : /* -------------------------------------------------------------------- */
4983 : /* Confirm the datasets match in size and band counts. */
4984 : /* -------------------------------------------------------------------- */
4985 29 : int nXSize = poSrcBand->GetXSize();
4986 29 : int nYSize = poSrcBand->GetYSize();
4987 :
4988 29 : if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
4989 : {
4990 0 : CPLError(CE_Failure, CPLE_AppDefined,
4991 : "Input and output band sizes do not\n"
4992 : "match in GDALRasterBandCopyWholeRaster()");
4993 0 : return CE_Failure;
4994 : }
4995 :
4996 : /* -------------------------------------------------------------------- */
4997 : /* Report preliminary (0) progress. */
4998 : /* -------------------------------------------------------------------- */
4999 29 : if (!pfnProgress(0.0, nullptr, pProgressData))
5000 : {
5001 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5002 : "User terminated CreateCopy()");
5003 0 : return CE_Failure;
5004 : }
5005 :
5006 29 : GDALDataType eDT = poDstBand->GetRasterDataType();
5007 :
5008 : // If the destination is compressed, we must try to write blocks just once,
5009 : // to save disk space (GTiff case for example), and to avoid data loss
5010 : // (JPEG compression for example).
5011 29 : bool bDstIsCompressed = false;
5012 : const char *pszDstCompressed =
5013 29 : CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
5014 29 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5015 17 : bDstIsCompressed = true;
5016 :
5017 : /* -------------------------------------------------------------------- */
5018 : /* What will our swath size be? */
5019 : /* -------------------------------------------------------------------- */
5020 :
5021 29 : int nSwathCols = 0;
5022 29 : int nSwathLines = 0;
5023 29 : GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
5024 : FALSE, &nSwathCols, &nSwathLines);
5025 :
5026 29 : const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5027 :
5028 29 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5029 29 : if (pSwathBuf == nullptr)
5030 : {
5031 0 : return CE_Failure;
5032 : }
5033 :
5034 29 : CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5035 : nSwathCols, nSwathLines);
5036 :
5037 : const bool bCheckHoles =
5038 29 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5039 :
5040 : // Advise the source raster that we are going to read it completely
5041 29 : poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
5042 :
5043 : /* ==================================================================== */
5044 : /* Band oriented (uninterleaved) case. */
5045 : /* ==================================================================== */
5046 :
5047 67 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5048 : {
5049 38 : int nThisLines = nSwathLines;
5050 :
5051 38 : if (iY + nThisLines > nYSize)
5052 3 : nThisLines = nYSize - iY;
5053 :
5054 76 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5055 : {
5056 38 : int nThisCols = nSwathCols;
5057 :
5058 38 : if (iX + nThisCols > nXSize)
5059 0 : nThisCols = nXSize - iX;
5060 :
5061 38 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5062 38 : if (bCheckHoles)
5063 : {
5064 0 : nStatus = poSrcBand->GetDataCoverageStatus(
5065 : iX, iY, nThisCols, nThisLines,
5066 : GDAL_DATA_COVERAGE_STATUS_DATA);
5067 : }
5068 38 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5069 : {
5070 38 : eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5071 : nThisLines, pSwathBuf, nThisCols,
5072 : nThisLines, eDT, 0, 0, nullptr);
5073 :
5074 38 : if (eErr == CE_None)
5075 38 : eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5076 : nThisLines, pSwathBuf, nThisCols,
5077 : nThisLines, eDT, 0, 0, nullptr);
5078 : }
5079 :
5080 76 : if (eErr == CE_None &&
5081 38 : !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
5082 : nullptr, pProgressData))
5083 : {
5084 0 : eErr = CE_Failure;
5085 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5086 : "User terminated CreateCopy()");
5087 : }
5088 : }
5089 : }
5090 :
5091 : /* -------------------------------------------------------------------- */
5092 : /* Cleanup */
5093 : /* -------------------------------------------------------------------- */
5094 29 : CPLFree(pSwathBuf);
5095 :
5096 29 : return eErr;
5097 : }
5098 :
5099 : /************************************************************************/
5100 : /* GDALCopyRasterIOExtraArg () */
5101 : /************************************************************************/
5102 :
5103 323330 : void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5104 : GDALRasterIOExtraArg *psSrcArg)
5105 : {
5106 323330 : INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5107 323330 : if (psSrcArg)
5108 : {
5109 323330 : psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5110 323330 : psDestArg->pfnProgress = psSrcArg->pfnProgress;
5111 323330 : psDestArg->pProgressData = psSrcArg->pProgressData;
5112 323330 : psDestArg->bFloatingPointWindowValidity =
5113 323330 : psSrcArg->bFloatingPointWindowValidity;
5114 323330 : if (psSrcArg->bFloatingPointWindowValidity)
5115 : {
5116 3116 : psDestArg->dfXOff = psSrcArg->dfXOff;
5117 3116 : psDestArg->dfYOff = psSrcArg->dfYOff;
5118 3116 : psDestArg->dfXSize = psSrcArg->dfXSize;
5119 3116 : psDestArg->dfYSize = psSrcArg->dfYSize;
5120 : }
5121 : }
5122 323330 : }
5123 :
5124 : /************************************************************************/
5125 : /* HasOnlyNoData() */
5126 : /************************************************************************/
5127 :
5128 24858042 : template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5129 : {
5130 24858042 : return value == noDataValue;
5131 : }
5132 :
5133 560303 : template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5134 : {
5135 560303 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5136 : }
5137 :
5138 501120 : template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5139 : {
5140 501120 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5141 : }
5142 :
5143 : template <class T>
5144 12090 : static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5145 : size_t nHeight, size_t nLineStride,
5146 : size_t nComponents)
5147 : {
5148 : // Fast test: check the 4 corners and the middle pixel.
5149 23430 : for (size_t iBand = 0; iBand < nComponents; iBand++)
5150 : {
5151 24917 : if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5152 12373 : IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5153 12284 : noDataValue) &&
5154 12284 : IsEqualToNoData(
5155 12284 : pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5156 12284 : nComponents +
5157 : iBand],
5158 11351 : noDataValue) &&
5159 11351 : IsEqualToNoData(
5160 11351 : pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5161 : noDataValue) &&
5162 11343 : IsEqualToNoData(
5163 11343 : pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
5164 11343 : nComponents +
5165 : iBand],
5166 : noDataValue)))
5167 : {
5168 1204 : return false;
5169 : }
5170 : }
5171 :
5172 : // Test all pixels.
5173 37776 : for (size_t iY = 0; iY < nHeight; iY++)
5174 : {
5175 26914 : const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
5176 25886447 : for (size_t iX = 0; iX < nWidth * nComponents; iX++)
5177 : {
5178 25859623 : if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
5179 : {
5180 24 : return false;
5181 : }
5182 : }
5183 : }
5184 10862 : return true;
5185 : }
5186 :
5187 : /************************************************************************/
5188 : /* GDALBufferHasOnlyNoData() */
5189 : /************************************************************************/
5190 :
5191 35354 : bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
5192 : size_t nWidth, size_t nHeight, size_t nLineStride,
5193 : size_t nComponents, int nBitsPerSample,
5194 : GDALBufferSampleFormat nSampleFormat)
5195 : {
5196 : // In the case where the nodata is 0, we can compare several bytes at
5197 : // once. Select the largest natural integer type for the architecture.
5198 : #if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5199 : // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5200 : typedef std::uint64_t WordType;
5201 : #else
5202 : typedef std::uint32_t WordType;
5203 : #endif
5204 35354 : if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
5205 : // Do not use this optimized code path for floating point numbers,
5206 : // as it can't detect negative zero.
5207 : nSampleFormat != GSF_FLOATING_POINT)
5208 : {
5209 23258 : const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
5210 23258 : const size_t nSize =
5211 23258 : (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
5212 23258 : size_t i = 0;
5213 : const size_t nInitialIters =
5214 46516 : std::min(sizeof(WordType) -
5215 23258 : static_cast<size_t>(
5216 : reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5217 : sizeof(WordType)),
5218 23258 : nSize);
5219 194736 : for (; i < nInitialIters; i++)
5220 : {
5221 174773 : if (pabyBuffer[i])
5222 3295 : return false;
5223 : }
5224 16095400 : for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
5225 : {
5226 16080900 : if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
5227 5428 : return false;
5228 : }
5229 51884 : for (; i < nSize; i++)
5230 : {
5231 37354 : if (pabyBuffer[i])
5232 5 : return false;
5233 : }
5234 14530 : return true;
5235 : }
5236 :
5237 12096 : if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
5238 : {
5239 22234 : return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
5240 11117 : HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
5241 11117 : static_cast<uint8_t>(dfNoDataValue), nWidth,
5242 11117 : nHeight, nLineStride, nComponents);
5243 : }
5244 979 : if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
5245 : {
5246 : // Use unsigned implementation by converting the nodatavalue to
5247 : // unsigned
5248 63 : return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
5249 31 : HasOnlyNoDataT(
5250 : static_cast<const uint8_t *>(pBuffer),
5251 31 : static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
5252 32 : nWidth, nHeight, nLineStride, nComponents);
5253 : }
5254 947 : if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
5255 : {
5256 21 : return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
5257 10 : HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
5258 10 : static_cast<uint16_t>(dfNoDataValue), nWidth,
5259 11 : nHeight, nLineStride, nComponents);
5260 : }
5261 936 : if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
5262 : {
5263 : // Use unsigned implementation by converting the nodatavalue to
5264 : // unsigned
5265 109 : return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
5266 54 : HasOnlyNoDataT(
5267 : static_cast<const uint16_t *>(pBuffer),
5268 54 : static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
5269 55 : nWidth, nHeight, nLineStride, nComponents);
5270 : }
5271 881 : if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
5272 : {
5273 73 : return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
5274 36 : HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
5275 : static_cast<uint32_t>(dfNoDataValue), nWidth,
5276 37 : nHeight, nLineStride, nComponents);
5277 : }
5278 844 : if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
5279 : {
5280 : // Use unsigned implementation by converting the nodatavalue to
5281 : // unsigned
5282 19 : return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
5283 9 : HasOnlyNoDataT(
5284 : static_cast<const uint32_t *>(pBuffer),
5285 9 : static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
5286 10 : nWidth, nHeight, nLineStride, nComponents);
5287 : }
5288 834 : if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
5289 : {
5290 56 : return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
5291 28 : HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
5292 : static_cast<uint64_t>(dfNoDataValue), nWidth,
5293 28 : nHeight, nLineStride, nComponents);
5294 : }
5295 806 : if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
5296 : {
5297 : // Use unsigned implementation by converting the nodatavalue to
5298 : // unsigned
5299 0 : return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
5300 0 : HasOnlyNoDataT(
5301 : static_cast<const uint64_t *>(pBuffer),
5302 0 : static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
5303 0 : nWidth, nHeight, nLineStride, nComponents);
5304 : }
5305 806 : if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
5306 : {
5307 1341 : return (std::isnan(dfNoDataValue) ||
5308 1370 : GDALIsValueInRange<float>(dfNoDataValue)) &&
5309 684 : HasOnlyNoDataT(static_cast<const float *>(pBuffer),
5310 : static_cast<float>(dfNoDataValue), nWidth,
5311 685 : nHeight, nLineStride, nComponents);
5312 : }
5313 121 : if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
5314 : {
5315 121 : return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
5316 : dfNoDataValue, nWidth, nHeight, nLineStride,
5317 121 : nComponents);
5318 : }
5319 0 : return false;
5320 : }
5321 :
5322 : #ifdef HAVE_SSE2
5323 :
5324 : /************************************************************************/
5325 : /* GDALDeinterleave3Byte() */
5326 : /************************************************************************/
5327 :
5328 : #if defined(__GNUC__) && !defined(__clang__)
5329 : __attribute__((optimize("no-tree-vectorize")))
5330 : #endif
5331 : static void
5332 69765 : GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5333 : GByte *CPL_RESTRICT pabyDest0,
5334 : GByte *CPL_RESTRICT pabyDest1,
5335 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5336 : #ifdef USE_NEON_OPTIMIZATIONS
5337 : {
5338 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5339 : nIters);
5340 : }
5341 : #else
5342 : {
5343 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5344 69765 : if (CPLHaveRuntimeSSSE3())
5345 : {
5346 69804 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5347 69795 : pabyDest2, nIters);
5348 : }
5349 : #endif
5350 :
5351 2 : size_t i = 0;
5352 2 : if (((reinterpret_cast<uintptr_t>(pabySrc) |
5353 2 : reinterpret_cast<uintptr_t>(pabyDest0) |
5354 2 : reinterpret_cast<uintptr_t>(pabyDest1) |
5355 2 : reinterpret_cast<uintptr_t>(pabyDest2)) %
5356 : sizeof(unsigned int)) == 0)
5357 : {
5358 : // Slightly better than GCC autovectorizer
5359 17 : for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
5360 : {
5361 15 : unsigned int word0 =
5362 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
5363 15 : unsigned int word1 =
5364 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
5365 15 : unsigned int word2 =
5366 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
5367 15 : reinterpret_cast<unsigned int *>(pabyDest0)[j] =
5368 15 : (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
5369 15 : ((word2 >> 8) << 24);
5370 15 : reinterpret_cast<unsigned int *>(pabyDest1)[j] =
5371 15 : ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
5372 15 : (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
5373 15 : pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
5374 15 : pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
5375 15 : pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
5376 15 : pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
5377 : }
5378 : }
5379 : #if defined(__clang__)
5380 : #pragma clang loop vectorize(disable)
5381 : #endif
5382 3 : for (; i < nIters; ++i)
5383 : {
5384 1 : pabyDest0[i] = pabySrc[3 * i + 0];
5385 1 : pabyDest1[i] = pabySrc[3 * i + 1];
5386 1 : pabyDest2[i] = pabySrc[3 * i + 2];
5387 : }
5388 : }
5389 : #endif
5390 :
5391 : /************************************************************************/
5392 : /* GDALDeinterleave4Byte() */
5393 : /************************************************************************/
5394 :
5395 : #if !defined(__GNUC__) || defined(__clang__)
5396 :
5397 : /************************************************************************/
5398 : /* deinterleave() */
5399 : /************************************************************************/
5400 :
5401 : template <bool SHIFT, bool MASK>
5402 : inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5403 : __m128i &xmm2_ori, __m128i &xmm3_ori)
5404 : {
5405 : // Set higher 24bit of each int32 packed word to 0
5406 : if (SHIFT)
5407 : {
5408 : xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5409 : xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5410 : xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5411 : xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5412 : }
5413 : __m128i xmm0;
5414 : __m128i xmm1;
5415 : __m128i xmm2;
5416 : __m128i xmm3;
5417 : if (MASK)
5418 : {
5419 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
5420 : xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5421 : xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5422 : xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5423 : xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5424 : }
5425 : else
5426 : {
5427 : xmm0 = xmm0_ori;
5428 : xmm1 = xmm1_ori;
5429 : xmm2 = xmm2_ori;
5430 : xmm3 = xmm3_ori;
5431 : }
5432 : // Pack int32 to int16
5433 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
5434 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
5435 : // Pack int16 to uint8
5436 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
5437 : return xmm0;
5438 : }
5439 :
5440 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5441 : GByte *CPL_RESTRICT pabyDest0,
5442 : GByte *CPL_RESTRICT pabyDest1,
5443 : GByte *CPL_RESTRICT pabyDest2,
5444 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5445 : #ifdef USE_NEON_OPTIMIZATIONS
5446 : {
5447 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5448 : pabyDest3, nIters);
5449 : }
5450 : #else
5451 : {
5452 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5453 : if (CPLHaveRuntimeSSSE3())
5454 : {
5455 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5456 : pabyDest2, pabyDest3, nIters);
5457 : }
5458 : #endif
5459 :
5460 : // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5461 : // do something slightly better.
5462 : size_t i = 0;
5463 : for (; i + 15 < nIters; i += 16)
5464 : {
5465 : __m128i xmm0_ori = _mm_loadu_si128(
5466 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5467 : __m128i xmm1_ori = _mm_loadu_si128(
5468 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5469 : __m128i xmm2_ori = _mm_loadu_si128(
5470 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5471 : __m128i xmm3_ori = _mm_loadu_si128(
5472 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5473 :
5474 : _mm_storeu_si128(
5475 : reinterpret_cast<__m128i *>(pabyDest0 + i),
5476 : deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5477 : _mm_storeu_si128(
5478 : reinterpret_cast<__m128i *>(pabyDest1 + i),
5479 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5480 : _mm_storeu_si128(
5481 : reinterpret_cast<__m128i *>(pabyDest2 + i),
5482 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5483 : _mm_storeu_si128(
5484 : reinterpret_cast<__m128i *>(pabyDest3 + i),
5485 : deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5486 : }
5487 :
5488 : #if defined(__clang__)
5489 : #pragma clang loop vectorize(disable)
5490 : #endif
5491 : for (; i < nIters; ++i)
5492 : {
5493 : pabyDest0[i] = pabySrc[4 * i + 0];
5494 : pabyDest1[i] = pabySrc[4 * i + 1];
5495 : pabyDest2[i] = pabySrc[4 * i + 2];
5496 : pabyDest3[i] = pabySrc[4 * i + 3];
5497 : }
5498 : }
5499 : #endif
5500 : #else
5501 : // GCC autovectorizer does an excellent job
5502 53069 : __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
5503 : const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5504 : GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5505 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5506 : {
5507 527000000 : for (size_t i = 0; i < nIters; ++i)
5508 : {
5509 526947000 : pabyDest0[i] = pabySrc[4 * i + 0];
5510 526947000 : pabyDest1[i] = pabySrc[4 * i + 1];
5511 526947000 : pabyDest2[i] = pabySrc[4 * i + 2];
5512 526947000 : pabyDest3[i] = pabySrc[4 * i + 3];
5513 : }
5514 53069 : }
5515 : #endif
5516 :
5517 : #else
5518 :
5519 : /************************************************************************/
5520 : /* GDALDeinterleave3Byte() */
5521 : /************************************************************************/
5522 :
5523 : // TODO: Enabling below could help on non-Intel architectures where GCC knows
5524 : // how to auto-vectorize
5525 : // #if defined(__GNUC__)
5526 : //__attribute__((optimize("tree-vectorize")))
5527 : // #endif
5528 : static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5529 : GByte *CPL_RESTRICT pabyDest0,
5530 : GByte *CPL_RESTRICT pabyDest1,
5531 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5532 : {
5533 : for (size_t i = 0; i < nIters; ++i)
5534 : {
5535 : pabyDest0[i] = pabySrc[3 * i + 0];
5536 : pabyDest1[i] = pabySrc[3 * i + 1];
5537 : pabyDest2[i] = pabySrc[3 * i + 2];
5538 : }
5539 : }
5540 :
5541 : /************************************************************************/
5542 : /* GDALDeinterleave4Byte() */
5543 : /************************************************************************/
5544 :
5545 : // TODO: Enabling below could help on non-Intel architectures where gcc knows
5546 : // how to auto-vectorize
5547 : // #if defined(__GNUC__)
5548 : //__attribute__((optimize("tree-vectorize")))
5549 : // #endif
5550 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5551 : GByte *CPL_RESTRICT pabyDest0,
5552 : GByte *CPL_RESTRICT pabyDest1,
5553 : GByte *CPL_RESTRICT pabyDest2,
5554 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5555 : {
5556 : for (size_t i = 0; i < nIters; ++i)
5557 : {
5558 : pabyDest0[i] = pabySrc[4 * i + 0];
5559 : pabyDest1[i] = pabySrc[4 * i + 1];
5560 : pabyDest2[i] = pabySrc[4 * i + 2];
5561 : pabyDest3[i] = pabySrc[4 * i + 3];
5562 : }
5563 : }
5564 :
5565 : #endif
5566 :
5567 : /************************************************************************/
5568 : /* GDALDeinterleave() */
5569 : /************************************************************************/
5570 :
5571 : /*! Copy values from a pixel-interleave buffer to multiple per-component
5572 : buffers.
5573 :
5574 : In pseudo-code
5575 : \verbatim
5576 : for(size_t i = 0; i < nIters; ++i)
5577 : for(int iComp = 0; iComp < nComponents; iComp++ )
5578 : ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
5579 : \endverbatim
5580 :
5581 : The implementation is optimized for a few cases, like de-interleaving
5582 : of 3 or 4-components Byte buffers.
5583 :
5584 : \since GDAL 3.6
5585 : */
5586 123620 : void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
5587 : int nComponents, void **ppDestBuffer,
5588 : GDALDataType eDestDT, size_t nIters)
5589 : {
5590 123620 : if (eSourceDT == eDestDT)
5591 : {
5592 123597 : if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8)
5593 : {
5594 122864 : if (nComponents == 3)
5595 : {
5596 69769 : const GByte *CPL_RESTRICT pabySrc =
5597 : static_cast<const GByte *>(pSourceBuffer);
5598 69769 : GByte *CPL_RESTRICT pabyDest0 =
5599 : static_cast<GByte *>(ppDestBuffer[0]);
5600 69769 : GByte *CPL_RESTRICT pabyDest1 =
5601 : static_cast<GByte *>(ppDestBuffer[1]);
5602 69769 : GByte *CPL_RESTRICT pabyDest2 =
5603 : static_cast<GByte *>(ppDestBuffer[2]);
5604 69769 : GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5605 : nIters);
5606 69800 : return;
5607 : }
5608 53095 : else if (nComponents == 4)
5609 : {
5610 53069 : const GByte *CPL_RESTRICT pabySrc =
5611 : static_cast<const GByte *>(pSourceBuffer);
5612 53069 : GByte *CPL_RESTRICT pabyDest0 =
5613 : static_cast<GByte *>(ppDestBuffer[0]);
5614 53069 : GByte *CPL_RESTRICT pabyDest1 =
5615 : static_cast<GByte *>(ppDestBuffer[1]);
5616 53069 : GByte *CPL_RESTRICT pabyDest2 =
5617 : static_cast<GByte *>(ppDestBuffer[2]);
5618 53069 : GByte *CPL_RESTRICT pabyDest3 =
5619 : static_cast<GByte *>(ppDestBuffer[3]);
5620 53069 : GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5621 : pabyDest3, nIters);
5622 53069 : return;
5623 26 : }
5624 : }
5625 : #if ((defined(__GNUC__) && !defined(__clang__)) || \
5626 : defined(__INTEL_CLANG_COMPILER)) && \
5627 : defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
5628 1466 : else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
5629 733 : CPLHaveRuntimeSSSE3())
5630 : {
5631 733 : if (nComponents == 3)
5632 : {
5633 239 : const GUInt16 *CPL_RESTRICT panSrc =
5634 : static_cast<const GUInt16 *>(pSourceBuffer);
5635 239 : GUInt16 *CPL_RESTRICT panDest0 =
5636 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5637 239 : GUInt16 *CPL_RESTRICT panDest1 =
5638 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5639 239 : GUInt16 *CPL_RESTRICT panDest2 =
5640 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5641 239 : GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
5642 : panDest2, nIters);
5643 239 : return;
5644 : }
5645 : #if !defined(__INTEL_CLANG_COMPILER)
5646 : // ICC autovectorizer doesn't do a good job, at least with icx
5647 : // 2022.1.0.20220316
5648 494 : else if (nComponents == 4)
5649 : {
5650 494 : const GUInt16 *CPL_RESTRICT panSrc =
5651 : static_cast<const GUInt16 *>(pSourceBuffer);
5652 494 : GUInt16 *CPL_RESTRICT panDest0 =
5653 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5654 494 : GUInt16 *CPL_RESTRICT panDest1 =
5655 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5656 494 : GUInt16 *CPL_RESTRICT panDest2 =
5657 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5658 494 : GUInt16 *CPL_RESTRICT panDest3 =
5659 : static_cast<GUInt16 *>(ppDestBuffer[3]);
5660 494 : GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
5661 : panDest2, panDest3, nIters);
5662 494 : return;
5663 : }
5664 : #endif
5665 : }
5666 : #endif
5667 : }
5668 :
5669 49 : const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
5670 22 : const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
5671 87 : for (int iComp = 0; iComp < nComponents; iComp++)
5672 : {
5673 65 : GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
5674 65 : iComp * nSourceDTSize,
5675 : eSourceDT, nComponents * nSourceDTSize,
5676 65 : ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
5677 : }
5678 : }
5679 :
5680 : /************************************************************************/
5681 : /* GDALTranspose2DSingleToSingle() */
5682 : /************************************************************************/
5683 : /**
5684 : * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way.
5685 : *
5686 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5687 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5688 : * @param nSrcWidth Width of pSrc array.
5689 : * @param nSrcHeight Height of pSrc array.
5690 : */
5691 :
5692 : template <class DST, class SRC>
5693 124 : void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc,
5694 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5695 : size_t nSrcHeight)
5696 : {
5697 124 : constexpr size_t blocksize = 32;
5698 273 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5699 : {
5700 149 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5701 348 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5702 : {
5703 : // transpose the block beginning at [i,j]
5704 199 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5705 2446 : for (size_t k = i; k < max_k; ++k)
5706 : {
5707 40849 : for (size_t l = j; l < max_l; ++l)
5708 : {
5709 38602 : GDALCopyWord(pSrc[l + k * nSrcWidth],
5710 38602 : pDst[k + l * nSrcHeight]);
5711 : }
5712 : }
5713 : }
5714 : }
5715 124 : }
5716 :
5717 : /************************************************************************/
5718 : /* GDALTranspose2DComplexToComplex() */
5719 : /************************************************************************/
5720 : /**
5721 : * Transpose a 2D array of complex values into an array of complex values,
5722 : * in a efficient (cache-oblivious) way.
5723 : *
5724 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5725 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5726 : * @param nSrcWidth Width of pSrc array.
5727 : * @param nSrcHeight Height of pSrc array.
5728 : */
5729 : template <class DST, class SRC>
5730 16 : void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc,
5731 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5732 : size_t nSrcHeight)
5733 : {
5734 16 : constexpr size_t blocksize = 32;
5735 32 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5736 : {
5737 16 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5738 32 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5739 : {
5740 : // transpose the block beginning at [i,j]
5741 16 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5742 48 : for (size_t k = i; k < max_k; ++k)
5743 : {
5744 128 : for (size_t l = j; l < max_l; ++l)
5745 : {
5746 96 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5747 96 : pDst[2 * (k + l * nSrcHeight) + 0]);
5748 96 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1],
5749 96 : pDst[2 * (k + l * nSrcHeight) + 1]);
5750 : }
5751 : }
5752 : }
5753 : }
5754 16 : }
5755 :
5756 : /************************************************************************/
5757 : /* GDALTranspose2DComplexToSingle() */
5758 : /************************************************************************/
5759 : /**
5760 : * Transpose a 2D array of complex values into an array of non-complex values,
5761 : * in a efficient (cache-oblivious) way.
5762 : *
5763 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5764 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5765 : * @param nSrcWidth Width of pSrc array.
5766 : * @param nSrcHeight Height of pSrc array.
5767 : */
5768 : template <class DST, class SRC>
5769 40 : void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc,
5770 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5771 : size_t nSrcHeight)
5772 : {
5773 40 : constexpr size_t blocksize = 32;
5774 80 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5775 : {
5776 40 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5777 80 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5778 : {
5779 : // transpose the block beginning at [i,j]
5780 40 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5781 120 : for (size_t k = i; k < max_k; ++k)
5782 : {
5783 320 : for (size_t l = j; l < max_l; ++l)
5784 : {
5785 240 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5786 240 : pDst[k + l * nSrcHeight]);
5787 : }
5788 : }
5789 : }
5790 : }
5791 40 : }
5792 :
5793 : /************************************************************************/
5794 : /* GDALTranspose2DSingleToComplex() */
5795 : /************************************************************************/
5796 : /**
5797 : * Transpose a 2D array of non-complex values into an array of complex values,
5798 : * in a efficient (cache-oblivious) way.
5799 : *
5800 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5801 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5802 : * @param nSrcWidth Width of pSrc array.
5803 : * @param nSrcHeight Height of pSrc array.
5804 : */
5805 : template <class DST, class SRC>
5806 40 : void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc,
5807 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5808 : size_t nSrcHeight)
5809 : {
5810 40 : constexpr size_t blocksize = 32;
5811 80 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5812 : {
5813 40 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5814 80 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5815 : {
5816 : // transpose the block beginning at [i,j]
5817 40 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5818 120 : for (size_t k = i; k < max_k; ++k)
5819 : {
5820 320 : for (size_t l = j; l < max_l; ++l)
5821 : {
5822 240 : GDALCopyWord(pSrc[l + k * nSrcWidth],
5823 240 : pDst[2 * (k + l * nSrcHeight) + 0]);
5824 240 : pDst[2 * (k + l * nSrcHeight) + 1] = 0;
5825 : }
5826 : }
5827 : }
5828 : }
5829 40 : }
5830 :
5831 : /************************************************************************/
5832 : /* GDALTranspose2D() */
5833 : /************************************************************************/
5834 :
5835 : template <class DST, bool DST_IS_COMPLEX>
5836 220 : static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst,
5837 : size_t nSrcWidth, size_t nSrcHeight)
5838 : {
5839 : #define CALL_GDALTranspose2D_internal(SRC_TYPE) \
5840 : do \
5841 : { \
5842 : if constexpr (DST_IS_COMPLEX) \
5843 : { \
5844 : GDALTranspose2DSingleToComplex( \
5845 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5846 : nSrcHeight); \
5847 : } \
5848 : else \
5849 : { \
5850 : GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \
5851 : pDst, nSrcWidth, nSrcHeight); \
5852 : } \
5853 : } while (0)
5854 :
5855 : #define CALL_GDALTranspose2DComplex_internal(SRC_TYPE) \
5856 : do \
5857 : { \
5858 : if constexpr (DST_IS_COMPLEX) \
5859 : { \
5860 : GDALTranspose2DComplexToComplex( \
5861 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5862 : nSrcHeight); \
5863 : } \
5864 : else \
5865 : { \
5866 : GDALTranspose2DComplexToSingle( \
5867 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5868 : nSrcHeight); \
5869 : } \
5870 : } while (0)
5871 :
5872 : // clang-format off
5873 220 : switch (eSrcType)
5874 : {
5875 14 : case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t); break;
5876 13 : case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t); break;
5877 22 : case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t); break;
5878 14 : case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t); break;
5879 22 : case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t); break;
5880 14 : case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t); break;
5881 14 : case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t); break;
5882 14 : case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t); break;
5883 15 : case GDT_Float32: CALL_GDALTranspose2D_internal(float); break;
5884 22 : case GDT_Float64: CALL_GDALTranspose2D_internal(double); break;
5885 14 : case GDT_CInt16: CALL_GDALTranspose2DComplex_internal(int16_t); break;
5886 14 : case GDT_CInt32: CALL_GDALTranspose2DComplex_internal(int32_t); break;
5887 14 : case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break;
5888 14 : case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break;
5889 0 : case GDT_Unknown:
5890 : case GDT_TypeCount:
5891 0 : break;
5892 : }
5893 : // clang-format on
5894 :
5895 : #undef CALL_GDALTranspose2D_internal
5896 : #undef CALL_GDALTranspose2DComplex_internal
5897 220 : }
5898 :
5899 : /************************************************************************/
5900 : /* GDALInterleave2Byte() */
5901 : /************************************************************************/
5902 :
5903 : #if defined(HAVE_SSE2) && \
5904 : (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
5905 :
5906 : // ICC autovectorizer doesn't do a good job at generating good SSE code,
5907 : // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
5908 : #if defined(__GNUC__)
5909 : __attribute__((noinline))
5910 : #endif
5911 : static void
5912 : GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
5913 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
5914 : {
5915 : size_t i = 0;
5916 : constexpr size_t VALS_PER_ITER = 16;
5917 : for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
5918 : {
5919 : __m128i xmm0 =
5920 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i));
5921 : __m128i xmm1 = _mm_loadu_si128(
5922 : reinterpret_cast<__m128i const *>(pSrc + i + nIters));
5923 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i),
5924 : _mm_unpacklo_epi8(xmm0, xmm1));
5925 : _mm_storeu_si128(
5926 : reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER),
5927 : _mm_unpackhi_epi8(xmm0, xmm1));
5928 : }
5929 : #if defined(__clang__)
5930 : #pragma clang loop vectorize(disable)
5931 : #endif
5932 : for (; i < nIters; ++i)
5933 : {
5934 : pDst[2 * i + 0] = pSrc[i + 0 * nIters];
5935 : pDst[2 * i + 1] = pSrc[i + 1 * nIters];
5936 : }
5937 : }
5938 :
5939 : #else
5940 :
5941 : #if defined(__GNUC__) && !defined(__clang__)
5942 : __attribute__((optimize("tree-vectorize")))
5943 : #endif
5944 : #if defined(__GNUC__)
5945 : __attribute__((noinline))
5946 : #endif
5947 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
5948 : // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
5949 : #pragma clang diagnostic push
5950 : #pragma clang diagnostic ignored "-Wpass-failed"
5951 : #endif
5952 : static void
5953 4 : GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
5954 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
5955 : {
5956 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
5957 : #pragma clang loop vectorize(enable)
5958 : #endif
5959 44 : for (size_t i = 0; i < nIters; ++i)
5960 : {
5961 40 : pDst[2 * i + 0] = pSrc[i + 0 * nIters];
5962 40 : pDst[2 * i + 1] = pSrc[i + 1 * nIters];
5963 : }
5964 4 : }
5965 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
5966 : #pragma clang diagnostic pop
5967 : #endif
5968 :
5969 : #endif
5970 :
5971 : /************************************************************************/
5972 : /* GDALInterleave4Byte() */
5973 : /************************************************************************/
5974 :
5975 : #if defined(HAVE_SSE2) && \
5976 : (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
5977 :
5978 : // ICC autovectorizer doesn't do a good job at generating good SSE code,
5979 : // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
5980 : #if defined(__GNUC__)
5981 : __attribute__((noinline))
5982 : #endif
5983 : static void
5984 : GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
5985 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
5986 : {
5987 : size_t i = 0;
5988 : constexpr size_t VALS_PER_ITER = 16;
5989 : for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
5990 : {
5991 : __m128i xmm0 = _mm_loadu_si128(
5992 : reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters));
5993 : __m128i xmm1 = _mm_loadu_si128(
5994 : reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters));
5995 : __m128i xmm2 = _mm_loadu_si128(
5996 : reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters));
5997 : __m128i xmm3 = _mm_loadu_si128(
5998 : reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters));
5999 : auto tmp0 = _mm_unpacklo_epi8(
6000 : xmm0,
6001 : xmm1); // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...)
6002 : auto tmp1 = _mm_unpackhi_epi8(
6003 : xmm0,
6004 : xmm1); // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...)
6005 : auto tmp2 = _mm_unpacklo_epi8(
6006 : xmm2,
6007 : xmm3); // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...)
6008 : auto tmp3 = _mm_unpackhi_epi8(
6009 : xmm2,
6010 : xmm3); // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...)
6011 : auto tmp2_0 = _mm_unpacklo_epi16(
6012 : tmp0,
6013 : tmp2); // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...)
6014 : auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
6015 : auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
6016 : auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
6017 : _mm_storeu_si128(
6018 : reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER),
6019 : tmp2_0);
6020 : _mm_storeu_si128(
6021 : reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER),
6022 : tmp2_1);
6023 : _mm_storeu_si128(
6024 : reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER),
6025 : tmp2_2);
6026 : _mm_storeu_si128(
6027 : reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER),
6028 : tmp2_3);
6029 : }
6030 : #if defined(__clang__)
6031 : #pragma clang loop vectorize(disable)
6032 : #endif
6033 : for (; i < nIters; ++i)
6034 : {
6035 : pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6036 : pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6037 : pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6038 : pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6039 : }
6040 : }
6041 :
6042 : #else
6043 :
6044 : #if defined(__GNUC__) && !defined(__clang__)
6045 : __attribute__((optimize("tree-vectorize")))
6046 : #endif
6047 : #if defined(__GNUC__)
6048 : __attribute__((noinline))
6049 : #endif
6050 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6051 : // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6052 : #pragma clang diagnostic push
6053 : #pragma clang diagnostic ignored "-Wpass-failed"
6054 : #endif
6055 : static void
6056 2 : GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6057 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
6058 : {
6059 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6060 : #pragma clang loop vectorize(enable)
6061 : #endif
6062 36 : for (size_t i = 0; i < nIters; ++i)
6063 : {
6064 34 : pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6065 34 : pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6066 34 : pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6067 34 : pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6068 : }
6069 2 : }
6070 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6071 : #pragma clang diagnostic pop
6072 : #endif
6073 :
6074 : #endif
6075 :
6076 : /************************************************************************/
6077 : /* GDALTranspose2D() */
6078 : /************************************************************************/
6079 :
6080 : /**
6081 : * Transpose a 2D array in a efficient (cache-oblivious) way.
6082 : *
6083 : * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight.
6084 : * @param eSrcType Data type of pSrc.
6085 : * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth.
6086 : * @param eDstType Data type of pDst.
6087 : * @param nSrcWidth Width of pSrc array.
6088 : * @param nSrcHeight Height of pSrc array.
6089 : * @since GDAL 3.11
6090 : */
6091 :
6092 245 : void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst,
6093 : GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight)
6094 : {
6095 245 : if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8))
6096 : {
6097 25 : if (nSrcHeight == 2)
6098 : {
6099 4 : GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc),
6100 : static_cast<uint8_t *>(pDst), nSrcWidth);
6101 4 : return;
6102 : }
6103 21 : if (nSrcHeight == 4)
6104 : {
6105 2 : GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc),
6106 : static_cast<uint8_t *>(pDst), nSrcWidth);
6107 2 : return;
6108 : }
6109 : #if (defined(HAVE_SSSE3_AT_COMPILE_TIME) && \
6110 : (defined(__x86_64) || defined(_M_X64)))
6111 19 : if (CPLHaveRuntimeSSSE3())
6112 : {
6113 19 : GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6114 : static_cast<uint8_t *>(pDst), nSrcWidth,
6115 : nSrcHeight);
6116 19 : return;
6117 : }
6118 : #elif defined(USE_NEON_OPTIMIZATIONS)
6119 : {
6120 : GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6121 : static_cast<uint8_t *>(pDst), nSrcWidth,
6122 : nSrcHeight);
6123 : return;
6124 : }
6125 : #endif
6126 : }
6127 :
6128 : #define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX) \
6129 : GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>( \
6130 : pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight)
6131 :
6132 : // clang-format off
6133 220 : switch (eDstType)
6134 : {
6135 13 : case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t, false); break;
6136 13 : case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t, false); break;
6137 22 : case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t, false); break;
6138 14 : case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t, false); break;
6139 22 : case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t, false); break;
6140 14 : case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t, false); break;
6141 14 : case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t, false); break;
6142 14 : case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t, false); break;
6143 15 : case GDT_Float32: CALL_GDALTranspose2D_internal(float, false); break;
6144 23 : case GDT_Float64: CALL_GDALTranspose2D_internal(double, false); break;
6145 14 : case GDT_CInt16: CALL_GDALTranspose2D_internal(int16_t, true); break;
6146 14 : case GDT_CInt32: CALL_GDALTranspose2D_internal(int32_t, true); break;
6147 14 : case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break;
6148 14 : case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break;
6149 0 : case GDT_Unknown:
6150 : case GDT_TypeCount:
6151 0 : break;
6152 : }
6153 : // clang-format on
6154 :
6155 : #undef CALL_GDALTranspose2D_internal
6156 : }
6157 :
6158 : /************************************************************************/
6159 : /* ExtractBitAndConvertTo255() */
6160 : /************************************************************************/
6161 :
6162 : #if defined(__GNUC__) || defined(_MSC_VER)
6163 : // Signedness of char implementation dependent, so be explicit.
6164 : // Assumes 2-complement integer types and sign extension of right shifting
6165 : // GCC guarantees such:
6166 : // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
6167 95050 : static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6168 : {
6169 95050 : return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >>
6170 95050 : 7);
6171 : }
6172 : #else
6173 : // Portable way
6174 : static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6175 : {
6176 : return (byVal & (1 << nBit)) ? 255 : 0;
6177 : }
6178 : #endif
6179 :
6180 : /************************************************************************/
6181 : /* ExpandEightPackedBitsToByteAt255() */
6182 : /************************************************************************/
6183 :
6184 11697 : static inline void ExpandEightPackedBitsToByteAt255(GByte byVal,
6185 : GByte abyOutput[8])
6186 : {
6187 11697 : abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7);
6188 11697 : abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6);
6189 11697 : abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5);
6190 11697 : abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4);
6191 11697 : abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3);
6192 11697 : abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2);
6193 11697 : abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1);
6194 11697 : abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0);
6195 11697 : }
6196 :
6197 : /************************************************************************/
6198 : /* GDALExpandPackedBitsToByteAt0Or255() */
6199 : /************************************************************************/
6200 :
6201 : /** Expand packed-bits (ordered from most-significant bit to least one)
6202 : into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6203 : at 1 to a byte at 255.
6204 :
6205 : The function does (in a possibly more optimized way) the following:
6206 : \code{.cpp}
6207 : for (size_t i = 0; i < nInputBits; ++i )
6208 : {
6209 : pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0;
6210 : }
6211 : \endcode
6212 :
6213 : @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6214 : @param pabyOutput Output array of nInputBits bytes.
6215 : @param nInputBits Number of valid bits in pabyInput.
6216 :
6217 : @since 3.11
6218 : */
6219 :
6220 30937 : void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput,
6221 : GByte *CPL_RESTRICT pabyOutput,
6222 : size_t nInputBits)
6223 : {
6224 30937 : const size_t nInputWholeBytes = nInputBits / 8;
6225 30937 : size_t iByte = 0;
6226 :
6227 : #ifdef HAVE_SSE2
6228 : // Mask to isolate each bit
6229 30937 : const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4,
6230 : 8, 16, 32, 64, -128);
6231 30937 : const __m128i zero = _mm_setzero_si128();
6232 30937 : const __m128i all_ones = _mm_set1_epi8(-1);
6233 : #ifdef __SSSE3__
6234 : const __m128i dispatch_two_bytes =
6235 : _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
6236 : #endif
6237 30937 : constexpr size_t SSE_REG_SIZE = sizeof(bit_mask);
6238 79750 : for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE)
6239 : {
6240 48813 : __m128i reg_ori = _mm_loadu_si128(
6241 48813 : reinterpret_cast<const __m128i *>(pabyInput + iByte));
6242 :
6243 48813 : constexpr int NUM_PROCESSED_BYTES_PER_REG = 2;
6244 439317 : for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k)
6245 : {
6246 : // Given reg_ori = (A, B, ... 14 other bytes ...),
6247 : // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B)
6248 : #ifdef __SSSE3__
6249 : __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes);
6250 : #else
6251 390504 : __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori);
6252 390504 : reg = _mm_unpacklo_epi16(reg, reg);
6253 390504 : reg = _mm_unpacklo_epi32(reg, reg);
6254 : #endif
6255 :
6256 : // Test if bits of interest are set
6257 390504 : reg = _mm_and_si128(reg, bit_mask);
6258 :
6259 : // Now test if those bits are set, by comparing to zero. So the
6260 : // result will be that bytes where bits are set will be at 0, and
6261 : // ones where they are cleared will be at 0xFF. So the inverse of
6262 : // the end result we want!
6263 390504 : reg = _mm_cmpeq_epi8(reg, zero);
6264 :
6265 : // Invert the result
6266 390504 : reg = _mm_andnot_si128(reg, all_ones);
6267 :
6268 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg);
6269 :
6270 390504 : pabyOutput += SSE_REG_SIZE;
6271 :
6272 : // Right-shift of 2 bytes
6273 390504 : reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG);
6274 : }
6275 : }
6276 :
6277 : #endif // HAVE_SSE2
6278 :
6279 42634 : for (; iByte < nInputWholeBytes; ++iByte)
6280 : {
6281 11697 : ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput);
6282 11697 : pabyOutput += 8;
6283 : }
6284 32411 : for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6285 : {
6286 1474 : *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit);
6287 1474 : ++pabyOutput;
6288 : }
6289 30937 : }
6290 :
6291 : /************************************************************************/
6292 : /* ExpandEightPackedBitsToByteAt1() */
6293 : /************************************************************************/
6294 :
6295 136113 : static inline void ExpandEightPackedBitsToByteAt1(GByte byVal,
6296 : GByte abyOutput[8])
6297 : {
6298 136113 : abyOutput[0] = (byVal >> 7) & 0x1;
6299 136113 : abyOutput[1] = (byVal >> 6) & 0x1;
6300 136113 : abyOutput[2] = (byVal >> 5) & 0x1;
6301 136113 : abyOutput[3] = (byVal >> 4) & 0x1;
6302 136113 : abyOutput[4] = (byVal >> 3) & 0x1;
6303 136113 : abyOutput[5] = (byVal >> 2) & 0x1;
6304 136113 : abyOutput[6] = (byVal >> 1) & 0x1;
6305 136113 : abyOutput[7] = (byVal >> 0) & 0x1;
6306 136113 : }
6307 :
6308 : /************************************************************************/
6309 : /* GDALExpandPackedBitsToByteAt0Or1() */
6310 : /************************************************************************/
6311 :
6312 : /** Expand packed-bits (ordered from most-significant bit to least one)
6313 : into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6314 : at 1 to a byte at 1.
6315 :
6316 : The function does (in a possibly more optimized way) the following:
6317 : \code{.cpp}
6318 : for (size_t i = 0; i < nInputBits; ++i )
6319 : {
6320 : pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
6321 : }
6322 : \endcode
6323 :
6324 : @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6325 : @param pabyOutput Output array of nInputBits bytes.
6326 : @param nInputBits Number of valid bits in pabyInput.
6327 :
6328 : @since 3.11
6329 : */
6330 :
6331 7041 : void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput,
6332 : GByte *CPL_RESTRICT pabyOutput,
6333 : size_t nInputBits)
6334 : {
6335 7041 : const size_t nInputWholeBytes = nInputBits / 8;
6336 7041 : size_t iByte = 0;
6337 143154 : for (; iByte < nInputWholeBytes; ++iByte)
6338 : {
6339 136113 : ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput);
6340 136113 : pabyOutput += 8;
6341 : }
6342 18902 : for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6343 : {
6344 11861 : *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1;
6345 11861 : ++pabyOutput;
6346 : }
6347 7041 : }
|