Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Contains default implementation of GDALRasterBand::IRasterIO()
5 : * and supporting functions of broader utility.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1998, Frank Warmerdam
10 : * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11 : *
12 : * SPDX-License-Identifier: MIT
13 : ****************************************************************************/
14 :
15 : #include "cpl_port.h"
16 : #include "gdal.h"
17 : #include "gdal_priv.h"
18 :
19 : #include <cassert>
20 : #include <climits>
21 : #include <cmath>
22 : #include <cstddef>
23 : #include <cstdio>
24 : #include <cstdlib>
25 : #include <cstring>
26 :
27 : #include <algorithm>
28 : #include <limits>
29 : #include <stdexcept>
30 : #include <type_traits>
31 :
32 : #include "cpl_conv.h"
33 : #include "cpl_cpu_features.h"
34 : #include "cpl_error.h"
35 : #include "cpl_progress.h"
36 : #include "cpl_string.h"
37 : #include "cpl_vsi.h"
38 : #include "gdal_priv_templates.hpp"
39 : #include "gdal_vrt.h"
40 : #include "gdalwarper.h"
41 : #include "memdataset.h"
42 : #include "vrtdataset.h"
43 :
44 : #if defined(__x86_64) || defined(_M_X64)
45 : #include <emmintrin.h>
46 : #define HAVE_SSE2
47 : #elif defined(USE_NEON_OPTIMIZATIONS)
48 : #include "include_sse2neon.h"
49 : #define HAVE_SSE2
50 : #endif
51 :
52 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
53 : #include "rasterio_ssse3.h"
54 : #endif
55 :
56 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
57 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
58 : int nDstPixelStride, GPtrDiff_t nWordCount);
59 :
60 : /************************************************************************/
61 : /* DownsamplingIntegerXFactor() */
62 : /************************************************************************/
63 :
64 : template <bool bSameDataType, int DATA_TYPE_SIZE>
65 413252 : static bool DownsamplingIntegerXFactor(
66 : GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
67 : GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
68 : GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
69 : int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
70 : {
71 413252 : const int nBandDataSize =
72 : bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
73 413252 : int nOuterLoopIters = nBufXSize - 1;
74 413252 : const int nIncSrcOffset = nSrcXInc * nBandDataSize;
75 : const GByte *CPL_RESTRICT pabySrcData;
76 413252 : int nEndBlockX = nBlockXSize + nStartBlockX;
77 :
78 413252 : if (iSrcX < nEndBlockX)
79 : {
80 226152 : CPLAssert(poBlock);
81 226152 : goto no_reload_block;
82 : }
83 187100 : goto reload_block;
84 :
85 : // Don't do the last iteration in the loop, as iSrcX might go beyond
86 : // nRasterXSize - 1
87 932888 : while (--nOuterLoopIters >= 1)
88 : {
89 189034 : iSrcX += nSrcXInc;
90 189034 : pabySrcData += nIncSrcOffset;
91 189034 : pabyDstData += nPixelSpace;
92 :
93 : /* --------------------------------------------------------------------
94 : */
95 : /* Ensure we have the appropriate block loaded. */
96 : /* --------------------------------------------------------------------
97 : */
98 189034 : if (iSrcX >= nEndBlockX)
99 : {
100 189034 : reload_block:
101 : {
102 388724 : const int nLBlockX = iSrcX / nBlockXSize;
103 388724 : nStartBlockX = nLBlockX * nBlockXSize;
104 388724 : nEndBlockX = nStartBlockX + nBlockXSize;
105 :
106 388724 : if (poBlock != nullptr)
107 316739 : poBlock->DropLock();
108 :
109 388724 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
110 388724 : if (poBlock == nullptr)
111 : {
112 1 : return false;
113 : }
114 : }
115 :
116 388723 : no_reload_block:
117 : const GByte *pabySrcBlock =
118 932888 : static_cast<const GByte *>(poBlock->GetDataRef());
119 932888 : GPtrDiff_t iSrcOffset =
120 932888 : (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
121 932888 : pabySrcData = pabySrcBlock + iSrcOffset;
122 : }
123 :
124 : /* --------------------------------------------------------------------
125 : */
126 : /* Copy the maximum run of pixels. */
127 : /* --------------------------------------------------------------------
128 : */
129 :
130 932888 : const int nIters = std::min(
131 932888 : (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
132 : if (bSameDataType)
133 : {
134 932483 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
135 932483 : if (nIters > 1)
136 : {
137 : if (DATA_TYPE_SIZE == 1)
138 : {
139 276307 : pabySrcData += nIncSrcOffset;
140 276307 : pabyDstData += nPixelSpace;
141 276307 : GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
142 276307 : nPixelSpace, nIters - 1);
143 276307 : pabySrcData +=
144 276307 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
145 276307 : pabyDstData +=
146 276307 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
147 : }
148 : else
149 : {
150 4443828 : for (int i = 0; i < nIters - 1; i++)
151 : {
152 4245254 : pabySrcData += nIncSrcOffset;
153 4245254 : pabyDstData += nPixelSpace;
154 4245254 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
155 : }
156 : }
157 474881 : iSrcX += nSrcXInc * (nIters - 1);
158 474881 : nOuterLoopIters -= nIters - 1;
159 : }
160 : }
161 : else
162 : {
163 : // Type to type conversion ...
164 405 : GDALCopyWords(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
165 405 : eBufType, nPixelSpace, std::max(1, nIters));
166 405 : if (nIters > 1)
167 : {
168 198 : pabySrcData +=
169 198 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
170 198 : pabyDstData +=
171 198 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
172 198 : iSrcX += nSrcXInc * (nIters - 1);
173 198 : nOuterLoopIters -= nIters - 1;
174 : }
175 : }
176 : }
177 :
178 : // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
179 743854 : if (nOuterLoopIters == 0)
180 : {
181 330603 : const int nRasterXSize = poBand->GetXSize();
182 330603 : iSrcX =
183 661206 : static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
184 330603 : static_cast<GInt64>(nRasterXSize - 1)));
185 330603 : pabyDstData += nPixelSpace;
186 330603 : if (iSrcX < nEndBlockX)
187 : {
188 318013 : goto no_reload_block;
189 : }
190 12590 : goto reload_block;
191 : }
192 413251 : return true;
193 : }
194 :
195 : /************************************************************************/
196 : /* IRasterIO() */
197 : /* */
198 : /* Default internal implementation of RasterIO() ... utilizes */
199 : /* the Block access methods to satisfy the request. This would */
200 : /* normally only be overridden by formats with overviews. */
201 : /************************************************************************/
202 :
203 5686630 : CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
204 : int nXSize, int nYSize, void *pData,
205 : int nBufXSize, int nBufYSize,
206 : GDALDataType eBufType, GSpacing nPixelSpace,
207 : GSpacing nLineSpace,
208 : GDALRasterIOExtraArg *psExtraArg)
209 :
210 : {
211 5686630 : if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
212 : {
213 0 : CPLError(eFlushBlockErr, CPLE_AppDefined,
214 : "An error occurred while writing a dirty block "
215 : "from GDALRasterBand::IRasterIO");
216 0 : CPLErr eErr = eFlushBlockErr;
217 0 : eFlushBlockErr = CE_None;
218 0 : return eErr;
219 : }
220 5686630 : if (nBlockXSize <= 0 || nBlockYSize <= 0)
221 : {
222 1987 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
223 0 : return CE_Failure;
224 : }
225 :
226 5684640 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
227 5684640 : const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
228 5682700 : GByte dummyBlock[2] = {0, 0};
229 5682700 : GByte *pabySrcBlock =
230 : dummyBlock; /* to avoid Coverity warning about nullptr dereference */
231 5682700 : GDALRasterBlock *poBlock = nullptr;
232 5682700 : const bool bUseIntegerRequestCoords =
233 5721960 : (!psExtraArg->bFloatingPointWindowValidity ||
234 39263 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
235 15903 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
236 :
237 : /* ==================================================================== */
238 : /* A common case is the data requested with the destination */
239 : /* is packed, and the block width is the raster width. */
240 : /* ==================================================================== */
241 5604230 : if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
242 2926780 : nBlockXSize == GetXSize() && nBufXSize == nXSize &&
243 11286100 : nBufYSize == nYSize && bUseIntegerRequestCoords)
244 : {
245 2793070 : CPLErr eErr = CE_None;
246 2793070 : int nLBlockY = -1;
247 :
248 8425560 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
249 : {
250 5628170 : const int iSrcY = iBufYOff + nYOff;
251 :
252 5628170 : if (iSrcY < nLBlockY * nBlockYSize ||
253 5627410 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
254 : {
255 3033080 : nLBlockY = iSrcY / nBlockYSize;
256 3033080 : bool bJustInitialize =
257 95893 : eRWFlag == GF_Write && nXOff == 0 &&
258 3180410 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
259 51439 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
260 :
261 : // Is this a partial tile at right and/or bottom edges of
262 : // the raster, and that is going to be completely written?
263 : // If so, do not load it from storage, but zero it so that
264 : // the content outsize of the validity area is initialized.
265 3033080 : bool bMemZeroBuffer = false;
266 95893 : if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
267 21147 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
268 3129060 : nYOff + nYSize == GetYSize() &&
269 89 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
270 : {
271 89 : bJustInitialize = true;
272 89 : bMemZeroBuffer = true;
273 : }
274 :
275 3033080 : if (poBlock)
276 237292 : poBlock->DropLock();
277 :
278 3033080 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
279 3029700 : poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
280 3034860 : if (poBlock == nullptr)
281 : {
282 1066 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
283 : nullptr)
284 : {
285 0 : CPLError(CE_Failure, CPLE_AppDefined,
286 : "GetBlockRef failed at X block offset %d, "
287 : "Y block offset %d%s",
288 : 0, nLBlockY,
289 0 : (nErrorCounter != CPLGetErrorCounter())
290 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
291 : : "");
292 : }
293 1066 : eErr = CE_Failure;
294 1066 : break;
295 : }
296 :
297 3033800 : if (eRWFlag == GF_Write)
298 95893 : poBlock->MarkDirty();
299 :
300 3033800 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
301 3033740 : if (bMemZeroBuffer)
302 : {
303 89 : memset(pabySrcBlock, 0,
304 89 : static_cast<GPtrDiff_t>(nBandDataSize) *
305 89 : nBlockXSize * nBlockYSize);
306 : }
307 : }
308 :
309 5628830 : const auto nSrcByteOffset =
310 5628830 : (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
311 5628830 : nBlockXSize +
312 5628830 : nXOff) *
313 5628830 : nBandDataSize;
314 :
315 5628830 : if (eDataType == eBufType)
316 : {
317 1731600 : if (eRWFlag == GF_Read)
318 1489880 : memcpy(static_cast<GByte *>(pData) +
319 1489880 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
320 1489880 : pabySrcBlock + nSrcByteOffset,
321 : static_cast<size_t>(nLineSpace));
322 : else
323 241713 : memcpy(pabySrcBlock + nSrcByteOffset,
324 241713 : static_cast<GByte *>(pData) +
325 241713 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
326 : static_cast<size_t>(nLineSpace));
327 : }
328 : else
329 : {
330 : // Type to type conversion.
331 :
332 3897240 : if (eRWFlag == GF_Read)
333 3882700 : GDALCopyWords(
334 3882700 : pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
335 : static_cast<GByte *>(pData) +
336 3882700 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
337 : eBufType, static_cast<int>(nPixelSpace), nBufXSize);
338 : else
339 14531 : GDALCopyWords(static_cast<GByte *>(pData) +
340 14531 : static_cast<GPtrDiff_t>(iBufYOff) *
341 : nLineSpace,
342 : eBufType, static_cast<int>(nPixelSpace),
343 14531 : pabySrcBlock + nSrcByteOffset, eDataType,
344 : nBandDataSize, nBufXSize);
345 : }
346 :
347 5691520 : if (psExtraArg->pfnProgress != nullptr &&
348 59019 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
349 : psExtraArg->pProgressData))
350 : {
351 5 : eErr = CE_Failure;
352 5 : break;
353 : }
354 : }
355 :
356 2798460 : if (poBlock)
357 2796260 : poBlock->DropLock();
358 :
359 2797290 : return eErr;
360 : }
361 :
362 : /* ==================================================================== */
363 : /* Do we have overviews that would be appropriate to satisfy */
364 : /* this request? */
365 : /* ==================================================================== */
366 2888840 : if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
367 : eRWFlag == GF_Read)
368 : {
369 : GDALRasterIOExtraArg sExtraArg;
370 2832 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
371 :
372 : const int nOverview =
373 2832 : GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
374 : nBufXSize, nBufYSize, &sExtraArg);
375 2832 : if (nOverview >= 0)
376 : {
377 2812 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
378 2812 : if (poOverviewBand == nullptr)
379 2812 : return CE_Failure;
380 :
381 2812 : return poOverviewBand->RasterIO(
382 : eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
383 2812 : nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
384 : }
385 : }
386 :
387 699328 : if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
388 0 : nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
389 3587280 : nLineSpace == nPixelSpace * nBufXSize &&
390 0 : CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
391 : {
392 0 : memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
393 0 : return CE_None;
394 : }
395 :
396 : /* ==================================================================== */
397 : /* The second case when we don't need subsample data but likely */
398 : /* need data type conversion. */
399 : /* ==================================================================== */
400 2887950 : if ( // nPixelSpace == nBufDataSize &&
401 2887950 : nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
402 : {
403 : #if DEBUG_VERBOSE
404 : printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
405 : nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
406 : #endif
407 :
408 : /* --------------------------------------------------------------------
409 : */
410 : /* Loop over buffer computing source locations. */
411 : /* --------------------------------------------------------------------
412 : */
413 : // Calculate starting values out of loop
414 2525080 : const int nLBlockXStart = nXOff / nBlockXSize;
415 2525080 : const int nXSpanEnd = nBufXSize + nXOff;
416 :
417 2525080 : int nYInc = 0;
418 5084770 : for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
419 2559690 : iBufYOff += nYInc, iSrcY += nYInc)
420 : {
421 2559640 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
422 : static_cast<GPtrDiff_t>(nLineSpace);
423 2559640 : int nLBlockY = iSrcY / nBlockYSize;
424 2559640 : int nLBlockX = nLBlockXStart;
425 2559640 : int iSrcX = nXOff;
426 5330610 : while (iSrcX < nXSpanEnd)
427 : {
428 2770920 : int nXSpan = nLBlockX * nBlockXSize;
429 2770920 : if (nXSpan < INT_MAX - nBlockXSize)
430 2770920 : nXSpan += nBlockXSize;
431 : else
432 2 : nXSpan = INT_MAX;
433 2770920 : const int nXRight = nXSpan;
434 2770920 : nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
435 2770920 : const size_t nXSpanSize =
436 2770920 : nXSpan * static_cast<size_t>(nPixelSpace);
437 :
438 2770920 : bool bJustInitialize =
439 2042040 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
440 37152 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
441 4838540 : nXOff <= nLBlockX * nBlockXSize &&
442 25575 : nXOff + nXSize >= nXRight;
443 :
444 : // Is this a partial tile at right and/or bottom edges of
445 : // the raster, and that is going to be completely written?
446 : // If so, do not load it from storage, but zero it so that
447 : // the content outsize of the validity area is initialized.
448 2770920 : bool bMemZeroBuffer = false;
449 2042040 : if (eRWFlag == GF_Write && !bJustInitialize &&
450 2017720 : nXOff <= nLBlockX * nBlockXSize &&
451 2016100 : nYOff <= nLBlockY * nBlockYSize &&
452 12074 : (nXOff + nXSize >= nXRight ||
453 : // cppcheck-suppress knownConditionTrueFalse
454 4815620 : (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
455 11896 : (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
456 10657 : (nYOff + nYSize == GetYSize() &&
457 1870 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
458 : {
459 3109 : bJustInitialize = true;
460 3109 : bMemZeroBuffer = true;
461 : }
462 :
463 : /* --------------------------------------------------------------------
464 : */
465 : /* Ensure we have the appropriate block loaded. */
466 : /* --------------------------------------------------------------------
467 : */
468 2770920 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
469 2771080 : poBlock =
470 2770840 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
471 2771080 : if (!poBlock)
472 : {
473 71 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
474 : nullptr)
475 : {
476 0 : CPLError(CE_Failure, CPLE_AppDefined,
477 : "GetBlockRef failed at X block offset %d, "
478 : "Y block offset %d%s",
479 : nLBlockX, nLBlockY,
480 0 : (nErrorCounter != CPLGetErrorCounter())
481 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
482 : : "");
483 : }
484 71 : return (CE_Failure);
485 : }
486 :
487 2771000 : if (eRWFlag == GF_Write)
488 2042040 : poBlock->MarkDirty();
489 :
490 2771000 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
491 2771000 : if (bMemZeroBuffer)
492 : {
493 3109 : memset(pabySrcBlock, 0,
494 3109 : static_cast<GPtrDiff_t>(nBandDataSize) *
495 3109 : nBlockXSize * nBlockYSize);
496 : }
497 : /* --------------------------------------------------------------------
498 : */
499 : /* Copy over this chunk of data. */
500 : /* --------------------------------------------------------------------
501 : */
502 2771000 : GPtrDiff_t iSrcOffset =
503 2771000 : (static_cast<GPtrDiff_t>(iSrcX) -
504 2771000 : static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
505 2771000 : (static_cast<GPtrDiff_t>(iSrcY) -
506 2771000 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
507 2771000 : nBlockXSize) *
508 2771000 : nBandDataSize;
509 : // Fill up as many rows as possible for the loaded block.
510 5541990 : const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
511 2771000 : nBufYSize - iBufYOff);
512 58531700 : for (int k = 0; k < kmax; k++)
513 : {
514 55760900 : if (eDataType == eBufType && nPixelSpace == nBufDataSize)
515 : {
516 51837100 : if (eRWFlag == GF_Read)
517 47412500 : memcpy(static_cast<GByte *>(pData) + iBufOffset +
518 47412500 : static_cast<GPtrDiff_t>(k) * nLineSpace,
519 47412500 : pabySrcBlock + iSrcOffset, nXSpanSize);
520 : else
521 4424610 : memcpy(pabySrcBlock + iSrcOffset,
522 4424610 : static_cast<GByte *>(pData) + iBufOffset +
523 4424610 : static_cast<GPtrDiff_t>(k) * nLineSpace,
524 : nXSpanSize);
525 : }
526 : else
527 : {
528 : /* type to type conversion */
529 3923790 : if (eRWFlag == GF_Read)
530 3899510 : GDALCopyWords(
531 3899510 : pabySrcBlock + iSrcOffset, eDataType,
532 : nBandDataSize,
533 3899510 : static_cast<GByte *>(pData) + iBufOffset +
534 3899510 : static_cast<GPtrDiff_t>(k) * nLineSpace,
535 : eBufType, static_cast<int>(nPixelSpace),
536 : nXSpan);
537 : else
538 24282 : GDALCopyWords(
539 24282 : static_cast<GByte *>(pData) + iBufOffset +
540 24282 : static_cast<GPtrDiff_t>(k) * nLineSpace,
541 : eBufType, static_cast<int>(nPixelSpace),
542 24282 : pabySrcBlock + iSrcOffset, eDataType,
543 : nBandDataSize, nXSpan);
544 : }
545 :
546 55760700 : iSrcOffset +=
547 55760700 : static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
548 : }
549 :
550 : iBufOffset =
551 2770870 : CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
552 2770890 : nLBlockX++;
553 2770890 : iSrcX += nXSpan;
554 :
555 2770890 : poBlock->DropLock();
556 2770970 : poBlock = nullptr;
557 : }
558 :
559 : /* Compute the increment to go on a block boundary */
560 2559690 : nYInc = nBlockYSize - (iSrcY % nBlockYSize);
561 :
562 2561450 : if (psExtraArg->pfnProgress != nullptr &&
563 1761 : !psExtraArg->pfnProgress(
564 2561450 : 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
565 : psExtraArg->pProgressData))
566 : {
567 0 : return CE_Failure;
568 : }
569 : }
570 :
571 2525130 : return CE_None;
572 : }
573 :
574 : /* ==================================================================== */
575 : /* Loop reading required source blocks to satisfy output */
576 : /* request. This is the most general implementation. */
577 : /* ==================================================================== */
578 :
579 362870 : double dfXOff = nXOff;
580 362870 : double dfYOff = nYOff;
581 362870 : double dfXSize = nXSize;
582 362870 : double dfYSize = nYSize;
583 362870 : if (psExtraArg->bFloatingPointWindowValidity)
584 : {
585 28160 : dfXOff = psExtraArg->dfXOff;
586 28160 : dfYOff = psExtraArg->dfYOff;
587 28160 : dfXSize = psExtraArg->dfXSize;
588 28160 : dfYSize = psExtraArg->dfYSize;
589 : }
590 :
591 : /* -------------------------------------------------------------------- */
592 : /* Compute stepping increment. */
593 : /* -------------------------------------------------------------------- */
594 362870 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
595 362870 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
596 362870 : CPLErr eErr = CE_None;
597 :
598 362870 : if (eRWFlag == GF_Write)
599 : {
600 : /* --------------------------------------------------------------------
601 : */
602 : /* Write case */
603 : /* Loop over raster window computing source locations in the buffer.
604 : */
605 : /* --------------------------------------------------------------------
606 : */
607 166650 : GByte *pabyDstBlock = nullptr;
608 166650 : int nLBlockX = -1;
609 166650 : int nLBlockY = -1;
610 :
611 1259590 : for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
612 : {
613 1092940 : const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
614 :
615 12063600 : for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
616 : {
617 10970600 : const int iBufXOff =
618 10970600 : static_cast<int>((iDstX - nXOff) / dfSrcXInc);
619 10970600 : GPtrDiff_t iBufOffset =
620 10970600 : static_cast<GPtrDiff_t>(iBufYOff) *
621 : static_cast<GPtrDiff_t>(nLineSpace) +
622 10970600 : iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
623 :
624 : // FIXME: this code likely doesn't work if the dirty block gets
625 : // flushed to disk before being completely written.
626 : // In the meantime, bJustInitialize should probably be set to
627 : // FALSE even if it is not ideal performance wise, and for
628 : // lossy compression.
629 :
630 : /* --------------------------------------------------------------------
631 : */
632 : /* Ensure we have the appropriate block loaded. */
633 : /* --------------------------------------------------------------------
634 : */
635 10970600 : if (iDstX < nLBlockX * nBlockXSize ||
636 10721300 : iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
637 10264600 : iDstY < nLBlockY * nBlockYSize ||
638 10264600 : iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
639 : {
640 738642 : nLBlockX = iDstX / nBlockXSize;
641 738642 : nLBlockY = iDstY / nBlockYSize;
642 :
643 738642 : const bool bJustInitialize =
644 1065870 : nYOff <= nLBlockY * nBlockYSize &&
645 327231 : nYOff + nYSize - nBlockYSize >=
646 327231 : nLBlockY * nBlockYSize &&
647 1116140 : nXOff <= nLBlockX * nBlockXSize &&
648 50265 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
649 : /*bool bMemZeroBuffer = FALSE;
650 : if( !bJustInitialize &&
651 : nXOff <= nLBlockX * nBlockXSize &&
652 : nYOff <= nLBlockY * nBlockYSize &&
653 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
654 : (nXOff + nXSize == GetXSize() &&
655 : (nLBlockX+1) * nBlockXSize > GetXSize())) &&
656 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
657 : (nYOff + nYSize == GetYSize() &&
658 : (nLBlockY+1) * nBlockYSize > GetYSize())) )
659 : {
660 : bJustInitialize = TRUE;
661 : bMemZeroBuffer = TRUE;
662 : }*/
663 738642 : if (poBlock != nullptr)
664 571992 : poBlock->DropLock();
665 :
666 738642 : poBlock =
667 738642 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
668 738642 : if (poBlock == nullptr)
669 : {
670 0 : return (CE_Failure);
671 : }
672 :
673 738642 : poBlock->MarkDirty();
674 :
675 738642 : pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
676 : /*if( bMemZeroBuffer )
677 : {
678 : memset(pabyDstBlock, 0,
679 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
680 : * nBlockYSize);
681 : }*/
682 : }
683 :
684 : // To make Coverity happy. Should not happen by design.
685 10970600 : if (pabyDstBlock == nullptr)
686 : {
687 0 : CPLAssert(false);
688 : eErr = CE_Failure;
689 : break;
690 : }
691 :
692 : /* --------------------------------------------------------------------
693 : */
694 : /* Copy over this pixel of data. */
695 : /* --------------------------------------------------------------------
696 : */
697 10970600 : GPtrDiff_t iDstOffset =
698 10970600 : (static_cast<GPtrDiff_t>(iDstX) -
699 10970600 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
700 10970600 : (static_cast<GPtrDiff_t>(iDstY) -
701 10970600 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
702 10970600 : nBlockXSize) *
703 10970600 : nBandDataSize;
704 :
705 10970600 : if (eDataType == eBufType)
706 : {
707 10967500 : memcpy(pabyDstBlock + iDstOffset,
708 10967500 : static_cast<GByte *>(pData) + iBufOffset,
709 : nBandDataSize);
710 : }
711 : else
712 : {
713 : /* type to type conversion ... ouch, this is expensive way
714 : of handling single words */
715 :
716 3096 : GDALCopyWords(static_cast<GByte *>(pData) + iBufOffset,
717 3096 : eBufType, 0, pabyDstBlock + iDstOffset,
718 : eDataType, 0, 1);
719 : }
720 : }
721 :
722 1092940 : if (psExtraArg->pfnProgress != nullptr &&
723 0 : !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
724 : psExtraArg->pProgressData))
725 : {
726 0 : eErr = CE_Failure;
727 0 : break;
728 : }
729 : }
730 : }
731 : else
732 : {
733 196220 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
734 : {
735 7612 : if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
736 2490 : psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
737 2488 : psExtraArg->eResampleAlg == GRIORA_Bilinear ||
738 5127 : psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
739 2455 : GetColorTable() != nullptr)
740 : {
741 0 : CPLError(CE_Warning, CPLE_NotSupported,
742 : "Resampling method not supported on paletted band. "
743 : "Falling back to nearest neighbour");
744 : }
745 2564 : else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
746 3 : GDALDataTypeIsComplex(eDataType))
747 : {
748 0 : CPLError(CE_Warning, CPLE_NotSupported,
749 : "Resampling method not supported on complex data type "
750 : "band. Falling back to nearest neighbour");
751 : }
752 : else
753 : {
754 2561 : return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
755 : pData, nBufXSize, nBufYSize, eBufType,
756 2561 : nPixelSpace, nLineSpace, psExtraArg);
757 : }
758 : }
759 :
760 193619 : int nLimitBlockY = 0;
761 193619 : const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
762 193619 : int nStartBlockX = -nBlockXSize;
763 193619 : const double EPS = 1e-10;
764 193619 : int nLBlockY = -1;
765 193619 : const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
766 193619 : const bool bIntegerXFactor =
767 170972 : bUseIntegerRequestCoords &&
768 265604 : static_cast<int>(dfSrcXInc) == dfSrcXInc &&
769 71985 : static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
770 :
771 : /* --------------------------------------------------------------------
772 : */
773 : /* Read case */
774 : /* Loop over buffer computing source locations. */
775 : /* --------------------------------------------------------------------
776 : */
777 1955240 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
778 : {
779 : // Add small epsilon to avoid some numeric precision issues.
780 1761630 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
781 1761630 : const int iSrcY = static_cast<int>(std::min(
782 1761630 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
783 :
784 1761630 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
785 : static_cast<GPtrDiff_t>(nLineSpace);
786 :
787 1761630 : if (iSrcY >= nLimitBlockY)
788 : {
789 234813 : nLBlockY = iSrcY / nBlockYSize;
790 234813 : nLimitBlockY = nLBlockY * nBlockYSize;
791 234813 : if (nLimitBlockY < INT_MAX - nBlockYSize)
792 234813 : nLimitBlockY += nBlockYSize;
793 : else
794 0 : nLimitBlockY = INT_MAX;
795 : // Make sure a new block is loaded.
796 234813 : nStartBlockX = -nBlockXSize;
797 : }
798 1526820 : else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
799 : {
800 : // Make sure a new block is loaded.
801 429795 : nStartBlockX = -nBlockXSize;
802 : }
803 :
804 1761630 : GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
805 1761630 : static_cast<GPtrDiff_t>(nBlockXSize);
806 :
807 1761630 : if (bIntegerXFactor)
808 : {
809 413252 : int iSrcX = static_cast<int>(dfSrcXStart);
810 413252 : const int nSrcXInc = static_cast<int>(dfSrcXInc);
811 413252 : GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
812 413252 : bool bRet = false;
813 413252 : if (bByteCopy)
814 : {
815 302865 : bRet = DownsamplingIntegerXFactor<true, 1>(
816 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
817 : static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
818 : GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
819 : }
820 110387 : else if (eDataType == eBufType)
821 : {
822 110182 : switch (nBandDataSize)
823 : {
824 110102 : case 2:
825 110102 : bRet = DownsamplingIntegerXFactor<true, 2>(
826 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
827 : pabyDstData, static_cast<int>(nPixelSpace),
828 : nBufXSize, eDataType, eDataType, nStartBlockX,
829 : nBlockXSize, poBlock, nLBlockY);
830 110102 : break;
831 22 : case 4:
832 22 : bRet = DownsamplingIntegerXFactor<true, 4>(
833 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
834 : pabyDstData, static_cast<int>(nPixelSpace),
835 : nBufXSize, eDataType, eDataType, nStartBlockX,
836 : nBlockXSize, poBlock, nLBlockY);
837 22 : break;
838 56 : case 8:
839 56 : bRet = DownsamplingIntegerXFactor<true, 8>(
840 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
841 : pabyDstData, static_cast<int>(nPixelSpace),
842 : nBufXSize, eDataType, eDataType, nStartBlockX,
843 : nBlockXSize, poBlock, nLBlockY);
844 56 : break;
845 2 : case 16:
846 2 : bRet = DownsamplingIntegerXFactor<true, 16>(
847 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
848 : pabyDstData, static_cast<int>(nPixelSpace),
849 : nBufXSize, eDataType, eDataType, nStartBlockX,
850 : nBlockXSize, poBlock, nLBlockY);
851 2 : break;
852 0 : default:
853 0 : CPLAssert(false);
854 : break;
855 : }
856 : }
857 : else
858 : {
859 205 : bRet = DownsamplingIntegerXFactor<false, 0>(
860 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
861 : static_cast<int>(nPixelSpace), nBufXSize, eDataType,
862 : eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
863 : }
864 413252 : if (!bRet)
865 1 : eErr = CE_Failure;
866 : }
867 : else
868 : {
869 1348380 : double dfSrcX = dfSrcXStart;
870 570400000 : for (int iBufXOff = 0; iBufXOff < nBufXSize;
871 569052000 : iBufXOff++, dfSrcX += dfSrcXInc)
872 : {
873 : // TODO?: try to avoid the clamping for most iterations
874 : const int iSrcX = static_cast<int>(
875 1138100000 : std::min(std::max(0.0, dfSrcX),
876 569052000 : static_cast<double>(nRasterXSize - 1)));
877 :
878 : /* --------------------------------------------------------------------
879 : */
880 : /* Ensure we have the appropriate block loaded. */
881 : /* --------------------------------------------------------------------
882 : */
883 569052000 : if (iSrcX >= nBlockXSize + nStartBlockX)
884 : {
885 1705380 : const int nLBlockX = iSrcX / nBlockXSize;
886 1705380 : nStartBlockX = nLBlockX * nBlockXSize;
887 :
888 1705380 : if (poBlock != nullptr)
889 1583740 : poBlock->DropLock();
890 :
891 1705380 : poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
892 1705380 : if (poBlock == nullptr)
893 : {
894 9 : eErr = CE_Failure;
895 9 : break;
896 : }
897 :
898 : pabySrcBlock =
899 1705370 : static_cast<GByte *>(poBlock->GetDataRef());
900 : }
901 569052000 : const GPtrDiff_t nDiffX =
902 569052000 : static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
903 :
904 : /* --------------------------------------------------------------------
905 : */
906 : /* Copy over this pixel of data. */
907 : /* --------------------------------------------------------------------
908 : */
909 :
910 569052000 : if (bByteCopy)
911 : {
912 515613000 : GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
913 515613000 : static_cast<GByte *>(pData)[iBufOffset] =
914 515613000 : pabySrcBlock[iSrcOffset];
915 : }
916 53439100 : else if (eDataType == eBufType)
917 : {
918 48225500 : GPtrDiff_t iSrcOffset =
919 48225500 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
920 48225500 : memcpy(static_cast<GByte *>(pData) + iBufOffset,
921 48225500 : pabySrcBlock + iSrcOffset, nBandDataSize);
922 : }
923 : else
924 : {
925 : // Type to type conversion ...
926 5213610 : GPtrDiff_t iSrcOffset =
927 5213610 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
928 5213610 : GDALCopyWords(pabySrcBlock + iSrcOffset, eDataType, 0,
929 5213610 : static_cast<GByte *>(pData) + iBufOffset,
930 : eBufType, 0, 1);
931 : }
932 :
933 569052000 : iBufOffset += static_cast<int>(nPixelSpace);
934 : }
935 : }
936 1761630 : if (eErr == CE_Failure)
937 11 : break;
938 :
939 1972790 : if (psExtraArg->pfnProgress != nullptr &&
940 211166 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
941 : psExtraArg->pProgressData))
942 : {
943 1 : eErr = CE_Failure;
944 1 : break;
945 : }
946 : }
947 : }
948 :
949 360269 : if (poBlock != nullptr)
950 360259 : poBlock->DropLock();
951 :
952 360269 : return eErr;
953 : }
954 :
955 : /************************************************************************/
956 : /* GDALRasterIOTransformer() */
957 : /************************************************************************/
958 :
959 : struct GDALRasterIOTransformerStruct
960 : {
961 : double dfXOff;
962 : double dfYOff;
963 : double dfXRatioDstToSrc;
964 : double dfYRatioDstToSrc;
965 : };
966 :
967 6748 : static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
968 : int nPointCount, double *x, double *y,
969 : double * /* z */, int *panSuccess)
970 : {
971 6748 : GDALRasterIOTransformerStruct *psParams =
972 : static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
973 6748 : if (bDstToSrc)
974 : {
975 252996 : for (int i = 0; i < nPointCount; i++)
976 : {
977 246836 : x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
978 246836 : y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
979 246836 : panSuccess[i] = TRUE;
980 : }
981 : }
982 : else
983 : {
984 1176 : for (int i = 0; i < nPointCount; i++)
985 : {
986 588 : x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
987 588 : y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
988 588 : panSuccess[i] = TRUE;
989 : }
990 : }
991 6748 : return TRUE;
992 : }
993 :
994 : /************************************************************************/
995 : /* RasterIOResampled() */
996 : /************************************************************************/
997 :
998 : //! @cond Doxygen_Suppress
999 2561 : CPLErr GDALRasterBand::RasterIOResampled(
1000 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1001 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1002 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1003 : {
1004 : // Determine if we use warping resampling or overview resampling
1005 : const bool bUseWarp =
1006 2561 : (GDALDataTypeIsComplex(eDataType) &&
1007 2718 : psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
1008 157 : psExtraArg->eResampleAlg != GRIORA_Mode);
1009 :
1010 2561 : double dfXOff = nXOff;
1011 2561 : double dfYOff = nYOff;
1012 2561 : double dfXSize = nXSize;
1013 2561 : double dfYSize = nYSize;
1014 2561 : if (psExtraArg->bFloatingPointWindowValidity)
1015 : {
1016 2114 : dfXOff = psExtraArg->dfXOff;
1017 2114 : dfYOff = psExtraArg->dfYOff;
1018 2114 : dfXSize = psExtraArg->dfXSize;
1019 2114 : dfYSize = psExtraArg->dfYSize;
1020 : }
1021 :
1022 2561 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1023 2561 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1024 :
1025 : // Determine the coordinates in the "virtual" output raster to see
1026 : // if there are not integers, in which case we will use them as a shift
1027 : // so that subwindow extracts give the exact same results as entire raster
1028 : // scaling.
1029 2561 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1030 2561 : bool bHasXOffVirtual = false;
1031 2561 : int nDestXOffVirtual = 0;
1032 2561 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1033 : {
1034 2235 : bHasXOffVirtual = true;
1035 2235 : dfXOff = nXOff;
1036 2235 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1037 : }
1038 :
1039 2561 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1040 2561 : bool bHasYOffVirtual = false;
1041 2561 : int nDestYOffVirtual = 0;
1042 2561 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1043 : {
1044 2229 : bHasYOffVirtual = true;
1045 2229 : dfYOff = nYOff;
1046 2229 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1047 : }
1048 :
1049 : // Create a MEM dataset that wraps the output buffer.
1050 : GDALDataset *poMEMDS;
1051 2561 : void *pTempBuffer = nullptr;
1052 2561 : GSpacing nPSMem = nPixelSpace;
1053 2561 : GSpacing nLSMem = nLineSpace;
1054 2561 : void *pDataMem = pData;
1055 2561 : GDALDataType eDTMem = eBufType;
1056 2561 : if (eBufType != eDataType)
1057 : {
1058 40 : nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1059 40 : nLSMem = nPSMem * nBufXSize;
1060 : pTempBuffer =
1061 40 : VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1062 40 : if (pTempBuffer == nullptr)
1063 0 : return CE_Failure;
1064 40 : pDataMem = pTempBuffer;
1065 40 : eDTMem = eDataType;
1066 : }
1067 :
1068 : poMEMDS =
1069 2561 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1070 : nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1071 2561 : GByte *pabyData = static_cast<GByte *>(pDataMem) -
1072 2561 : nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1073 2561 : GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1074 : poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1075 2561 : poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1076 :
1077 2561 : const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1078 2561 : const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
1079 2561 : if (pszNBITS)
1080 6 : GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
1081 6 : "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1082 :
1083 2561 : CPLErr eErr = CE_None;
1084 :
1085 : // Do the resampling.
1086 2561 : if (bUseWarp)
1087 : {
1088 149 : int bHasNoData = FALSE;
1089 149 : double dfNoDataValue = GetNoDataValue(&bHasNoData);
1090 :
1091 149 : VRTDatasetH hVRTDS = nullptr;
1092 149 : GDALRasterBandH hVRTBand = nullptr;
1093 149 : if (GetDataset() == nullptr)
1094 : {
1095 : /* Create VRT dataset that wraps the whole dataset */
1096 0 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1097 0 : VRTAddBand(hVRTDS, eDataType, nullptr);
1098 0 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1099 0 : VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1100 : 0, 0, nRasterXSize, nRasterYSize, nullptr,
1101 : VRT_NODATA_UNSET);
1102 :
1103 : /* Add a mask band if needed */
1104 0 : if (GetMaskFlags() != GMF_ALL_VALID)
1105 : {
1106 0 : GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1107 : VRTSourcedRasterBand *poVRTMaskBand =
1108 : reinterpret_cast<VRTSourcedRasterBand *>(
1109 : reinterpret_cast<GDALRasterBand *>(hVRTBand)
1110 0 : ->GetMaskBand());
1111 0 : poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1112 0 : nRasterYSize, 0, 0,
1113 0 : nRasterXSize, nRasterYSize);
1114 : }
1115 : }
1116 :
1117 149 : GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1118 149 : switch (psExtraArg->eResampleAlg)
1119 : {
1120 0 : case GRIORA_NearestNeighbour:
1121 0 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1122 0 : break;
1123 147 : case GRIORA_Bilinear:
1124 147 : psWarpOptions->eResampleAlg = GRA_Bilinear;
1125 147 : break;
1126 0 : case GRIORA_Cubic:
1127 0 : psWarpOptions->eResampleAlg = GRA_Cubic;
1128 0 : break;
1129 0 : case GRIORA_CubicSpline:
1130 0 : psWarpOptions->eResampleAlg = GRA_CubicSpline;
1131 0 : break;
1132 0 : case GRIORA_Lanczos:
1133 0 : psWarpOptions->eResampleAlg = GRA_Lanczos;
1134 0 : break;
1135 0 : case GRIORA_Average:
1136 0 : psWarpOptions->eResampleAlg = GRA_Average;
1137 0 : break;
1138 2 : case GRIORA_RMS:
1139 2 : psWarpOptions->eResampleAlg = GRA_RMS;
1140 2 : break;
1141 0 : case GRIORA_Mode:
1142 0 : psWarpOptions->eResampleAlg = GRA_Mode;
1143 0 : break;
1144 0 : default:
1145 0 : CPLAssert(false);
1146 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1147 : break;
1148 : }
1149 149 : psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1150 149 : psWarpOptions->hDstDS = poMEMDS;
1151 149 : psWarpOptions->nBandCount = 1;
1152 149 : int nSrcBandNumber = hVRTDS ? 1 : nBand;
1153 149 : int nDstBandNumber = 1;
1154 149 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1155 149 : psWarpOptions->panDstBands = &nDstBandNumber;
1156 298 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1157 149 : ? psExtraArg->pfnProgress
1158 : : GDALDummyProgress;
1159 149 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1160 149 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1161 149 : if (bHasNoData)
1162 : {
1163 0 : psWarpOptions->papszWarpOptions = CSLSetNameValue(
1164 : psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1165 0 : if (psWarpOptions->padfSrcNoDataReal == nullptr)
1166 : {
1167 0 : psWarpOptions->padfSrcNoDataReal =
1168 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1169 0 : psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1170 : }
1171 :
1172 0 : if (psWarpOptions->padfDstNoDataReal == nullptr)
1173 : {
1174 0 : psWarpOptions->padfDstNoDataReal =
1175 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1176 0 : psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1177 : }
1178 : }
1179 :
1180 : GDALRasterIOTransformerStruct sTransformer;
1181 149 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1182 149 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1183 149 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1184 149 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1185 149 : psWarpOptions->pTransformerArg = &sTransformer;
1186 :
1187 : GDALWarpOperationH hWarpOperation =
1188 149 : GDALCreateWarpOperation(psWarpOptions);
1189 149 : eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1190 : nDestYOffVirtual, nBufXSize, nBufYSize);
1191 149 : GDALDestroyWarpOperation(hWarpOperation);
1192 :
1193 149 : psWarpOptions->panSrcBands = nullptr;
1194 149 : psWarpOptions->panDstBands = nullptr;
1195 149 : GDALDestroyWarpOptions(psWarpOptions);
1196 :
1197 149 : if (hVRTDS)
1198 0 : GDALClose(hVRTDS);
1199 : }
1200 : else
1201 : {
1202 2412 : const char *pszResampling =
1203 2594 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1204 293 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1205 220 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1206 213 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1207 159 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1208 95 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1209 43 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1210 3 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1211 : : "UNKNOWN";
1212 :
1213 2412 : int nKernelRadius = 0;
1214 : GDALResampleFunction pfnResampleFunc =
1215 2412 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1216 2412 : CPLAssert(pfnResampleFunc);
1217 : GDALDataType eWrkDataType =
1218 2412 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1219 2412 : int nHasNoData = 0;
1220 2412 : double dfNoDataValue = GetNoDataValue(&nHasNoData);
1221 2412 : const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1222 2412 : if (!bHasNoData)
1223 2348 : dfNoDataValue = 0.0;
1224 :
1225 2412 : int nDstBlockXSize = nBufXSize;
1226 2412 : int nDstBlockYSize = nBufYSize;
1227 2412 : int nFullResXChunk = 0;
1228 2412 : int nFullResYChunk = 0;
1229 : while (true)
1230 : {
1231 2412 : nFullResXChunk =
1232 2412 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1233 2412 : nFullResYChunk =
1234 2412 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1235 2412 : if (nFullResXChunk > nRasterXSize)
1236 2223 : nFullResXChunk = nRasterXSize;
1237 2412 : if (nFullResYChunk > nRasterYSize)
1238 206 : nFullResYChunk = nRasterYSize;
1239 2412 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1240 2366 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1241 : 1024 * 1024))
1242 : break;
1243 : // When operating on the full width of a raster whose block width is
1244 : // the raster width, prefer doing chunks in height.
1245 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1246 : nDstBlockYSize > 1)
1247 0 : nDstBlockYSize /= 2;
1248 : /* Otherwise cut the maximal dimension */
1249 0 : else if (nDstBlockXSize > 1 &&
1250 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1251 0 : nDstBlockXSize /= 2;
1252 : else
1253 0 : nDstBlockYSize /= 2;
1254 : }
1255 :
1256 2412 : int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1257 2412 : int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1258 2412 : if (nOvrXFactor == 0)
1259 2024 : nOvrXFactor = 1;
1260 2412 : if (nOvrYFactor == 0)
1261 2023 : nOvrYFactor = 1;
1262 2412 : int nFullResXSizeQueried =
1263 2412 : nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1264 2412 : int nFullResYSizeQueried =
1265 2412 : nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1266 :
1267 2412 : if (nFullResXSizeQueried > nRasterXSize)
1268 2125 : nFullResXSizeQueried = nRasterXSize;
1269 2412 : if (nFullResYSizeQueried > nRasterYSize)
1270 119 : nFullResYSizeQueried = nRasterYSize;
1271 :
1272 : void *pChunk =
1273 2412 : VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1274 : nFullResXSizeQueried, nFullResYSizeQueried);
1275 2412 : GByte *pabyChunkNoDataMask = nullptr;
1276 :
1277 2412 : GDALRasterBand *poMaskBand = GetMaskBand();
1278 2412 : int l_nMaskFlags = GetMaskFlags();
1279 :
1280 2412 : bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1281 2412 : if (bUseNoDataMask)
1282 : {
1283 120 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1284 : nFullResXSizeQueried, nFullResYSizeQueried));
1285 : }
1286 2412 : if (pChunk == nullptr ||
1287 120 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1288 : {
1289 0 : GDALClose(poMEMDS);
1290 0 : CPLFree(pChunk);
1291 0 : CPLFree(pabyChunkNoDataMask);
1292 0 : VSIFree(pTempBuffer);
1293 0 : return CE_Failure;
1294 : }
1295 :
1296 2412 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1297 2412 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1298 2412 : int nBlocksDone = 0;
1299 :
1300 : int nDstYOff;
1301 4824 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1302 2412 : nDstYOff += nDstBlockYSize)
1303 : {
1304 : int nDstYCount;
1305 2412 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1306 2412 : nDstYCount = nDstBlockYSize;
1307 : else
1308 0 : nDstYCount = nBufYSize - nDstYOff;
1309 :
1310 2412 : int nChunkYOff =
1311 2412 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1312 2412 : int nChunkYOff2 = nYOff + 1 +
1313 2412 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1314 : dfYRatioDstToSrc));
1315 2412 : if (nChunkYOff2 > nRasterYSize)
1316 313 : nChunkYOff2 = nRasterYSize;
1317 2412 : int nYCount = nChunkYOff2 - nChunkYOff;
1318 2412 : CPLAssert(nYCount <= nFullResYChunk);
1319 :
1320 2412 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1321 2412 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1322 2412 : if (nChunkYOffQueried < 0)
1323 : {
1324 221 : nChunkYSizeQueried += nChunkYOffQueried;
1325 221 : nChunkYOffQueried = 0;
1326 : }
1327 2412 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1328 321 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1329 2412 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1330 :
1331 2412 : int nDstXOff = 0;
1332 4824 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1333 2412 : nDstXOff += nDstBlockXSize)
1334 : {
1335 2412 : int nDstXCount = 0;
1336 2412 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1337 2412 : nDstXCount = nDstBlockXSize;
1338 : else
1339 0 : nDstXCount = nBufXSize - nDstXOff;
1340 :
1341 2412 : int nChunkXOff =
1342 2412 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1343 2412 : int nChunkXOff2 =
1344 2412 : nXOff + 1 +
1345 2412 : static_cast<int>(
1346 2412 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1347 2412 : if (nChunkXOff2 > nRasterXSize)
1348 2224 : nChunkXOff2 = nRasterXSize;
1349 2412 : int nXCount = nChunkXOff2 - nChunkXOff;
1350 2412 : CPLAssert(nXCount <= nFullResXChunk);
1351 :
1352 2412 : int nChunkXOffQueried =
1353 2412 : nChunkXOff - nKernelRadius * nOvrXFactor;
1354 2412 : int nChunkXSizeQueried =
1355 2412 : nXCount + 2 * nKernelRadius * nOvrXFactor;
1356 2412 : if (nChunkXOffQueried < 0)
1357 : {
1358 2138 : nChunkXSizeQueried += nChunkXOffQueried;
1359 2138 : nChunkXOffQueried = 0;
1360 : }
1361 2412 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1362 2124 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1363 2412 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1364 :
1365 : // Read the source buffers.
1366 2412 : eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1367 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1368 : nChunkXSizeQueried, nChunkYSizeQueried,
1369 : eWrkDataType, 0, 0, nullptr);
1370 :
1371 2412 : bool bSkipResample = false;
1372 2412 : bool bNoDataMaskFullyOpaque = false;
1373 2412 : if (eErr == CE_None && bUseNoDataMask)
1374 : {
1375 120 : eErr = poMaskBand->RasterIO(
1376 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1377 : nChunkXSizeQueried, nChunkYSizeQueried,
1378 : pabyChunkNoDataMask, nChunkXSizeQueried,
1379 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1380 :
1381 : /* Optimizations if mask if fully opaque or transparent */
1382 120 : int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1383 120 : GByte bVal = pabyChunkNoDataMask[0];
1384 120 : int i = 1;
1385 235310 : for (; i < nPixels; i++)
1386 : {
1387 235267 : if (pabyChunkNoDataMask[i] != bVal)
1388 77 : break;
1389 : }
1390 120 : if (i == nPixels)
1391 : {
1392 43 : if (bVal == 0)
1393 : {
1394 712 : for (int j = 0; j < nDstYCount; j++)
1395 : {
1396 686 : GDALCopyWords(&dfNoDataValue, GDT_Float64, 0,
1397 : static_cast<GByte *>(pDataMem) +
1398 686 : nLSMem * (j + nDstYOff) +
1399 686 : nDstXOff * nPSMem,
1400 : eDTMem, static_cast<int>(nPSMem),
1401 : nDstXCount);
1402 : }
1403 26 : bSkipResample = true;
1404 : }
1405 : else
1406 : {
1407 17 : bNoDataMaskFullyOpaque = true;
1408 : }
1409 : }
1410 : }
1411 :
1412 2412 : if (!bSkipResample && eErr == CE_None)
1413 : {
1414 2384 : const bool bPropagateNoData = false;
1415 2384 : void *pDstBuffer = nullptr;
1416 2384 : GDALDataType eDstBufferDataType = GDT_Unknown;
1417 : GDALRasterBand *poMEMBand =
1418 2384 : GDALRasterBand::FromHandle(hMEMBand);
1419 2384 : GDALOverviewResampleArgs args;
1420 2384 : args.eSrcDataType = eDataType;
1421 2384 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1422 2384 : args.nOvrXSize = poMEMBand->GetXSize();
1423 2384 : args.nOvrYSize = poMEMBand->GetYSize();
1424 2384 : args.nOvrNBITS = nNBITS;
1425 2384 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1426 2384 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1427 2384 : args.dfSrcXDelta =
1428 2384 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1429 2384 : args.dfSrcYDelta =
1430 2384 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1431 2384 : args.eWrkDataType = eWrkDataType;
1432 2384 : args.pabyChunkNodataMask =
1433 2384 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
1434 2384 : args.nChunkXOff =
1435 2384 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1436 2384 : args.nChunkXSize = nChunkXSizeQueried;
1437 2384 : args.nChunkYOff =
1438 2384 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1439 2384 : args.nChunkYSize = nChunkYSizeQueried;
1440 2384 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1441 2384 : args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
1442 2384 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1443 2384 : args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
1444 2384 : args.pszResampling = pszResampling;
1445 2384 : args.bHasNoData = bHasNoData;
1446 2384 : args.dfNoDataValue = dfNoDataValue;
1447 2384 : args.poColorTable = GetColorTable();
1448 2384 : args.bPropagateNoData = bPropagateNoData;
1449 2384 : eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
1450 : &eDstBufferDataType);
1451 2384 : if (eErr == CE_None)
1452 : {
1453 2384 : eErr = poMEMBand->RasterIO(
1454 : GF_Write, nDstXOff + nDestXOffVirtual,
1455 : nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1456 : pDstBuffer, nDstXCount, nDstYCount,
1457 : eDstBufferDataType, 0, 0, nullptr);
1458 : }
1459 2384 : CPLFree(pDstBuffer);
1460 : }
1461 :
1462 2412 : nBlocksDone++;
1463 2441 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1464 29 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1465 : "", psExtraArg->pProgressData))
1466 : {
1467 1 : eErr = CE_Failure;
1468 : }
1469 : }
1470 : }
1471 :
1472 2412 : CPLFree(pChunk);
1473 2412 : CPLFree(pabyChunkNoDataMask);
1474 : }
1475 :
1476 2561 : if (eBufType != eDataType)
1477 : {
1478 40 : CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1479 : GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1480 : pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1481 : nullptr));
1482 : }
1483 2561 : GDALClose(poMEMDS);
1484 2561 : VSIFree(pTempBuffer);
1485 :
1486 2561 : return eErr;
1487 : }
1488 :
1489 : /************************************************************************/
1490 : /* RasterIOResampled() */
1491 : /************************************************************************/
1492 :
1493 279 : CPLErr GDALDataset::RasterIOResampled(
1494 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1495 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1496 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1497 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1498 :
1499 : {
1500 : #if 0
1501 : // Determine if we use warping resampling or overview resampling
1502 : bool bUseWarp = false;
1503 : if( GDALDataTypeIsComplex( eDataType ) )
1504 : bUseWarp = true;
1505 : #endif
1506 :
1507 279 : double dfXOff = nXOff;
1508 279 : double dfYOff = nYOff;
1509 279 : double dfXSize = nXSize;
1510 279 : double dfYSize = nYSize;
1511 279 : if (psExtraArg->bFloatingPointWindowValidity)
1512 : {
1513 157 : dfXOff = psExtraArg->dfXOff;
1514 157 : dfYOff = psExtraArg->dfYOff;
1515 157 : dfXSize = psExtraArg->dfXSize;
1516 157 : dfYSize = psExtraArg->dfYSize;
1517 : }
1518 :
1519 279 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1520 279 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1521 :
1522 : // Determine the coordinates in the "virtual" output raster to see
1523 : // if there are not integers, in which case we will use them as a shift
1524 : // so that subwindow extracts give the exact same results as entire raster
1525 : // scaling.
1526 279 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1527 279 : bool bHasXOffVirtual = false;
1528 279 : int nDestXOffVirtual = 0;
1529 279 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1530 : {
1531 162 : bHasXOffVirtual = true;
1532 162 : dfXOff = nXOff;
1533 162 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1534 : }
1535 :
1536 279 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1537 279 : bool bHasYOffVirtual = false;
1538 279 : int nDestYOffVirtual = 0;
1539 279 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1540 : {
1541 121 : bHasYOffVirtual = true;
1542 121 : dfYOff = nYOff;
1543 121 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1544 : }
1545 :
1546 : // Create a MEM dataset that wraps the output buffer.
1547 : GDALDataset *poMEMDS =
1548 279 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1549 : nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1550 : GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1551 277 : CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
1552 265 : int nNBITS = 0;
1553 1203 : for (int i = 0; i < nBandCount; i++)
1554 : {
1555 932 : char szBuffer[32] = {'\0'};
1556 1880 : int nRet = CPLPrintPointer(
1557 : szBuffer,
1558 932 : static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
1559 932 : nLineSpace * nDestYOffVirtual + nBandSpace * i,
1560 : sizeof(szBuffer));
1561 948 : szBuffer[nRet] = 0;
1562 :
1563 948 : char szBuffer0[64] = {'\0'};
1564 948 : snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1565 :
1566 948 : char szBuffer1[64] = {'\0'};
1567 948 : snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
1568 : static_cast<GIntBig>(nPixelSpace));
1569 :
1570 948 : char szBuffer2[64] = {'\0'};
1571 948 : snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
1572 : static_cast<GIntBig>(nLineSpace));
1573 :
1574 948 : char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
1575 :
1576 948 : poMEMDS->AddBand(eBufType, apszOptions);
1577 :
1578 935 : GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1579 919 : papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
1580 : const char *pszNBITS =
1581 923 : poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1582 925 : if (pszNBITS)
1583 : {
1584 0 : nNBITS = atoi(pszNBITS);
1585 0 : poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1586 0 : "IMAGE_STRUCTURE");
1587 : }
1588 : }
1589 :
1590 271 : CPLErr eErr = CE_None;
1591 :
1592 : // TODO(schwehr): Why disabled? Why not just delete?
1593 : // Looks like this code was initially added as disable by copying
1594 : // from RasterIO here:
1595 : // https://trac.osgeo.org/gdal/changeset/29572
1596 : #if 0
1597 : // Do the resampling.
1598 : if( bUseWarp )
1599 : {
1600 : VRTDatasetH hVRTDS = nullptr;
1601 : GDALRasterBandH hVRTBand = nullptr;
1602 : if( GetDataset() == nullptr )
1603 : {
1604 : /* Create VRT dataset that wraps the whole dataset */
1605 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1606 : VRTAddBand( hVRTDS, eDataType, nullptr );
1607 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1608 : VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1609 : (GDALRasterBandH)this,
1610 : 0, 0,
1611 : nRasterXSize, nRasterYSize,
1612 : 0, 0,
1613 : nRasterXSize, nRasterYSize,
1614 : nullptr, VRT_NODATA_UNSET );
1615 :
1616 : /* Add a mask band if needed */
1617 : if( GetMaskFlags() != GMF_ALL_VALID )
1618 : {
1619 : ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1620 : VRTSourcedRasterBand* poVRTMaskBand =
1621 : (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1622 : poVRTMaskBand->
1623 : AddMaskBandSource( this,
1624 : 0, 0,
1625 : nRasterXSize, nRasterYSize,
1626 : 0, 0,
1627 : nRasterXSize, nRasterYSize);
1628 : }
1629 : }
1630 :
1631 : GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1632 : psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1633 : psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1634 : psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1635 : psWarpOptions->nBandCount = 1;
1636 : int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1637 : int nDstBandNumber = 1;
1638 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1639 : psWarpOptions->panDstBands = &nDstBandNumber;
1640 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1641 : psExtraArg->pfnProgress : GDALDummyProgress;
1642 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1643 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1644 : GDALRasterIOTransformerStruct sTransformer;
1645 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1646 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1647 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1648 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1649 : psWarpOptions->pTransformerArg = &sTransformer;
1650 :
1651 : GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1652 : eErr = GDALChunkAndWarpImage( hWarpOperation,
1653 : nDestXOffVirtual, nDestYOffVirtual,
1654 : nBufXSize, nBufYSize );
1655 : GDALDestroyWarpOperation( hWarpOperation );
1656 :
1657 : psWarpOptions->panSrcBands = nullptr;
1658 : psWarpOptions->panDstBands = nullptr;
1659 : GDALDestroyWarpOptions( psWarpOptions );
1660 :
1661 : if( hVRTDS )
1662 : GDALClose(hVRTDS);
1663 : }
1664 : else
1665 : #endif
1666 : {
1667 271 : const char *pszResampling =
1668 427 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1669 156 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1670 0 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1671 0 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1672 0 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1673 0 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1674 0 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1675 0 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1676 : : "UNKNOWN";
1677 :
1678 271 : GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1679 268 : GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1680 : int nBlockXSize, nBlockYSize;
1681 271 : poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1682 :
1683 : int nKernelRadius;
1684 : GDALResampleFunction pfnResampleFunc =
1685 259 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1686 259 : CPLAssert(pfnResampleFunc);
1687 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1688 : GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1689 : GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1690 : #endif
1691 : GDALDataType eWrkDataType =
1692 259 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1693 :
1694 259 : int nDstBlockXSize = nBufXSize;
1695 259 : int nDstBlockYSize = nBufYSize;
1696 : int nFullResXChunk, nFullResYChunk;
1697 : while (true)
1698 : {
1699 259 : nFullResXChunk =
1700 259 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1701 259 : nFullResYChunk =
1702 259 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1703 259 : if (nFullResXChunk > nRasterXSize)
1704 139 : nFullResXChunk = nRasterXSize;
1705 259 : if (nFullResYChunk > nRasterYSize)
1706 33 : nFullResYChunk = nRasterYSize;
1707 259 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1708 257 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1709 : 1024 * 1024))
1710 : break;
1711 : // When operating on the full width of a raster whose block width is
1712 : // the raster width, prefer doing chunks in height.
1713 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1714 : nDstBlockYSize > 1)
1715 0 : nDstBlockYSize /= 2;
1716 : /* Otherwise cut the maximal dimension */
1717 0 : else if (nDstBlockXSize > 1 &&
1718 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1719 0 : nDstBlockXSize /= 2;
1720 : else
1721 0 : nDstBlockYSize /= 2;
1722 : }
1723 :
1724 524 : int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1725 259 : static_cast<int>(0.5 + dfYRatioDstToSrc));
1726 265 : if (nOvrFactor == 0)
1727 90 : nOvrFactor = 1;
1728 265 : int nFullResXSizeQueried =
1729 265 : nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1730 265 : int nFullResYSizeQueried =
1731 265 : nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1732 :
1733 265 : if (nFullResXSizeQueried > nRasterXSize)
1734 158 : nFullResXSizeQueried = nRasterXSize;
1735 265 : if (nFullResYSizeQueried > nRasterYSize)
1736 36 : nFullResYSizeQueried = nRasterYSize;
1737 :
1738 265 : void *pChunk = VSI_MALLOC3_VERBOSE(
1739 : cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1740 : nBandCount),
1741 : nFullResXSizeQueried, nFullResYSizeQueried);
1742 277 : GByte *pabyChunkNoDataMask = nullptr;
1743 :
1744 277 : GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1745 272 : int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1746 :
1747 274 : bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1748 274 : if (bUseNoDataMask)
1749 : {
1750 55 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1751 : nFullResXSizeQueried, nFullResYSizeQueried));
1752 : }
1753 274 : if (pChunk == nullptr ||
1754 55 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1755 : {
1756 12 : GDALClose(poMEMDS);
1757 0 : CPLFree(pChunk);
1758 0 : CPLFree(pabyChunkNoDataMask);
1759 0 : CPLFree(papoDstBands);
1760 0 : return CE_Failure;
1761 : }
1762 :
1763 262 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1764 262 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1765 262 : int nBlocksDone = 0;
1766 :
1767 : int nDstYOff;
1768 544 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1769 282 : nDstYOff += nDstBlockYSize)
1770 : {
1771 : int nDstYCount;
1772 258 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1773 261 : nDstYCount = nDstBlockYSize;
1774 : else
1775 0 : nDstYCount = nBufYSize - nDstYOff;
1776 :
1777 258 : int nChunkYOff =
1778 258 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1779 258 : int nChunkYOff2 = nYOff + 1 +
1780 258 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1781 : dfYRatioDstToSrc));
1782 258 : if (nChunkYOff2 > nRasterYSize)
1783 56 : nChunkYOff2 = nRasterYSize;
1784 258 : int nYCount = nChunkYOff2 - nChunkYOff;
1785 258 : CPLAssert(nYCount <= nFullResYChunk);
1786 :
1787 258 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1788 258 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1789 258 : if (nChunkYOffQueried < 0)
1790 : {
1791 56 : nChunkYSizeQueried += nChunkYOffQueried;
1792 56 : nChunkYOffQueried = 0;
1793 : }
1794 258 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1795 66 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1796 258 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1797 :
1798 : int nDstXOff;
1799 538 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1800 280 : nDstXOff += nDstBlockXSize)
1801 : {
1802 : int nDstXCount;
1803 256 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1804 256 : nDstXCount = nDstBlockXSize;
1805 : else
1806 0 : nDstXCount = nBufXSize - nDstXOff;
1807 :
1808 256 : int nChunkXOff =
1809 256 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1810 256 : int nChunkXOff2 =
1811 256 : nXOff + 1 +
1812 256 : static_cast<int>(
1813 256 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1814 256 : if (nChunkXOff2 > nRasterXSize)
1815 144 : nChunkXOff2 = nRasterXSize;
1816 256 : int nXCount = nChunkXOff2 - nChunkXOff;
1817 256 : CPLAssert(nXCount <= nFullResXChunk);
1818 :
1819 256 : int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1820 256 : int nChunkXSizeQueried =
1821 256 : nXCount + 2 * nKernelRadius * nOvrFactor;
1822 256 : if (nChunkXOffQueried < 0)
1823 : {
1824 136 : nChunkXSizeQueried += nChunkXOffQueried;
1825 136 : nChunkXOffQueried = 0;
1826 : }
1827 256 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1828 144 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1829 256 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1830 :
1831 256 : bool bSkipResample = false;
1832 256 : bool bNoDataMaskFullyOpaque = false;
1833 256 : if (eErr == CE_None && bUseNoDataMask)
1834 : {
1835 55 : eErr = poMaskBand->RasterIO(
1836 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1837 : nChunkXSizeQueried, nChunkYSizeQueried,
1838 : pabyChunkNoDataMask, nChunkXSizeQueried,
1839 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1840 :
1841 : /* Optimizations if mask if fully opaque or transparent */
1842 55 : const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1843 55 : const GByte bVal = pabyChunkNoDataMask[0];
1844 55 : int i = 1; // Used after for.
1845 123794 : for (; i < nPixels; i++)
1846 : {
1847 123777 : if (pabyChunkNoDataMask[i] != bVal)
1848 38 : break;
1849 : }
1850 55 : if (i == nPixels)
1851 : {
1852 17 : if (bVal == 0)
1853 : {
1854 16 : GByte abyZero[16] = {0};
1855 64 : for (int iBand = 0; iBand < nBandCount; iBand++)
1856 : {
1857 2016 : for (int j = 0; j < nDstYCount; j++)
1858 : {
1859 1968 : GDALCopyWords(
1860 : abyZero, GDT_Byte, 0,
1861 : static_cast<GByte *>(pData) +
1862 1968 : iBand * nBandSpace +
1863 1968 : nLineSpace * (j + nDstYOff) +
1864 1968 : nDstXOff * nPixelSpace,
1865 : eBufType, static_cast<int>(nPixelSpace),
1866 : nDstXCount);
1867 : }
1868 : }
1869 16 : bSkipResample = true;
1870 : }
1871 : else
1872 : {
1873 1 : bNoDataMaskFullyOpaque = true;
1874 : }
1875 : }
1876 : }
1877 :
1878 256 : if (!bSkipResample && eErr == CE_None)
1879 : {
1880 : /* Read the source buffers */
1881 244 : eErr = RasterIO(
1882 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1883 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1884 : nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1885 : nBandCount, panBandMap, 0, 0, 0, nullptr);
1886 : }
1887 :
1888 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1889 : if (pfnResampleFuncMultiBands && !bSkipResample &&
1890 : eErr == CE_None)
1891 : {
1892 : eErr = pfnResampleFuncMultiBands(
1893 : dfXRatioDstToSrc, dfYRatioDstToSrc,
1894 : dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1895 : dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1896 : eWrkDataType, (GByte *)pChunk, nBandCount,
1897 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1898 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1899 : nChunkXSizeQueried,
1900 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1901 : nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1902 : nDstXOff + nDestXOffVirtual + nDstXCount,
1903 : nDstYOff + nDestYOffVirtual,
1904 : nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1905 : pszResampling, FALSE /*bHasNoData*/,
1906 : 0.0 /* dfNoDataValue */, nullptr /* color table*/,
1907 : eDataType);
1908 : }
1909 : else
1910 : #endif
1911 : {
1912 : size_t nChunkBandOffset =
1913 274 : static_cast<size_t>(nChunkXSizeQueried) *
1914 274 : nChunkYSizeQueried *
1915 274 : GDALGetDataTypeSizeBytes(eWrkDataType);
1916 1190 : for (int i = 0;
1917 1190 : i < nBandCount && !bSkipResample && eErr == CE_None;
1918 : i++)
1919 : {
1920 910 : const bool bPropagateNoData = false;
1921 910 : void *pDstBuffer = nullptr;
1922 910 : GDALDataType eDstBufferDataType = GDT_Unknown;
1923 : GDALRasterBand *poMEMBand =
1924 910 : poMEMDS->GetRasterBand(i + 1);
1925 910 : GDALOverviewResampleArgs args;
1926 910 : args.eSrcDataType = eDataType;
1927 910 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1928 909 : args.nOvrXSize = poMEMBand->GetXSize();
1929 908 : args.nOvrYSize = poMEMBand->GetYSize();
1930 905 : args.nOvrNBITS = nNBITS;
1931 905 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1932 905 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1933 905 : args.dfSrcXDelta =
1934 905 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1935 905 : args.dfSrcYDelta =
1936 905 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1937 905 : args.eWrkDataType = eWrkDataType;
1938 905 : args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
1939 905 : ? nullptr
1940 : : pabyChunkNoDataMask;
1941 905 : args.nChunkXOff =
1942 905 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1943 905 : args.nChunkXSize = nChunkXSizeQueried;
1944 905 : args.nChunkYOff =
1945 905 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1946 905 : args.nChunkYSize = nChunkYSizeQueried;
1947 905 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1948 905 : args.nDstXOff2 =
1949 905 : nDstXOff + nDestXOffVirtual + nDstXCount;
1950 905 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1951 905 : args.nDstYOff2 =
1952 905 : nDstYOff + nDestYOffVirtual + nDstYCount;
1953 905 : args.pszResampling = pszResampling;
1954 905 : args.bHasNoData = false;
1955 905 : args.dfNoDataValue = 0.0;
1956 905 : args.poColorTable = nullptr;
1957 905 : args.bPropagateNoData = bPropagateNoData;
1958 :
1959 : eErr =
1960 1815 : pfnResampleFunc(args,
1961 905 : reinterpret_cast<GByte *>(pChunk) +
1962 905 : i * nChunkBandOffset,
1963 : &pDstBuffer, &eDstBufferDataType);
1964 910 : if (eErr == CE_None)
1965 : {
1966 910 : eErr = poMEMBand->RasterIO(
1967 : GF_Write, nDstXOff + nDestXOffVirtual,
1968 : nDstYOff + nDestYOffVirtual, nDstXCount,
1969 : nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1970 : eDstBufferDataType, 0, 0, nullptr);
1971 : }
1972 910 : CPLFree(pDstBuffer);
1973 : }
1974 : }
1975 :
1976 280 : nBlocksDone++;
1977 282 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1978 2 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1979 : "", psExtraArg->pProgressData))
1980 : {
1981 0 : eErr = CE_Failure;
1982 : }
1983 : }
1984 : }
1985 :
1986 286 : CPLFree(pChunk);
1987 280 : CPLFree(pabyChunkNoDataMask);
1988 : }
1989 :
1990 280 : CPLFree(papoDstBands);
1991 280 : GDALClose(poMEMDS);
1992 :
1993 280 : return eErr;
1994 : }
1995 :
1996 : //! @endcond
1997 :
1998 : /************************************************************************/
1999 : /* GDALSwapWords() */
2000 : /************************************************************************/
2001 :
2002 : /**
2003 : * Byte swap words in-place.
2004 : *
2005 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2006 : * a memory array. No assumption is made that the words being swapped are
2007 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2008 : * to determine if the current platform is big endian or little endian. Use
2009 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2010 : * of a function call.
2011 : *
2012 : * @param pData pointer to start of data buffer.
2013 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2014 : * @param nWordCount the number of words to be swapped in this call.
2015 : * @param nWordSkip the byte offset from the start of one word to the start of
2016 : * the next. For packed buffers this is the same as nWordSize.
2017 : */
2018 :
2019 438599 : void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
2020 : int nWordSkip)
2021 :
2022 : {
2023 438599 : if (nWordCount > 0)
2024 438599 : VALIDATE_POINTER0(pData, "GDALSwapWords");
2025 :
2026 438599 : GByte *pabyData = static_cast<GByte *>(pData);
2027 :
2028 438599 : switch (nWordSize)
2029 : {
2030 7204 : case 1:
2031 7204 : break;
2032 :
2033 418136 : case 2:
2034 418136 : CPLAssert(nWordSkip >= 2 || nWordCount == 1);
2035 289092000 : for (int i = 0; i < nWordCount; i++)
2036 : {
2037 288673000 : CPL_SWAP16PTR(pabyData);
2038 288673000 : pabyData += nWordSkip;
2039 : }
2040 418136 : break;
2041 :
2042 10688 : case 4:
2043 10688 : CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2044 10688 : if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2045 : {
2046 29148000 : for (int i = 0; i < nWordCount; i++)
2047 : {
2048 29137300 : *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2049 : *reinterpret_cast<const GUInt32 *>(pabyData));
2050 29137300 : pabyData += nWordSkip;
2051 10685 : }
2052 : }
2053 : else
2054 : {
2055 9 : for (int i = 0; i < nWordCount; i++)
2056 : {
2057 6 : CPL_SWAP32PTR(pabyData);
2058 6 : pabyData += nWordSkip;
2059 : }
2060 : }
2061 10688 : break;
2062 :
2063 2571 : case 8:
2064 2571 : CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2065 : #ifdef CPL_HAS_GINT64
2066 2571 : if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2067 : {
2068 3359870 : for (int i = 0; i < nWordCount; i++)
2069 : {
2070 3357300 : *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2071 : *reinterpret_cast<const GUInt64 *>(pabyData));
2072 3357300 : pabyData += nWordSkip;
2073 2570 : }
2074 : }
2075 : else
2076 : #endif
2077 : {
2078 3 : for (int i = 0; i < nWordCount; i++)
2079 : {
2080 2 : CPL_SWAP64PTR(pabyData);
2081 2 : pabyData += nWordSkip;
2082 : }
2083 : }
2084 2571 : break;
2085 :
2086 0 : default:
2087 0 : CPLAssert(false);
2088 : }
2089 : }
2090 :
2091 : /************************************************************************/
2092 : /* GDALSwapWordsEx() */
2093 : /************************************************************************/
2094 :
2095 : /**
2096 : * Byte swap words in-place.
2097 : *
2098 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2099 : * a memory array. No assumption is made that the words being swapped are
2100 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2101 : * to determine if the current platform is big endian or little endian. Use
2102 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2103 : * of a function call.
2104 : *
2105 : * @param pData pointer to start of data buffer.
2106 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2107 : * @param nWordCount the number of words to be swapped in this call.
2108 : * @param nWordSkip the byte offset from the start of one word to the start of
2109 : * the next. For packed buffers this is the same as nWordSize.
2110 : * @since GDAL 2.1
2111 : */
2112 6328 : void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2113 : int nWordSkip)
2114 : {
2115 6328 : GByte *pabyData = static_cast<GByte *>(pData);
2116 12656 : while (nWordCount)
2117 : {
2118 : // Pick-up a multiple of 8 as max chunk size.
2119 6328 : const int nWordCountSmall =
2120 6328 : (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2121 6328 : GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2122 6328 : pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2123 6328 : nWordCount -= nWordCountSmall;
2124 : }
2125 6328 : }
2126 :
2127 : // Place the new GDALCopyWords helpers in an anonymous namespace
2128 : namespace
2129 : {
2130 :
2131 : /************************************************************************/
2132 : /* GDALCopyWordsT() */
2133 : /************************************************************************/
2134 : /**
2135 : * Template function, used to copy data from pSrcData into buffer
2136 : * pDstData, with stride nSrcPixelStride in the source data and
2137 : * stride nDstPixelStride in the destination data. This template can
2138 : * deal with the case where the input data type is real or complex and
2139 : * the output is real.
2140 : *
2141 : * @param pSrcData the source data buffer
2142 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2143 : * of interest.
2144 : * @param pDstData the destination buffer.
2145 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2146 : * interest.
2147 : * @param nWordCount the total number of pixel words to copy
2148 : *
2149 : * @code
2150 : * // Assume an input buffer of type GUInt16 named pBufferIn
2151 : * GByte *pBufferOut = new GByte[numBytesOut];
2152 : * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2153 : * @endcode
2154 : * @note
2155 : * This is a private function, and should not be exposed outside of
2156 : * rasterio.cpp. External users should call the GDALCopyWords driver function.
2157 : */
2158 :
2159 : template <class Tin, class Tout>
2160 46343755 : static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2161 : int nSrcPixelStride,
2162 : Tout *const CPL_RESTRICT pDstData,
2163 : int nDstPixelStride,
2164 : GPtrDiff_t nWordCount)
2165 : {
2166 46343755 : decltype(nWordCount) nDstOffset = 0;
2167 :
2168 46343755 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2169 46343755 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2170 442519228 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2171 : {
2172 396168361 : const Tin tValue =
2173 396168361 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2174 396168361 : Tout *const pOutPixel =
2175 396168361 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2176 :
2177 396168361 : GDALCopyWord(tValue, *pOutPixel);
2178 :
2179 396175461 : nDstOffset += nDstPixelStride;
2180 : }
2181 46350885 : }
2182 :
2183 : template <class Tin, class Tout>
2184 38234082 : static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2185 : int nSrcPixelStride,
2186 : Tout *const CPL_RESTRICT pDstData,
2187 : int nDstPixelStride, GPtrDiff_t nWordCount)
2188 : {
2189 38234082 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2190 : nWordCount);
2191 38234222 : }
2192 :
2193 : template <class Tin, class Tout>
2194 194232 : static void inline GDALCopyWordsT_8atatime(
2195 : const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2196 : Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2197 : GPtrDiff_t nWordCount)
2198 : {
2199 194232 : decltype(nWordCount) nDstOffset = 0;
2200 :
2201 194232 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2202 194232 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2203 194232 : decltype(nWordCount) n = 0;
2204 194232 : if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2205 : nDstPixelStride == static_cast<int>(sizeof(Tout)))
2206 : {
2207 22731350 : for (; n < nWordCount - 7; n += 8)
2208 : {
2209 22536468 : const Tin *pInValues = reinterpret_cast<const Tin *>(
2210 22536468 : pSrcDataPtr + (n * nSrcPixelStride));
2211 22536468 : Tout *const pOutPixels =
2212 22536468 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2213 :
2214 22536468 : GDALCopy8Words(pInValues, pOutPixels);
2215 :
2216 22537998 : nDstOffset += 8 * nDstPixelStride;
2217 : }
2218 : }
2219 687113 : for (; n < nWordCount; n++)
2220 : {
2221 492891 : const Tin tValue =
2222 492891 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2223 492891 : Tout *const pOutPixel =
2224 492891 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2225 :
2226 492891 : GDALCopyWord(tValue, *pOutPixel);
2227 :
2228 491368 : nDstOffset += nDstPixelStride;
2229 : }
2230 194222 : }
2231 :
2232 : #ifdef HAVE_SSE2
2233 :
2234 : template <class Tout>
2235 38739 : void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2236 : int nSrcPixelStride,
2237 : Tout *const CPL_RESTRICT pDstData,
2238 : int nDstPixelStride, GPtrDiff_t nWordCount)
2239 : {
2240 : static_assert(std::is_integral<Tout>::value &&
2241 : sizeof(Tout) == sizeof(uint16_t),
2242 : "Bad Tout");
2243 38739 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2244 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2245 : {
2246 32688 : decltype(nWordCount) n = 0;
2247 32688 : const __m128i xmm_zero = _mm_setzero_si128();
2248 32688 : GByte *CPL_RESTRICT pabyDstDataPtr =
2249 : reinterpret_cast<GByte *>(pDstData);
2250 1499275 : for (; n < nWordCount - 15; n += 16)
2251 : {
2252 1466587 : __m128i xmm = _mm_loadu_si128(
2253 1466587 : reinterpret_cast<const __m128i *>(pSrcData + n));
2254 1466587 : __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2255 1466587 : __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2256 : _mm_storeu_si128(
2257 1466587 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2258 : _mm_storeu_si128(
2259 1466587 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2260 : }
2261 106775 : for (; n < nWordCount; n++)
2262 : {
2263 74087 : pDstData[n] = pSrcData[n];
2264 32688 : }
2265 : }
2266 : else
2267 : {
2268 6051 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2269 : nDstPixelStride, nWordCount);
2270 : }
2271 38739 : }
2272 :
2273 : template <>
2274 25763 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2275 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2276 : int nDstPixelStride, GPtrDiff_t nWordCount)
2277 : {
2278 25763 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2279 : nDstPixelStride, nWordCount);
2280 25763 : }
2281 :
2282 : template <>
2283 12976 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2284 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2285 : int nDstPixelStride, GPtrDiff_t nWordCount)
2286 : {
2287 12976 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2288 : nDstPixelStride, nWordCount);
2289 12976 : }
2290 :
2291 : template <class Tout>
2292 12095237 : void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2293 : int nSrcPixelStride,
2294 : Tout *const CPL_RESTRICT pDstData,
2295 : int nDstPixelStride, GPtrDiff_t nWordCount)
2296 : {
2297 : static_assert(std::is_integral<Tout>::value &&
2298 : sizeof(Tout) == sizeof(uint32_t),
2299 : "Bad Tout");
2300 12095237 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2301 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2302 : {
2303 6374127 : decltype(nWordCount) n = 0;
2304 6374127 : const __m128i xmm_zero = _mm_setzero_si128();
2305 6374127 : GByte *CPL_RESTRICT pabyDstDataPtr =
2306 : reinterpret_cast<GByte *>(pDstData);
2307 71873351 : for (; n < nWordCount - 15; n += 16)
2308 : {
2309 65430824 : __m128i xmm = _mm_loadu_si128(
2310 65430824 : reinterpret_cast<const __m128i *>(pSrcData + n));
2311 65508424 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2312 65554324 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2313 65386624 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2314 65401624 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2315 65431224 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2316 65499224 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2317 : _mm_storeu_si128(
2318 65499224 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2319 : _mm_storeu_si128(
2320 65499224 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2321 : _mm_storeu_si128(
2322 65499224 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2323 : _mm_storeu_si128(
2324 65499224 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2325 : }
2326 15168832 : for (; n < nWordCount; n++)
2327 : {
2328 8726335 : pDstData[n] = pSrcData[n];
2329 6442467 : }
2330 : }
2331 : else
2332 : {
2333 5721090 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2334 : nDstPixelStride, nWordCount);
2335 : }
2336 12169837 : }
2337 :
2338 : template <>
2339 437 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2340 : int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData,
2341 : int nDstPixelStride, GPtrDiff_t nWordCount)
2342 : {
2343 437 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2344 : nDstPixelStride, nWordCount);
2345 437 : }
2346 :
2347 : template <>
2348 12096900 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2349 : int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData,
2350 : int nDstPixelStride, GPtrDiff_t nWordCount)
2351 : {
2352 12096900 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2353 : nDstPixelStride, nWordCount);
2354 12103800 : }
2355 :
2356 : template <>
2357 2470370 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2358 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2359 : int nDstPixelStride, GPtrDiff_t nWordCount)
2360 : {
2361 2470370 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2362 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2363 : {
2364 110924 : decltype(nWordCount) n = 0;
2365 110924 : const __m128i xmm_zero = _mm_setzero_si128();
2366 110924 : GByte *CPL_RESTRICT pabyDstDataPtr =
2367 : reinterpret_cast<GByte *>(pDstData);
2368 3271060 : for (; n < nWordCount - 15; n += 16)
2369 : {
2370 3160140 : __m128i xmm = _mm_loadu_si128(
2371 3160140 : reinterpret_cast<const __m128i *>(pSrcData + n));
2372 3160140 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2373 3160140 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2374 3160140 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2375 3160140 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2376 3160140 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2377 3160140 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2378 3160140 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2379 3160140 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2380 3160140 : __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2381 3160140 : __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2382 3160140 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2383 : xmm0_f);
2384 : _mm_storeu_ps(
2385 3160140 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2386 : _mm_storeu_ps(
2387 3160140 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2388 : _mm_storeu_ps(
2389 3160140 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2390 : }
2391 471306 : for (; n < nWordCount; n++)
2392 : {
2393 360382 : pDstData[n] = pSrcData[n];
2394 110924 : }
2395 : }
2396 : else
2397 : {
2398 2359440 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2399 : nDstPixelStride, nWordCount);
2400 : }
2401 2470370 : }
2402 :
2403 : template <>
2404 145276 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2405 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2406 : int nDstPixelStride, GPtrDiff_t nWordCount)
2407 : {
2408 145276 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2409 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2410 : {
2411 122459 : decltype(nWordCount) n = 0;
2412 122459 : const __m128i xmm_zero = _mm_setzero_si128();
2413 122459 : GByte *CPL_RESTRICT pabyDstDataPtr =
2414 : reinterpret_cast<GByte *>(pDstData);
2415 1409260 : for (; n < nWordCount - 15; n += 16)
2416 : {
2417 1286800 : __m128i xmm = _mm_loadu_si128(
2418 1286800 : reinterpret_cast<const __m128i *>(pSrcData + n));
2419 1286800 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2420 1286800 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2421 1286800 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2422 1286800 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2423 1286800 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2424 1286800 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2425 :
2426 1286800 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2427 1286800 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2428 1286800 : __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2429 1286800 : __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2430 1286800 : xmm0 = _mm_srli_si128(xmm0, 8);
2431 1286800 : xmm1 = _mm_srli_si128(xmm1, 8);
2432 1286800 : xmm2 = _mm_srli_si128(xmm2, 8);
2433 1286800 : xmm3 = _mm_srli_si128(xmm3, 8);
2434 1286800 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2435 1286800 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2436 1286800 : __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2437 1286800 : __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2438 :
2439 1286800 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2440 : xmm0_low_d);
2441 : _mm_storeu_pd(
2442 1286800 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2443 : xmm0_high_d);
2444 : _mm_storeu_pd(
2445 1286800 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2446 : xmm1_low_d);
2447 : _mm_storeu_pd(
2448 1286800 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2449 : xmm1_high_d);
2450 : _mm_storeu_pd(
2451 1286800 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2452 : xmm2_low_d);
2453 : _mm_storeu_pd(
2454 1286800 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2455 : xmm2_high_d);
2456 : _mm_storeu_pd(
2457 1286800 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2458 : xmm3_low_d);
2459 : _mm_storeu_pd(
2460 1286800 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2461 : xmm3_high_d);
2462 : }
2463 232943 : for (; n < nWordCount; n++)
2464 : {
2465 110484 : pDstData[n] = pSrcData[n];
2466 122459 : }
2467 : }
2468 : else
2469 : {
2470 22817 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2471 : nDstPixelStride, nWordCount);
2472 : }
2473 145276 : }
2474 :
2475 : template <>
2476 5992 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2477 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2478 : int nDstPixelStride, GPtrDiff_t nWordCount)
2479 : {
2480 5992 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2481 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2482 : {
2483 5017 : decltype(nWordCount) n = 0;
2484 : // In SSE2, min_epu16 does not exist, so shift from
2485 : // UInt16 to SInt16 to be able to use min_epi16
2486 5017 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2487 5017 : const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2488 138457 : for (; n < nWordCount - 7; n += 8)
2489 : {
2490 133440 : __m128i xmm = _mm_loadu_si128(
2491 133440 : reinterpret_cast<const __m128i *>(pSrcData + n));
2492 133440 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2493 133440 : xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2494 133440 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2495 133440 : xmm = _mm_packus_epi16(xmm, xmm);
2496 133440 : GDALCopyXMMToInt64(xmm,
2497 133440 : reinterpret_cast<GPtrDiff_t *>(pDstData + n));
2498 : }
2499 15907 : for (; n < nWordCount; n++)
2500 : {
2501 10890 : pDstData[n] =
2502 10890 : pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2503 5017 : }
2504 : }
2505 : else
2506 : {
2507 975 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2508 : nDstPixelStride, nWordCount);
2509 : }
2510 5992 : }
2511 :
2512 : template <>
2513 21 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2514 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2515 : int nDstPixelStride, GPtrDiff_t nWordCount)
2516 : {
2517 21 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2518 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2519 : {
2520 15 : decltype(nWordCount) n = 0;
2521 : // In SSE2, min_epu16 does not exist, so shift from
2522 : // UInt16 to SInt16 to be able to use min_epi16
2523 15 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2524 15 : const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2525 31 : for (; n < nWordCount - 7; n += 8)
2526 : {
2527 16 : __m128i xmm = _mm_loadu_si128(
2528 16 : reinterpret_cast<const __m128i *>(pSrcData + n));
2529 16 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2530 16 : xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2531 16 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2532 16 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm);
2533 : }
2534 55 : for (; n < nWordCount; n++)
2535 : {
2536 40 : pDstData[n] =
2537 40 : pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2538 15 : }
2539 : }
2540 : else
2541 : {
2542 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2543 : nDstPixelStride, nWordCount);
2544 : }
2545 21 : }
2546 :
2547 : template <>
2548 412 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2549 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2550 : int nDstPixelStride, GPtrDiff_t nWordCount)
2551 : {
2552 412 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2553 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2554 : {
2555 406 : decltype(nWordCount) n = 0;
2556 406 : const __m128i xmm_zero = _mm_setzero_si128();
2557 406 : GByte *CPL_RESTRICT pabyDstDataPtr =
2558 : reinterpret_cast<GByte *>(pDstData);
2559 1500 : for (; n < nWordCount - 7; n += 8)
2560 : {
2561 1094 : __m128i xmm = _mm_loadu_si128(
2562 1094 : reinterpret_cast<const __m128i *>(pSrcData + n));
2563 1094 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2564 1094 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2565 1094 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2566 1094 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2567 1094 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2568 : xmm0_f);
2569 : _mm_storeu_ps(
2570 1094 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2571 : }
2572 1483 : for (; n < nWordCount; n++)
2573 : {
2574 1077 : pDstData[n] = pSrcData[n];
2575 406 : }
2576 : }
2577 : else
2578 : {
2579 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2580 : nDstPixelStride, nWordCount);
2581 : }
2582 412 : }
2583 :
2584 : template <>
2585 279 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2586 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2587 : int nDstPixelStride, GPtrDiff_t nWordCount)
2588 : {
2589 279 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2590 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2591 : {
2592 171 : decltype(nWordCount) n = 0;
2593 171 : const __m128i xmm_zero = _mm_setzero_si128();
2594 171 : GByte *CPL_RESTRICT pabyDstDataPtr =
2595 : reinterpret_cast<GByte *>(pDstData);
2596 219 : for (; n < nWordCount - 7; n += 8)
2597 : {
2598 48 : __m128i xmm = _mm_loadu_si128(
2599 48 : reinterpret_cast<const __m128i *>(pSrcData + n));
2600 48 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2601 48 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2602 :
2603 48 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2604 48 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2605 48 : xmm0 = _mm_srli_si128(xmm0, 8);
2606 48 : xmm1 = _mm_srli_si128(xmm1, 8);
2607 48 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2608 48 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2609 :
2610 48 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2611 : xmm0_low_d);
2612 : _mm_storeu_pd(
2613 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2614 : xmm0_high_d);
2615 : _mm_storeu_pd(
2616 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2617 : xmm1_low_d);
2618 : _mm_storeu_pd(
2619 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2620 : xmm1_high_d);
2621 : }
2622 429 : for (; n < nWordCount; n++)
2623 : {
2624 258 : pDstData[n] = pSrcData[n];
2625 171 : }
2626 : }
2627 : else
2628 : {
2629 108 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2630 : nDstPixelStride, nWordCount);
2631 : }
2632 279 : }
2633 :
2634 : template <>
2635 789 : void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
2636 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2637 : int nDstPixelStride, GPtrDiff_t nWordCount)
2638 : {
2639 789 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2640 : nDstPixelStride, nWordCount);
2641 789 : }
2642 :
2643 : #endif // HAVE_SSE2
2644 :
2645 : template <>
2646 116659 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2647 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2648 : int nDstPixelStride, GPtrDiff_t nWordCount)
2649 : {
2650 116659 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2651 : nDstPixelStride, nWordCount);
2652 116660 : }
2653 :
2654 : template <>
2655 15146 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2656 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2657 : int nDstPixelStride, GPtrDiff_t nWordCount)
2658 : {
2659 15146 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2660 : nDstPixelStride, nWordCount);
2661 15146 : }
2662 :
2663 : template <>
2664 61642 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2665 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2666 : int nDstPixelStride, GPtrDiff_t nWordCount)
2667 : {
2668 61642 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2669 : nDstPixelStride, nWordCount);
2670 61638 : }
2671 :
2672 : /************************************************************************/
2673 : /* GDALCopyWordsComplexT() */
2674 : /************************************************************************/
2675 : /**
2676 : * Template function, used to copy data from pSrcData into buffer
2677 : * pDstData, with stride nSrcPixelStride in the source data and
2678 : * stride nDstPixelStride in the destination data. Deals with the
2679 : * complex case, where input is complex and output is complex.
2680 : *
2681 : * @param pSrcData the source data buffer
2682 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2683 : * of interest.
2684 : * @param pDstData the destination buffer.
2685 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2686 : * interest.
2687 : * @param nWordCount the total number of pixel words to copy
2688 : *
2689 : */
2690 : template <class Tin, class Tout>
2691 125132 : inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
2692 : int nSrcPixelStride,
2693 : Tout *const CPL_RESTRICT pDstData,
2694 : int nDstPixelStride, GPtrDiff_t nWordCount)
2695 : {
2696 125132 : decltype(nWordCount) nDstOffset = 0;
2697 125132 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2698 125132 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2699 :
2700 7337033 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2701 : {
2702 7211896 : const Tin *const pPixelIn =
2703 7211896 : reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2704 7211896 : Tout *const pPixelOut =
2705 7211896 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2706 :
2707 7211896 : GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2708 7211896 : GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2709 :
2710 7211896 : nDstOffset += nDstPixelStride;
2711 : }
2712 125132 : }
2713 :
2714 : /************************************************************************/
2715 : /* GDALCopyWordsComplexOutT() */
2716 : /************************************************************************/
2717 : /**
2718 : * Template function, used to copy data from pSrcData into buffer
2719 : * pDstData, with stride nSrcPixelStride in the source data and
2720 : * stride nDstPixelStride in the destination data. Deals with the
2721 : * case where the value is real coming in, but complex going out.
2722 : *
2723 : * @param pSrcData the source data buffer
2724 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2725 : * of interest, in bytes.
2726 : * @param pDstData the destination buffer.
2727 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2728 : * interest, in bytes.
2729 : * @param nWordCount the total number of pixel words to copy
2730 : *
2731 : */
2732 : template <class Tin, class Tout>
2733 3164 : inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
2734 : int nSrcPixelStride,
2735 : Tout *const CPL_RESTRICT pDstData,
2736 : int nDstPixelStride, GPtrDiff_t nWordCount)
2737 : {
2738 3164 : decltype(nWordCount) nDstOffset = 0;
2739 :
2740 3164 : const Tout tOutZero = static_cast<Tout>(0);
2741 :
2742 3164 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2743 3164 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2744 :
2745 1112451 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2746 : {
2747 1109287 : const Tin tValue =
2748 1109287 : *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2749 1109287 : Tout *const pPixelOut =
2750 1109287 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2751 1109287 : GDALCopyWord(tValue, *pPixelOut);
2752 :
2753 1109287 : pPixelOut[1] = tOutZero;
2754 :
2755 1109287 : nDstOffset += nDstPixelStride;
2756 : }
2757 3164 : }
2758 :
2759 : /************************************************************************/
2760 : /* GDALCopyWordsFromT() */
2761 : /************************************************************************/
2762 : /**
2763 : * Template driver function. Given the input type T, call the appropriate
2764 : * GDALCopyWordsT function template for the desired output type. You should
2765 : * never call this function directly (call GDALCopyWords instead).
2766 : *
2767 : * @param pSrcData source data buffer
2768 : * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2769 : * @param bInComplex input is complex
2770 : * @param pDstData destination data buffer
2771 : * @param eDstType destination data type
2772 : * @param nDstPixelStride pixel stride in output buffer, in pixel words
2773 : * @param nWordCount number of pixel words to be copied
2774 : */
2775 : template <class T>
2776 53312260 : inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
2777 : int nSrcPixelStride, bool bInComplex,
2778 : void *CPL_RESTRICT pDstData,
2779 : GDALDataType eDstType, int nDstPixelStride,
2780 : GPtrDiff_t nWordCount)
2781 : {
2782 53312260 : switch (eDstType)
2783 : {
2784 4556357 : case GDT_Byte:
2785 4556357 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2786 : static_cast<unsigned char *>(pDstData),
2787 : nDstPixelStride, nWordCount);
2788 4556590 : break;
2789 457 : case GDT_Int8:
2790 457 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2791 : static_cast<signed char *>(pDstData),
2792 : nDstPixelStride, nWordCount);
2793 457 : break;
2794 101112 : case GDT_UInt16:
2795 101112 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2796 : static_cast<unsigned short *>(pDstData),
2797 : nDstPixelStride, nWordCount);
2798 101114 : break;
2799 4125678 : case GDT_Int16:
2800 4125678 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2801 : static_cast<short *>(pDstData), nDstPixelStride,
2802 : nWordCount);
2803 4125678 : break;
2804 4179 : case GDT_UInt32:
2805 4179 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2806 : static_cast<unsigned int *>(pDstData),
2807 : nDstPixelStride, nWordCount);
2808 4179 : break;
2809 25277683 : case GDT_Int32:
2810 25277683 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2811 : static_cast<int *>(pDstData), nDstPixelStride,
2812 : nWordCount);
2813 25288283 : break;
2814 592 : case GDT_UInt64:
2815 592 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2816 : static_cast<std::uint64_t *>(pDstData),
2817 : nDstPixelStride, nWordCount);
2818 592 : break;
2819 4157 : case GDT_Int64:
2820 4157 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2821 : static_cast<std::int64_t *>(pDstData),
2822 : nDstPixelStride, nWordCount);
2823 4157 : break;
2824 3869075 : case GDT_Float32:
2825 3869075 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2826 : static_cast<float *>(pDstData), nDstPixelStride,
2827 : nWordCount);
2828 3869075 : break;
2829 15243095 : case GDT_Float64:
2830 15243095 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2831 : static_cast<double *>(pDstData), nDstPixelStride,
2832 : nWordCount);
2833 15243145 : break;
2834 122400 : case GDT_CInt16:
2835 122400 : if (bInComplex)
2836 : {
2837 121390 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2838 : static_cast<short *>(pDstData),
2839 : nDstPixelStride, nWordCount);
2840 : }
2841 : else // input is not complex, so we need to promote to a complex
2842 : // buffer
2843 : {
2844 1010 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2845 : static_cast<short *>(pDstData),
2846 : nDstPixelStride, nWordCount);
2847 : }
2848 122400 : break;
2849 779 : case GDT_CInt32:
2850 779 : if (bInComplex)
2851 : {
2852 391 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2853 : static_cast<int *>(pDstData),
2854 : nDstPixelStride, nWordCount);
2855 : }
2856 : else // input is not complex, so we need to promote to a complex
2857 : // buffer
2858 : {
2859 388 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2860 : static_cast<int *>(pDstData),
2861 : nDstPixelStride, nWordCount);
2862 : }
2863 779 : break;
2864 3170 : case GDT_CFloat32:
2865 3170 : if (bInComplex)
2866 : {
2867 2589 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2868 : static_cast<float *>(pDstData),
2869 : nDstPixelStride, nWordCount);
2870 : }
2871 : else // input is not complex, so we need to promote to a complex
2872 : // buffer
2873 : {
2874 581 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2875 : static_cast<float *>(pDstData),
2876 : nDstPixelStride, nWordCount);
2877 : }
2878 3170 : break;
2879 1947 : case GDT_CFloat64:
2880 1947 : if (bInComplex)
2881 : {
2882 762 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2883 : static_cast<double *>(pDstData),
2884 : nDstPixelStride, nWordCount);
2885 : }
2886 : else // input is not complex, so we need to promote to a complex
2887 : // buffer
2888 : {
2889 1185 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2890 : static_cast<double *>(pDstData),
2891 : nDstPixelStride, nWordCount);
2892 : }
2893 1947 : break;
2894 0 : case GDT_Unknown:
2895 : case GDT_TypeCount:
2896 0 : CPLAssert(false);
2897 : }
2898 53323015 : }
2899 :
2900 : } // end anonymous namespace
2901 :
2902 : /************************************************************************/
2903 : /* GDALReplicateWord() */
2904 : /************************************************************************/
2905 :
2906 : template <class T>
2907 227440 : inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
2908 : GPtrDiff_t nWordCount)
2909 : {
2910 227440 : const T valSet = *static_cast<const T *>(pDstData);
2911 227440 : if (nDstPixelStride == static_cast<int>(sizeof(T)))
2912 : {
2913 199069 : T *pDstPtr = static_cast<T *>(pDstData) + 1;
2914 8614812 : while (nWordCount >= 4)
2915 : {
2916 8415740 : nWordCount -= 4;
2917 8415740 : pDstPtr[0] = valSet;
2918 8415740 : pDstPtr[1] = valSet;
2919 8415740 : pDstPtr[2] = valSet;
2920 8415740 : pDstPtr[3] = valSet;
2921 8415740 : pDstPtr += 4;
2922 : }
2923 664645 : while (nWordCount > 0)
2924 : {
2925 465576 : --nWordCount;
2926 465576 : *pDstPtr = valSet;
2927 465576 : pDstPtr++;
2928 : }
2929 : }
2930 : else
2931 : {
2932 28371 : GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2933 954250 : while (nWordCount > 0)
2934 : {
2935 925879 : --nWordCount;
2936 925879 : *reinterpret_cast<T *>(pabyDstPtr) = valSet;
2937 925879 : pabyDstPtr += nDstPixelStride;
2938 : }
2939 : }
2940 227440 : }
2941 :
2942 592282 : static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
2943 : GDALDataType eSrcType,
2944 : void *CPL_RESTRICT pDstData,
2945 : GDALDataType eDstType, int nDstPixelStride,
2946 : GPtrDiff_t nWordCount)
2947 : {
2948 : /* -----------------------------------------------------------------------
2949 : */
2950 : /* Special case when the source data is always the same value */
2951 : /* (for VRTSourcedRasterBand::IRasterIO and
2952 : * VRTDerivedRasterBand::IRasterIO*/
2953 : /* for example) */
2954 : /* -----------------------------------------------------------------------
2955 : */
2956 : // Let the general translation case do the necessary conversions
2957 : // on the first destination element.
2958 592282 : GDALCopyWords(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
2959 :
2960 : // Now copy the first element to the nWordCount - 1 following destination
2961 : // elements.
2962 592282 : nWordCount--;
2963 592282 : GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
2964 :
2965 592282 : switch (eDstType)
2966 : {
2967 364758 : case GDT_Byte:
2968 : case GDT_Int8:
2969 : {
2970 364758 : if (nDstPixelStride == 1)
2971 : {
2972 331006 : if (nWordCount > 0)
2973 331006 : memset(pabyDstWord,
2974 331006 : *reinterpret_cast<const GByte *>(pDstData),
2975 : nWordCount);
2976 : }
2977 : else
2978 : {
2979 33752 : GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
2980 5438530 : while (nWordCount > 0)
2981 : {
2982 5404780 : --nWordCount;
2983 5404780 : *pabyDstWord = valSet;
2984 5404780 : pabyDstWord += nDstPixelStride;
2985 : }
2986 : }
2987 364758 : break;
2988 : }
2989 :
2990 : #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \
2991 : case enum_type: \
2992 : { \
2993 : GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \
2994 : break; \
2995 : }
2996 :
2997 333 : CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
2998 169647 : CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
2999 56 : CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3000 140 : CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3001 21 : CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
3002 22 : CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
3003 52210 : CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3004 5011 : CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3005 :
3006 : #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \
3007 : case enum_type: \
3008 : { \
3009 : c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \
3010 : c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \
3011 : while (nWordCount > 0) \
3012 : { \
3013 : --nWordCount; \
3014 : reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \
3015 : reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \
3016 : pabyDstWord += nDstPixelStride; \
3017 : } \
3018 : break; \
3019 : }
3020 :
3021 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3022 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3023 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3024 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3025 :
3026 0 : case GDT_Unknown:
3027 : case GDT_TypeCount:
3028 0 : CPLAssert(false);
3029 : }
3030 592282 : }
3031 :
3032 : /************************************************************************/
3033 : /* GDALUnrolledCopy() */
3034 : /************************************************************************/
3035 :
3036 : template <class T, int srcStride, int dstStride>
3037 5319771 : static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3038 : const T *CPL_RESTRICT pSrc,
3039 : GPtrDiff_t nIters)
3040 : {
3041 5319771 : if (nIters >= 16)
3042 : {
3043 138118800 : for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3044 : {
3045 132928679 : pDest[0 * dstStride] = pSrc[0 * srcStride];
3046 132928679 : pDest[1 * dstStride] = pSrc[1 * srcStride];
3047 132928679 : pDest[2 * dstStride] = pSrc[2 * srcStride];
3048 132928679 : pDest[3 * dstStride] = pSrc[3 * srcStride];
3049 132928679 : pDest[4 * dstStride] = pSrc[4 * srcStride];
3050 132928679 : pDest[5 * dstStride] = pSrc[5 * srcStride];
3051 132928679 : pDest[6 * dstStride] = pSrc[6 * srcStride];
3052 132928679 : pDest[7 * dstStride] = pSrc[7 * srcStride];
3053 132928679 : pDest[8 * dstStride] = pSrc[8 * srcStride];
3054 132928679 : pDest[9 * dstStride] = pSrc[9 * srcStride];
3055 132928679 : pDest[10 * dstStride] = pSrc[10 * srcStride];
3056 132928679 : pDest[11 * dstStride] = pSrc[11 * srcStride];
3057 132928679 : pDest[12 * dstStride] = pSrc[12 * srcStride];
3058 132928679 : pDest[13 * dstStride] = pSrc[13 * srcStride];
3059 132928679 : pDest[14 * dstStride] = pSrc[14 * srcStride];
3060 132928679 : pDest[15 * dstStride] = pSrc[15 * srcStride];
3061 132928679 : pDest += 16 * dstStride;
3062 132928679 : pSrc += 16 * srcStride;
3063 : }
3064 5190161 : nIters = nIters % 16;
3065 : }
3066 7571813 : for (GPtrDiff_t i = 0; i < nIters; i++)
3067 : {
3068 2252029 : pDest[i * dstStride] = *pSrc;
3069 2252029 : pSrc += srcStride;
3070 : }
3071 5319771 : }
3072 :
3073 : template <class T, int srcStride, int dstStride>
3074 5314586 : static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3075 : const T *CPL_RESTRICT pSrc,
3076 : GPtrDiff_t nIters)
3077 : {
3078 5314586 : GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3079 5314628 : }
3080 :
3081 : #ifdef HAVE_SSE2
3082 :
3083 : template <>
3084 303485 : void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3085 : const GByte *CPL_RESTRICT pSrc,
3086 : GPtrDiff_t nIters)
3087 : {
3088 303485 : decltype(nIters) i = 0;
3089 303485 : if (nIters > 16)
3090 : {
3091 145295 : const __m128i xmm_mask = _mm_set1_epi16(0xff);
3092 : // If we were sure that there would always be 1 trailing byte, we could
3093 : // check against nIters - 15
3094 2535420 : for (; i < nIters - 16; i += 16)
3095 : {
3096 : __m128i xmm0 =
3097 2390130 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3098 : __m128i xmm1 =
3099 4780250 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3100 : // Set higher 8bit of each int16 packed word to 0
3101 2390130 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3102 2390130 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3103 : // Pack int16 to uint8 and merge back both vector
3104 2390130 : xmm0 = _mm_packus_epi16(xmm0, xmm1);
3105 :
3106 : // Store result
3107 2390130 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3108 :
3109 2390130 : pSrc += 2 * 16;
3110 : }
3111 : }
3112 3866570 : for (; i < nIters; i++)
3113 : {
3114 3563080 : pDest[i] = *pSrc;
3115 3563080 : pSrc += 2;
3116 : }
3117 303485 : }
3118 :
3119 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3120 :
3121 : template <>
3122 184624 : void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3123 : const GByte *CPL_RESTRICT pSrc,
3124 : GPtrDiff_t nIters)
3125 : {
3126 184624 : if (nIters > 16 && CPLHaveRuntimeSSSE3())
3127 : {
3128 179427 : GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3129 : }
3130 : else
3131 : {
3132 5197 : GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
3133 : }
3134 184624 : }
3135 :
3136 : #endif
3137 :
3138 : template <>
3139 100208 : void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3140 : const GByte *CPL_RESTRICT pSrc,
3141 : GPtrDiff_t nIters)
3142 : {
3143 100208 : decltype(nIters) i = 0;
3144 100208 : if (nIters > 16)
3145 : {
3146 94915 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
3147 : // If we were sure that there would always be 3 trailing bytes, we could
3148 : // check against nIters - 15
3149 8731670 : for (; i < nIters - 16; i += 16)
3150 : {
3151 : __m128i xmm0 =
3152 8636590 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3153 : __m128i xmm1 =
3154 8636590 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3155 : __m128i xmm2 =
3156 8636590 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3157 : __m128i xmm3 =
3158 17273200 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3159 : // Set higher 24bit of each int32 packed word to 0
3160 8636590 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3161 8636590 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3162 8636590 : xmm2 = _mm_and_si128(xmm2, xmm_mask);
3163 8636590 : xmm3 = _mm_and_si128(xmm3, xmm_mask);
3164 : // Pack int32 to int16
3165 8636900 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
3166 8636670 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
3167 : // Pack int16 to uint8
3168 8636760 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
3169 :
3170 : // Store result
3171 8636760 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3172 :
3173 8636760 : pSrc += 4 * 16;
3174 : }
3175 : }
3176 1100560 : for (; i < nIters; i++)
3177 : {
3178 1000190 : pDest[i] = *pSrc;
3179 1000190 : pSrc += 4;
3180 : }
3181 100372 : }
3182 : #endif // HAVE_SSE2
3183 :
3184 : /************************************************************************/
3185 : /* GDALFastCopy() */
3186 : /************************************************************************/
3187 :
3188 : template <class T>
3189 39773800 : static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
3190 : const T *CPL_RESTRICT pSrc, int nSrcStride,
3191 : GPtrDiff_t nIters)
3192 : {
3193 39773800 : constexpr int sizeofT = static_cast<int>(sizeof(T));
3194 39773800 : if (nIters == 1)
3195 : {
3196 22288380 : *pDest = *pSrc;
3197 : }
3198 17485412 : else if (nDestStride == sizeofT)
3199 : {
3200 12233224 : if (nSrcStride == sizeofT)
3201 : {
3202 11502469 : memcpy(pDest, pSrc, nIters * sizeof(T));
3203 : }
3204 730703 : else if (nSrcStride == 2 * sizeofT)
3205 : {
3206 306426 : GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
3207 : }
3208 424277 : else if (nSrcStride == 3 * sizeofT)
3209 : {
3210 289484 : GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
3211 : }
3212 134793 : else if (nSrcStride == 4 * sizeofT)
3213 : {
3214 129076 : GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
3215 : }
3216 : else
3217 : {
3218 12969320 : while (nIters-- > 0)
3219 : {
3220 12963630 : *pDest = *pSrc;
3221 12963630 : pSrc += nSrcStride / sizeofT;
3222 12963630 : pDest++;
3223 : }
3224 : }
3225 : }
3226 5252258 : else if (nSrcStride == sizeofT)
3227 : {
3228 5237338 : if (nDestStride == 2 * sizeofT)
3229 : {
3230 125525 : GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
3231 : }
3232 5111809 : else if (nDestStride == 3 * sizeofT)
3233 : {
3234 4409379 : GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
3235 : }
3236 702427 : else if (nDestStride == 4 * sizeofT)
3237 : {
3238 643004 : GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
3239 : }
3240 : else
3241 : {
3242 12650500 : while (nIters-- > 0)
3243 : {
3244 12591080 : *pDest = *pSrc;
3245 12591080 : pSrc++;
3246 12591080 : pDest += nDestStride / sizeofT;
3247 : }
3248 : }
3249 : }
3250 : else
3251 : {
3252 1113808 : while (nIters-- > 0)
3253 : {
3254 1098888 : *pDest = *pSrc;
3255 1098888 : pSrc += nSrcStride / sizeofT;
3256 1098888 : pDest += nDestStride / sizeofT;
3257 : }
3258 : }
3259 39773800 : }
3260 :
3261 : /************************************************************************/
3262 : /* GDALFastCopyByte() */
3263 : /************************************************************************/
3264 :
3265 276307 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
3266 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3267 : int nDstPixelStride, GPtrDiff_t nWordCount)
3268 : {
3269 276307 : GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
3270 : nWordCount);
3271 276307 : }
3272 :
3273 : /************************************************************************/
3274 : /* GDALCopyWords() */
3275 : /************************************************************************/
3276 :
3277 : /**
3278 : * Copy pixel words from buffer to buffer.
3279 : *
3280 : * @see GDALCopyWords64()
3281 : */
3282 101097000 : void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
3283 : GDALDataType eSrcType, int nSrcPixelStride,
3284 : void *CPL_RESTRICT pDstData,
3285 : GDALDataType eDstType, int nDstPixelStride,
3286 : int nWordCount)
3287 : {
3288 101097000 : GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
3289 : nDstPixelStride, nWordCount);
3290 101096000 : }
3291 :
3292 : /************************************************************************/
3293 : /* GDALCopyWords64() */
3294 : /************************************************************************/
3295 :
3296 : /**
3297 : * Copy pixel words from buffer to buffer.
3298 : *
3299 : * This function is used to copy pixel word values from one memory buffer
3300 : * to another, with support for conversion between data types, and differing
3301 : * step factors. The data type conversion is done using the following
3302 : * rules:
3303 : * <ul>
3304 : * <li>Values assigned to a lower range integer type are clipped. For
3305 : * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3306 : * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3307 : * </li>
3308 : * <li>
3309 : * Assignment from floating point to integer rounds to closest integer.
3310 : * +Infinity is mapped to the largest integer. -Infinity is mapped to the
3311 : * smallest integer. NaN is mapped to 0.
3312 : * </li>
3313 : * <li>
3314 : * Assignment from non-complex to complex will result in the imaginary part
3315 : * being set to zero on output.
3316 : * </li>
3317 : * <li> Assignment from complex to
3318 : * non-complex will result in the complex portion being lost and the real
3319 : * component being preserved (<i>not magnitude!</i>).
3320 : * </li>
3321 : * </ul>
3322 : *
3323 : * No assumptions are made about the source or destination words occurring
3324 : * on word boundaries. It is assumed that all values are in native machine
3325 : * byte order.
3326 : *
3327 : * @param pSrcData Pointer to source data to be converted.
3328 : * @param eSrcType the source data type (see GDALDataType enum)
3329 : * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3330 : * in bytes
3331 : * @param pDstData Pointer to buffer where destination data should go
3332 : * @param eDstType the destination data type (see GDALDataType enum)
3333 : * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3334 : * words), in bytes
3335 : * @param nWordCount number of words to be copied
3336 : *
3337 : * @note
3338 : * When adding a new data type to GDAL, you must do the following to
3339 : * support it properly within the GDALCopyWords function:
3340 : * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3341 : * This should invoke the appropriate GDALCopyWordsFromT wrapper.
3342 : * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3343 : * This should call the appropriate GDALCopyWordsT template.
3344 : * 3. If appropriate, overload the appropriate CopyWord template in the
3345 : * above namespace. This will ensure that any conversion issues are
3346 : * handled (cases like the float -> int32 case, where the min/max)
3347 : * values are subject to roundoff error.
3348 : */
3349 :
3350 107721000 : void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
3351 : GDALDataType eSrcType, int nSrcPixelStride,
3352 : void *CPL_RESTRICT pDstData,
3353 : GDALDataType eDstType, int nDstPixelStride,
3354 : GPtrDiff_t nWordCount)
3355 :
3356 : {
3357 : // On platforms where alignment matters, be careful
3358 107721000 : const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3359 107710000 : const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3360 107711000 : if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
3361 : {
3362 2 : CPLError(CE_Failure, CPLE_NotSupported,
3363 : "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
3364 : "argument");
3365 2 : return;
3366 : }
3367 107711000 : if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3368 59699200 : ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3369 59691100 : (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
3370 59689800 : (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3371 59687900 : (nDstPixelStride % nDstDataTypeSize) != 0))
3372 : {
3373 905 : if (eSrcType == eDstType)
3374 : {
3375 34800 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3376 : {
3377 34000 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3378 : static_cast<const GByte *>(pSrcData) +
3379 34000 : nSrcPixelStride * i,
3380 : nDstDataTypeSize);
3381 : }
3382 : }
3383 : else
3384 : {
3385 210 : const auto getAlignedPtr = [](GByte *ptr, int align)
3386 : {
3387 : return ptr +
3388 210 : ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
3389 210 : align);
3390 : };
3391 :
3392 : // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3393 : // be sure to get correctly aligned pointer.
3394 105 : constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
3395 : GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
3396 : GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
3397 : GByte *pabySrcBuffer =
3398 105 : getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
3399 : GByte *pabyDstBuffer =
3400 105 : getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
3401 3360 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3402 : {
3403 3255 : memcpy(pabySrcBuffer,
3404 : static_cast<const GByte *>(pSrcData) +
3405 3255 : nSrcPixelStride * i,
3406 : nSrcDataTypeSize);
3407 3255 : GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3408 : eDstType, 0, 1);
3409 3255 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3410 : pabyDstBuffer, nDstDataTypeSize);
3411 : }
3412 : }
3413 905 : return;
3414 : }
3415 :
3416 : // Deal with the case where we're replicating a single word into the
3417 : // provided buffer
3418 107711000 : if (nSrcPixelStride == 0 && nWordCount > 1)
3419 : {
3420 592282 : GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
3421 : nDstPixelStride, nWordCount);
3422 592282 : return;
3423 : }
3424 :
3425 107118000 : if (eSrcType == eDstType)
3426 : {
3427 53966300 : if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
3428 : {
3429 18533000 : GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
3430 : static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3431 : nWordCount);
3432 18533500 : return;
3433 : }
3434 :
3435 35433300 : if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
3436 20964600 : (nDstPixelStride % 2) == 0)
3437 : {
3438 20964600 : GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
3439 : static_cast<const short *>(pSrcData), nSrcPixelStride,
3440 : nWordCount);
3441 20964100 : return;
3442 : }
3443 :
3444 14468700 : if (nWordCount == 1)
3445 : {
3446 : #if defined(CSA_BUILD) || defined(__COVERITY__)
3447 : // Avoid false positives...
3448 : memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3449 : #else
3450 14056100 : if (nSrcDataTypeSize == 2)
3451 0 : memcpy(pDstData, pSrcData, 2);
3452 14056100 : else if (nSrcDataTypeSize == 4)
3453 14014000 : memcpy(pDstData, pSrcData, 4);
3454 42152 : else if (nSrcDataTypeSize == 8)
3455 25636 : memcpy(pDstData, pSrcData, 8);
3456 : else /* if( eSrcType == GDT_CFloat64 ) */
3457 16516 : memcpy(pDstData, pSrcData, 16);
3458 : #endif
3459 14056100 : return;
3460 : }
3461 :
3462 : // Let memcpy() handle the case where we're copying a packed buffer
3463 : // of pixels.
3464 412638 : if (nSrcPixelStride == nDstPixelStride)
3465 : {
3466 259091 : if (nSrcPixelStride == nSrcDataTypeSize)
3467 : {
3468 256921 : memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3469 256921 : return;
3470 : }
3471 : }
3472 : }
3473 :
3474 : // Handle the more general case -- deals with conversion of data types
3475 : // directly.
3476 53307600 : switch (eSrcType)
3477 : {
3478 14751600 : case GDT_Byte:
3479 14751600 : GDALCopyWordsFromT<unsigned char>(
3480 : static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3481 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3482 14760800 : break;
3483 942 : case GDT_Int8:
3484 942 : GDALCopyWordsFromT<signed char>(
3485 : static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3486 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3487 942 : break;
3488 52171 : case GDT_UInt16:
3489 52171 : GDALCopyWordsFromT<unsigned short>(
3490 : static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3491 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3492 52171 : break;
3493 4541780 : case GDT_Int16:
3494 4541780 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3495 : nSrcPixelStride, false, pDstData,
3496 : eDstType, nDstPixelStride, nWordCount);
3497 4541800 : break;
3498 5813 : case GDT_UInt32:
3499 5813 : GDALCopyWordsFromT<unsigned int>(
3500 : static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3501 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3502 5813 : break;
3503 12254600 : case GDT_Int32:
3504 12254600 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3505 : nSrcPixelStride, false, pDstData, eDstType,
3506 : nDstPixelStride, nWordCount);
3507 12254600 : break;
3508 496 : case GDT_UInt64:
3509 496 : GDALCopyWordsFromT<std::uint64_t>(
3510 : static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3511 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3512 496 : break;
3513 7126 : case GDT_Int64:
3514 7126 : GDALCopyWordsFromT<std::int64_t>(
3515 : static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3516 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3517 7126 : break;
3518 278504 : case GDT_Float32:
3519 278504 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3520 : nSrcPixelStride, false, pDstData,
3521 : eDstType, nDstPixelStride, nWordCount);
3522 278499 : break;
3523 20677600 : case GDT_Float64:
3524 20677600 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3525 : nSrcPixelStride, false, pDstData,
3526 : eDstType, nDstPixelStride, nWordCount);
3527 20677700 : break;
3528 566927 : case GDT_CInt16:
3529 566927 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3530 : nSrcPixelStride, true, pDstData, eDstType,
3531 : nDstPixelStride, nWordCount);
3532 566927 : break;
3533 383 : case GDT_CInt32:
3534 383 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3535 : nSrcPixelStride, true, pDstData, eDstType,
3536 : nDstPixelStride, nWordCount);
3537 383 : break;
3538 1323 : case GDT_CFloat32:
3539 1323 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3540 : nSrcPixelStride, true, pDstData, eDstType,
3541 : nDstPixelStride, nWordCount);
3542 1323 : break;
3543 172460 : case GDT_CFloat64:
3544 172460 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3545 : nSrcPixelStride, true, pDstData,
3546 : eDstType, nDstPixelStride, nWordCount);
3547 172460 : break;
3548 0 : case GDT_Unknown:
3549 : case GDT_TypeCount:
3550 0 : CPLAssert(false);
3551 : }
3552 : }
3553 :
3554 : /************************************************************************/
3555 : /* GDALCopyBits() */
3556 : /************************************************************************/
3557 :
3558 : /**
3559 : * Bitwise word copying.
3560 : *
3561 : * A function for moving sets of partial bytes around. Loosely
3562 : * speaking this is a bitwise analog to GDALCopyWords().
3563 : *
3564 : * It copies nStepCount "words" where each word is nBitCount bits long.
3565 : * The nSrcStep and nDstStep are the number of bits from the start of one
3566 : * word to the next (same as nBitCount if they are packed). The nSrcOffset
3567 : * and nDstOffset are the offset into the source and destination buffers
3568 : * to start at, also measured in bits.
3569 : *
3570 : * All bit offsets are assumed to start from the high order bit in a byte
3571 : * (i.e. most significant bit first). Currently this function is not very
3572 : * optimized, but it may be improved for some common cases in the future
3573 : * as needed.
3574 : *
3575 : * @param pabySrcData the source data buffer.
3576 : * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3577 : * first word to copy.
3578 : * @param nSrcStep the offset in bits from the start one source word to the
3579 : * start of the next.
3580 : * @param pabyDstData the destination data buffer.
3581 : * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3582 : * first word to copy over.
3583 : * @param nDstStep the offset in bits from the start one word to the
3584 : * start of the next.
3585 : * @param nBitCount the number of bits in a word to be copied.
3586 : * @param nStepCount the number of words to copy.
3587 : */
3588 :
3589 0 : void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3590 : GByte *pabyDstData, int nDstOffset, int nDstStep,
3591 : int nBitCount, int nStepCount)
3592 :
3593 : {
3594 0 : VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
3595 :
3596 0 : for (int iStep = 0; iStep < nStepCount; iStep++)
3597 : {
3598 0 : for (int iBit = 0; iBit < nBitCount; iBit++)
3599 : {
3600 0 : if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
3601 0 : pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
3602 : else
3603 0 : pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
3604 :
3605 0 : nSrcOffset++;
3606 0 : nDstOffset++;
3607 : }
3608 :
3609 0 : nSrcOffset += (nSrcStep - nBitCount);
3610 0 : nDstOffset += (nDstStep - nBitCount);
3611 : }
3612 : }
3613 :
3614 : /************************************************************************/
3615 : /* GDALGetBestOverviewLevel() */
3616 : /* */
3617 : /* Returns the best overview level to satisfy the query or -1 if none */
3618 : /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */
3619 : /* returning a valid overview level */
3620 : /************************************************************************/
3621 :
3622 0 : int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
3623 : int &nXSize, int &nYSize, int nBufXSize,
3624 : int nBufYSize)
3625 : {
3626 0 : return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
3627 0 : nBufXSize, nBufYSize, nullptr);
3628 : }
3629 :
3630 322801 : int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
3631 : int &nYOff, int &nXSize, int &nYSize,
3632 : int nBufXSize, int nBufYSize,
3633 : GDALRasterIOExtraArg *psExtraArg)
3634 : {
3635 : /* -------------------------------------------------------------------- */
3636 : /* Compute the desired downsampling factor. It is */
3637 : /* based on the least reduced axis, and represents the number */
3638 : /* of source pixels to one destination pixel. */
3639 : /* -------------------------------------------------------------------- */
3640 322801 : const double dfDesiredDownsamplingFactor =
3641 322801 : ((nXSize / static_cast<double>(nBufXSize)) <
3642 160465 : (nYSize / static_cast<double>(nBufYSize)) ||
3643 : nBufYSize == 1)
3644 354177 : ? nXSize / static_cast<double>(nBufXSize)
3645 129089 : : nYSize / static_cast<double>(nBufYSize);
3646 :
3647 : /* -------------------------------------------------------------------- */
3648 : /* Find the overview level that largest downsampling factor (most */
3649 : /* downsampled) that is still less than (or only a little more) */
3650 : /* downsampled than the request. */
3651 : /* -------------------------------------------------------------------- */
3652 322801 : const int nOverviewCount = poBand->GetOverviewCount();
3653 322801 : GDALRasterBand *poBestOverview = nullptr;
3654 322801 : double dfBestDownsamplingFactor = 0;
3655 322801 : int nBestOverviewLevel = -1;
3656 :
3657 : const char *pszOversampligThreshold =
3658 322801 : CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
3659 :
3660 : // Note: keep this logic for overview selection in sync between
3661 : // gdalwarp_lib.cpp and rasterio.cpp
3662 : // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
3663 : const double dfOversamplingThreshold =
3664 645593 : pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
3665 322792 : : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
3666 645584 : ? 1.0
3667 322801 : : 1.2;
3668 325491 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3669 : {
3670 5526 : GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
3671 11052 : if (poOverview == nullptr ||
3672 11051 : poOverview->GetXSize() > poBand->GetXSize() ||
3673 5525 : poOverview->GetYSize() > poBand->GetYSize())
3674 : {
3675 1 : continue;
3676 : }
3677 :
3678 : // Compute downsampling factor of this overview
3679 : const double dfDownsamplingFactor = std::min(
3680 5525 : poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
3681 11050 : poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
3682 :
3683 : // Is it nearly the requested factor and better (lower) than
3684 : // the current best factor?
3685 : // Use an epsilon because of numerical instability.
3686 5525 : constexpr double EPSILON = 1e-1;
3687 5633 : if (dfDownsamplingFactor >=
3688 5525 : dfDesiredDownsamplingFactor * dfOversamplingThreshold +
3689 5417 : EPSILON ||
3690 : dfDownsamplingFactor <= dfBestDownsamplingFactor)
3691 : {
3692 108 : continue;
3693 : }
3694 :
3695 : // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3696 5417 : const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
3697 :
3698 5417 : if (pszResampling != nullptr &&
3699 69 : STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
3700 16 : continue;
3701 :
3702 : // OK, this is our new best overview.
3703 5401 : poBestOverview = poOverview;
3704 5401 : nBestOverviewLevel = iOverview;
3705 5401 : dfBestDownsamplingFactor = dfDownsamplingFactor;
3706 :
3707 5401 : if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
3708 : EPSILON)
3709 : {
3710 2836 : break;
3711 : }
3712 : }
3713 :
3714 : /* -------------------------------------------------------------------- */
3715 : /* If we didn't find an overview that helps us, just return */
3716 : /* indicating failure and the full resolution image will be used. */
3717 : /* -------------------------------------------------------------------- */
3718 322801 : if (nBestOverviewLevel < 0)
3719 319898 : return -1;
3720 :
3721 : /* -------------------------------------------------------------------- */
3722 : /* Recompute the source window in terms of the selected */
3723 : /* overview. */
3724 : /* -------------------------------------------------------------------- */
3725 : const double dfXFactor =
3726 2903 : poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
3727 : const double dfYFactor =
3728 2903 : poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
3729 2903 : CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
3730 : poBestOverview->GetYSize());
3731 :
3732 8709 : const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
3733 2903 : static_cast<int>(nXOff / dfXFactor + 0.5));
3734 8709 : const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
3735 2903 : static_cast<int>(nYOff / dfYFactor + 0.5));
3736 2903 : int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
3737 2903 : int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
3738 2903 : if (nOXOff + nOXSize > poBestOverview->GetXSize())
3739 0 : nOXSize = poBestOverview->GetXSize() - nOXOff;
3740 2903 : if (nOYOff + nOYSize > poBestOverview->GetYSize())
3741 2 : nOYSize = poBestOverview->GetYSize() - nOYOff;
3742 :
3743 2903 : if (psExtraArg)
3744 : {
3745 2903 : if (psExtraArg->bFloatingPointWindowValidity)
3746 : {
3747 45 : psExtraArg->dfXOff /= dfXFactor;
3748 45 : psExtraArg->dfXSize /= dfXFactor;
3749 45 : psExtraArg->dfYOff /= dfYFactor;
3750 45 : psExtraArg->dfYSize /= dfYFactor;
3751 : }
3752 2858 : else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
3753 : {
3754 16 : psExtraArg->bFloatingPointWindowValidity = true;
3755 16 : psExtraArg->dfXOff = nXOff / dfXFactor;
3756 16 : psExtraArg->dfXSize = nXSize / dfXFactor;
3757 16 : psExtraArg->dfYOff = nYOff / dfYFactor;
3758 16 : psExtraArg->dfYSize = nYSize / dfYFactor;
3759 : }
3760 : }
3761 :
3762 2903 : nXOff = nOXOff;
3763 2903 : nYOff = nOYOff;
3764 2903 : nXSize = nOXSize;
3765 2903 : nYSize = nOYSize;
3766 :
3767 2903 : return nBestOverviewLevel;
3768 : }
3769 :
3770 : /************************************************************************/
3771 : /* OverviewRasterIO() */
3772 : /* */
3773 : /* Special work function to utilize available overviews to */
3774 : /* more efficiently satisfy downsampled requests. It will */
3775 : /* return CE_Failure if there are no appropriate overviews */
3776 : /* available but it doesn't emit any error messages. */
3777 : /************************************************************************/
3778 :
3779 : //! @cond Doxygen_Suppress
3780 2 : CPLErr GDALRasterBand::OverviewRasterIO(
3781 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3782 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3783 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
3784 :
3785 : {
3786 : GDALRasterIOExtraArg sExtraArg;
3787 2 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3788 :
3789 2 : const int nOverview = GDALBandGetBestOverviewLevel2(
3790 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
3791 2 : if (nOverview < 0)
3792 1 : return CE_Failure;
3793 :
3794 : /* -------------------------------------------------------------------- */
3795 : /* Recast the call in terms of the new raster layer. */
3796 : /* -------------------------------------------------------------------- */
3797 1 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
3798 1 : if (poOverviewBand == nullptr)
3799 0 : return CE_Failure;
3800 :
3801 1 : return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3802 : pData, nBufXSize, nBufYSize, eBufType,
3803 1 : nPixelSpace, nLineSpace, &sExtraArg);
3804 : }
3805 :
3806 : /************************************************************************/
3807 : /* TryOverviewRasterIO() */
3808 : /************************************************************************/
3809 :
3810 161938 : CPLErr GDALRasterBand::TryOverviewRasterIO(
3811 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3812 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3813 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
3814 : int *pbTried)
3815 : {
3816 161938 : int nXOffMod = nXOff;
3817 161938 : int nYOffMod = nYOff;
3818 161938 : int nXSizeMod = nXSize;
3819 161938 : int nYSizeMod = nYSize;
3820 : GDALRasterIOExtraArg sExtraArg;
3821 :
3822 161938 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3823 :
3824 161938 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3825 : this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
3826 : &sExtraArg);
3827 :
3828 161938 : if (iOvrLevel >= 0)
3829 : {
3830 49 : GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
3831 49 : if (poOverviewBand)
3832 : {
3833 49 : *pbTried = TRUE;
3834 49 : return poOverviewBand->RasterIO(
3835 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
3836 : nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
3837 49 : &sExtraArg);
3838 : }
3839 : }
3840 :
3841 161889 : *pbTried = FALSE;
3842 161889 : return CE_None;
3843 : }
3844 :
3845 : /************************************************************************/
3846 : /* TryOverviewRasterIO() */
3847 : /************************************************************************/
3848 :
3849 158022 : CPLErr GDALDataset::TryOverviewRasterIO(
3850 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3851 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3852 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
3853 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
3854 : int *pbTried)
3855 : {
3856 158022 : int nXOffMod = nXOff;
3857 158022 : int nYOffMod = nYOff;
3858 158022 : int nXSizeMod = nXSize;
3859 158022 : int nYSizeMod = nYSize;
3860 : GDALRasterIOExtraArg sExtraArg;
3861 158022 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3862 :
3863 316044 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3864 158022 : papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
3865 : nBufYSize, &sExtraArg);
3866 :
3867 158058 : if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3868 36 : papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
3869 : {
3870 36 : *pbTried = TRUE;
3871 36 : return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3872 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
3873 : nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
3874 36 : nLineSpace, nBandSpace, &sExtraArg);
3875 : }
3876 : else
3877 : {
3878 157986 : *pbTried = FALSE;
3879 157986 : return CE_None;
3880 : }
3881 : }
3882 :
3883 : /************************************************************************/
3884 : /* GetBestOverviewLevel() */
3885 : /* */
3886 : /* Returns the best overview level to satisfy the query or -1 if none */
3887 : /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */
3888 : /* overview level */
3889 : /************************************************************************/
3890 :
3891 4 : static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
3892 : int &nYOff, int &nXSize, int &nYSize,
3893 : int nBufXSize, int nBufYSize,
3894 : int nBandCount,
3895 : const int *panBandMap,
3896 : GDALRasterIOExtraArg *psExtraArg)
3897 : {
3898 4 : int nOverviewCount = 0;
3899 4 : GDALRasterBand *poFirstBand = nullptr;
3900 :
3901 : /* -------------------------------------------------------------------- */
3902 : /* Check that all bands have the same number of overviews and */
3903 : /* that they have all the same size and block dimensions */
3904 : /* -------------------------------------------------------------------- */
3905 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
3906 : {
3907 8 : GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
3908 8 : if (poBand == nullptr)
3909 0 : return -1;
3910 8 : if (iBand == 0)
3911 : {
3912 4 : poFirstBand = poBand;
3913 4 : nOverviewCount = poBand->GetOverviewCount();
3914 : }
3915 4 : else if (nOverviewCount != poBand->GetOverviewCount())
3916 : {
3917 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3918 : "mismatched overview count, use std method.");
3919 0 : return -1;
3920 : }
3921 : else
3922 : {
3923 4 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3924 : {
3925 0 : GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
3926 : GDALRasterBand *poOvrFirstBand =
3927 0 : poFirstBand->GetOverview(iOverview);
3928 0 : if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
3929 0 : continue;
3930 :
3931 0 : if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3932 0 : poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
3933 : {
3934 0 : CPLDebug("GDAL",
3935 : "GDALDataset::GetBestOverviewLevel() ... "
3936 : "mismatched overview sizes, use std method.");
3937 0 : return -1;
3938 : }
3939 0 : int nBlockXSizeFirst = 0;
3940 0 : int nBlockYSizeFirst = 0;
3941 0 : poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
3942 : &nBlockYSizeFirst);
3943 :
3944 0 : int nBlockXSizeCurrent = 0;
3945 0 : int nBlockYSizeCurrent = 0;
3946 0 : poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
3947 : &nBlockYSizeCurrent);
3948 :
3949 0 : if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3950 0 : nBlockYSizeFirst != nBlockYSizeCurrent)
3951 : {
3952 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3953 : "mismatched block sizes, use std method.");
3954 0 : return -1;
3955 : }
3956 : }
3957 : }
3958 : }
3959 4 : if (poFirstBand == nullptr)
3960 0 : return -1;
3961 :
3962 4 : return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
3963 : nYSize, nBufXSize, nBufYSize,
3964 4 : psExtraArg);
3965 : }
3966 :
3967 : /************************************************************************/
3968 : /* BlockBasedRasterIO() */
3969 : /* */
3970 : /* This convenience function implements a dataset level */
3971 : /* RasterIO() interface based on calling down to fetch blocks, */
3972 : /* much like the GDALRasterBand::IRasterIO(), but it handles */
3973 : /* all bands at once, so that a format driver that handles a */
3974 : /* request for different bands of the same block efficiently */
3975 : /* (i.e. without re-reading interleaved data) will efficiently. */
3976 : /* */
3977 : /* This method is intended to be called by an overridden */
3978 : /* IRasterIO() method in the driver specific GDALDataset */
3979 : /* derived class. */
3980 : /* */
3981 : /* Default internal implementation of RasterIO() ... utilizes */
3982 : /* the Block access methods to satisfy the request. This would */
3983 : /* normally only be overridden by formats with overviews. */
3984 : /* */
3985 : /* To keep things relatively simple, this method does not */
3986 : /* currently take advantage of some special cases addressed in */
3987 : /* GDALRasterBand::IRasterIO(), so it is likely best to only */
3988 : /* call it when you know it will help. That is in cases where */
3989 : /* data is at 1:1 to the buffer, and you know the driver is */
3990 : /* implementing interleaved IO efficiently on a block by block */
3991 : /* basis. Overviews will be used when possible. */
3992 : /************************************************************************/
3993 :
3994 63528 : CPLErr GDALDataset::BlockBasedRasterIO(
3995 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3996 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3997 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
3998 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
3999 :
4000 : {
4001 63528 : CPLAssert(nullptr != pData);
4002 :
4003 63528 : GByte **papabySrcBlock = nullptr;
4004 63528 : GDALRasterBlock *poBlock = nullptr;
4005 63528 : GDALRasterBlock **papoBlocks = nullptr;
4006 63528 : int nLBlockX = -1;
4007 63528 : int nLBlockY = -1;
4008 : int iBufYOff;
4009 : int iBufXOff;
4010 63528 : int nBlockXSize = 1;
4011 63528 : int nBlockYSize = 1;
4012 63528 : CPLErr eErr = CE_None;
4013 63528 : GDALDataType eDataType = GDT_Byte;
4014 :
4015 63528 : const bool bUseIntegerRequestCoords =
4016 63961 : (!psExtraArg->bFloatingPointWindowValidity ||
4017 433 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
4018 431 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
4019 :
4020 : /* -------------------------------------------------------------------- */
4021 : /* Ensure that all bands share a common block size and data type. */
4022 : /* -------------------------------------------------------------------- */
4023 300722 : for (int iBand = 0; iBand < nBandCount; iBand++)
4024 : {
4025 237194 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4026 :
4027 237192 : if (iBand == 0)
4028 : {
4029 63528 : poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4030 63528 : eDataType = poBand->GetRasterDataType();
4031 : }
4032 : else
4033 : {
4034 173664 : int nThisBlockXSize = 0;
4035 173664 : int nThisBlockYSize = 0;
4036 173664 : poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
4037 173667 : if (nThisBlockXSize != nBlockXSize ||
4038 173667 : nThisBlockYSize != nBlockYSize)
4039 : {
4040 1 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4041 : "mismatched block sizes, use std method.");
4042 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4043 : pData, nBufXSize, nBufYSize, eBufType,
4044 : nBandCount, panBandMap, nPixelSpace,
4045 0 : nLineSpace, nBandSpace, psExtraArg);
4046 : }
4047 :
4048 173666 : if (eDataType != poBand->GetRasterDataType() &&
4049 0 : (nXSize != nBufXSize || nYSize != nBufYSize))
4050 : {
4051 0 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4052 : "mismatched band data types, use std method.");
4053 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4054 : pData, nBufXSize, nBufYSize, eBufType,
4055 : nBandCount, panBandMap, nPixelSpace,
4056 0 : nLineSpace, nBandSpace, psExtraArg);
4057 : }
4058 : }
4059 : }
4060 :
4061 : /* ==================================================================== */
4062 : /* In this special case at full resolution we step through in */
4063 : /* blocks, turning the request over to the per-band */
4064 : /* IRasterIO(), but ensuring that all bands of one block are */
4065 : /* called before proceeding to the next. */
4066 : /* ==================================================================== */
4067 :
4068 63528 : if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4069 : {
4070 : GDALRasterIOExtraArg sDummyExtraArg;
4071 63524 : INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4072 :
4073 63524 : int nChunkYSize = 0;
4074 63524 : int nChunkXSize = 0;
4075 :
4076 219117 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4077 : {
4078 156620 : const int nChunkYOff = iBufYOff + nYOff;
4079 156620 : nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4080 156620 : if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4081 58858 : nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4082 :
4083 837083 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4084 : {
4085 681480 : const int nChunkXOff = iBufXOff + nXOff;
4086 681480 : nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4087 681480 : if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4088 74871 : nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4089 :
4090 681480 : GByte *pabyChunkData =
4091 681480 : static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4092 681480 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4093 :
4094 3311550 : for (int iBand = 0; iBand < nBandCount; iBand++)
4095 : {
4096 2631090 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4097 :
4098 5262150 : eErr = poBand->IRasterIO(
4099 : eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4100 : nChunkYSize,
4101 2631060 : pabyChunkData +
4102 2631060 : static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4103 : nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4104 2631060 : nLineSpace, &sDummyExtraArg);
4105 2631080 : if (eErr != CE_None)
4106 1013 : return eErr;
4107 : }
4108 : }
4109 :
4110 175839 : if (psExtraArg->pfnProgress != nullptr &&
4111 20236 : !psExtraArg->pfnProgress(
4112 175839 : 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4113 : nBufYSize,
4114 : "", psExtraArg->pProgressData))
4115 : {
4116 8 : return CE_Failure;
4117 : }
4118 : }
4119 :
4120 62497 : return CE_None;
4121 : }
4122 :
4123 : /* Below code is not compatible with that case. It would need a complete */
4124 : /* separate code like done in GDALRasterBand::IRasterIO. */
4125 4 : if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4126 : {
4127 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4128 : nBufXSize, nBufYSize, eBufType, nBandCount,
4129 : panBandMap, nPixelSpace, nLineSpace,
4130 0 : nBandSpace, psExtraArg);
4131 : }
4132 :
4133 : /* We could have a smarter implementation, but that will do for now */
4134 4 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4135 0 : (nBufXSize != nXSize || nBufYSize != nYSize))
4136 : {
4137 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4138 : nBufXSize, nBufYSize, eBufType, nBandCount,
4139 : panBandMap, nPixelSpace, nLineSpace,
4140 0 : nBandSpace, psExtraArg);
4141 : }
4142 :
4143 : /* ==================================================================== */
4144 : /* Loop reading required source blocks to satisfy output */
4145 : /* request. This is the most general implementation. */
4146 : /* ==================================================================== */
4147 :
4148 4 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4149 :
4150 : papabySrcBlock =
4151 4 : static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4152 : papoBlocks =
4153 4 : static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4154 :
4155 : /* -------------------------------------------------------------------- */
4156 : /* Select an overview level if appropriate. */
4157 : /* -------------------------------------------------------------------- */
4158 :
4159 : GDALRasterIOExtraArg sExtraArg;
4160 4 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4161 4 : const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4162 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4163 : panBandMap, &sExtraArg);
4164 4 : if (nOverviewLevel >= 0)
4165 : {
4166 2 : GetRasterBand(panBandMap[0])
4167 2 : ->GetOverview(nOverviewLevel)
4168 2 : ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4169 : }
4170 :
4171 4 : double dfXOff = nXOff;
4172 4 : double dfYOff = nYOff;
4173 4 : double dfXSize = nXSize;
4174 4 : double dfYSize = nYSize;
4175 4 : if (sExtraArg.bFloatingPointWindowValidity)
4176 : {
4177 2 : dfXOff = sExtraArg.dfXOff;
4178 2 : dfYOff = sExtraArg.dfYOff;
4179 2 : dfXSize = sExtraArg.dfXSize;
4180 2 : dfYSize = sExtraArg.dfYSize;
4181 : }
4182 :
4183 : /* -------------------------------------------------------------------- */
4184 : /* Compute stepping increment. */
4185 : /* -------------------------------------------------------------------- */
4186 4 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4187 4 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4188 :
4189 4 : constexpr double EPS = 1e-10;
4190 : /* -------------------------------------------------------------------- */
4191 : /* Loop over buffer computing source locations. */
4192 : /* -------------------------------------------------------------------- */
4193 36 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
4194 : {
4195 : GPtrDiff_t iSrcOffset;
4196 :
4197 : // Add small epsilon to avoid some numeric precision issues.
4198 32 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4199 32 : const int iSrcY = static_cast<int>(std::min(
4200 32 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
4201 :
4202 32 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
4203 : static_cast<GPtrDiff_t>(nLineSpace);
4204 :
4205 302 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
4206 : {
4207 270 : const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4208 270 : const int iSrcX = static_cast<int>(std::min(
4209 270 : std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
4210 :
4211 : // FIXME: this code likely doesn't work if the dirty block gets
4212 : // flushed to disk before being completely written. In the meantime,
4213 : // bJustInitialize should probably be set to FALSE even if it is not
4214 : // ideal performance wise, and for lossy compression
4215 :
4216 : /* --------------------------------------------------------------------
4217 : */
4218 : /* Ensure we have the appropriate block loaded. */
4219 : /* --------------------------------------------------------------------
4220 : */
4221 270 : if (iSrcX < nLBlockX * nBlockXSize ||
4222 270 : iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
4223 266 : iSrcY < nLBlockY * nBlockYSize ||
4224 266 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
4225 : {
4226 4 : nLBlockX = iSrcX / nBlockXSize;
4227 4 : nLBlockY = iSrcY / nBlockYSize;
4228 :
4229 4 : const bool bJustInitialize =
4230 0 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
4231 0 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
4232 4 : nXOff <= nLBlockX * nBlockXSize &&
4233 0 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4234 : /*bool bMemZeroBuffer = FALSE;
4235 : if( eRWFlag == GF_Write && !bJustInitialize &&
4236 : nXOff <= nLBlockX * nBlockXSize &&
4237 : nYOff <= nLBlockY * nBlockYSize &&
4238 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4239 : (nXOff + nXSize == GetRasterXSize() &&
4240 : (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4241 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4242 : (nYOff + nYSize == GetRasterYSize() &&
4243 : (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4244 : {
4245 : bJustInitialize = TRUE;
4246 : bMemZeroBuffer = TRUE;
4247 : }*/
4248 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4249 : {
4250 8 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4251 8 : if (nOverviewLevel >= 0)
4252 2 : poBand = poBand->GetOverview(nOverviewLevel);
4253 16 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
4254 8 : bJustInitialize);
4255 8 : if (poBlock == nullptr)
4256 : {
4257 0 : eErr = CE_Failure;
4258 0 : goto CleanupAndReturn;
4259 : }
4260 :
4261 8 : if (eRWFlag == GF_Write)
4262 0 : poBlock->MarkDirty();
4263 :
4264 8 : if (papoBlocks[iBand] != nullptr)
4265 0 : papoBlocks[iBand]->DropLock();
4266 :
4267 8 : papoBlocks[iBand] = poBlock;
4268 :
4269 8 : papabySrcBlock[iBand] =
4270 8 : static_cast<GByte *>(poBlock->GetDataRef());
4271 : /*if( bMemZeroBuffer )
4272 : {
4273 : memset(papabySrcBlock[iBand], 0,
4274 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4275 : * nBlockYSize);
4276 : }*/
4277 : }
4278 : }
4279 :
4280 : /* --------------------------------------------------------------------
4281 : */
4282 : /* Copy over this pixel of data. */
4283 : /* --------------------------------------------------------------------
4284 : */
4285 270 : iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
4286 270 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
4287 270 : (static_cast<GPtrDiff_t>(iSrcY) -
4288 270 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
4289 270 : nBlockXSize) *
4290 270 : nBandDataSize;
4291 :
4292 980 : for (int iBand = 0; iBand < nBandCount; iBand++)
4293 : {
4294 710 : GByte *pabySrcBlock = papabySrcBlock[iBand];
4295 710 : GPtrDiff_t iBandBufOffset =
4296 710 : iBufOffset + static_cast<GPtrDiff_t>(iBand) *
4297 : static_cast<GPtrDiff_t>(nBandSpace);
4298 :
4299 710 : if (eDataType == eBufType)
4300 : {
4301 710 : if (eRWFlag == GF_Read)
4302 710 : memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
4303 710 : pabySrcBlock + iSrcOffset, nBandDataSize);
4304 : else
4305 0 : memcpy(pabySrcBlock + iSrcOffset,
4306 : static_cast<const GByte *>(pData) +
4307 0 : iBandBufOffset,
4308 : nBandDataSize);
4309 : }
4310 : else
4311 : {
4312 : /* type to type conversion ... ouch, this is expensive way
4313 : of handling single words */
4314 :
4315 0 : if (eRWFlag == GF_Read)
4316 0 : GDALCopyWords(pabySrcBlock + iSrcOffset, eDataType, 0,
4317 : static_cast<GByte *>(pData) +
4318 0 : iBandBufOffset,
4319 : eBufType, 0, 1);
4320 : else
4321 0 : GDALCopyWords(static_cast<const GByte *>(pData) +
4322 0 : iBandBufOffset,
4323 0 : eBufType, 0, pabySrcBlock + iSrcOffset,
4324 : eDataType, 0, 1);
4325 : }
4326 : }
4327 :
4328 270 : iBufOffset += static_cast<int>(nPixelSpace);
4329 : }
4330 : }
4331 :
4332 : /* -------------------------------------------------------------------- */
4333 : /* CleanupAndReturn. */
4334 : /* -------------------------------------------------------------------- */
4335 4 : CleanupAndReturn:
4336 4 : CPLFree(papabySrcBlock);
4337 4 : if (papoBlocks != nullptr)
4338 : {
4339 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4340 : {
4341 8 : if (papoBlocks[iBand] != nullptr)
4342 8 : papoBlocks[iBand]->DropLock();
4343 : }
4344 4 : CPLFree(papoBlocks);
4345 : }
4346 :
4347 4 : return eErr;
4348 : }
4349 :
4350 : //! @endcond
4351 :
4352 : /************************************************************************/
4353 : /* GDALCopyWholeRasterGetSwathSize() */
4354 : /************************************************************************/
4355 :
4356 2825 : static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
4357 : GDALRasterBand *poDstPrototypeBand,
4358 : int nBandCount,
4359 : int bDstIsCompressed,
4360 : int bInterleave, int *pnSwathCols,
4361 : int *pnSwathLines)
4362 : {
4363 2825 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4364 2825 : int nSrcBlockXSize = 0;
4365 2825 : int nSrcBlockYSize = 0;
4366 2825 : int nBlockXSize = 0;
4367 2825 : int nBlockYSize = 0;
4368 :
4369 2825 : int nXSize = poSrcPrototypeBand->GetXSize();
4370 2825 : int nYSize = poSrcPrototypeBand->GetYSize();
4371 :
4372 2825 : poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
4373 2825 : poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4374 :
4375 2825 : const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4376 2825 : const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4377 :
4378 2825 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4379 2825 : if (bInterleave)
4380 1314 : nPixelSize *= nBandCount;
4381 :
4382 : // aim for one row of blocks. Do not settle for less.
4383 2825 : int nSwathCols = nXSize;
4384 2825 : int nSwathLines = nMaxBlockYSize;
4385 :
4386 : const char *pszSrcCompression =
4387 2825 : poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4388 2825 : if (pszSrcCompression == nullptr)
4389 : {
4390 2799 : auto poSrcDS = poSrcPrototypeBand->GetDataset();
4391 2799 : if (poSrcDS)
4392 : pszSrcCompression =
4393 2793 : poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4394 : }
4395 :
4396 : /* -------------------------------------------------------------------- */
4397 : /* What will our swath size be? */
4398 : /* -------------------------------------------------------------------- */
4399 : // When writing interleaved data in a compressed format, we want to be sure
4400 : // that each block will only be written once, so the swath size must not be
4401 : // greater than the block cache.
4402 2825 : const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4403 : int nTargetSwathSize;
4404 2825 : if (pszSwathSize != nullptr)
4405 0 : nTargetSwathSize = static_cast<int>(
4406 0 : std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4407 : else
4408 : {
4409 : // As a default, take one 1/4 of the cache size.
4410 2825 : nTargetSwathSize = static_cast<int>(
4411 2825 : std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4412 :
4413 : // but if the minimum idal swath buf size is less, then go for it to
4414 : // avoid unnecessarily abusing RAM usage.
4415 : // but try to use 10 MB at least.
4416 2825 : GIntBig nIdealSwathBufSize =
4417 2825 : static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4418 2825 : int nMinTargetSwathSize = 10 * 1000 * 1000;
4419 :
4420 2825 : if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
4421 2825 : GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
4422 : {
4423 2 : nMinTargetSwathSize = nTargetSwathSize;
4424 : }
4425 :
4426 2825 : if (nIdealSwathBufSize < nTargetSwathSize &&
4427 2815 : nIdealSwathBufSize < nMinTargetSwathSize)
4428 : {
4429 2812 : nIdealSwathBufSize = nMinTargetSwathSize;
4430 : }
4431 :
4432 2825 : if (pszSrcCompression != nullptr &&
4433 156 : EQUAL(pszSrcCompression, "JPEG2000") &&
4434 0 : (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4435 0 : (nSrcBlockYSize % nBlockYSize) == 0)))
4436 : {
4437 2 : nIdealSwathBufSize =
4438 4 : std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4439 2 : nSrcBlockYSize * nPixelSize);
4440 : }
4441 2825 : if (nTargetSwathSize > nIdealSwathBufSize)
4442 2811 : nTargetSwathSize = static_cast<int>(
4443 2811 : std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4444 : }
4445 :
4446 2825 : if (nTargetSwathSize < 1000000)
4447 8 : nTargetSwathSize = 1000000;
4448 :
4449 : /* But let's check that */
4450 3033 : if (bDstIsCompressed && bInterleave &&
4451 208 : nTargetSwathSize > GDALGetCacheMax64())
4452 : {
4453 0 : CPLError(CE_Warning, CPLE_AppDefined,
4454 : "When translating into a compressed interleave format, "
4455 : "the block cache size (" CPL_FRMT_GIB ") "
4456 : "should be at least the size of the swath (%d) "
4457 : "(GDAL_SWATH_SIZE config. option)",
4458 : GDALGetCacheMax64(), nTargetSwathSize);
4459 : }
4460 :
4461 : #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4462 : #define ROUND_TO(x, y) (((x) / (y)) * (y))
4463 :
4464 : // if both input and output datasets are tiled, that the tile dimensions
4465 : // are "compatible", try to stick to a swath dimension that is a multiple
4466 : // of input and output block dimensions.
4467 2825 : if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4468 34 : IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4469 34 : IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4470 34 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4471 34 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4472 : {
4473 34 : if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
4474 34 : nPixelSize <=
4475 34 : static_cast<GIntBig>(nTargetSwathSize))
4476 : {
4477 34 : nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4478 34 : nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4479 34 : if (nSwathCols == 0)
4480 0 : nSwathCols = nMaxBlockXSize;
4481 34 : if (nSwathCols > nXSize)
4482 32 : nSwathCols = nXSize;
4483 34 : nSwathLines = nMaxBlockYSize;
4484 :
4485 34 : if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4486 34 : static_cast<GIntBig>(nTargetSwathSize))
4487 : {
4488 0 : nSwathCols = nXSize;
4489 0 : nSwathLines = nBlockYSize;
4490 : }
4491 : }
4492 : }
4493 :
4494 2825 : const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4495 2825 : const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4496 2825 : if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
4497 : {
4498 1 : nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4499 1 : if (nSwathLines == 0)
4500 1 : nSwathLines = 1;
4501 :
4502 1 : CPLDebug(
4503 : "GDAL",
4504 : "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4505 : "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4506 : "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4507 1 : nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
4508 : }
4509 : // If we are processing single scans, try to handle several at once.
4510 : // If we are handling swaths already, only grow the swath if a row
4511 : // of blocks is substantially less than our target buffer size.
4512 2824 : else if (nSwathLines == 1 ||
4513 2324 : nMemoryPerCol * nSwathLines <
4514 2324 : static_cast<GIntBig>(nTargetSwathSize) / 10)
4515 : {
4516 2797 : nSwathLines = std::min(
4517 : nYSize,
4518 2797 : std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4519 :
4520 : /* If possible try to align to source and target block height */
4521 2797 : if ((nSwathLines % nMaxBlockYSize) != 0 &&
4522 958 : nSwathLines > nMaxBlockYSize &&
4523 958 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4524 930 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4525 148 : nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4526 : }
4527 :
4528 2825 : if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4529 0 : (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4530 0 : IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
4531 : {
4532 : // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4533 2 : if (nSwathLines < nSrcBlockYSize)
4534 : {
4535 0 : nSwathLines = nSrcBlockYSize;
4536 :
4537 : // Number of pixels that can be read/write simultaneously.
4538 0 : nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4539 0 : nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4540 0 : if (nSwathCols == 0)
4541 0 : nSwathCols = nSrcBlockXSize;
4542 0 : if (nSwathCols > nXSize)
4543 0 : nSwathCols = nXSize;
4544 :
4545 0 : CPLDebug(
4546 : "GDAL",
4547 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4548 : "too high block, "
4549 : "use partial width at one time");
4550 : }
4551 2 : else if ((nSwathLines % nSrcBlockYSize) != 0)
4552 : {
4553 : /* Round on a multiple of nSrcBlockYSize */
4554 0 : nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4555 0 : CPLDebug(
4556 : "GDAL",
4557 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4558 : "round nSwathLines to block height : %d",
4559 : nSwathLines);
4560 : }
4561 : }
4562 2823 : else if (bDstIsCompressed)
4563 : {
4564 365 : if (nSwathLines < nBlockYSize)
4565 : {
4566 142 : nSwathLines = nBlockYSize;
4567 :
4568 : // Number of pixels that can be read/write simultaneously.
4569 142 : nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4570 142 : nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4571 142 : if (nSwathCols == 0)
4572 0 : nSwathCols = nBlockXSize;
4573 142 : if (nSwathCols > nXSize)
4574 142 : nSwathCols = nXSize;
4575 :
4576 142 : CPLDebug(
4577 : "GDAL",
4578 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4579 : "too high block, "
4580 : "use partial width at one time");
4581 : }
4582 223 : else if ((nSwathLines % nBlockYSize) != 0)
4583 : {
4584 : // Round on a multiple of nBlockYSize.
4585 9 : nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4586 9 : CPLDebug(
4587 : "GDAL",
4588 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4589 : "round nSwathLines to block height : %d",
4590 : nSwathLines);
4591 : }
4592 : }
4593 :
4594 2825 : *pnSwathCols = nSwathCols;
4595 2825 : *pnSwathLines = nSwathLines;
4596 2825 : }
4597 :
4598 : /************************************************************************/
4599 : /* GDALDatasetCopyWholeRaster() */
4600 : /************************************************************************/
4601 :
4602 : /**
4603 : * \brief Copy all dataset raster data.
4604 : *
4605 : * This function copies the complete raster contents of one dataset to
4606 : * another similarly configured dataset. The source and destination
4607 : * dataset must have the same number of bands, and the same width
4608 : * and height. The bands do not have to have the same data type.
4609 : *
4610 : * This function is primarily intended to support implementation of
4611 : * driver specific CreateCopy() functions. It implements efficient copying,
4612 : * in particular "chunking" the copy in substantial blocks and, if appropriate,
4613 : * performing the transfer in a pixel interleaved fashion.
4614 : *
4615 : * Currently the only papszOptions value supported are :
4616 : * <ul>
4617 : * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
4618 : * write access pattern (this does not modify the layout of the destination
4619 : * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
4620 : * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
4621 : * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
4622 : * (GDAL >= 2.2)</li>
4623 : * </ul>
4624 : * More options may be supported in the future.
4625 : *
4626 : * @param hSrcDS the source dataset
4627 : * @param hDstDS the destination dataset
4628 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4629 : * @param pfnProgress progress reporting function.
4630 : * @param pProgressData callback data for progress function.
4631 : *
4632 : * @return CE_None on success, or CE_Failure on failure.
4633 : */
4634 :
4635 2797 : CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
4636 : GDALDatasetH hDstDS,
4637 : CSLConstList papszOptions,
4638 : GDALProgressFunc pfnProgress,
4639 : void *pProgressData)
4640 :
4641 : {
4642 2797 : VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4643 2797 : VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4644 :
4645 2797 : GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4646 2797 : GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4647 :
4648 2797 : if (pfnProgress == nullptr)
4649 3 : pfnProgress = GDALDummyProgress;
4650 :
4651 : /* -------------------------------------------------------------------- */
4652 : /* Confirm the datasets match in size and band counts. */
4653 : /* -------------------------------------------------------------------- */
4654 2797 : const int nXSize = poDstDS->GetRasterXSize();
4655 2797 : const int nYSize = poDstDS->GetRasterYSize();
4656 2797 : const int nBandCount = poDstDS->GetRasterCount();
4657 :
4658 2797 : if (poSrcDS->GetRasterXSize() != nXSize ||
4659 5594 : poSrcDS->GetRasterYSize() != nYSize ||
4660 2797 : poSrcDS->GetRasterCount() != nBandCount)
4661 : {
4662 0 : CPLError(CE_Failure, CPLE_AppDefined,
4663 : "Input and output dataset sizes or band counts do not\n"
4664 : "match in GDALDatasetCopyWholeRaster()");
4665 0 : return CE_Failure;
4666 : }
4667 :
4668 : /* -------------------------------------------------------------------- */
4669 : /* Report preliminary (0) progress. */
4670 : /* -------------------------------------------------------------------- */
4671 2797 : if (!pfnProgress(0.0, nullptr, pProgressData))
4672 : {
4673 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4674 : "User terminated CreateCopy()");
4675 1 : return CE_Failure;
4676 : }
4677 :
4678 : /* -------------------------------------------------------------------- */
4679 : /* Get our prototype band, and assume the others are similarly */
4680 : /* configured. */
4681 : /* -------------------------------------------------------------------- */
4682 2796 : if (nBandCount == 0)
4683 0 : return CE_None;
4684 :
4685 2796 : GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4686 2796 : GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4687 2796 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4688 :
4689 : /* -------------------------------------------------------------------- */
4690 : /* Do we want to try and do the operation in a pixel */
4691 : /* interleaved fashion? */
4692 : /* -------------------------------------------------------------------- */
4693 2796 : bool bInterleave = false;
4694 : const char *pszInterleave =
4695 2796 : poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4696 2796 : if (pszInterleave != nullptr &&
4697 1063 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4698 151 : bInterleave = true;
4699 :
4700 2796 : pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4701 2796 : if (pszInterleave != nullptr &&
4702 2022 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4703 1285 : bInterleave = true;
4704 :
4705 2796 : pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
4706 2796 : if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
4707 5 : bInterleave = true;
4708 2791 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
4709 7 : bInterleave = false;
4710 : // attributes is specific to the TileDB driver
4711 2784 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
4712 4 : bInterleave = true;
4713 2780 : else if (pszInterleave != nullptr)
4714 : {
4715 0 : CPLError(CE_Warning, CPLE_NotSupported,
4716 : "Unsupported value for option INTERLEAVE");
4717 : }
4718 :
4719 : // If the destination is compressed, we must try to write blocks just once,
4720 : // to save disk space (GTiff case for example), and to avoid data loss
4721 : // (JPEG compression for example).
4722 2796 : bool bDstIsCompressed = false;
4723 : const char *pszDstCompressed =
4724 2796 : CSLFetchNameValue(papszOptions, "COMPRESSED");
4725 2796 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4726 348 : bDstIsCompressed = true;
4727 :
4728 : /* -------------------------------------------------------------------- */
4729 : /* What will our swath size be? */
4730 : /* -------------------------------------------------------------------- */
4731 :
4732 2796 : int nSwathCols = 0;
4733 2796 : int nSwathLines = 0;
4734 2796 : GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
4735 : nBandCount, bDstIsCompressed, bInterleave,
4736 : &nSwathCols, &nSwathLines);
4737 :
4738 2796 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4739 2796 : if (bInterleave)
4740 1314 : nPixelSize *= nBandCount;
4741 :
4742 2796 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
4743 2796 : if (pSwathBuf == nullptr)
4744 : {
4745 0 : return CE_Failure;
4746 : }
4747 :
4748 2796 : CPLDebug("GDAL",
4749 : "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4750 : nSwathCols, nSwathLines, static_cast<int>(bInterleave));
4751 :
4752 : // Advise the source raster that we are going to read it completely
4753 : // Note: this might already have been done by GDALCreateCopy() in the
4754 : // likely case this function is indirectly called by it
4755 2796 : poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
4756 2796 : nullptr, nullptr);
4757 :
4758 : /* ==================================================================== */
4759 : /* Band oriented (uninterleaved) case. */
4760 : /* ==================================================================== */
4761 2796 : CPLErr eErr = CE_None;
4762 : const bool bCheckHoles =
4763 2796 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
4764 :
4765 2796 : if (!bInterleave)
4766 : {
4767 : GDALRasterIOExtraArg sExtraArg;
4768 1482 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4769 1482 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4770 :
4771 4446 : const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
4772 1482 : DIV_ROUND_UP(nYSize, nSwathLines) *
4773 1482 : DIV_ROUND_UP(nXSize, nSwathCols);
4774 1482 : GIntBig nBlocksDone = 0;
4775 :
4776 3668 : for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
4777 : {
4778 2186 : int nBand = iBand + 1;
4779 :
4780 4527 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4781 : {
4782 2341 : int nThisLines = nSwathLines;
4783 :
4784 2341 : if (iY + nThisLines > nYSize)
4785 271 : nThisLines = nYSize - iY;
4786 :
4787 4682 : for (int iX = 0; iX < nXSize && eErr == CE_None;
4788 2341 : iX += nSwathCols)
4789 : {
4790 2341 : int nThisCols = nSwathCols;
4791 :
4792 2341 : if (iX + nThisCols > nXSize)
4793 0 : nThisCols = nXSize - iX;
4794 :
4795 2341 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4796 2341 : if (bCheckHoles)
4797 : {
4798 : nStatus = poSrcDS->GetRasterBand(nBand)
4799 926 : ->GetDataCoverageStatus(
4800 : iX, iY, nThisCols, nThisLines,
4801 : GDAL_DATA_COVERAGE_STATUS_DATA);
4802 : }
4803 2341 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4804 : {
4805 2337 : sExtraArg.pfnProgress = GDALScaledProgress;
4806 4674 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4807 2337 : nBlocksDone / static_cast<double>(nTotalBlocks),
4808 2337 : (nBlocksDone + 0.5) /
4809 2337 : static_cast<double>(nTotalBlocks),
4810 : pfnProgress, pProgressData);
4811 2337 : if (sExtraArg.pProgressData == nullptr)
4812 1400 : sExtraArg.pfnProgress = nullptr;
4813 :
4814 2337 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4815 : nThisLines, pSwathBuf,
4816 : nThisCols, nThisLines, eDT, 1,
4817 : &nBand, 0, 0, 0, &sExtraArg);
4818 :
4819 2337 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4820 :
4821 2337 : if (eErr == CE_None)
4822 2333 : eErr = poDstDS->RasterIO(
4823 : GF_Write, iX, iY, nThisCols, nThisLines,
4824 : pSwathBuf, nThisCols, nThisLines, eDT, 1,
4825 : &nBand, 0, 0, 0, nullptr);
4826 : }
4827 :
4828 2341 : nBlocksDone++;
4829 4643 : if (eErr == CE_None &&
4830 2302 : !pfnProgress(nBlocksDone /
4831 2302 : static_cast<double>(nTotalBlocks),
4832 : nullptr, pProgressData))
4833 : {
4834 3 : eErr = CE_Failure;
4835 3 : CPLError(CE_Failure, CPLE_UserInterrupt,
4836 : "User terminated CreateCopy()");
4837 : }
4838 : }
4839 : }
4840 : }
4841 : }
4842 :
4843 : /* ==================================================================== */
4844 : /* Pixel interleaved case. */
4845 : /* ==================================================================== */
4846 : else /* if( bInterleave ) */
4847 : {
4848 : GDALRasterIOExtraArg sExtraArg;
4849 1314 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4850 1314 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4851 :
4852 1314 : const GIntBig nTotalBlocks =
4853 1314 : static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4854 1314 : DIV_ROUND_UP(nXSize, nSwathCols);
4855 1314 : GIntBig nBlocksDone = 0;
4856 :
4857 2836 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4858 : {
4859 1522 : int nThisLines = nSwathLines;
4860 :
4861 1522 : if (iY + nThisLines > nYSize)
4862 186 : nThisLines = nYSize - iY;
4863 :
4864 3049 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
4865 : {
4866 1527 : int nThisCols = nSwathCols;
4867 :
4868 1527 : if (iX + nThisCols > nXSize)
4869 3 : nThisCols = nXSize - iX;
4870 :
4871 1527 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4872 1527 : if (bCheckHoles)
4873 : {
4874 1336 : nStatus = 0;
4875 1389 : for (int iBand = 0; iBand < nBandCount; iBand++)
4876 : {
4877 1370 : nStatus |= poSrcDS->GetRasterBand(iBand + 1)
4878 1370 : ->GetDataCoverageStatus(
4879 : iX, iY, nThisCols, nThisLines,
4880 : GDAL_DATA_COVERAGE_STATUS_DATA);
4881 1370 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4882 1317 : break;
4883 : }
4884 : }
4885 1527 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4886 : {
4887 1508 : sExtraArg.pfnProgress = GDALScaledProgress;
4888 3016 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4889 1508 : nBlocksDone / static_cast<double>(nTotalBlocks),
4890 1508 : (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4891 : pfnProgress, pProgressData);
4892 1508 : if (sExtraArg.pProgressData == nullptr)
4893 288 : sExtraArg.pfnProgress = nullptr;
4894 :
4895 1508 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4896 : nThisLines, pSwathBuf, nThisCols,
4897 : nThisLines, eDT, nBandCount,
4898 : nullptr, 0, 0, 0, &sExtraArg);
4899 :
4900 1508 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4901 :
4902 1508 : if (eErr == CE_None)
4903 1507 : eErr = poDstDS->RasterIO(
4904 : GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
4905 : nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
4906 : 0, 0, nullptr);
4907 : }
4908 :
4909 1527 : nBlocksDone++;
4910 3050 : if (eErr == CE_None &&
4911 1523 : !pfnProgress(nBlocksDone /
4912 1523 : static_cast<double>(nTotalBlocks),
4913 : nullptr, pProgressData))
4914 : {
4915 1 : eErr = CE_Failure;
4916 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4917 : "User terminated CreateCopy()");
4918 : }
4919 : }
4920 : }
4921 : }
4922 :
4923 : /* -------------------------------------------------------------------- */
4924 : /* Cleanup */
4925 : /* -------------------------------------------------------------------- */
4926 2796 : CPLFree(pSwathBuf);
4927 :
4928 2796 : return eErr;
4929 : }
4930 :
4931 : /************************************************************************/
4932 : /* GDALRasterBandCopyWholeRaster() */
4933 : /************************************************************************/
4934 :
4935 : /**
4936 : * \brief Copy a whole raster band
4937 : *
4938 : * This function copies the complete raster contents of one band to
4939 : * another similarly configured band. The source and destination
4940 : * bands must have the same width and height. The bands do not have
4941 : * to have the same data type.
4942 : *
4943 : * It implements efficient copying, in particular "chunking" the copy in
4944 : * substantial blocks.
4945 : *
4946 : * Currently the only papszOptions value supported are :
4947 : * <ul>
4948 : * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4949 : * achieve best compression.</li>
4950 : * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4951 : * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li>
4952 : * </ul>
4953 : *
4954 : * @param hSrcBand the source band
4955 : * @param hDstBand the destination band
4956 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4957 : * @param pfnProgress progress reporting function.
4958 : * @param pProgressData callback data for progress function.
4959 : *
4960 : * @return CE_None on success, or CE_Failure on failure.
4961 : */
4962 :
4963 29 : CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
4964 : GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
4965 : const char *const *const papszOptions, GDALProgressFunc pfnProgress,
4966 : void *pProgressData)
4967 :
4968 : {
4969 29 : VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
4970 29 : VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
4971 :
4972 29 : GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
4973 29 : GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
4974 29 : CPLErr eErr = CE_None;
4975 :
4976 29 : if (pfnProgress == nullptr)
4977 11 : pfnProgress = GDALDummyProgress;
4978 :
4979 : /* -------------------------------------------------------------------- */
4980 : /* Confirm the datasets match in size and band counts. */
4981 : /* -------------------------------------------------------------------- */
4982 29 : int nXSize = poSrcBand->GetXSize();
4983 29 : int nYSize = poSrcBand->GetYSize();
4984 :
4985 29 : if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
4986 : {
4987 0 : CPLError(CE_Failure, CPLE_AppDefined,
4988 : "Input and output band sizes do not\n"
4989 : "match in GDALRasterBandCopyWholeRaster()");
4990 0 : return CE_Failure;
4991 : }
4992 :
4993 : /* -------------------------------------------------------------------- */
4994 : /* Report preliminary (0) progress. */
4995 : /* -------------------------------------------------------------------- */
4996 29 : if (!pfnProgress(0.0, nullptr, pProgressData))
4997 : {
4998 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
4999 : "User terminated CreateCopy()");
5000 0 : return CE_Failure;
5001 : }
5002 :
5003 29 : GDALDataType eDT = poDstBand->GetRasterDataType();
5004 :
5005 : // If the destination is compressed, we must try to write blocks just once,
5006 : // to save disk space (GTiff case for example), and to avoid data loss
5007 : // (JPEG compression for example).
5008 29 : bool bDstIsCompressed = false;
5009 : const char *pszDstCompressed =
5010 29 : CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
5011 29 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5012 17 : bDstIsCompressed = true;
5013 :
5014 : /* -------------------------------------------------------------------- */
5015 : /* What will our swath size be? */
5016 : /* -------------------------------------------------------------------- */
5017 :
5018 29 : int nSwathCols = 0;
5019 29 : int nSwathLines = 0;
5020 29 : GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
5021 : FALSE, &nSwathCols, &nSwathLines);
5022 :
5023 29 : const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5024 :
5025 29 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5026 29 : if (pSwathBuf == nullptr)
5027 : {
5028 0 : return CE_Failure;
5029 : }
5030 :
5031 29 : CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5032 : nSwathCols, nSwathLines);
5033 :
5034 : const bool bCheckHoles =
5035 29 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5036 :
5037 : // Advise the source raster that we are going to read it completely
5038 29 : poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
5039 :
5040 : /* ==================================================================== */
5041 : /* Band oriented (uninterleaved) case. */
5042 : /* ==================================================================== */
5043 :
5044 67 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5045 : {
5046 38 : int nThisLines = nSwathLines;
5047 :
5048 38 : if (iY + nThisLines > nYSize)
5049 3 : nThisLines = nYSize - iY;
5050 :
5051 76 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5052 : {
5053 38 : int nThisCols = nSwathCols;
5054 :
5055 38 : if (iX + nThisCols > nXSize)
5056 0 : nThisCols = nXSize - iX;
5057 :
5058 38 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5059 38 : if (bCheckHoles)
5060 : {
5061 0 : nStatus = poSrcBand->GetDataCoverageStatus(
5062 : iX, iY, nThisCols, nThisLines,
5063 : GDAL_DATA_COVERAGE_STATUS_DATA);
5064 : }
5065 38 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5066 : {
5067 38 : eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5068 : nThisLines, pSwathBuf, nThisCols,
5069 : nThisLines, eDT, 0, 0, nullptr);
5070 :
5071 38 : if (eErr == CE_None)
5072 38 : eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5073 : nThisLines, pSwathBuf, nThisCols,
5074 : nThisLines, eDT, 0, 0, nullptr);
5075 : }
5076 :
5077 76 : if (eErr == CE_None &&
5078 38 : !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
5079 : nullptr, pProgressData))
5080 : {
5081 0 : eErr = CE_Failure;
5082 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5083 : "User terminated CreateCopy()");
5084 : }
5085 : }
5086 : }
5087 :
5088 : /* -------------------------------------------------------------------- */
5089 : /* Cleanup */
5090 : /* -------------------------------------------------------------------- */
5091 29 : CPLFree(pSwathBuf);
5092 :
5093 29 : return eErr;
5094 : }
5095 :
5096 : /************************************************************************/
5097 : /* GDALCopyRasterIOExtraArg () */
5098 : /************************************************************************/
5099 :
5100 323291 : void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5101 : GDALRasterIOExtraArg *psSrcArg)
5102 : {
5103 323291 : INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5104 323291 : if (psSrcArg)
5105 : {
5106 323291 : psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5107 323291 : psDestArg->pfnProgress = psSrcArg->pfnProgress;
5108 323291 : psDestArg->pProgressData = psSrcArg->pProgressData;
5109 323291 : psDestArg->bFloatingPointWindowValidity =
5110 323291 : psSrcArg->bFloatingPointWindowValidity;
5111 323291 : if (psSrcArg->bFloatingPointWindowValidity)
5112 : {
5113 3108 : psDestArg->dfXOff = psSrcArg->dfXOff;
5114 3108 : psDestArg->dfYOff = psSrcArg->dfYOff;
5115 3108 : psDestArg->dfXSize = psSrcArg->dfXSize;
5116 3108 : psDestArg->dfYSize = psSrcArg->dfYSize;
5117 : }
5118 : }
5119 323291 : }
5120 :
5121 : /************************************************************************/
5122 : /* HasOnlyNoData() */
5123 : /************************************************************************/
5124 :
5125 4798052 : template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5126 : {
5127 4798052 : return value == noDataValue;
5128 : }
5129 :
5130 560287 : template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5131 : {
5132 560287 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5133 : }
5134 :
5135 501120 : template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5136 : {
5137 501120 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5138 : }
5139 :
5140 : template <class T>
5141 2079 : static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5142 : size_t nHeight, size_t nLineStride,
5143 : size_t nComponents)
5144 : {
5145 : // Fast test: check the 4 corners and the middle pixel.
5146 3419 : for (size_t iBand = 0; iBand < nComponents; iBand++)
5147 : {
5148 4895 : if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5149 2362 : IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5150 2283 : noDataValue) &&
5151 2283 : IsEqualToNoData(
5152 2283 : pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5153 2283 : nComponents +
5154 : iBand],
5155 1351 : noDataValue) &&
5156 1351 : IsEqualToNoData(
5157 1351 : pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5158 : noDataValue) &&
5159 1343 : IsEqualToNoData(
5160 1343 : pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
5161 1343 : nComponents +
5162 : iBand],
5163 : noDataValue)))
5164 : {
5165 1193 : return false;
5166 : }
5167 : }
5168 :
5169 : // Test all pixels.
5170 17776 : for (size_t iY = 0; iY < nHeight; iY++)
5171 : {
5172 16914 : const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
5173 5866477 : for (size_t iX = 0; iX < nWidth * nComponents; iX++)
5174 : {
5175 5849583 : if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
5176 : {
5177 24 : return false;
5178 : }
5179 : }
5180 : }
5181 862 : return true;
5182 : }
5183 :
5184 : /************************************************************************/
5185 : /* GDALBufferHasOnlyNoData() */
5186 : /************************************************************************/
5187 :
5188 25359 : bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
5189 : size_t nWidth, size_t nHeight, size_t nLineStride,
5190 : size_t nComponents, int nBitsPerSample,
5191 : GDALBufferSampleFormat nSampleFormat)
5192 : {
5193 : // In the case where the nodata is 0, we can compare several bytes at
5194 : // once. Select the largest natural integer type for the architecture.
5195 : #if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5196 : // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5197 : typedef std::uint64_t WordType;
5198 : #else
5199 : typedef std::uint32_t WordType;
5200 : #endif
5201 25359 : if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
5202 : // Do not use this optimized code path for floating point numbers,
5203 : // as it can't detect negative zero.
5204 : nSampleFormat != GSF_FLOATING_POINT)
5205 : {
5206 23274 : const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
5207 23274 : const size_t nSize =
5208 23274 : (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
5209 23274 : size_t i = 0;
5210 : const size_t nInitialIters =
5211 46548 : std::min(sizeof(WordType) -
5212 23274 : static_cast<size_t>(
5213 : reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5214 : sizeof(WordType)),
5215 23274 : nSize);
5216 194907 : for (; i < nInitialIters; i++)
5217 : {
5218 174929 : if (pabyBuffer[i])
5219 3296 : return false;
5220 : }
5221 16064100 : for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
5222 : {
5223 16049600 : if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
5224 5446 : return false;
5225 : }
5226 51877 : for (; i < nSize; i++)
5227 : {
5228 37350 : if (pabyBuffer[i])
5229 5 : return false;
5230 : }
5231 14527 : return true;
5232 : }
5233 :
5234 2085 : if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
5235 : {
5236 2228 : return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
5237 1114 : HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
5238 1114 : static_cast<uint8_t>(dfNoDataValue), nWidth,
5239 1114 : nHeight, nLineStride, nComponents);
5240 : }
5241 971 : if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
5242 : {
5243 : // Use unsigned implementation by converting the nodatavalue to
5244 : // unsigned
5245 63 : return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
5246 31 : HasOnlyNoDataT(
5247 : static_cast<const uint8_t *>(pBuffer),
5248 31 : static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
5249 32 : nWidth, nHeight, nLineStride, nComponents);
5250 : }
5251 939 : if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
5252 : {
5253 21 : return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
5254 10 : HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
5255 10 : static_cast<uint16_t>(dfNoDataValue), nWidth,
5256 11 : nHeight, nLineStride, nComponents);
5257 : }
5258 928 : if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
5259 : {
5260 : // Use unsigned implementation by converting the nodatavalue to
5261 : // unsigned
5262 109 : return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
5263 54 : HasOnlyNoDataT(
5264 : static_cast<const uint16_t *>(pBuffer),
5265 54 : static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
5266 55 : nWidth, nHeight, nLineStride, nComponents);
5267 : }
5268 873 : if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
5269 : {
5270 73 : return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
5271 36 : HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
5272 : static_cast<uint32_t>(dfNoDataValue), nWidth,
5273 37 : nHeight, nLineStride, nComponents);
5274 : }
5275 836 : if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
5276 : {
5277 : // Use unsigned implementation by converting the nodatavalue to
5278 : // unsigned
5279 19 : return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
5280 9 : HasOnlyNoDataT(
5281 : static_cast<const uint32_t *>(pBuffer),
5282 9 : static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
5283 10 : nWidth, nHeight, nLineStride, nComponents);
5284 : }
5285 826 : if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
5286 : {
5287 56 : return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
5288 28 : HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
5289 : static_cast<uint64_t>(dfNoDataValue), nWidth,
5290 28 : nHeight, nLineStride, nComponents);
5291 : }
5292 798 : if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
5293 : {
5294 : // Use unsigned implementation by converting the nodatavalue to
5295 : // unsigned
5296 0 : return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
5297 0 : HasOnlyNoDataT(
5298 : static_cast<const uint64_t *>(pBuffer),
5299 0 : static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
5300 0 : nWidth, nHeight, nLineStride, nComponents);
5301 : }
5302 798 : if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
5303 : {
5304 1325 : return (std::isnan(dfNoDataValue) ||
5305 1354 : GDALIsValueInRange<float>(dfNoDataValue)) &&
5306 676 : HasOnlyNoDataT(static_cast<const float *>(pBuffer),
5307 : static_cast<float>(dfNoDataValue), nWidth,
5308 677 : nHeight, nLineStride, nComponents);
5309 : }
5310 121 : if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
5311 : {
5312 121 : return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
5313 : dfNoDataValue, nWidth, nHeight, nLineStride,
5314 121 : nComponents);
5315 : }
5316 0 : return false;
5317 : }
5318 :
5319 : #ifdef HAVE_SSE2
5320 :
5321 : /************************************************************************/
5322 : /* GDALDeinterleave3Byte() */
5323 : /************************************************************************/
5324 :
5325 : #if defined(__GNUC__) && !defined(__clang__)
5326 : __attribute__((optimize("no-tree-vectorize")))
5327 : #endif
5328 : static void
5329 69759 : GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5330 : GByte *CPL_RESTRICT pabyDest0,
5331 : GByte *CPL_RESTRICT pabyDest1,
5332 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5333 : #ifdef USE_NEON_OPTIMIZATIONS
5334 : {
5335 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5336 : nIters);
5337 : }
5338 : #else
5339 : {
5340 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5341 69759 : if (CPLHaveRuntimeSSSE3())
5342 : {
5343 69766 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5344 69767 : pabyDest2, nIters);
5345 : }
5346 : #endif
5347 :
5348 0 : size_t i = 0;
5349 0 : if (((reinterpret_cast<uintptr_t>(pabySrc) |
5350 0 : reinterpret_cast<uintptr_t>(pabyDest0) |
5351 0 : reinterpret_cast<uintptr_t>(pabyDest1) |
5352 0 : reinterpret_cast<uintptr_t>(pabyDest2)) %
5353 : sizeof(unsigned int)) == 0)
5354 : {
5355 : // Slightly better than GCC autovectorizer
5356 17 : for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
5357 : {
5358 15 : unsigned int word0 =
5359 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
5360 15 : unsigned int word1 =
5361 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
5362 15 : unsigned int word2 =
5363 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
5364 15 : reinterpret_cast<unsigned int *>(pabyDest0)[j] =
5365 15 : (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
5366 15 : ((word2 >> 8) << 24);
5367 15 : reinterpret_cast<unsigned int *>(pabyDest1)[j] =
5368 15 : ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
5369 15 : (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
5370 15 : pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
5371 15 : pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
5372 15 : pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
5373 15 : pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
5374 : }
5375 : }
5376 : #if defined(__clang__)
5377 : #pragma clang loop vectorize(disable)
5378 : #endif
5379 0 : for (; i < nIters; ++i)
5380 : {
5381 1 : pabyDest0[i] = pabySrc[3 * i + 0];
5382 1 : pabyDest1[i] = pabySrc[3 * i + 1];
5383 1 : pabyDest2[i] = pabySrc[3 * i + 2];
5384 : }
5385 : }
5386 : #endif
5387 :
5388 : /************************************************************************/
5389 : /* GDALDeinterleave4Byte() */
5390 : /************************************************************************/
5391 :
5392 : #if !defined(__GNUC__) || defined(__clang__)
5393 :
5394 : /************************************************************************/
5395 : /* deinterleave() */
5396 : /************************************************************************/
5397 :
5398 : template <bool SHIFT, bool MASK>
5399 : inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5400 : __m128i &xmm2_ori, __m128i &xmm3_ori)
5401 : {
5402 : // Set higher 24bit of each int32 packed word to 0
5403 : if (SHIFT)
5404 : {
5405 : xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5406 : xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5407 : xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5408 : xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5409 : }
5410 : __m128i xmm0;
5411 : __m128i xmm1;
5412 : __m128i xmm2;
5413 : __m128i xmm3;
5414 : if (MASK)
5415 : {
5416 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
5417 : xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5418 : xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5419 : xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5420 : xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5421 : }
5422 : else
5423 : {
5424 : xmm0 = xmm0_ori;
5425 : xmm1 = xmm1_ori;
5426 : xmm2 = xmm2_ori;
5427 : xmm3 = xmm3_ori;
5428 : }
5429 : // Pack int32 to int16
5430 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
5431 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
5432 : // Pack int16 to uint8
5433 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
5434 : return xmm0;
5435 : }
5436 :
5437 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5438 : GByte *CPL_RESTRICT pabyDest0,
5439 : GByte *CPL_RESTRICT pabyDest1,
5440 : GByte *CPL_RESTRICT pabyDest2,
5441 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5442 : #ifdef USE_NEON_OPTIMIZATIONS
5443 : {
5444 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5445 : pabyDest3, nIters);
5446 : }
5447 : #else
5448 : {
5449 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5450 : if (CPLHaveRuntimeSSSE3())
5451 : {
5452 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5453 : pabyDest2, pabyDest3, nIters);
5454 : }
5455 : #endif
5456 :
5457 : // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5458 : // do something slightly better.
5459 : size_t i = 0;
5460 : for (; i + 15 < nIters; i += 16)
5461 : {
5462 : __m128i xmm0_ori = _mm_loadu_si128(
5463 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5464 : __m128i xmm1_ori = _mm_loadu_si128(
5465 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5466 : __m128i xmm2_ori = _mm_loadu_si128(
5467 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5468 : __m128i xmm3_ori = _mm_loadu_si128(
5469 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5470 :
5471 : _mm_storeu_si128(
5472 : reinterpret_cast<__m128i *>(pabyDest0 + i),
5473 : deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5474 : _mm_storeu_si128(
5475 : reinterpret_cast<__m128i *>(pabyDest1 + i),
5476 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5477 : _mm_storeu_si128(
5478 : reinterpret_cast<__m128i *>(pabyDest2 + i),
5479 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5480 : _mm_storeu_si128(
5481 : reinterpret_cast<__m128i *>(pabyDest3 + i),
5482 : deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5483 : }
5484 :
5485 : #if defined(__clang__)
5486 : #pragma clang loop vectorize(disable)
5487 : #endif
5488 : for (; i < nIters; ++i)
5489 : {
5490 : pabyDest0[i] = pabySrc[4 * i + 0];
5491 : pabyDest1[i] = pabySrc[4 * i + 1];
5492 : pabyDest2[i] = pabySrc[4 * i + 2];
5493 : pabyDest3[i] = pabySrc[4 * i + 3];
5494 : }
5495 : }
5496 : #endif
5497 : #else
5498 : // GCC autovectorizer does an excellent job
5499 52670 : __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
5500 : const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5501 : GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5502 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5503 : {
5504 525866000 : for (size_t i = 0; i < nIters; ++i)
5505 : {
5506 525813000 : pabyDest0[i] = pabySrc[4 * i + 0];
5507 525813000 : pabyDest1[i] = pabySrc[4 * i + 1];
5508 525813000 : pabyDest2[i] = pabySrc[4 * i + 2];
5509 525813000 : pabyDest3[i] = pabySrc[4 * i + 3];
5510 : }
5511 52670 : }
5512 : #endif
5513 :
5514 : #else
5515 :
5516 : /************************************************************************/
5517 : /* GDALDeinterleave3Byte() */
5518 : /************************************************************************/
5519 :
5520 : // TODO: Enabling below could help on non-Intel architectures where GCC knows
5521 : // how to auto-vectorize
5522 : // #if defined(__GNUC__)
5523 : //__attribute__((optimize("tree-vectorize")))
5524 : // #endif
5525 : static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5526 : GByte *CPL_RESTRICT pabyDest0,
5527 : GByte *CPL_RESTRICT pabyDest1,
5528 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5529 : {
5530 : for (size_t i = 0; i < nIters; ++i)
5531 : {
5532 : pabyDest0[i] = pabySrc[3 * i + 0];
5533 : pabyDest1[i] = pabySrc[3 * i + 1];
5534 : pabyDest2[i] = pabySrc[3 * i + 2];
5535 : }
5536 : }
5537 :
5538 : /************************************************************************/
5539 : /* GDALDeinterleave4Byte() */
5540 : /************************************************************************/
5541 :
5542 : // TODO: Enabling below could help on non-Intel architectures where gcc knows
5543 : // how to auto-vectorize
5544 : // #if defined(__GNUC__)
5545 : //__attribute__((optimize("tree-vectorize")))
5546 : // #endif
5547 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5548 : GByte *CPL_RESTRICT pabyDest0,
5549 : GByte *CPL_RESTRICT pabyDest1,
5550 : GByte *CPL_RESTRICT pabyDest2,
5551 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5552 : {
5553 : for (size_t i = 0; i < nIters; ++i)
5554 : {
5555 : pabyDest0[i] = pabySrc[4 * i + 0];
5556 : pabyDest1[i] = pabySrc[4 * i + 1];
5557 : pabyDest2[i] = pabySrc[4 * i + 2];
5558 : pabyDest3[i] = pabySrc[4 * i + 3];
5559 : }
5560 : }
5561 :
5562 : #endif
5563 :
5564 : /************************************************************************/
5565 : /* GDALDeinterleave() */
5566 : /************************************************************************/
5567 :
5568 : /*! Copy values from a pixel-interleave buffer to multiple per-component
5569 : buffers.
5570 :
5571 : In pseudo-code
5572 : \verbatim
5573 : for(size_t i = 0; i < nIters; ++i)
5574 : for(int iComp = 0; iComp < nComponents; iComp++ )
5575 : ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
5576 : \endverbatim
5577 :
5578 : The implementation is optimized for a few cases, like de-interleaving
5579 : of 3 or 4-components Byte buffers.
5580 :
5581 : \since GDAL 3.6
5582 : */
5583 123191 : void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
5584 : int nComponents, void **ppDestBuffer,
5585 : GDALDataType eDestDT, size_t nIters)
5586 : {
5587 123191 : if (eSourceDT == eDestDT)
5588 : {
5589 123168 : if (eSourceDT == GDT_Byte)
5590 : {
5591 122426 : if (nComponents == 3)
5592 : {
5593 69756 : const GByte *CPL_RESTRICT pabySrc =
5594 : static_cast<const GByte *>(pSourceBuffer);
5595 69756 : GByte *CPL_RESTRICT pabyDest0 =
5596 : static_cast<GByte *>(ppDestBuffer[0]);
5597 69756 : GByte *CPL_RESTRICT pabyDest1 =
5598 : static_cast<GByte *>(ppDestBuffer[1]);
5599 69756 : GByte *CPL_RESTRICT pabyDest2 =
5600 : static_cast<GByte *>(ppDestBuffer[2]);
5601 69756 : GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5602 : nIters);
5603 69770 : return;
5604 : }
5605 52670 : else if (nComponents == 4)
5606 : {
5607 52670 : const GByte *CPL_RESTRICT pabySrc =
5608 : static_cast<const GByte *>(pSourceBuffer);
5609 52670 : GByte *CPL_RESTRICT pabyDest0 =
5610 : static_cast<GByte *>(ppDestBuffer[0]);
5611 52670 : GByte *CPL_RESTRICT pabyDest1 =
5612 : static_cast<GByte *>(ppDestBuffer[1]);
5613 52670 : GByte *CPL_RESTRICT pabyDest2 =
5614 : static_cast<GByte *>(ppDestBuffer[2]);
5615 52670 : GByte *CPL_RESTRICT pabyDest3 =
5616 : static_cast<GByte *>(ppDestBuffer[3]);
5617 52670 : GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5618 : pabyDest3, nIters);
5619 52670 : return;
5620 : }
5621 : }
5622 : #if ((defined(__GNUC__) && !defined(__clang__)) || \
5623 : defined(__INTEL_CLANG_COMPILER)) && \
5624 : defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
5625 1475 : else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
5626 742 : CPLHaveRuntimeSSSE3())
5627 : {
5628 733 : if (nComponents == 3)
5629 : {
5630 239 : const GUInt16 *CPL_RESTRICT panSrc =
5631 : static_cast<const GUInt16 *>(pSourceBuffer);
5632 239 : GUInt16 *CPL_RESTRICT panDest0 =
5633 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5634 239 : GUInt16 *CPL_RESTRICT panDest1 =
5635 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5636 239 : GUInt16 *CPL_RESTRICT panDest2 =
5637 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5638 239 : GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
5639 : panDest2, nIters);
5640 239 : return;
5641 : }
5642 : #if !defined(__INTEL_CLANG_COMPILER)
5643 : // ICC autovectorizer doesn't do a good job, at least with icx
5644 : // 2022.1.0.20220316
5645 494 : else if (nComponents == 4)
5646 : {
5647 494 : const GUInt16 *CPL_RESTRICT panSrc =
5648 : static_cast<const GUInt16 *>(pSourceBuffer);
5649 494 : GUInt16 *CPL_RESTRICT panDest0 =
5650 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5651 494 : GUInt16 *CPL_RESTRICT panDest1 =
5652 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5653 494 : GUInt16 *CPL_RESTRICT panDest2 =
5654 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5655 494 : GUInt16 *CPL_RESTRICT panDest3 =
5656 : static_cast<GUInt16 *>(ppDestBuffer[3]);
5657 494 : GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
5658 : panDest2, panDest3, nIters);
5659 494 : return;
5660 : }
5661 : #endif
5662 : }
5663 : #endif
5664 : }
5665 :
5666 23 : const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
5667 22 : const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
5668 87 : for (int iComp = 0; iComp < nComponents; iComp++)
5669 : {
5670 65 : GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
5671 65 : iComp * nSourceDTSize,
5672 : eSourceDT, nComponents * nSourceDTSize,
5673 65 : ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
5674 : }
5675 : }
|