Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Contains default implementation of GDALRasterBand::IRasterIO()
5 : * and supporting functions of broader utility.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1998, Frank Warmerdam
10 : * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11 : *
12 : * SPDX-License-Identifier: MIT
13 : ****************************************************************************/
14 :
15 : #include "cpl_port.h"
16 : #include "gdal.h"
17 : #include "gdal_priv.h"
18 :
19 : #include <cassert>
20 : #include <climits>
21 : #include <cmath>
22 : #include <cstddef>
23 : #include <cstdio>
24 : #include <cstdlib>
25 : #include <cstring>
26 :
27 : #include <algorithm>
28 : #include <limits>
29 : #include <stdexcept>
30 : #include <type_traits>
31 :
32 : #include "cpl_conv.h"
33 : #include "cpl_cpu_features.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_progress.h"
37 : #include "cpl_string.h"
38 : #include "cpl_vsi.h"
39 : #include "gdal_priv_templates.hpp"
40 : #include "gdal_vrt.h"
41 : #include "gdalwarper.h"
42 : #include "memdataset.h"
43 : #include "vrtdataset.h"
44 :
45 : #if defined(__x86_64) || defined(_M_X64)
46 : #include <emmintrin.h>
47 : #define HAVE_SSE2
48 : #elif defined(USE_NEON_OPTIMIZATIONS)
49 : #include "include_sse2neon.h"
50 : #define HAVE_SSE2
51 : #endif
52 :
53 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
54 : #include "rasterio_ssse3.h"
55 : #ifdef __SSSE3__
56 : #include <tmmintrin.h>
57 : #endif
58 : #endif
59 :
60 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
61 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
62 : int nDstPixelStride, GPtrDiff_t nWordCount);
63 :
64 : /************************************************************************/
65 : /* DownsamplingIntegerXFactor() */
66 : /************************************************************************/
67 :
68 : template <bool bSameDataType, int DATA_TYPE_SIZE>
69 413236 : static bool DownsamplingIntegerXFactor(
70 : GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
71 : GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
72 : GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
73 : int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
74 : {
75 413236 : const int nBandDataSize =
76 : bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
77 413236 : int nOuterLoopIters = nBufXSize - 1;
78 413236 : const int nIncSrcOffset = nSrcXInc * nBandDataSize;
79 : const GByte *CPL_RESTRICT pabySrcData;
80 413236 : int nEndBlockX = nBlockXSize + nStartBlockX;
81 :
82 413236 : if (iSrcX < nEndBlockX)
83 : {
84 226134 : CPLAssert(poBlock);
85 226134 : goto no_reload_block;
86 : }
87 187102 : goto reload_block;
88 :
89 : // Don't do the last iteration in the loop, as iSrcX might go beyond
90 : // nRasterXSize - 1
91 932852 : while (--nOuterLoopIters >= 1)
92 : {
93 189034 : iSrcX += nSrcXInc;
94 189034 : pabySrcData += nIncSrcOffset;
95 189034 : pabyDstData += nPixelSpace;
96 :
97 : /* --------------------------------------------------------------------
98 : */
99 : /* Ensure we have the appropriate block loaded. */
100 : /* --------------------------------------------------------------------
101 : */
102 189034 : if (iSrcX >= nEndBlockX)
103 : {
104 189034 : reload_block:
105 : {
106 388726 : const int nLBlockX = iSrcX / nBlockXSize;
107 388726 : nStartBlockX = nLBlockX * nBlockXSize;
108 388726 : nEndBlockX = nStartBlockX + nBlockXSize;
109 :
110 388726 : if (poBlock != nullptr)
111 316739 : poBlock->DropLock();
112 :
113 388726 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
114 388726 : if (poBlock == nullptr)
115 : {
116 1 : return false;
117 : }
118 : }
119 :
120 388725 : no_reload_block:
121 : const GByte *pabySrcBlock =
122 932852 : static_cast<const GByte *>(poBlock->GetDataRef());
123 932852 : GPtrDiff_t iSrcOffset =
124 932852 : (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
125 932852 : pabySrcData = pabySrcBlock + iSrcOffset;
126 : }
127 :
128 : /* --------------------------------------------------------------------
129 : */
130 : /* Copy the maximum run of pixels. */
131 : /* --------------------------------------------------------------------
132 : */
133 :
134 932852 : const int nIters = std::min(
135 932852 : (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
136 : if (bSameDataType)
137 : {
138 932447 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
139 932447 : if (nIters > 1)
140 : {
141 : if (DATA_TYPE_SIZE == 1)
142 : {
143 276287 : pabySrcData += nIncSrcOffset;
144 276287 : pabyDstData += nPixelSpace;
145 276287 : GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
146 276287 : nPixelSpace, nIters - 1);
147 276287 : pabySrcData +=
148 276287 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
149 276287 : pabyDstData +=
150 276287 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
151 : }
152 : else
153 : {
154 4443828 : for (int i = 0; i < nIters - 1; i++)
155 : {
156 4245254 : pabySrcData += nIncSrcOffset;
157 4245254 : pabyDstData += nPixelSpace;
158 4245254 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
159 : }
160 : }
161 474861 : iSrcX += nSrcXInc * (nIters - 1);
162 474861 : nOuterLoopIters -= nIters - 1;
163 : }
164 : }
165 : else
166 : {
167 : // Type to type conversion ...
168 405 : GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
169 405 : eBufType, nPixelSpace, std::max(1, nIters));
170 405 : if (nIters > 1)
171 : {
172 198 : pabySrcData +=
173 198 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
174 198 : pabyDstData +=
175 198 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
176 198 : iSrcX += nSrcXInc * (nIters - 1);
177 198 : nOuterLoopIters -= nIters - 1;
178 : }
179 : }
180 : }
181 :
182 : // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
183 743818 : if (nOuterLoopIters == 0)
184 : {
185 330583 : const int nRasterXSize = poBand->GetXSize();
186 330583 : iSrcX =
187 661166 : static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
188 330583 : static_cast<GInt64>(nRasterXSize - 1)));
189 330583 : pabyDstData += nPixelSpace;
190 330583 : if (iSrcX < nEndBlockX)
191 : {
192 317993 : goto no_reload_block;
193 : }
194 12590 : goto reload_block;
195 : }
196 413235 : return true;
197 : }
198 :
199 : /************************************************************************/
200 : /* IRasterIO() */
201 : /* */
202 : /* Default internal implementation of RasterIO() ... utilizes */
203 : /* the Block access methods to satisfy the request. This would */
204 : /* normally only be overridden by formats with overviews. */
205 : /************************************************************************/
206 :
207 5718490 : CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
208 : int nXSize, int nYSize, void *pData,
209 : int nBufXSize, int nBufYSize,
210 : GDALDataType eBufType, GSpacing nPixelSpace,
211 : GSpacing nLineSpace,
212 : GDALRasterIOExtraArg *psExtraArg)
213 :
214 : {
215 5718490 : if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
216 : {
217 0 : CPLError(eFlushBlockErr, CPLE_AppDefined,
218 : "An error occurred while writing a dirty block "
219 : "from GDALRasterBand::IRasterIO");
220 0 : CPLErr eErr = eFlushBlockErr;
221 0 : eFlushBlockErr = CE_None;
222 0 : return eErr;
223 : }
224 5718490 : if (nBlockXSize <= 0 || nBlockYSize <= 0)
225 : {
226 2376 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
227 0 : return CE_Failure;
228 : }
229 :
230 5716120 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
231 5716410 : const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
232 5716300 : GByte dummyBlock[2] = {0, 0};
233 5716300 : GByte *pabySrcBlock =
234 : dummyBlock; /* to avoid Coverity warning about nullptr dereference */
235 5716300 : GDALRasterBlock *poBlock = nullptr;
236 5716300 : const bool bUseIntegerRequestCoords =
237 5756130 : (!psExtraArg->bFloatingPointWindowValidity ||
238 39825 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
239 16453 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
240 :
241 : /* ==================================================================== */
242 : /* A common case is the data requested with the destination */
243 : /* is packed, and the block width is the raster width. */
244 : /* ==================================================================== */
245 5634660 : if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
246 2954400 : nBlockXSize == GetXSize() && nBufXSize == nXSize &&
247 11351200 : nBufYSize == nYSize && bUseIntegerRequestCoords)
248 : {
249 2821460 : CPLErr eErr = CE_None;
250 2821460 : int nLBlockY = -1;
251 :
252 8267560 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
253 : {
254 5445050 : const int iSrcY = iBufYOff + nYOff;
255 :
256 5445050 : if (iSrcY < nLBlockY * nBlockYSize ||
257 5446220 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
258 : {
259 3073020 : nLBlockY = iSrcY / nBlockYSize;
260 3073020 : bool bJustInitialize =
261 100069 : eRWFlag == GF_Write && nXOff == 0 &&
262 3226900 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
263 53813 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
264 :
265 : // Is this a partial tile at right and/or bottom edges of
266 : // the raster, and that is going to be completely written?
267 : // If so, do not load it from storage, but zero it so that
268 : // the content outsize of the validity area is initialized.
269 3073020 : bool bMemZeroBuffer = false;
270 100069 : if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
271 21969 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
272 3173180 : nYOff + nYSize == GetYSize() &&
273 89 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
274 : {
275 89 : bJustInitialize = true;
276 89 : bMemZeroBuffer = true;
277 : }
278 :
279 3073020 : if (poBlock)
280 252560 : poBlock->DropLock();
281 :
282 3073020 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
283 3073580 : poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
284 3076970 : if (poBlock == nullptr)
285 : {
286 1067 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
287 : nullptr)
288 : {
289 0 : CPLError(CE_Failure, CPLE_AppDefined,
290 : "GetBlockRef failed at X block offset %d, "
291 : "Y block offset %d%s",
292 : 0, nLBlockY,
293 0 : (nErrorCounter != CPLGetErrorCounter())
294 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
295 : : "");
296 : }
297 1067 : eErr = CE_Failure;
298 1067 : break;
299 : }
300 :
301 3075900 : if (eRWFlag == GF_Write)
302 100069 : poBlock->MarkDirty();
303 :
304 3075900 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
305 3075860 : if (bMemZeroBuffer)
306 : {
307 89 : memset(pabySrcBlock, 0,
308 89 : static_cast<GPtrDiff_t>(nBandDataSize) *
309 89 : nBlockXSize * nBlockYSize);
310 : }
311 : }
312 :
313 5447890 : const auto nSrcByteOffset =
314 5447890 : (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
315 5447890 : nBlockXSize +
316 5447890 : nXOff) *
317 5447890 : nBandDataSize;
318 :
319 5447890 : if (eDataType == eBufType)
320 : {
321 1792100 : if (eRWFlag == GF_Read)
322 1546070 : memcpy(static_cast<GByte *>(pData) +
323 1546070 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
324 1546070 : pabySrcBlock + nSrcByteOffset,
325 : static_cast<size_t>(nLineSpace));
326 : else
327 246031 : memcpy(pabySrcBlock + nSrcByteOffset,
328 246031 : static_cast<GByte *>(pData) +
329 246031 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
330 : static_cast<size_t>(nLineSpace));
331 : }
332 : else
333 : {
334 : // Type to type conversion.
335 3655790 : if (eRWFlag == GF_Read)
336 3634140 : GDALCopyWords64(
337 3634140 : pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
338 : static_cast<GByte *>(pData) +
339 3634140 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
340 : eBufType, static_cast<int>(nPixelSpace), nBufXSize);
341 : else
342 21645 : GDALCopyWords64(static_cast<GByte *>(pData) +
343 21645 : static_cast<GPtrDiff_t>(iBufYOff) *
344 : nLineSpace,
345 : eBufType, static_cast<int>(nPixelSpace),
346 21645 : pabySrcBlock + nSrcByteOffset, eDataType,
347 : nBandDataSize, nBufXSize);
348 : }
349 :
350 5506940 : if (psExtraArg->pfnProgress != nullptr &&
351 60834 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
352 : psExtraArg->pProgressData))
353 : {
354 5 : eErr = CE_Failure;
355 5 : break;
356 : }
357 : }
358 :
359 2823580 : if (poBlock)
360 2823250 : poBlock->DropLock();
361 :
362 2824210 : return eErr;
363 : }
364 :
365 : /* ==================================================================== */
366 : /* Do we have overviews that would be appropriate to satisfy */
367 : /* this request? */
368 : /* ==================================================================== */
369 2895080 : if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
370 : eRWFlag == GF_Read)
371 : {
372 : GDALRasterIOExtraArg sExtraArg;
373 2832 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
374 :
375 : const int nOverview =
376 2832 : GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
377 : nBufXSize, nBufYSize, &sExtraArg);
378 2832 : if (nOverview >= 0)
379 : {
380 2812 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
381 2812 : if (poOverviewBand == nullptr)
382 2812 : return CE_Failure;
383 :
384 2812 : return poOverviewBand->RasterIO(
385 : eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
386 2812 : nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
387 : }
388 : }
389 :
390 702836 : if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
391 0 : nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
392 3594380 : nLineSpace == nPixelSpace * nBufXSize &&
393 0 : CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
394 : {
395 0 : memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
396 0 : return CE_None;
397 : }
398 :
399 : /* ==================================================================== */
400 : /* The second case when we don't need subsample data but likely */
401 : /* need data type conversion. */
402 : /* ==================================================================== */
403 2891550 : if ( // nPixelSpace == nBufDataSize &&
404 2891550 : nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
405 : {
406 : #if DEBUG_VERBOSE
407 : printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
408 : nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
409 : #endif
410 :
411 : /* --------------------------------------------------------------------
412 : */
413 : /* Loop over buffer computing source locations. */
414 : /* --------------------------------------------------------------------
415 : */
416 : // Calculate starting values out of loop
417 2528660 : const int nLBlockXStart = nXOff / nBlockXSize;
418 2528660 : const int nXSpanEnd = nBufXSize + nXOff;
419 :
420 2528660 : int nYInc = 0;
421 5091740 : for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
422 2563080 : iBufYOff += nYInc, iSrcY += nYInc)
423 : {
424 2563110 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
425 : static_cast<GPtrDiff_t>(nLineSpace);
426 2563110 : int nLBlockY = iSrcY / nBlockYSize;
427 2563110 : int nLBlockX = nLBlockXStart;
428 2563110 : int iSrcX = nXOff;
429 5338090 : while (iSrcX < nXSpanEnd)
430 : {
431 2774990 : int nXSpan = nLBlockX * nBlockXSize;
432 2774990 : if (nXSpan < INT_MAX - nBlockXSize)
433 2775000 : nXSpan += nBlockXSize;
434 : else
435 0 : nXSpan = INT_MAX;
436 2774990 : const int nXRight = nXSpan;
437 2774990 : nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
438 2774990 : const size_t nXSpanSize =
439 2774990 : nXSpan * static_cast<size_t>(nPixelSpace);
440 :
441 2774990 : bool bJustInitialize =
442 2042280 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
443 37397 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
444 4842960 : nXOff <= nLBlockX * nBlockXSize &&
445 25691 : nXOff + nXSize >= nXRight;
446 :
447 : // Is this a partial tile at right and/or bottom edges of
448 : // the raster, and that is going to be completely written?
449 : // If so, do not load it from storage, but zero it so that
450 : // the content outsize of the validity area is initialized.
451 2774990 : bool bMemZeroBuffer = false;
452 2042280 : if (eRWFlag == GF_Write && !bJustInitialize &&
453 2017830 : nXOff <= nLBlockX * nBlockXSize &&
454 2016210 : nYOff <= nLBlockY * nBlockYSize &&
455 12184 : (nXOff + nXSize >= nXRight ||
456 : // cppcheck-suppress knownConditionTrueFalse
457 4819990 : (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
458 12004 : (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
459 10765 : (nYOff + nYSize == GetYSize() &&
460 1978 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
461 : {
462 3217 : bJustInitialize = true;
463 3217 : bMemZeroBuffer = true;
464 : }
465 :
466 : /* --------------------------------------------------------------------
467 : */
468 : /* Ensure we have the appropriate block loaded. */
469 : /* --------------------------------------------------------------------
470 : */
471 2774990 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
472 2775050 : poBlock =
473 2774960 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
474 2775050 : if (!poBlock)
475 : {
476 74 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
477 : nullptr)
478 : {
479 0 : CPLError(CE_Failure, CPLE_AppDefined,
480 : "GetBlockRef failed at X block offset %d, "
481 : "Y block offset %d%s",
482 : nLBlockX, nLBlockY,
483 0 : (nErrorCounter != CPLGetErrorCounter())
484 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
485 : : "");
486 : }
487 74 : return (CE_Failure);
488 : }
489 :
490 2774970 : if (eRWFlag == GF_Write)
491 2042270 : poBlock->MarkDirty();
492 :
493 2774980 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
494 2774980 : if (bMemZeroBuffer)
495 : {
496 3217 : memset(pabySrcBlock, 0,
497 3217 : static_cast<GPtrDiff_t>(nBandDataSize) *
498 3217 : nBlockXSize * nBlockYSize);
499 : }
500 : /* --------------------------------------------------------------------
501 : */
502 : /* Copy over this chunk of data. */
503 : /* --------------------------------------------------------------------
504 : */
505 2774980 : GPtrDiff_t iSrcOffset =
506 2774980 : (static_cast<GPtrDiff_t>(iSrcX) -
507 2774980 : static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
508 2774980 : (static_cast<GPtrDiff_t>(iSrcY) -
509 2774980 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
510 2774980 : nBlockXSize) *
511 2774980 : nBandDataSize;
512 : // Fill up as many rows as possible for the loaded block.
513 5549970 : const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
514 2774980 : nBufYSize - iBufYOff);
515 58556300 : for (int k = 0; k < kmax; k++)
516 : {
517 55781400 : if (eDataType == eBufType && nPixelSpace == nBufDataSize)
518 : {
519 51864700 : if (eRWFlag == GF_Read)
520 47420900 : memcpy(static_cast<GByte *>(pData) + iBufOffset +
521 47420900 : static_cast<GPtrDiff_t>(k) * nLineSpace,
522 47420900 : pabySrcBlock + iSrcOffset, nXSpanSize);
523 : else
524 4443800 : memcpy(pabySrcBlock + iSrcOffset,
525 4443800 : static_cast<GByte *>(pData) + iBufOffset +
526 4443800 : static_cast<GPtrDiff_t>(k) * nLineSpace,
527 : nXSpanSize);
528 : }
529 : else
530 : {
531 : /* type to type conversion */
532 3916610 : if (eRWFlag == GF_Read)
533 3919320 : GDALCopyWords64(
534 3919320 : pabySrcBlock + iSrcOffset, eDataType,
535 : nBandDataSize,
536 3919320 : static_cast<GByte *>(pData) + iBufOffset +
537 3919320 : static_cast<GPtrDiff_t>(k) * nLineSpace,
538 : eBufType, static_cast<int>(nPixelSpace),
539 : nXSpan);
540 : else
541 0 : GDALCopyWords64(
542 0 : static_cast<GByte *>(pData) + iBufOffset +
543 0 : static_cast<GPtrDiff_t>(k) * nLineSpace,
544 : eBufType, static_cast<int>(nPixelSpace),
545 0 : pabySrcBlock + iSrcOffset, eDataType,
546 : nBandDataSize, nXSpan);
547 : }
548 :
549 55781300 : iSrcOffset +=
550 55781300 : static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
551 : }
552 :
553 : iBufOffset =
554 2774930 : CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
555 2774940 : nLBlockX++;
556 2774940 : iSrcX += nXSpan;
557 :
558 2774940 : poBlock->DropLock();
559 2774980 : poBlock = nullptr;
560 : }
561 :
562 : /* Compute the increment to go on a block boundary */
563 2563100 : nYInc = nBlockYSize - (iSrcY % nBlockYSize);
564 :
565 2564890 : if (psExtraArg->pfnProgress != nullptr &&
566 1792 : !psExtraArg->pfnProgress(
567 2564890 : 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
568 : psExtraArg->pProgressData))
569 : {
570 29 : return CE_Failure;
571 : }
572 : }
573 :
574 2528620 : return CE_None;
575 : }
576 :
577 : /* ==================================================================== */
578 : /* Loop reading required source blocks to satisfy output */
579 : /* request. This is the most general implementation. */
580 : /* ==================================================================== */
581 :
582 362891 : double dfXOff = nXOff;
583 362891 : double dfYOff = nYOff;
584 362891 : double dfXSize = nXSize;
585 362891 : double dfYSize = nYSize;
586 362891 : if (psExtraArg->bFloatingPointWindowValidity)
587 : {
588 28185 : dfXOff = psExtraArg->dfXOff;
589 28185 : dfYOff = psExtraArg->dfYOff;
590 28185 : dfXSize = psExtraArg->dfXSize;
591 28185 : dfYSize = psExtraArg->dfYSize;
592 : }
593 :
594 : /* -------------------------------------------------------------------- */
595 : /* Compute stepping increment. */
596 : /* -------------------------------------------------------------------- */
597 362891 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
598 362891 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
599 362891 : CPLErr eErr = CE_None;
600 :
601 362891 : if (eRWFlag == GF_Write)
602 : {
603 : /* --------------------------------------------------------------------
604 : */
605 : /* Write case */
606 : /* Loop over raster window computing source locations in the buffer.
607 : */
608 : /* --------------------------------------------------------------------
609 : */
610 166650 : GByte *pabyDstBlock = nullptr;
611 166650 : int nLBlockX = -1;
612 166650 : int nLBlockY = -1;
613 :
614 1259590 : for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
615 : {
616 1092940 : const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
617 :
618 12063600 : for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
619 : {
620 10970600 : const int iBufXOff =
621 10970600 : static_cast<int>((iDstX - nXOff) / dfSrcXInc);
622 10970600 : GPtrDiff_t iBufOffset =
623 10970600 : static_cast<GPtrDiff_t>(iBufYOff) *
624 : static_cast<GPtrDiff_t>(nLineSpace) +
625 10970600 : iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
626 :
627 : // FIXME: this code likely doesn't work if the dirty block gets
628 : // flushed to disk before being completely written.
629 : // In the meantime, bJustInitialize should probably be set to
630 : // FALSE even if it is not ideal performance wise, and for
631 : // lossy compression.
632 :
633 : /* --------------------------------------------------------------------
634 : */
635 : /* Ensure we have the appropriate block loaded. */
636 : /* --------------------------------------------------------------------
637 : */
638 10970600 : if (iDstX < nLBlockX * nBlockXSize ||
639 10721300 : iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
640 10264600 : iDstY < nLBlockY * nBlockYSize ||
641 10264600 : iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
642 : {
643 738642 : nLBlockX = iDstX / nBlockXSize;
644 738642 : nLBlockY = iDstY / nBlockYSize;
645 :
646 738642 : const bool bJustInitialize =
647 1065870 : nYOff <= nLBlockY * nBlockYSize &&
648 327231 : nYOff + nYSize - nBlockYSize >=
649 327231 : nLBlockY * nBlockYSize &&
650 1116140 : nXOff <= nLBlockX * nBlockXSize &&
651 50265 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
652 : /*bool bMemZeroBuffer = FALSE;
653 : if( !bJustInitialize &&
654 : nXOff <= nLBlockX * nBlockXSize &&
655 : nYOff <= nLBlockY * nBlockYSize &&
656 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
657 : (nXOff + nXSize == GetXSize() &&
658 : (nLBlockX+1) * nBlockXSize > GetXSize())) &&
659 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
660 : (nYOff + nYSize == GetYSize() &&
661 : (nLBlockY+1) * nBlockYSize > GetYSize())) )
662 : {
663 : bJustInitialize = TRUE;
664 : bMemZeroBuffer = TRUE;
665 : }*/
666 738642 : if (poBlock != nullptr)
667 571992 : poBlock->DropLock();
668 :
669 738642 : poBlock =
670 738642 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
671 738642 : if (poBlock == nullptr)
672 : {
673 0 : return (CE_Failure);
674 : }
675 :
676 738642 : poBlock->MarkDirty();
677 :
678 738642 : pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
679 : /*if( bMemZeroBuffer )
680 : {
681 : memset(pabyDstBlock, 0,
682 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
683 : * nBlockYSize);
684 : }*/
685 : }
686 :
687 : // To make Coverity happy. Should not happen by design.
688 10970600 : if (pabyDstBlock == nullptr)
689 : {
690 0 : CPLAssert(false);
691 : eErr = CE_Failure;
692 : break;
693 : }
694 :
695 : /* --------------------------------------------------------------------
696 : */
697 : /* Copy over this pixel of data. */
698 : /* --------------------------------------------------------------------
699 : */
700 10970600 : GPtrDiff_t iDstOffset =
701 10970600 : (static_cast<GPtrDiff_t>(iDstX) -
702 10970600 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
703 10970600 : (static_cast<GPtrDiff_t>(iDstY) -
704 10970600 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
705 10970600 : nBlockXSize) *
706 10970600 : nBandDataSize;
707 :
708 10970600 : if (eDataType == eBufType)
709 : {
710 10967500 : memcpy(pabyDstBlock + iDstOffset,
711 10967500 : static_cast<GByte *>(pData) + iBufOffset,
712 : nBandDataSize);
713 : }
714 : else
715 : {
716 : /* type to type conversion ... ouch, this is expensive way
717 : of handling single words */
718 3096 : GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset,
719 3096 : eBufType, 0, pabyDstBlock + iDstOffset,
720 : eDataType, 0, 1);
721 : }
722 : }
723 :
724 1092940 : if (psExtraArg->pfnProgress != nullptr &&
725 0 : !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
726 : psExtraArg->pProgressData))
727 : {
728 0 : eErr = CE_Failure;
729 0 : break;
730 : }
731 : }
732 : }
733 : else
734 : {
735 196241 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
736 : {
737 7638 : if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
738 2496 : psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
739 2494 : psExtraArg->eResampleAlg == GRIORA_Bilinear ||
740 5147 : psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
741 2465 : GetColorTable() != nullptr)
742 : {
743 0 : CPLError(CE_Warning, CPLE_NotSupported,
744 : "Resampling method not supported on paletted band. "
745 : "Falling back to nearest neighbour");
746 : }
747 2574 : else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
748 3 : GDALDataTypeIsComplex(eDataType))
749 : {
750 0 : CPLError(CE_Warning, CPLE_NotSupported,
751 : "Resampling method not supported on complex data type "
752 : "band. Falling back to nearest neighbour");
753 : }
754 : else
755 : {
756 2571 : return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
757 : pData, nBufXSize, nBufYSize, eBufType,
758 2571 : nPixelSpace, nLineSpace, psExtraArg);
759 : }
760 : }
761 :
762 193651 : int nLimitBlockY = 0;
763 193651 : const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
764 193651 : int nStartBlockX = -nBlockXSize;
765 193651 : const double EPS = 1e-10;
766 193651 : int nLBlockY = -1;
767 193651 : const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
768 193651 : const bool bIntegerXFactor =
769 170980 : bUseIntegerRequestCoords &&
770 265638 : static_cast<int>(dfSrcXInc) == dfSrcXInc &&
771 71987 : static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
772 :
773 : /* --------------------------------------------------------------------
774 : */
775 : /* Read case */
776 : /* Loop over buffer computing source locations. */
777 : /* --------------------------------------------------------------------
778 : */
779 1957320 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
780 : {
781 : // Add small epsilon to avoid some numeric precision issues.
782 1763680 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
783 1763680 : const int iSrcY = static_cast<int>(std::min(
784 1763680 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
785 :
786 1763680 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
787 : static_cast<GPtrDiff_t>(nLineSpace);
788 :
789 1763680 : if (iSrcY >= nLimitBlockY)
790 : {
791 235012 : nLBlockY = iSrcY / nBlockYSize;
792 235012 : nLimitBlockY = nLBlockY * nBlockYSize;
793 235012 : if (nLimitBlockY < INT_MAX - nBlockYSize)
794 235012 : nLimitBlockY += nBlockYSize;
795 : else
796 0 : nLimitBlockY = INT_MAX;
797 : // Make sure a new block is loaded.
798 235012 : nStartBlockX = -nBlockXSize;
799 : }
800 1528670 : else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
801 : {
802 : // Make sure a new block is loaded.
803 429795 : nStartBlockX = -nBlockXSize;
804 : }
805 :
806 1763680 : GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
807 1763680 : static_cast<GPtrDiff_t>(nBlockXSize);
808 :
809 1763680 : if (bIntegerXFactor)
810 : {
811 413236 : int iSrcX = static_cast<int>(dfSrcXStart);
812 413236 : const int nSrcXInc = static_cast<int>(dfSrcXInc);
813 413236 : GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
814 413236 : bool bRet = false;
815 413236 : if (bByteCopy)
816 : {
817 302849 : bRet = DownsamplingIntegerXFactor<true, 1>(
818 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
819 : static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
820 : GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
821 : }
822 110387 : else if (eDataType == eBufType)
823 : {
824 110182 : switch (nBandDataSize)
825 : {
826 110102 : case 2:
827 110102 : bRet = DownsamplingIntegerXFactor<true, 2>(
828 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
829 : pabyDstData, static_cast<int>(nPixelSpace),
830 : nBufXSize, eDataType, eDataType, nStartBlockX,
831 : nBlockXSize, poBlock, nLBlockY);
832 110102 : break;
833 22 : case 4:
834 22 : bRet = DownsamplingIntegerXFactor<true, 4>(
835 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
836 : pabyDstData, static_cast<int>(nPixelSpace),
837 : nBufXSize, eDataType, eDataType, nStartBlockX,
838 : nBlockXSize, poBlock, nLBlockY);
839 22 : break;
840 56 : case 8:
841 56 : bRet = DownsamplingIntegerXFactor<true, 8>(
842 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
843 : pabyDstData, static_cast<int>(nPixelSpace),
844 : nBufXSize, eDataType, eDataType, nStartBlockX,
845 : nBlockXSize, poBlock, nLBlockY);
846 56 : break;
847 2 : case 16:
848 2 : bRet = DownsamplingIntegerXFactor<true, 16>(
849 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
850 : pabyDstData, static_cast<int>(nPixelSpace),
851 : nBufXSize, eDataType, eDataType, nStartBlockX,
852 : nBlockXSize, poBlock, nLBlockY);
853 2 : break;
854 0 : default:
855 0 : CPLAssert(false);
856 : break;
857 : }
858 : }
859 : else
860 : {
861 205 : bRet = DownsamplingIntegerXFactor<false, 0>(
862 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
863 : static_cast<int>(nPixelSpace), nBufXSize, eDataType,
864 : eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
865 : }
866 413236 : if (!bRet)
867 1 : eErr = CE_Failure;
868 : }
869 : else
870 : {
871 1350450 : double dfSrcX = dfSrcXStart;
872 572584000 : for (int iBufXOff = 0; iBufXOff < nBufXSize;
873 571233000 : iBufXOff++, dfSrcX += dfSrcXInc)
874 : {
875 : // TODO?: try to avoid the clamping for most iterations
876 : const int iSrcX = static_cast<int>(
877 1142470000 : std::min(std::max(0.0, dfSrcX),
878 571233000 : static_cast<double>(nRasterXSize - 1)));
879 :
880 : /* --------------------------------------------------------------------
881 : */
882 : /* Ensure we have the appropriate block loaded. */
883 : /* --------------------------------------------------------------------
884 : */
885 571233000 : if (iSrcX >= nBlockXSize + nStartBlockX)
886 : {
887 1705570 : const int nLBlockX = iSrcX / nBlockXSize;
888 1705570 : nStartBlockX = nLBlockX * nBlockXSize;
889 :
890 1705570 : if (poBlock != nullptr)
891 1583910 : poBlock->DropLock();
892 :
893 1705570 : poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
894 1705570 : if (poBlock == nullptr)
895 : {
896 9 : eErr = CE_Failure;
897 9 : break;
898 : }
899 :
900 : pabySrcBlock =
901 1705560 : static_cast<GByte *>(poBlock->GetDataRef());
902 : }
903 571233000 : const GPtrDiff_t nDiffX =
904 571233000 : static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
905 :
906 : /* --------------------------------------------------------------------
907 : */
908 : /* Copy over this pixel of data. */
909 : /* --------------------------------------------------------------------
910 : */
911 :
912 571233000 : if (bByteCopy)
913 : {
914 517794000 : GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
915 517794000 : static_cast<GByte *>(pData)[iBufOffset] =
916 517794000 : pabySrcBlock[iSrcOffset];
917 : }
918 53439200 : else if (eDataType == eBufType)
919 : {
920 48225500 : GPtrDiff_t iSrcOffset =
921 48225500 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
922 48225500 : memcpy(static_cast<GByte *>(pData) + iBufOffset,
923 48225500 : pabySrcBlock + iSrcOffset, nBandDataSize);
924 : }
925 : else
926 : {
927 : // Type to type conversion ...
928 5213680 : GPtrDiff_t iSrcOffset =
929 5213680 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
930 5213680 : GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
931 : static_cast<GByte *>(pData) +
932 5213680 : iBufOffset,
933 : eBufType, 0, 1);
934 : }
935 :
936 571233000 : iBufOffset += static_cast<int>(nPixelSpace);
937 : }
938 : }
939 1763680 : if (eErr == CE_Failure)
940 11 : break;
941 :
942 1983380 : if (psExtraArg->pfnProgress != nullptr &&
943 219710 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
944 : psExtraArg->pProgressData))
945 : {
946 1 : eErr = CE_Failure;
947 1 : break;
948 : }
949 : }
950 : }
951 :
952 360301 : if (poBlock != nullptr)
953 360291 : poBlock->DropLock();
954 :
955 360301 : return eErr;
956 : }
957 :
958 : /************************************************************************/
959 : /* GDALRasterIOTransformer() */
960 : /************************************************************************/
961 :
962 : struct GDALRasterIOTransformerStruct
963 : {
964 : double dfXOff;
965 : double dfYOff;
966 : double dfXRatioDstToSrc;
967 : double dfYRatioDstToSrc;
968 : };
969 :
970 6748 : static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
971 : int nPointCount, double *x, double *y,
972 : double * /* z */, int *panSuccess)
973 : {
974 6748 : GDALRasterIOTransformerStruct *psParams =
975 : static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
976 6748 : if (bDstToSrc)
977 : {
978 252996 : for (int i = 0; i < nPointCount; i++)
979 : {
980 246836 : x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
981 246836 : y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
982 246836 : panSuccess[i] = TRUE;
983 : }
984 : }
985 : else
986 : {
987 1176 : for (int i = 0; i < nPointCount; i++)
988 : {
989 588 : x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
990 588 : y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
991 588 : panSuccess[i] = TRUE;
992 : }
993 : }
994 6748 : return TRUE;
995 : }
996 :
997 : /************************************************************************/
998 : /* RasterIOResampled() */
999 : /************************************************************************/
1000 :
1001 : //! @cond Doxygen_Suppress
1002 2571 : CPLErr GDALRasterBand::RasterIOResampled(
1003 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1004 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1005 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1006 : {
1007 : // Determine if we use warping resampling or overview resampling
1008 : const bool bUseWarp =
1009 2571 : (GDALDataTypeIsComplex(eDataType) &&
1010 2728 : psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
1011 157 : psExtraArg->eResampleAlg != GRIORA_Mode);
1012 :
1013 2571 : double dfXOff = nXOff;
1014 2571 : double dfYOff = nYOff;
1015 2571 : double dfXSize = nXSize;
1016 2571 : double dfYSize = nYSize;
1017 2571 : if (psExtraArg->bFloatingPointWindowValidity)
1018 : {
1019 2114 : dfXOff = psExtraArg->dfXOff;
1020 2114 : dfYOff = psExtraArg->dfYOff;
1021 2114 : dfXSize = psExtraArg->dfXSize;
1022 2114 : dfYSize = psExtraArg->dfYSize;
1023 : }
1024 :
1025 2571 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1026 2571 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1027 :
1028 : // Determine the coordinates in the "virtual" output raster to see
1029 : // if there are not integers, in which case we will use them as a shift
1030 : // so that subwindow extracts give the exact same results as entire raster
1031 : // scaling.
1032 2571 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1033 2571 : bool bHasXOffVirtual = false;
1034 2571 : int nDestXOffVirtual = 0;
1035 2571 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1036 : {
1037 2245 : bHasXOffVirtual = true;
1038 2245 : dfXOff = nXOff;
1039 2245 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1040 : }
1041 :
1042 2571 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1043 2571 : bool bHasYOffVirtual = false;
1044 2571 : int nDestYOffVirtual = 0;
1045 2571 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1046 : {
1047 2239 : bHasYOffVirtual = true;
1048 2239 : dfYOff = nYOff;
1049 2239 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1050 : }
1051 :
1052 : // Create a MEM dataset that wraps the output buffer.
1053 : GDALDataset *poMEMDS;
1054 2571 : void *pTempBuffer = nullptr;
1055 2571 : GSpacing nPSMem = nPixelSpace;
1056 2571 : GSpacing nLSMem = nLineSpace;
1057 2571 : void *pDataMem = pData;
1058 2571 : GDALDataType eDTMem = eBufType;
1059 2571 : if (eBufType != eDataType)
1060 : {
1061 40 : nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1062 40 : nLSMem = nPSMem * nBufXSize;
1063 : pTempBuffer =
1064 40 : VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1065 40 : if (pTempBuffer == nullptr)
1066 0 : return CE_Failure;
1067 40 : pDataMem = pTempBuffer;
1068 40 : eDTMem = eDataType;
1069 : }
1070 :
1071 : poMEMDS =
1072 2571 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1073 : nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1074 2571 : GByte *pabyData = static_cast<GByte *>(pDataMem) -
1075 2571 : nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1076 2571 : GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1077 : poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1078 2571 : poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1079 :
1080 2571 : const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1081 2571 : const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
1082 2571 : if (pszNBITS)
1083 6 : GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
1084 6 : "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1085 :
1086 2571 : CPLErr eErr = CE_None;
1087 :
1088 : // Do the resampling.
1089 2571 : if (bUseWarp)
1090 : {
1091 149 : int bHasNoData = FALSE;
1092 149 : double dfNoDataValue = GetNoDataValue(&bHasNoData);
1093 :
1094 149 : VRTDatasetH hVRTDS = nullptr;
1095 149 : GDALRasterBandH hVRTBand = nullptr;
1096 149 : if (GetDataset() == nullptr)
1097 : {
1098 : /* Create VRT dataset that wraps the whole dataset */
1099 0 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1100 0 : VRTAddBand(hVRTDS, eDataType, nullptr);
1101 0 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1102 0 : VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1103 : 0, 0, nRasterXSize, nRasterYSize, nullptr,
1104 : VRT_NODATA_UNSET);
1105 :
1106 : /* Add a mask band if needed */
1107 0 : if (GetMaskFlags() != GMF_ALL_VALID)
1108 : {
1109 0 : GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1110 : VRTSourcedRasterBand *poVRTMaskBand =
1111 : reinterpret_cast<VRTSourcedRasterBand *>(
1112 : reinterpret_cast<GDALRasterBand *>(hVRTBand)
1113 0 : ->GetMaskBand());
1114 0 : poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1115 0 : nRasterYSize, 0, 0,
1116 0 : nRasterXSize, nRasterYSize);
1117 : }
1118 : }
1119 :
1120 149 : GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1121 149 : switch (psExtraArg->eResampleAlg)
1122 : {
1123 0 : case GRIORA_NearestNeighbour:
1124 0 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1125 0 : break;
1126 147 : case GRIORA_Bilinear:
1127 147 : psWarpOptions->eResampleAlg = GRA_Bilinear;
1128 147 : break;
1129 0 : case GRIORA_Cubic:
1130 0 : psWarpOptions->eResampleAlg = GRA_Cubic;
1131 0 : break;
1132 0 : case GRIORA_CubicSpline:
1133 0 : psWarpOptions->eResampleAlg = GRA_CubicSpline;
1134 0 : break;
1135 0 : case GRIORA_Lanczos:
1136 0 : psWarpOptions->eResampleAlg = GRA_Lanczos;
1137 0 : break;
1138 0 : case GRIORA_Average:
1139 0 : psWarpOptions->eResampleAlg = GRA_Average;
1140 0 : break;
1141 2 : case GRIORA_RMS:
1142 2 : psWarpOptions->eResampleAlg = GRA_RMS;
1143 2 : break;
1144 0 : case GRIORA_Mode:
1145 0 : psWarpOptions->eResampleAlg = GRA_Mode;
1146 0 : break;
1147 0 : default:
1148 0 : CPLAssert(false);
1149 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1150 : break;
1151 : }
1152 149 : psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1153 149 : psWarpOptions->hDstDS = poMEMDS;
1154 149 : psWarpOptions->nBandCount = 1;
1155 149 : int nSrcBandNumber = hVRTDS ? 1 : nBand;
1156 149 : int nDstBandNumber = 1;
1157 149 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1158 149 : psWarpOptions->panDstBands = &nDstBandNumber;
1159 298 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1160 149 : ? psExtraArg->pfnProgress
1161 : : GDALDummyProgress;
1162 149 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1163 149 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1164 149 : if (bHasNoData)
1165 : {
1166 0 : psWarpOptions->papszWarpOptions = CSLSetNameValue(
1167 : psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1168 0 : if (psWarpOptions->padfSrcNoDataReal == nullptr)
1169 : {
1170 0 : psWarpOptions->padfSrcNoDataReal =
1171 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1172 0 : psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1173 : }
1174 :
1175 0 : if (psWarpOptions->padfDstNoDataReal == nullptr)
1176 : {
1177 0 : psWarpOptions->padfDstNoDataReal =
1178 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1179 0 : psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1180 : }
1181 : }
1182 :
1183 : GDALRasterIOTransformerStruct sTransformer;
1184 149 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1185 149 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1186 149 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1187 149 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1188 149 : psWarpOptions->pTransformerArg = &sTransformer;
1189 :
1190 : GDALWarpOperationH hWarpOperation =
1191 149 : GDALCreateWarpOperation(psWarpOptions);
1192 149 : eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1193 : nDestYOffVirtual, nBufXSize, nBufYSize);
1194 149 : GDALDestroyWarpOperation(hWarpOperation);
1195 :
1196 149 : psWarpOptions->panSrcBands = nullptr;
1197 149 : psWarpOptions->panDstBands = nullptr;
1198 149 : GDALDestroyWarpOptions(psWarpOptions);
1199 :
1200 149 : if (hVRTDS)
1201 0 : GDALClose(hVRTDS);
1202 : }
1203 : else
1204 : {
1205 2422 : const char *pszResampling =
1206 2608 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1207 297 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1208 220 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1209 213 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1210 159 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1211 95 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1212 43 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1213 3 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1214 : : "UNKNOWN";
1215 :
1216 2422 : int nKernelRadius = 0;
1217 : GDALResampleFunction pfnResampleFunc =
1218 2422 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1219 2422 : CPLAssert(pfnResampleFunc);
1220 : GDALDataType eWrkDataType =
1221 2422 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1222 2422 : int nHasNoData = 0;
1223 2422 : double dfNoDataValue = GetNoDataValue(&nHasNoData);
1224 2422 : const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1225 2422 : if (!bHasNoData)
1226 2358 : dfNoDataValue = 0.0;
1227 :
1228 2422 : int nDstBlockXSize = nBufXSize;
1229 2422 : int nDstBlockYSize = nBufYSize;
1230 2422 : int nFullResXChunk = 0;
1231 2422 : int nFullResYChunk = 0;
1232 : while (true)
1233 : {
1234 2422 : nFullResXChunk =
1235 2422 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1236 2422 : nFullResYChunk =
1237 2422 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1238 2422 : if (nFullResXChunk > nRasterXSize)
1239 2233 : nFullResXChunk = nRasterXSize;
1240 2422 : if (nFullResYChunk > nRasterYSize)
1241 216 : nFullResYChunk = nRasterYSize;
1242 2422 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1243 2376 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1244 : 1024 * 1024))
1245 : break;
1246 : // When operating on the full width of a raster whose block width is
1247 : // the raster width, prefer doing chunks in height.
1248 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1249 : nDstBlockYSize > 1)
1250 0 : nDstBlockYSize /= 2;
1251 : /* Otherwise cut the maximal dimension */
1252 0 : else if (nDstBlockXSize > 1 &&
1253 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1254 0 : nDstBlockXSize /= 2;
1255 : else
1256 0 : nDstBlockYSize /= 2;
1257 : }
1258 :
1259 2422 : int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1260 2422 : int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1261 2422 : if (nOvrXFactor == 0)
1262 2024 : nOvrXFactor = 1;
1263 2422 : if (nOvrYFactor == 0)
1264 2023 : nOvrYFactor = 1;
1265 2422 : int nFullResXSizeQueried =
1266 2422 : nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1267 2422 : int nFullResYSizeQueried =
1268 2422 : nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1269 :
1270 2422 : if (nFullResXSizeQueried > nRasterXSize)
1271 2135 : nFullResXSizeQueried = nRasterXSize;
1272 2422 : if (nFullResYSizeQueried > nRasterYSize)
1273 129 : nFullResYSizeQueried = nRasterYSize;
1274 :
1275 : void *pChunk =
1276 2422 : VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1277 : nFullResXSizeQueried, nFullResYSizeQueried);
1278 2422 : GByte *pabyChunkNoDataMask = nullptr;
1279 :
1280 2422 : GDALRasterBand *poMaskBand = GetMaskBand();
1281 2422 : int l_nMaskFlags = GetMaskFlags();
1282 :
1283 2422 : bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1284 2422 : if (bUseNoDataMask)
1285 : {
1286 126 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1287 : nFullResXSizeQueried, nFullResYSizeQueried));
1288 : }
1289 2422 : if (pChunk == nullptr ||
1290 126 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1291 : {
1292 0 : GDALClose(poMEMDS);
1293 0 : CPLFree(pChunk);
1294 0 : CPLFree(pabyChunkNoDataMask);
1295 0 : VSIFree(pTempBuffer);
1296 0 : return CE_Failure;
1297 : }
1298 :
1299 2422 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1300 2422 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1301 2422 : int nBlocksDone = 0;
1302 :
1303 : int nDstYOff;
1304 4844 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1305 2422 : nDstYOff += nDstBlockYSize)
1306 : {
1307 : int nDstYCount;
1308 2422 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1309 2422 : nDstYCount = nDstBlockYSize;
1310 : else
1311 0 : nDstYCount = nBufYSize - nDstYOff;
1312 :
1313 2422 : int nChunkYOff =
1314 2422 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1315 2422 : int nChunkYOff2 = nYOff + 1 +
1316 2422 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1317 : dfYRatioDstToSrc));
1318 2422 : if (nChunkYOff2 > nRasterYSize)
1319 323 : nChunkYOff2 = nRasterYSize;
1320 2422 : int nYCount = nChunkYOff2 - nChunkYOff;
1321 2422 : CPLAssert(nYCount <= nFullResYChunk);
1322 :
1323 2422 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1324 2422 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1325 2422 : if (nChunkYOffQueried < 0)
1326 : {
1327 231 : nChunkYSizeQueried += nChunkYOffQueried;
1328 231 : nChunkYOffQueried = 0;
1329 : }
1330 2422 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1331 331 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1332 2422 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1333 :
1334 2422 : int nDstXOff = 0;
1335 4844 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1336 2422 : nDstXOff += nDstBlockXSize)
1337 : {
1338 2422 : int nDstXCount = 0;
1339 2422 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1340 2422 : nDstXCount = nDstBlockXSize;
1341 : else
1342 0 : nDstXCount = nBufXSize - nDstXOff;
1343 :
1344 2422 : int nChunkXOff =
1345 2422 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1346 2422 : int nChunkXOff2 =
1347 2422 : nXOff + 1 +
1348 2422 : static_cast<int>(
1349 2422 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1350 2422 : if (nChunkXOff2 > nRasterXSize)
1351 2234 : nChunkXOff2 = nRasterXSize;
1352 2422 : int nXCount = nChunkXOff2 - nChunkXOff;
1353 2422 : CPLAssert(nXCount <= nFullResXChunk);
1354 :
1355 2422 : int nChunkXOffQueried =
1356 2422 : nChunkXOff - nKernelRadius * nOvrXFactor;
1357 2422 : int nChunkXSizeQueried =
1358 2422 : nXCount + 2 * nKernelRadius * nOvrXFactor;
1359 2422 : if (nChunkXOffQueried < 0)
1360 : {
1361 2148 : nChunkXSizeQueried += nChunkXOffQueried;
1362 2148 : nChunkXOffQueried = 0;
1363 : }
1364 2422 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1365 2134 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1366 2422 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1367 :
1368 : // Read the source buffers.
1369 2422 : eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1370 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1371 : nChunkXSizeQueried, nChunkYSizeQueried,
1372 : eWrkDataType, 0, 0, nullptr);
1373 :
1374 2422 : bool bSkipResample = false;
1375 2422 : bool bNoDataMaskFullyOpaque = false;
1376 2422 : if (eErr == CE_None && bUseNoDataMask)
1377 : {
1378 126 : eErr = poMaskBand->RasterIO(
1379 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1380 : nChunkXSizeQueried, nChunkYSizeQueried,
1381 : pabyChunkNoDataMask, nChunkXSizeQueried,
1382 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1383 :
1384 : /* Optimizations if mask if fully opaque or transparent */
1385 126 : int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1386 126 : GByte bVal = pabyChunkNoDataMask[0];
1387 126 : int i = 1;
1388 241310 : for (; i < nPixels; i++)
1389 : {
1390 241261 : if (pabyChunkNoDataMask[i] != bVal)
1391 77 : break;
1392 : }
1393 126 : if (i == nPixels)
1394 : {
1395 49 : if (bVal == 0)
1396 : {
1397 712 : for (int j = 0; j < nDstYCount; j++)
1398 : {
1399 686 : GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0,
1400 : static_cast<GByte *>(pDataMem) +
1401 686 : nLSMem * (j + nDstYOff) +
1402 686 : nDstXOff * nPSMem,
1403 : eDTMem,
1404 : static_cast<int>(nPSMem),
1405 : nDstXCount);
1406 : }
1407 26 : bSkipResample = true;
1408 : }
1409 : else
1410 : {
1411 23 : bNoDataMaskFullyOpaque = true;
1412 : }
1413 : }
1414 : }
1415 :
1416 2422 : if (!bSkipResample && eErr == CE_None)
1417 : {
1418 2394 : const bool bPropagateNoData = false;
1419 2394 : void *pDstBuffer = nullptr;
1420 2394 : GDALDataType eDstBufferDataType = GDT_Unknown;
1421 : GDALRasterBand *poMEMBand =
1422 2394 : GDALRasterBand::FromHandle(hMEMBand);
1423 2394 : GDALOverviewResampleArgs args;
1424 2394 : args.eSrcDataType = eDataType;
1425 2394 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1426 2394 : args.nOvrXSize = poMEMBand->GetXSize();
1427 2394 : args.nOvrYSize = poMEMBand->GetYSize();
1428 2394 : args.nOvrNBITS = nNBITS;
1429 2394 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1430 2394 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1431 2394 : args.dfSrcXDelta =
1432 2394 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1433 2394 : args.dfSrcYDelta =
1434 2394 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1435 2394 : args.eWrkDataType = eWrkDataType;
1436 2394 : args.pabyChunkNodataMask =
1437 2394 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
1438 2394 : args.nChunkXOff =
1439 2394 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1440 2394 : args.nChunkXSize = nChunkXSizeQueried;
1441 2394 : args.nChunkYOff =
1442 2394 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1443 2394 : args.nChunkYSize = nChunkYSizeQueried;
1444 2394 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1445 2394 : args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
1446 2394 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1447 2394 : args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
1448 2394 : args.pszResampling = pszResampling;
1449 2394 : args.bHasNoData = bHasNoData;
1450 2394 : args.dfNoDataValue = dfNoDataValue;
1451 2394 : args.poColorTable = GetColorTable();
1452 2394 : args.bPropagateNoData = bPropagateNoData;
1453 2394 : eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
1454 : &eDstBufferDataType);
1455 2394 : if (eErr == CE_None)
1456 : {
1457 2394 : eErr = poMEMBand->RasterIO(
1458 : GF_Write, nDstXOff + nDestXOffVirtual,
1459 : nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1460 : pDstBuffer, nDstXCount, nDstYCount,
1461 : eDstBufferDataType, 0, 0, nullptr);
1462 : }
1463 2394 : CPLFree(pDstBuffer);
1464 : }
1465 :
1466 2422 : nBlocksDone++;
1467 2451 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1468 29 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1469 : "", psExtraArg->pProgressData))
1470 : {
1471 1 : eErr = CE_Failure;
1472 : }
1473 : }
1474 : }
1475 :
1476 2422 : CPLFree(pChunk);
1477 2422 : CPLFree(pabyChunkNoDataMask);
1478 : }
1479 :
1480 2571 : if (eBufType != eDataType)
1481 : {
1482 40 : CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1483 : GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1484 : pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1485 : nullptr));
1486 : }
1487 2571 : GDALClose(poMEMDS);
1488 2571 : VSIFree(pTempBuffer);
1489 :
1490 2571 : return eErr;
1491 : }
1492 :
1493 : /************************************************************************/
1494 : /* RasterIOResampled() */
1495 : /************************************************************************/
1496 :
1497 278 : CPLErr GDALDataset::RasterIOResampled(
1498 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1499 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1500 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1501 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1502 :
1503 : {
1504 : #if 0
1505 : // Determine if we use warping resampling or overview resampling
1506 : bool bUseWarp = false;
1507 : if( GDALDataTypeIsComplex( eDataType ) )
1508 : bUseWarp = true;
1509 : #endif
1510 :
1511 278 : double dfXOff = nXOff;
1512 278 : double dfYOff = nYOff;
1513 278 : double dfXSize = nXSize;
1514 278 : double dfYSize = nYSize;
1515 278 : if (psExtraArg->bFloatingPointWindowValidity)
1516 : {
1517 159 : dfXOff = psExtraArg->dfXOff;
1518 159 : dfYOff = psExtraArg->dfYOff;
1519 159 : dfXSize = psExtraArg->dfXSize;
1520 159 : dfYSize = psExtraArg->dfYSize;
1521 : }
1522 :
1523 278 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1524 278 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1525 :
1526 : // Determine the coordinates in the "virtual" output raster to see
1527 : // if there are not integers, in which case we will use them as a shift
1528 : // so that subwindow extracts give the exact same results as entire raster
1529 : // scaling.
1530 278 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1531 278 : bool bHasXOffVirtual = false;
1532 278 : int nDestXOffVirtual = 0;
1533 278 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1534 : {
1535 159 : bHasXOffVirtual = true;
1536 159 : dfXOff = nXOff;
1537 159 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1538 : }
1539 :
1540 278 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1541 278 : bool bHasYOffVirtual = false;
1542 278 : int nDestYOffVirtual = 0;
1543 278 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1544 : {
1545 123 : bHasYOffVirtual = true;
1546 123 : dfYOff = nYOff;
1547 123 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1548 : }
1549 :
1550 : // Create a MEM dataset that wraps the output buffer.
1551 : GDALDataset *poMEMDS =
1552 278 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1553 : nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1554 : GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1555 279 : CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
1556 297 : int nNBITS = 0;
1557 1243 : for (int i = 0; i < nBandCount; i++)
1558 : {
1559 968 : char szBuffer[32] = {'\0'};
1560 1927 : int nRet = CPLPrintPointer(
1561 : szBuffer,
1562 968 : static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
1563 968 : nLineSpace * nDestYOffVirtual + nBandSpace * i,
1564 : sizeof(szBuffer));
1565 959 : szBuffer[nRet] = 0;
1566 :
1567 959 : char szBuffer0[64] = {'\0'};
1568 959 : snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1569 :
1570 959 : char szBuffer1[64] = {'\0'};
1571 959 : snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
1572 : static_cast<GIntBig>(nPixelSpace));
1573 :
1574 959 : char szBuffer2[64] = {'\0'};
1575 959 : snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
1576 : static_cast<GIntBig>(nLineSpace));
1577 :
1578 959 : char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
1579 :
1580 959 : poMEMDS->AddBand(eBufType, apszOptions);
1581 :
1582 954 : GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1583 942 : papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
1584 : const char *pszNBITS =
1585 955 : poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1586 949 : if (pszNBITS)
1587 : {
1588 0 : nNBITS = atoi(pszNBITS);
1589 0 : poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1590 0 : "IMAGE_STRUCTURE");
1591 : }
1592 : }
1593 :
1594 275 : CPLErr eErr = CE_None;
1595 :
1596 : // TODO(schwehr): Why disabled? Why not just delete?
1597 : // Looks like this code was initially added as disable by copying
1598 : // from RasterIO here:
1599 : // https://trac.osgeo.org/gdal/changeset/29572
1600 : #if 0
1601 : // Do the resampling.
1602 : if( bUseWarp )
1603 : {
1604 : VRTDatasetH hVRTDS = nullptr;
1605 : GDALRasterBandH hVRTBand = nullptr;
1606 : if( GetDataset() == nullptr )
1607 : {
1608 : /* Create VRT dataset that wraps the whole dataset */
1609 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1610 : VRTAddBand( hVRTDS, eDataType, nullptr );
1611 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1612 : VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1613 : (GDALRasterBandH)this,
1614 : 0, 0,
1615 : nRasterXSize, nRasterYSize,
1616 : 0, 0,
1617 : nRasterXSize, nRasterYSize,
1618 : nullptr, VRT_NODATA_UNSET );
1619 :
1620 : /* Add a mask band if needed */
1621 : if( GetMaskFlags() != GMF_ALL_VALID )
1622 : {
1623 : ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1624 : VRTSourcedRasterBand* poVRTMaskBand =
1625 : (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1626 : poVRTMaskBand->
1627 : AddMaskBandSource( this,
1628 : 0, 0,
1629 : nRasterXSize, nRasterYSize,
1630 : 0, 0,
1631 : nRasterXSize, nRasterYSize);
1632 : }
1633 : }
1634 :
1635 : GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1636 : psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1637 : psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1638 : psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1639 : psWarpOptions->nBandCount = 1;
1640 : int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1641 : int nDstBandNumber = 1;
1642 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1643 : psWarpOptions->panDstBands = &nDstBandNumber;
1644 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1645 : psExtraArg->pfnProgress : GDALDummyProgress;
1646 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1647 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1648 : GDALRasterIOTransformerStruct sTransformer;
1649 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1650 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1651 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1652 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1653 : psWarpOptions->pTransformerArg = &sTransformer;
1654 :
1655 : GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1656 : eErr = GDALChunkAndWarpImage( hWarpOperation,
1657 : nDestXOffVirtual, nDestYOffVirtual,
1658 : nBufXSize, nBufYSize );
1659 : GDALDestroyWarpOperation( hWarpOperation );
1660 :
1661 : psWarpOptions->panSrcBands = nullptr;
1662 : psWarpOptions->panDstBands = nullptr;
1663 : GDALDestroyWarpOptions( psWarpOptions );
1664 :
1665 : if( hVRTDS )
1666 : GDALClose(hVRTDS);
1667 : }
1668 : else
1669 : #endif
1670 : {
1671 275 : const char *pszResampling =
1672 432 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1673 157 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1674 0 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1675 0 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1676 0 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1677 0 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1678 0 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1679 0 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1680 : : "UNKNOWN";
1681 :
1682 275 : GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1683 267 : GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1684 : int nBlockXSize, nBlockYSize;
1685 265 : poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1686 :
1687 : int nKernelRadius;
1688 : GDALResampleFunction pfnResampleFunc =
1689 270 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1690 269 : CPLAssert(pfnResampleFunc);
1691 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1692 : GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1693 : GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1694 : #endif
1695 : GDALDataType eWrkDataType =
1696 269 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1697 :
1698 265 : int nDstBlockXSize = nBufXSize;
1699 265 : int nDstBlockYSize = nBufYSize;
1700 : int nFullResXChunk, nFullResYChunk;
1701 : while (true)
1702 : {
1703 265 : nFullResXChunk =
1704 265 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1705 265 : nFullResYChunk =
1706 265 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1707 265 : if (nFullResXChunk > nRasterXSize)
1708 143 : nFullResXChunk = nRasterXSize;
1709 265 : if (nFullResYChunk > nRasterYSize)
1710 33 : nFullResYChunk = nRasterYSize;
1711 265 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1712 263 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1713 : 1024 * 1024))
1714 : break;
1715 : // When operating on the full width of a raster whose block width is
1716 : // the raster width, prefer doing chunks in height.
1717 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1718 : nDstBlockYSize > 1)
1719 0 : nDstBlockYSize /= 2;
1720 : /* Otherwise cut the maximal dimension */
1721 0 : else if (nDstBlockXSize > 1 &&
1722 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1723 0 : nDstBlockXSize /= 2;
1724 : else
1725 0 : nDstBlockYSize /= 2;
1726 : }
1727 :
1728 540 : int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1729 265 : static_cast<int>(0.5 + dfYRatioDstToSrc));
1730 275 : if (nOvrFactor == 0)
1731 95 : nOvrFactor = 1;
1732 275 : int nFullResXSizeQueried =
1733 275 : nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1734 275 : int nFullResYSizeQueried =
1735 275 : nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1736 :
1737 275 : if (nFullResXSizeQueried > nRasterXSize)
1738 162 : nFullResXSizeQueried = nRasterXSize;
1739 275 : if (nFullResYSizeQueried > nRasterYSize)
1740 36 : nFullResYSizeQueried = nRasterYSize;
1741 :
1742 275 : void *pChunk = VSI_MALLOC3_VERBOSE(
1743 : cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1744 : nBandCount),
1745 : nFullResXSizeQueried, nFullResYSizeQueried);
1746 282 : GByte *pabyChunkNoDataMask = nullptr;
1747 :
1748 282 : GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1749 280 : int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1750 :
1751 280 : bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1752 280 : if (bUseNoDataMask)
1753 : {
1754 55 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1755 : nFullResXSizeQueried, nFullResYSizeQueried));
1756 : }
1757 280 : if (pChunk == nullptr ||
1758 55 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1759 : {
1760 12 : GDALClose(poMEMDS);
1761 0 : CPLFree(pChunk);
1762 0 : CPLFree(pabyChunkNoDataMask);
1763 0 : CPLFree(papoDstBands);
1764 0 : return CE_Failure;
1765 : }
1766 :
1767 268 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1768 268 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1769 268 : int nBlocksDone = 0;
1770 :
1771 : int nDstYOff;
1772 555 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1773 287 : nDstYOff += nDstBlockYSize)
1774 : {
1775 : int nDstYCount;
1776 270 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1777 271 : nDstYCount = nDstBlockYSize;
1778 : else
1779 0 : nDstYCount = nBufYSize - nDstYOff;
1780 :
1781 270 : int nChunkYOff =
1782 270 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1783 270 : int nChunkYOff2 = nYOff + 1 +
1784 270 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1785 : dfYRatioDstToSrc));
1786 270 : if (nChunkYOff2 > nRasterYSize)
1787 56 : nChunkYOff2 = nRasterYSize;
1788 270 : int nYCount = nChunkYOff2 - nChunkYOff;
1789 270 : CPLAssert(nYCount <= nFullResYChunk);
1790 :
1791 270 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1792 270 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1793 270 : if (nChunkYOffQueried < 0)
1794 : {
1795 56 : nChunkYSizeQueried += nChunkYOffQueried;
1796 56 : nChunkYOffQueried = 0;
1797 : }
1798 270 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1799 66 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1800 270 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1801 :
1802 : int nDstXOff;
1803 553 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1804 283 : nDstXOff += nDstBlockXSize)
1805 : {
1806 : int nDstXCount;
1807 266 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1808 272 : nDstXCount = nDstBlockXSize;
1809 : else
1810 0 : nDstXCount = nBufXSize - nDstXOff;
1811 :
1812 266 : int nChunkXOff =
1813 266 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1814 266 : int nChunkXOff2 =
1815 266 : nXOff + 1 +
1816 266 : static_cast<int>(
1817 266 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1818 266 : if (nChunkXOff2 > nRasterXSize)
1819 146 : nChunkXOff2 = nRasterXSize;
1820 266 : int nXCount = nChunkXOff2 - nChunkXOff;
1821 266 : CPLAssert(nXCount <= nFullResXChunk);
1822 :
1823 266 : int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1824 266 : int nChunkXSizeQueried =
1825 266 : nXCount + 2 * nKernelRadius * nOvrFactor;
1826 266 : if (nChunkXOffQueried < 0)
1827 : {
1828 139 : nChunkXSizeQueried += nChunkXOffQueried;
1829 139 : nChunkXOffQueried = 0;
1830 : }
1831 266 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1832 147 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1833 266 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1834 :
1835 266 : bool bSkipResample = false;
1836 266 : bool bNoDataMaskFullyOpaque = false;
1837 266 : if (eErr == CE_None && bUseNoDataMask)
1838 : {
1839 55 : eErr = poMaskBand->RasterIO(
1840 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1841 : nChunkXSizeQueried, nChunkYSizeQueried,
1842 : pabyChunkNoDataMask, nChunkXSizeQueried,
1843 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1844 :
1845 : /* Optimizations if mask if fully opaque or transparent */
1846 55 : const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1847 55 : const GByte bVal = pabyChunkNoDataMask[0];
1848 55 : int i = 1; // Used after for.
1849 123794 : for (; i < nPixels; i++)
1850 : {
1851 123777 : if (pabyChunkNoDataMask[i] != bVal)
1852 38 : break;
1853 : }
1854 55 : if (i == nPixels)
1855 : {
1856 17 : if (bVal == 0)
1857 : {
1858 16 : GByte abyZero[16] = {0};
1859 64 : for (int iBand = 0; iBand < nBandCount; iBand++)
1860 : {
1861 2016 : for (int j = 0; j < nDstYCount; j++)
1862 : {
1863 1968 : GDALCopyWords64(
1864 : abyZero, GDT_Byte, 0,
1865 : static_cast<GByte *>(pData) +
1866 1968 : iBand * nBandSpace +
1867 1968 : nLineSpace * (j + nDstYOff) +
1868 1968 : nDstXOff * nPixelSpace,
1869 : eBufType, static_cast<int>(nPixelSpace),
1870 : nDstXCount);
1871 : }
1872 : }
1873 16 : bSkipResample = true;
1874 : }
1875 : else
1876 : {
1877 1 : bNoDataMaskFullyOpaque = true;
1878 : }
1879 : }
1880 : }
1881 :
1882 266 : if (!bSkipResample && eErr == CE_None)
1883 : {
1884 : /* Read the source buffers */
1885 250 : eErr = RasterIO(
1886 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1887 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1888 : nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1889 : nBandCount, panBandMap, 0, 0, 0, nullptr);
1890 : }
1891 :
1892 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1893 : if (pfnResampleFuncMultiBands && !bSkipResample &&
1894 : eErr == CE_None)
1895 : {
1896 : eErr = pfnResampleFuncMultiBands(
1897 : dfXRatioDstToSrc, dfYRatioDstToSrc,
1898 : dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1899 : dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1900 : eWrkDataType, (GByte *)pChunk, nBandCount,
1901 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1902 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1903 : nChunkXSizeQueried,
1904 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1905 : nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1906 : nDstXOff + nDestXOffVirtual + nDstXCount,
1907 : nDstYOff + nDestYOffVirtual,
1908 : nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1909 : pszResampling, FALSE /*bHasNoData*/,
1910 : 0.0 /* dfNoDataValue */, nullptr /* color table*/,
1911 : eDataType);
1912 : }
1913 : else
1914 : #endif
1915 : {
1916 : size_t nChunkBandOffset =
1917 282 : static_cast<size_t>(nChunkXSizeQueried) *
1918 282 : nChunkYSizeQueried *
1919 282 : GDALGetDataTypeSizeBytes(eWrkDataType);
1920 1205 : for (int i = 0;
1921 1205 : i < nBandCount && !bSkipResample && eErr == CE_None;
1922 : i++)
1923 : {
1924 922 : const bool bPropagateNoData = false;
1925 922 : void *pDstBuffer = nullptr;
1926 922 : GDALDataType eDstBufferDataType = GDT_Unknown;
1927 : GDALRasterBand *poMEMBand =
1928 922 : poMEMDS->GetRasterBand(i + 1);
1929 921 : GDALOverviewResampleArgs args;
1930 921 : args.eSrcDataType = eDataType;
1931 921 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1932 922 : args.nOvrXSize = poMEMBand->GetXSize();
1933 920 : args.nOvrYSize = poMEMBand->GetYSize();
1934 921 : args.nOvrNBITS = nNBITS;
1935 921 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1936 921 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1937 921 : args.dfSrcXDelta =
1938 921 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1939 921 : args.dfSrcYDelta =
1940 921 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1941 921 : args.eWrkDataType = eWrkDataType;
1942 921 : args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
1943 921 : ? nullptr
1944 : : pabyChunkNoDataMask;
1945 921 : args.nChunkXOff =
1946 921 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1947 921 : args.nChunkXSize = nChunkXSizeQueried;
1948 921 : args.nChunkYOff =
1949 921 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1950 921 : args.nChunkYSize = nChunkYSizeQueried;
1951 921 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1952 921 : args.nDstXOff2 =
1953 921 : nDstXOff + nDestXOffVirtual + nDstXCount;
1954 921 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1955 921 : args.nDstYOff2 =
1956 921 : nDstYOff + nDestYOffVirtual + nDstYCount;
1957 921 : args.pszResampling = pszResampling;
1958 921 : args.bHasNoData = false;
1959 921 : args.dfNoDataValue = 0.0;
1960 921 : args.poColorTable = nullptr;
1961 921 : args.bPropagateNoData = bPropagateNoData;
1962 :
1963 : eErr =
1964 1843 : pfnResampleFunc(args,
1965 921 : reinterpret_cast<GByte *>(pChunk) +
1966 921 : i * nChunkBandOffset,
1967 : &pDstBuffer, &eDstBufferDataType);
1968 922 : if (eErr == CE_None)
1969 : {
1970 922 : eErr = poMEMBand->RasterIO(
1971 : GF_Write, nDstXOff + nDestXOffVirtual,
1972 : nDstYOff + nDestYOffVirtual, nDstXCount,
1973 : nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1974 : eDstBufferDataType, 0, 0, nullptr);
1975 : }
1976 922 : CPLFree(pDstBuffer);
1977 : }
1978 : }
1979 :
1980 283 : nBlocksDone++;
1981 285 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1982 2 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1983 : "", psExtraArg->pProgressData))
1984 : {
1985 0 : eErr = CE_Failure;
1986 : }
1987 : }
1988 : }
1989 :
1990 285 : CPLFree(pChunk);
1991 284 : CPLFree(pabyChunkNoDataMask);
1992 : }
1993 :
1994 284 : CPLFree(papoDstBands);
1995 284 : GDALClose(poMEMDS);
1996 :
1997 284 : return eErr;
1998 : }
1999 :
2000 : //! @endcond
2001 :
2002 : /************************************************************************/
2003 : /* GDALSwapWords() */
2004 : /************************************************************************/
2005 :
2006 : /**
2007 : * Byte swap words in-place.
2008 : *
2009 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2010 : * a memory array. No assumption is made that the words being swapped are
2011 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2012 : * to determine if the current platform is big endian or little endian. Use
2013 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2014 : * of a function call.
2015 : *
2016 : * @param pData pointer to start of data buffer.
2017 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2018 : * @param nWordCount the number of words to be swapped in this call.
2019 : * @param nWordSkip the byte offset from the start of one word to the start of
2020 : * the next. For packed buffers this is the same as nWordSize.
2021 : */
2022 :
2023 439181 : void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
2024 : int nWordSkip)
2025 :
2026 : {
2027 439181 : if (nWordCount > 0)
2028 439181 : VALIDATE_POINTER0(pData, "GDALSwapWords");
2029 :
2030 439181 : GByte *pabyData = static_cast<GByte *>(pData);
2031 :
2032 439181 : switch (nWordSize)
2033 : {
2034 7234 : case 1:
2035 7234 : break;
2036 :
2037 418687 : case 2:
2038 418687 : CPLAssert(nWordSkip >= 2 || nWordCount == 1);
2039 289291000 : for (int i = 0; i < nWordCount; i++)
2040 : {
2041 288873000 : CPL_SWAP16PTR(pabyData);
2042 288873000 : pabyData += nWordSkip;
2043 : }
2044 418687 : break;
2045 :
2046 10689 : case 4:
2047 10689 : CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2048 10689 : if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2049 : {
2050 29148800 : for (int i = 0; i < nWordCount; i++)
2051 : {
2052 29138100 : *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2053 : *reinterpret_cast<const GUInt32 *>(pabyData));
2054 29138100 : pabyData += nWordSkip;
2055 10686 : }
2056 : }
2057 : else
2058 : {
2059 9 : for (int i = 0; i < nWordCount; i++)
2060 : {
2061 6 : CPL_SWAP32PTR(pabyData);
2062 6 : pabyData += nWordSkip;
2063 : }
2064 : }
2065 10689 : break;
2066 :
2067 2571 : case 8:
2068 2571 : CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2069 2571 : if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2070 : {
2071 3359870 : for (int i = 0; i < nWordCount; i++)
2072 : {
2073 3357300 : *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2074 : *reinterpret_cast<const GUInt64 *>(pabyData));
2075 3357300 : pabyData += nWordSkip;
2076 2570 : }
2077 : }
2078 : else
2079 : {
2080 3 : for (int i = 0; i < nWordCount; i++)
2081 : {
2082 2 : CPL_SWAP64PTR(pabyData);
2083 2 : pabyData += nWordSkip;
2084 : }
2085 : }
2086 2571 : break;
2087 :
2088 0 : default:
2089 0 : CPLAssert(false);
2090 : }
2091 : }
2092 :
2093 : /************************************************************************/
2094 : /* GDALSwapWordsEx() */
2095 : /************************************************************************/
2096 :
2097 : /**
2098 : * Byte swap words in-place.
2099 : *
2100 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2101 : * a memory array. No assumption is made that the words being swapped are
2102 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2103 : * to determine if the current platform is big endian or little endian. Use
2104 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2105 : * of a function call.
2106 : *
2107 : * @param pData pointer to start of data buffer.
2108 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2109 : * @param nWordCount the number of words to be swapped in this call.
2110 : * @param nWordSkip the byte offset from the start of one word to the start of
2111 : * the next. For packed buffers this is the same as nWordSize.
2112 : * @since GDAL 2.1
2113 : */
2114 6378 : void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2115 : int nWordSkip)
2116 : {
2117 6378 : GByte *pabyData = static_cast<GByte *>(pData);
2118 12756 : while (nWordCount)
2119 : {
2120 : // Pick-up a multiple of 8 as max chunk size.
2121 6378 : const int nWordCountSmall =
2122 6378 : (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2123 6378 : GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2124 6378 : pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2125 6378 : nWordCount -= nWordCountSmall;
2126 : }
2127 6378 : }
2128 :
2129 : // Place the new GDALCopyWords helpers in an anonymous namespace
2130 : namespace
2131 : {
2132 :
2133 : /************************************************************************/
2134 : /* GDALCopyWordsT() */
2135 : /************************************************************************/
2136 : /**
2137 : * Template function, used to copy data from pSrcData into buffer
2138 : * pDstData, with stride nSrcPixelStride in the source data and
2139 : * stride nDstPixelStride in the destination data. This template can
2140 : * deal with the case where the input data type is real or complex and
2141 : * the output is real.
2142 : *
2143 : * @param pSrcData the source data buffer
2144 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2145 : * of interest.
2146 : * @param pDstData the destination buffer.
2147 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2148 : * interest.
2149 : * @param nWordCount the total number of pixel words to copy
2150 : *
2151 : * @code
2152 : * // Assume an input buffer of type GUInt16 named pBufferIn
2153 : * GByte *pBufferOut = new GByte[numBytesOut];
2154 : * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2155 : * @endcode
2156 : * @note
2157 : * This is a private function, and should not be exposed outside of
2158 : * rasterio.cpp. External users should call the GDALCopyWords driver function.
2159 : */
2160 :
2161 : template <class Tin, class Tout>
2162 46723194 : static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2163 : int nSrcPixelStride,
2164 : Tout *const CPL_RESTRICT pDstData,
2165 : int nDstPixelStride,
2166 : GPtrDiff_t nWordCount)
2167 : {
2168 46723194 : decltype(nWordCount) nDstOffset = 0;
2169 :
2170 46723194 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2171 46723194 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2172 604242515 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2173 : {
2174 557520162 : const Tin tValue =
2175 557520162 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2176 557520162 : Tout *const pOutPixel =
2177 557520162 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2178 :
2179 557520162 : GDALCopyWord(tValue, *pOutPixel);
2180 :
2181 557519072 : nDstOffset += nDstPixelStride;
2182 : }
2183 46721925 : }
2184 :
2185 : template <class Tin, class Tout>
2186 38280644 : static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2187 : int nSrcPixelStride,
2188 : Tout *const CPL_RESTRICT pDstData,
2189 : int nDstPixelStride, GPtrDiff_t nWordCount)
2190 : {
2191 38280644 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2192 : nWordCount);
2193 38280735 : }
2194 :
2195 : template <class Tin, class Tout>
2196 195564 : static void inline GDALCopyWordsT_8atatime(
2197 : const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2198 : Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2199 : GPtrDiff_t nWordCount)
2200 : {
2201 195564 : decltype(nWordCount) nDstOffset = 0;
2202 :
2203 195564 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2204 195564 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2205 195564 : decltype(nWordCount) n = 0;
2206 195564 : if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2207 : nDstPixelStride == static_cast<int>(sizeof(Tout)))
2208 : {
2209 22744720 : for (; n < nWordCount - 7; n += 8)
2210 : {
2211 22546738 : const Tin *pInValues = reinterpret_cast<const Tin *>(
2212 22546738 : pSrcDataPtr + (n * nSrcPixelStride));
2213 22546738 : Tout *const pOutPixels =
2214 22546738 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2215 :
2216 22546738 : GDALCopy8Words(pInValues, pOutPixels);
2217 :
2218 22550058 : nDstOffset += 8 * nDstPixelStride;
2219 : }
2220 : }
2221 690296 : for (; n < nWordCount; n++)
2222 : {
2223 494739 : const Tin tValue =
2224 494739 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2225 494739 : Tout *const pOutPixel =
2226 494739 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2227 :
2228 494739 : GDALCopyWord(tValue, *pOutPixel);
2229 :
2230 491373 : nDstOffset += nDstPixelStride;
2231 : }
2232 195557 : }
2233 :
2234 : #ifdef HAVE_SSE2
2235 :
2236 : template <class Tout>
2237 39381 : void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2238 : int nSrcPixelStride,
2239 : Tout *const CPL_RESTRICT pDstData,
2240 : int nDstPixelStride, GPtrDiff_t nWordCount)
2241 : {
2242 : static_assert(std::is_integral<Tout>::value &&
2243 : sizeof(Tout) == sizeof(uint16_t),
2244 : "Bad Tout");
2245 39381 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2246 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2247 : {
2248 33330 : decltype(nWordCount) n = 0;
2249 33330 : const __m128i xmm_zero = _mm_setzero_si128();
2250 33330 : GByte *CPL_RESTRICT pabyDstDataPtr =
2251 : reinterpret_cast<GByte *>(pDstData);
2252 1501757 : for (; n < nWordCount - 15; n += 16)
2253 : {
2254 1468427 : __m128i xmm = _mm_loadu_si128(
2255 1468427 : reinterpret_cast<const __m128i *>(pSrcData + n));
2256 1468427 : __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2257 1468427 : __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2258 : _mm_storeu_si128(
2259 1468427 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2260 : _mm_storeu_si128(
2261 1468427 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2262 : }
2263 108789 : for (; n < nWordCount; n++)
2264 : {
2265 75459 : pDstData[n] = pSrcData[n];
2266 33330 : }
2267 : }
2268 : else
2269 : {
2270 6051 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2271 : nDstPixelStride, nWordCount);
2272 : }
2273 39381 : }
2274 :
2275 : template <>
2276 25764 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2277 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2278 : int nDstPixelStride, GPtrDiff_t nWordCount)
2279 : {
2280 25764 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2281 : nDstPixelStride, nWordCount);
2282 25764 : }
2283 :
2284 : template <>
2285 13617 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2286 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2287 : int nDstPixelStride, GPtrDiff_t nWordCount)
2288 : {
2289 13617 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2290 : nDstPixelStride, nWordCount);
2291 13617 : }
2292 :
2293 : template <class Tout>
2294 12300638 : void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2295 : int nSrcPixelStride,
2296 : Tout *const CPL_RESTRICT pDstData,
2297 : int nDstPixelStride, GPtrDiff_t nWordCount)
2298 : {
2299 : static_assert(std::is_integral<Tout>::value &&
2300 : sizeof(Tout) == sizeof(uint32_t),
2301 : "Bad Tout");
2302 12300638 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2303 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2304 : {
2305 6246608 : decltype(nWordCount) n = 0;
2306 6246608 : const __m128i xmm_zero = _mm_setzero_si128();
2307 6246608 : GByte *CPL_RESTRICT pabyDstDataPtr =
2308 : reinterpret_cast<GByte *>(pDstData);
2309 69407552 : for (; n < nWordCount - 15; n += 16)
2310 : {
2311 63288524 : __m128i xmm = _mm_loadu_si128(
2312 63288524 : reinterpret_cast<const __m128i *>(pSrcData + n));
2313 63302124 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2314 63271424 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2315 63156024 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2316 63067824 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2317 63011324 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2318 63160924 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2319 : _mm_storeu_si128(
2320 63160924 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2321 : _mm_storeu_si128(
2322 63160924 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2323 : _mm_storeu_si128(
2324 63160924 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2325 : _mm_storeu_si128(
2326 63160924 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2327 : }
2328 14307639 : for (; n < nWordCount; n++)
2329 : {
2330 8188681 : pDstData[n] = pSrcData[n];
2331 6118988 : }
2332 : }
2333 : else
2334 : {
2335 6054050 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2336 : nDstPixelStride, nWordCount);
2337 : }
2338 12170038 : }
2339 :
2340 : template <>
2341 438 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2342 : int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData,
2343 : int nDstPixelStride, GPtrDiff_t nWordCount)
2344 : {
2345 438 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2346 : nDstPixelStride, nWordCount);
2347 438 : }
2348 :
2349 : template <>
2350 12300700 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2351 : int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData,
2352 : int nDstPixelStride, GPtrDiff_t nWordCount)
2353 : {
2354 12300700 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2355 : nDstPixelStride, nWordCount);
2356 12306400 : }
2357 :
2358 : template <>
2359 2470650 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2360 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2361 : int nDstPixelStride, GPtrDiff_t nWordCount)
2362 : {
2363 2470650 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2364 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2365 : {
2366 111205 : decltype(nWordCount) n = 0;
2367 111205 : const __m128i xmm_zero = _mm_setzero_si128();
2368 111205 : GByte *CPL_RESTRICT pabyDstDataPtr =
2369 : reinterpret_cast<GByte *>(pDstData);
2370 3273020 : for (; n < nWordCount - 15; n += 16)
2371 : {
2372 3161820 : __m128i xmm = _mm_loadu_si128(
2373 3161820 : reinterpret_cast<const __m128i *>(pSrcData + n));
2374 3161820 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2375 3161820 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2376 3161820 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2377 3161820 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2378 3161820 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2379 3161820 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2380 3161820 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2381 3161820 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2382 3161820 : __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2383 3161820 : __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2384 3161820 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2385 : xmm0_f);
2386 : _mm_storeu_ps(
2387 3161820 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2388 : _mm_storeu_ps(
2389 3161820 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2390 : _mm_storeu_ps(
2391 3161820 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2392 : }
2393 472713 : for (; n < nWordCount; n++)
2394 : {
2395 361508 : pDstData[n] = pSrcData[n];
2396 111205 : }
2397 : }
2398 : else
2399 : {
2400 2359440 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2401 : nDstPixelStride, nWordCount);
2402 : }
2403 2470650 : }
2404 :
2405 : template <>
2406 147900 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2407 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2408 : int nDstPixelStride, GPtrDiff_t nWordCount)
2409 : {
2410 147900 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2411 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2412 : {
2413 124846 : decltype(nWordCount) n = 0;
2414 124846 : const __m128i xmm_zero = _mm_setzero_si128();
2415 124846 : GByte *CPL_RESTRICT pabyDstDataPtr =
2416 : reinterpret_cast<GByte *>(pDstData);
2417 1425860 : for (; n < nWordCount - 15; n += 16)
2418 : {
2419 1301020 : __m128i xmm = _mm_loadu_si128(
2420 1301020 : reinterpret_cast<const __m128i *>(pSrcData + n));
2421 1301020 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2422 1301020 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2423 1301020 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2424 1301020 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2425 1301020 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2426 1301020 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2427 :
2428 1301020 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2429 1301020 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2430 1301020 : __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2431 1301020 : __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2432 1301020 : xmm0 = _mm_srli_si128(xmm0, 8);
2433 1301020 : xmm1 = _mm_srli_si128(xmm1, 8);
2434 1301020 : xmm2 = _mm_srli_si128(xmm2, 8);
2435 1301020 : xmm3 = _mm_srli_si128(xmm3, 8);
2436 1301020 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2437 1301020 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2438 1301020 : __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2439 1301020 : __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2440 :
2441 1301020 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2442 : xmm0_low_d);
2443 : _mm_storeu_pd(
2444 1301020 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2445 : xmm0_high_d);
2446 : _mm_storeu_pd(
2447 1301020 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2448 : xmm1_low_d);
2449 : _mm_storeu_pd(
2450 1301020 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2451 : xmm1_high_d);
2452 : _mm_storeu_pd(
2453 1301020 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2454 : xmm2_low_d);
2455 : _mm_storeu_pd(
2456 1301020 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2457 : xmm2_high_d);
2458 : _mm_storeu_pd(
2459 1301020 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2460 : xmm3_low_d);
2461 : _mm_storeu_pd(
2462 1301020 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2463 : xmm3_high_d);
2464 : }
2465 238530 : for (; n < nWordCount; n++)
2466 : {
2467 113684 : pDstData[n] = pSrcData[n];
2468 124846 : }
2469 : }
2470 : else
2471 : {
2472 23054 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2473 : nDstPixelStride, nWordCount);
2474 : }
2475 147900 : }
2476 :
2477 : template <>
2478 6008 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2479 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2480 : int nDstPixelStride, GPtrDiff_t nWordCount)
2481 : {
2482 6008 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2483 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2484 : {
2485 5033 : decltype(nWordCount) n = 0;
2486 : // In SSE2, min_epu16 does not exist, so shift from
2487 : // UInt16 to SInt16 to be able to use min_epi16
2488 5033 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2489 5033 : const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2490 138473 : for (; n < nWordCount - 7; n += 8)
2491 : {
2492 133440 : __m128i xmm = _mm_loadu_si128(
2493 133440 : reinterpret_cast<const __m128i *>(pSrcData + n));
2494 133440 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2495 133440 : xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2496 133440 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2497 133440 : xmm = _mm_packus_epi16(xmm, xmm);
2498 133440 : GDALCopyXMMToInt64(xmm,
2499 133440 : reinterpret_cast<GPtrDiff_t *>(pDstData + n));
2500 : }
2501 16019 : for (; n < nWordCount; n++)
2502 : {
2503 10986 : pDstData[n] =
2504 10986 : pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2505 5033 : }
2506 : }
2507 : else
2508 : {
2509 975 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2510 : nDstPixelStride, nWordCount);
2511 : }
2512 6008 : }
2513 :
2514 : template <>
2515 21 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2516 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2517 : int nDstPixelStride, GPtrDiff_t nWordCount)
2518 : {
2519 21 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2520 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2521 : {
2522 15 : decltype(nWordCount) n = 0;
2523 : // In SSE2, min_epu16 does not exist, so shift from
2524 : // UInt16 to SInt16 to be able to use min_epi16
2525 15 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2526 15 : const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2527 31 : for (; n < nWordCount - 7; n += 8)
2528 : {
2529 16 : __m128i xmm = _mm_loadu_si128(
2530 16 : reinterpret_cast<const __m128i *>(pSrcData + n));
2531 16 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2532 16 : xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2533 16 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2534 16 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm);
2535 : }
2536 55 : for (; n < nWordCount; n++)
2537 : {
2538 40 : pDstData[n] =
2539 40 : pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2540 15 : }
2541 : }
2542 : else
2543 : {
2544 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2545 : nDstPixelStride, nWordCount);
2546 : }
2547 21 : }
2548 :
2549 : template <>
2550 412 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2551 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2552 : int nDstPixelStride, GPtrDiff_t nWordCount)
2553 : {
2554 412 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2555 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2556 : {
2557 406 : decltype(nWordCount) n = 0;
2558 406 : const __m128i xmm_zero = _mm_setzero_si128();
2559 406 : GByte *CPL_RESTRICT pabyDstDataPtr =
2560 : reinterpret_cast<GByte *>(pDstData);
2561 1500 : for (; n < nWordCount - 7; n += 8)
2562 : {
2563 1094 : __m128i xmm = _mm_loadu_si128(
2564 1094 : reinterpret_cast<const __m128i *>(pSrcData + n));
2565 1094 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2566 1094 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2567 1094 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2568 1094 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2569 1094 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2570 : xmm0_f);
2571 : _mm_storeu_ps(
2572 1094 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2573 : }
2574 1483 : for (; n < nWordCount; n++)
2575 : {
2576 1077 : pDstData[n] = pSrcData[n];
2577 406 : }
2578 : }
2579 : else
2580 : {
2581 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2582 : nDstPixelStride, nWordCount);
2583 : }
2584 412 : }
2585 :
2586 : template <>
2587 281 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2588 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2589 : int nDstPixelStride, GPtrDiff_t nWordCount)
2590 : {
2591 281 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2592 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2593 : {
2594 173 : decltype(nWordCount) n = 0;
2595 173 : const __m128i xmm_zero = _mm_setzero_si128();
2596 173 : GByte *CPL_RESTRICT pabyDstDataPtr =
2597 : reinterpret_cast<GByte *>(pDstData);
2598 221 : for (; n < nWordCount - 7; n += 8)
2599 : {
2600 48 : __m128i xmm = _mm_loadu_si128(
2601 48 : reinterpret_cast<const __m128i *>(pSrcData + n));
2602 48 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2603 48 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2604 :
2605 48 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2606 48 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2607 48 : xmm0 = _mm_srli_si128(xmm0, 8);
2608 48 : xmm1 = _mm_srli_si128(xmm1, 8);
2609 48 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2610 48 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2611 :
2612 48 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2613 : xmm0_low_d);
2614 : _mm_storeu_pd(
2615 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2616 : xmm0_high_d);
2617 : _mm_storeu_pd(
2618 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2619 : xmm1_low_d);
2620 : _mm_storeu_pd(
2621 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2622 : xmm1_high_d);
2623 : }
2624 433 : for (; n < nWordCount; n++)
2625 : {
2626 260 : pDstData[n] = pSrcData[n];
2627 173 : }
2628 : }
2629 : else
2630 : {
2631 108 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2632 : nDstPixelStride, nWordCount);
2633 : }
2634 281 : }
2635 :
2636 : template <>
2637 811 : void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
2638 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2639 : int nDstPixelStride, GPtrDiff_t nWordCount)
2640 : {
2641 811 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2642 : nDstPixelStride, nWordCount);
2643 811 : }
2644 :
2645 : #endif // HAVE_SSE2
2646 :
2647 : template <>
2648 117959 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2649 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2650 : int nDstPixelStride, GPtrDiff_t nWordCount)
2651 : {
2652 117959 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2653 : nDstPixelStride, nWordCount);
2654 117959 : }
2655 :
2656 : template <>
2657 15146 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2658 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2659 : int nDstPixelStride, GPtrDiff_t nWordCount)
2660 : {
2661 15146 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2662 : nDstPixelStride, nWordCount);
2663 15146 : }
2664 :
2665 : template <>
2666 61648 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2667 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2668 : int nDstPixelStride, GPtrDiff_t nWordCount)
2669 : {
2670 61648 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2671 : nDstPixelStride, nWordCount);
2672 61641 : }
2673 :
2674 : /************************************************************************/
2675 : /* GDALCopyWordsComplexT() */
2676 : /************************************************************************/
2677 : /**
2678 : * Template function, used to copy data from pSrcData into buffer
2679 : * pDstData, with stride nSrcPixelStride in the source data and
2680 : * stride nDstPixelStride in the destination data. Deals with the
2681 : * complex case, where input is complex and output is complex.
2682 : *
2683 : * @param pSrcData the source data buffer
2684 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2685 : * of interest.
2686 : * @param pDstData the destination buffer.
2687 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2688 : * interest.
2689 : * @param nWordCount the total number of pixel words to copy
2690 : *
2691 : */
2692 : template <class Tin, class Tout>
2693 125220 : inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
2694 : int nSrcPixelStride,
2695 : Tout *const CPL_RESTRICT pDstData,
2696 : int nDstPixelStride, GPtrDiff_t nWordCount)
2697 : {
2698 125220 : decltype(nWordCount) nDstOffset = 0;
2699 125220 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2700 125220 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2701 :
2702 7338017 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2703 : {
2704 7212792 : const Tin *const pPixelIn =
2705 7212792 : reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2706 7212792 : Tout *const pPixelOut =
2707 7212792 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2708 :
2709 7212792 : GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2710 7212792 : GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2711 :
2712 7212792 : nDstOffset += nDstPixelStride;
2713 : }
2714 125220 : }
2715 :
2716 : /************************************************************************/
2717 : /* GDALCopyWordsComplexOutT() */
2718 : /************************************************************************/
2719 : /**
2720 : * Template function, used to copy data from pSrcData into buffer
2721 : * pDstData, with stride nSrcPixelStride in the source data and
2722 : * stride nDstPixelStride in the destination data. Deals with the
2723 : * case where the value is real coming in, but complex going out.
2724 : *
2725 : * @param pSrcData the source data buffer
2726 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2727 : * of interest, in bytes.
2728 : * @param pDstData the destination buffer.
2729 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2730 : * interest, in bytes.
2731 : * @param nWordCount the total number of pixel words to copy
2732 : *
2733 : */
2734 : template <class Tin, class Tout>
2735 3314 : inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
2736 : int nSrcPixelStride,
2737 : Tout *const CPL_RESTRICT pDstData,
2738 : int nDstPixelStride, GPtrDiff_t nWordCount)
2739 : {
2740 3314 : decltype(nWordCount) nDstOffset = 0;
2741 :
2742 3314 : const Tout tOutZero = static_cast<Tout>(0);
2743 :
2744 3314 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2745 3314 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2746 :
2747 1112984 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2748 : {
2749 1109670 : const Tin tValue =
2750 1109670 : *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2751 1109670 : Tout *const pPixelOut =
2752 1109670 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2753 1109670 : GDALCopyWord(tValue, *pPixelOut);
2754 :
2755 1109670 : pPixelOut[1] = tOutZero;
2756 :
2757 1109670 : nDstOffset += nDstPixelStride;
2758 : }
2759 3314 : }
2760 :
2761 : /************************************************************************/
2762 : /* GDALCopyWordsFromT() */
2763 : /************************************************************************/
2764 : /**
2765 : * Template driver function. Given the input type T, call the appropriate
2766 : * GDALCopyWordsT function template for the desired output type. You should
2767 : * never call this function directly (call GDALCopyWords instead).
2768 : *
2769 : * @param pSrcData source data buffer
2770 : * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2771 : * @param bInComplex input is complex
2772 : * @param pDstData destination data buffer
2773 : * @param eDstType destination data type
2774 : * @param nDstPixelStride pixel stride in output buffer, in pixel words
2775 : * @param nWordCount number of pixel words to be copied
2776 : */
2777 : template <class T>
2778 53571778 : inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
2779 : int nSrcPixelStride, bool bInComplex,
2780 : void *CPL_RESTRICT pDstData,
2781 : GDALDataType eDstType, int nDstPixelStride,
2782 : GPtrDiff_t nWordCount)
2783 : {
2784 53571778 : switch (eDstType)
2785 : {
2786 4559427 : case GDT_Byte:
2787 4559427 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2788 : static_cast<unsigned char *>(pDstData),
2789 : nDstPixelStride, nWordCount);
2790 4559526 : break;
2791 529 : case GDT_Int8:
2792 529 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2793 : static_cast<signed char *>(pDstData),
2794 : nDstPixelStride, nWordCount);
2795 529 : break;
2796 101199 : case GDT_UInt16:
2797 101199 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2798 : static_cast<unsigned short *>(pDstData),
2799 : nDstPixelStride, nWordCount);
2800 101193 : break;
2801 4126466 : case GDT_Int16:
2802 4126466 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2803 : static_cast<short *>(pDstData), nDstPixelStride,
2804 : nWordCount);
2805 4126466 : break;
2806 4229 : case GDT_UInt32:
2807 4229 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2808 : static_cast<unsigned int *>(pDstData),
2809 : nDstPixelStride, nWordCount);
2810 4229 : break;
2811 25529423 : case GDT_Int32:
2812 25529423 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2813 : static_cast<int *>(pDstData), nDstPixelStride,
2814 : nWordCount);
2815 25534724 : break;
2816 631 : case GDT_UInt64:
2817 631 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2818 : static_cast<std::uint64_t *>(pDstData),
2819 : nDstPixelStride, nWordCount);
2820 631 : break;
2821 4224 : case GDT_Int64:
2822 4224 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2823 : static_cast<std::int64_t *>(pDstData),
2824 : nDstPixelStride, nWordCount);
2825 4224 : break;
2826 118 : case GDT_Float16:
2827 118 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2828 : static_cast<GFloat16 *>(pDstData), nDstPixelStride,
2829 : nWordCount);
2830 118 : break;
2831 3869406 : case GDT_Float32:
2832 3869406 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2833 : static_cast<float *>(pDstData), nDstPixelStride,
2834 : nWordCount);
2835 3869406 : break;
2836 15246636 : case GDT_Float64:
2837 15246636 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2838 : static_cast<double *>(pDstData), nDstPixelStride,
2839 : nWordCount);
2840 15246636 : break;
2841 122439 : case GDT_CInt16:
2842 122439 : if (bInComplex)
2843 : {
2844 121400 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2845 : static_cast<short *>(pDstData),
2846 : nDstPixelStride, nWordCount);
2847 : }
2848 : else // input is not complex, so we need to promote to a complex
2849 : // buffer
2850 : {
2851 1039 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2852 : static_cast<short *>(pDstData),
2853 : nDstPixelStride, nWordCount);
2854 : }
2855 122439 : break;
2856 838 : case GDT_CInt32:
2857 838 : if (bInComplex)
2858 : {
2859 421 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2860 : static_cast<int *>(pDstData),
2861 : nDstPixelStride, nWordCount);
2862 : }
2863 : else // input is not complex, so we need to promote to a complex
2864 : // buffer
2865 : {
2866 417 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2867 : static_cast<int *>(pDstData),
2868 : nDstPixelStride, nWordCount);
2869 : }
2870 838 : break;
2871 57 : case GDT_CFloat16:
2872 57 : if (bInComplex)
2873 : {
2874 16 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2875 : static_cast<GFloat16 *>(pDstData),
2876 : nDstPixelStride, nWordCount);
2877 : }
2878 : else // input is not complex, so we need to promote to a complex
2879 : // buffer
2880 : {
2881 41 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2882 : static_cast<GFloat16 *>(pDstData),
2883 : nDstPixelStride, nWordCount);
2884 : }
2885 57 : break;
2886 3186 : case GDT_CFloat32:
2887 3186 : if (bInComplex)
2888 : {
2889 2595 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2890 : static_cast<float *>(pDstData),
2891 : nDstPixelStride, nWordCount);
2892 : }
2893 : else // input is not complex, so we need to promote to a complex
2894 : // buffer
2895 : {
2896 591 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2897 : static_cast<float *>(pDstData),
2898 : nDstPixelStride, nWordCount);
2899 : }
2900 3186 : break;
2901 2014 : case GDT_CFloat64:
2902 2014 : if (bInComplex)
2903 : {
2904 788 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2905 : static_cast<double *>(pDstData),
2906 : nDstPixelStride, nWordCount);
2907 : }
2908 : else // input is not complex, so we need to promote to a complex
2909 : // buffer
2910 : {
2911 1226 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2912 : static_cast<double *>(pDstData),
2913 : nDstPixelStride, nWordCount);
2914 : }
2915 2014 : break;
2916 0 : case GDT_Unknown:
2917 : case GDT_TypeCount:
2918 0 : CPLAssert(false);
2919 : }
2920 53577172 : }
2921 :
2922 : } // end anonymous namespace
2923 :
2924 : /************************************************************************/
2925 : /* GDALReplicateWord() */
2926 : /************************************************************************/
2927 :
2928 : template <class T>
2929 522520 : inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
2930 : GPtrDiff_t nWordCount)
2931 : {
2932 522520 : const T valSet = *static_cast<const T *>(pDstData);
2933 522520 : if (nDstPixelStride == static_cast<int>(sizeof(T)))
2934 : {
2935 494174 : T *pDstPtr = static_cast<T *>(pDstData) + 1;
2936 19705837 : while (nWordCount >= 4)
2937 : {
2938 19211684 : nWordCount -= 4;
2939 19211684 : pDstPtr[0] = valSet;
2940 19211684 : pDstPtr[1] = valSet;
2941 19211684 : pDstPtr[2] = valSet;
2942 19211684 : pDstPtr[3] = valSet;
2943 19211684 : pDstPtr += 4;
2944 : }
2945 1259444 : while (nWordCount > 0)
2946 : {
2947 765270 : --nWordCount;
2948 765270 : *pDstPtr = valSet;
2949 765270 : pDstPtr++;
2950 : }
2951 : }
2952 : else
2953 : {
2954 28407 : GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2955 954322 : while (nWordCount > 0)
2956 : {
2957 925915 : --nWordCount;
2958 925915 : *reinterpret_cast<T *>(pabyDstPtr) = valSet;
2959 925915 : pabyDstPtr += nDstPixelStride;
2960 : }
2961 : }
2962 522520 : }
2963 :
2964 903812 : static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
2965 : GDALDataType eSrcType,
2966 : void *CPL_RESTRICT pDstData,
2967 : GDALDataType eDstType, int nDstPixelStride,
2968 : GPtrDiff_t nWordCount)
2969 : {
2970 : /* -----------------------------------------------------------------------
2971 : */
2972 : /* Special case when the source data is always the same value */
2973 : /* (for VRTSourcedRasterBand::IRasterIO and
2974 : * VRTDerivedRasterBand::IRasterIO*/
2975 : /* for example) */
2976 : /* -----------------------------------------------------------------------
2977 : */
2978 : // Let the general translation case do the necessary conversions
2979 : // on the first destination element.
2980 903812 : GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
2981 :
2982 : // Now copy the first element to the nWordCount - 1 following destination
2983 : // elements.
2984 901119 : nWordCount--;
2985 901119 : GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
2986 :
2987 901119 : switch (eDstType)
2988 : {
2989 378245 : case GDT_Byte:
2990 : case GDT_Int8:
2991 : {
2992 378245 : if (nDstPixelStride == 1)
2993 : {
2994 344493 : if (nWordCount > 0)
2995 344493 : memset(pabyDstWord,
2996 344493 : *reinterpret_cast<const GByte *>(pDstData),
2997 : nWordCount);
2998 : }
2999 : else
3000 : {
3001 33752 : GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
3002 5438530 : while (nWordCount > 0)
3003 : {
3004 5404780 : --nWordCount;
3005 5404780 : *pabyDstWord = valSet;
3006 5404780 : pabyDstWord += nDstPixelStride;
3007 : }
3008 : }
3009 378245 : break;
3010 : }
3011 :
3012 : #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \
3013 : case enum_type: \
3014 : { \
3015 : GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \
3016 : break; \
3017 : }
3018 :
3019 354 : CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3020 169653 : CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
3021 56 : CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3022 294525 : CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3023 21 : CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
3024 662 : CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
3025 0 : CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16)
3026 52218 : CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3027 5089 : CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3028 :
3029 : #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \
3030 : case enum_type: \
3031 : { \
3032 : c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \
3033 : c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \
3034 : while (nWordCount > 0) \
3035 : { \
3036 : --nWordCount; \
3037 : reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \
3038 : reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \
3039 : pabyDstWord += nDstPixelStride; \
3040 : } \
3041 : break; \
3042 : }
3043 :
3044 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3045 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3046 0 : CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16)
3047 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3048 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3049 :
3050 0 : case GDT_Unknown:
3051 : case GDT_TypeCount:
3052 0 : CPLAssert(false);
3053 : }
3054 905047 : }
3055 :
3056 : /************************************************************************/
3057 : /* GDALUnrolledCopy() */
3058 : /************************************************************************/
3059 :
3060 : template <class T, int srcStride, int dstStride>
3061 5329960 : static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3062 : const T *CPL_RESTRICT pSrc,
3063 : GPtrDiff_t nIters)
3064 : {
3065 5329960 : if (nIters >= 16)
3066 : {
3067 138921769 : for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3068 : {
3069 133722348 : pDest[0 * dstStride] = pSrc[0 * srcStride];
3070 133722348 : pDest[1 * dstStride] = pSrc[1 * srcStride];
3071 133722348 : pDest[2 * dstStride] = pSrc[2 * srcStride];
3072 133722348 : pDest[3 * dstStride] = pSrc[3 * srcStride];
3073 133722348 : pDest[4 * dstStride] = pSrc[4 * srcStride];
3074 133722348 : pDest[5 * dstStride] = pSrc[5 * srcStride];
3075 133722348 : pDest[6 * dstStride] = pSrc[6 * srcStride];
3076 133722348 : pDest[7 * dstStride] = pSrc[7 * srcStride];
3077 133722348 : pDest[8 * dstStride] = pSrc[8 * srcStride];
3078 133722348 : pDest[9 * dstStride] = pSrc[9 * srcStride];
3079 133722348 : pDest[10 * dstStride] = pSrc[10 * srcStride];
3080 133722348 : pDest[11 * dstStride] = pSrc[11 * srcStride];
3081 133722348 : pDest[12 * dstStride] = pSrc[12 * srcStride];
3082 133722348 : pDest[13 * dstStride] = pSrc[13 * srcStride];
3083 133722348 : pDest[14 * dstStride] = pSrc[14 * srcStride];
3084 133722348 : pDest[15 * dstStride] = pSrc[15 * srcStride];
3085 133722348 : pDest += 16 * dstStride;
3086 133722348 : pSrc += 16 * srcStride;
3087 : }
3088 5199389 : nIters = nIters % 16;
3089 : }
3090 7582860 : for (GPtrDiff_t i = 0; i < nIters; i++)
3091 : {
3092 2252905 : pDest[i * dstStride] = *pSrc;
3093 2252905 : pSrc += srcStride;
3094 : }
3095 5329960 : }
3096 :
3097 : template <class T, int srcStride, int dstStride>
3098 5323871 : static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3099 : const T *CPL_RESTRICT pSrc,
3100 : GPtrDiff_t nIters)
3101 : {
3102 5323871 : GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3103 5323882 : }
3104 :
3105 : #ifdef HAVE_SSE2
3106 :
3107 : template <>
3108 304503 : void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3109 : const GByte *CPL_RESTRICT pSrc,
3110 : GPtrDiff_t nIters)
3111 : {
3112 304503 : decltype(nIters) i = 0;
3113 304503 : if (nIters > 16)
3114 : {
3115 146327 : const __m128i xmm_mask = _mm_set1_epi16(0xff);
3116 : // If we were sure that there would always be 1 trailing byte, we could
3117 : // check against nIters - 15
3118 2552310 : for (; i < nIters - 16; i += 16)
3119 : {
3120 : __m128i xmm0 =
3121 2405980 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3122 : __m128i xmm1 =
3123 4811970 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3124 : // Set higher 8bit of each int16 packed word to 0
3125 2405980 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3126 2405980 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3127 : // Pack int16 to uint8 and merge back both vector
3128 2405980 : xmm0 = _mm_packus_epi16(xmm0, xmm1);
3129 :
3130 : // Store result
3131 2405980 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3132 :
3133 2405980 : pSrc += 2 * 16;
3134 : }
3135 : }
3136 3883900 : for (; i < nIters; i++)
3137 : {
3138 3579400 : pDest[i] = *pSrc;
3139 3579400 : pSrc += 2;
3140 : }
3141 304503 : }
3142 :
3143 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3144 :
3145 : template <>
3146 191200 : void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3147 : const GByte *CPL_RESTRICT pSrc,
3148 : GPtrDiff_t nIters)
3149 : {
3150 191200 : if (nIters > 16 && CPLHaveRuntimeSSSE3())
3151 : {
3152 185100 : GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3153 : }
3154 : else
3155 : {
3156 6100 : GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
3157 : }
3158 191200 : }
3159 :
3160 : #endif
3161 :
3162 : template <>
3163 105200 : void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3164 : const GByte *CPL_RESTRICT pSrc,
3165 : GPtrDiff_t nIters)
3166 : {
3167 105200 : decltype(nIters) i = 0;
3168 105200 : if (nIters > 16)
3169 : {
3170 99907 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
3171 : // If we were sure that there would always be 3 trailing bytes, we could
3172 : // check against nIters - 15
3173 8826390 : for (; i < nIters - 16; i += 16)
3174 : {
3175 : __m128i xmm0 =
3176 8726220 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3177 : __m128i xmm1 =
3178 8726220 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3179 : __m128i xmm2 =
3180 8726220 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3181 : __m128i xmm3 =
3182 17452400 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3183 : // Set higher 24bit of each int32 packed word to 0
3184 8726220 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3185 8726220 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3186 8726220 : xmm2 = _mm_and_si128(xmm2, xmm_mask);
3187 8726220 : xmm3 = _mm_and_si128(xmm3, xmm_mask);
3188 : // Pack int32 to int16
3189 8726330 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
3190 8726390 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
3191 : // Pack int16 to uint8
3192 8726480 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
3193 :
3194 : // Store result
3195 8726480 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3196 :
3197 8726480 : pSrc += 4 * 16;
3198 : }
3199 : }
3200 1118720 : for (; i < nIters; i++)
3201 : {
3202 1013260 : pDest[i] = *pSrc;
3203 1013260 : pSrc += 4;
3204 : }
3205 105461 : }
3206 : #endif // HAVE_SSE2
3207 :
3208 : /************************************************************************/
3209 : /* GDALFastCopy() */
3210 : /************************************************************************/
3211 :
3212 : template <class T>
3213 39816400 : static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
3214 : const T *CPL_RESTRICT pSrc, int nSrcStride,
3215 : GPtrDiff_t nIters)
3216 : {
3217 39816400 : constexpr int sizeofT = static_cast<int>(sizeof(T));
3218 39816400 : if (nIters == 1)
3219 : {
3220 22302360 : *pDest = *pSrc;
3221 : }
3222 17514007 : else if (nDestStride == sizeofT)
3223 : {
3224 12258985 : if (nSrcStride == sizeofT)
3225 : {
3226 11516466 : memcpy(pDest, pSrc, nIters * sizeof(T));
3227 : }
3228 742546 : else if (nSrcStride == 2 * sizeofT)
3229 : {
3230 307456 : GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
3231 : }
3232 435090 : else if (nSrcStride == 3 * sizeofT)
3233 : {
3234 296508 : GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
3235 : }
3236 138582 : else if (nSrcStride == 4 * sizeofT)
3237 : {
3238 134068 : GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
3239 : }
3240 : else
3241 : {
3242 12992220 : while (nIters-- > 0)
3243 : {
3244 12987630 : *pDest = *pSrc;
3245 12987630 : pSrc += nSrcStride / sizeofT;
3246 12987630 : pDest++;
3247 : }
3248 : }
3249 : }
3250 5255022 : else if (nSrcStride == sizeofT)
3251 : {
3252 5246158 : if (nDestStride == 2 * sizeofT)
3253 : {
3254 131205 : GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
3255 : }
3256 5114953 : else if (nDestStride == 3 * sizeofT)
3257 : {
3258 4412163 : GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
3259 : }
3260 702796 : else if (nDestStride == 4 * sizeofT)
3261 : {
3262 643372 : GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
3263 : }
3264 : else
3265 : {
3266 12632600 : while (nIters-- > 0)
3267 : {
3268 12573180 : *pDest = *pSrc;
3269 12573180 : pSrc++;
3270 12573180 : pDest += nDestStride / sizeofT;
3271 : }
3272 : }
3273 : }
3274 : else
3275 : {
3276 1107757 : while (nIters-- > 0)
3277 : {
3278 1098896 : *pDest = *pSrc;
3279 1098896 : pSrc += nSrcStride / sizeofT;
3280 1098896 : pDest += nDestStride / sizeofT;
3281 : }
3282 : }
3283 39816400 : }
3284 :
3285 : /************************************************************************/
3286 : /* GDALFastCopyByte() */
3287 : /************************************************************************/
3288 :
3289 276287 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
3290 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3291 : int nDstPixelStride, GPtrDiff_t nWordCount)
3292 : {
3293 276287 : GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
3294 : nWordCount);
3295 276287 : }
3296 :
3297 : /************************************************************************/
3298 : /* GDALCopyWords() */
3299 : /************************************************************************/
3300 :
3301 : /**
3302 : * Copy pixel words from buffer to buffer.
3303 : *
3304 : * @see GDALCopyWords64()
3305 : */
3306 87109800 : void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
3307 : GDALDataType eSrcType, int nSrcPixelStride,
3308 : void *CPL_RESTRICT pDstData,
3309 : GDALDataType eDstType, int nDstPixelStride,
3310 : int nWordCount)
3311 : {
3312 87109800 : GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
3313 : nDstPixelStride, nWordCount);
3314 87111000 : }
3315 :
3316 : /************************************************************************/
3317 : /* GDALCopyWords64() */
3318 : /************************************************************************/
3319 :
3320 : /**
3321 : * Copy pixel words from buffer to buffer.
3322 : *
3323 : * This function is used to copy pixel word values from one memory buffer
3324 : * to another, with support for conversion between data types, and differing
3325 : * step factors. The data type conversion is done using the following
3326 : * rules:
3327 : * <ul>
3328 : * <li>Values assigned to a lower range integer type are clipped. For
3329 : * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3330 : * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3331 : * </li>
3332 : * <li>
3333 : * Assignment from floating point to integer rounds to closest integer.
3334 : * +Infinity is mapped to the largest integer. -Infinity is mapped to the
3335 : * smallest integer. NaN is mapped to 0.
3336 : * </li>
3337 : * <li>
3338 : * Assignment from non-complex to complex will result in the imaginary part
3339 : * being set to zero on output.
3340 : * </li>
3341 : * <li> Assignment from complex to
3342 : * non-complex will result in the complex portion being lost and the real
3343 : * component being preserved (<i>not magnitude!</i>).
3344 : * </li>
3345 : * </ul>
3346 : *
3347 : * No assumptions are made about the source or destination words occurring
3348 : * on word boundaries. It is assumed that all values are in native machine
3349 : * byte order.
3350 : *
3351 : * @param pSrcData Pointer to source data to be converted.
3352 : * @param eSrcType the source data type (see GDALDataType enum)
3353 : * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3354 : * in bytes
3355 : * @param pDstData Pointer to buffer where destination data should go
3356 : * @param eDstType the destination data type (see GDALDataType enum)
3357 : * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3358 : * words), in bytes
3359 : * @param nWordCount number of words to be copied
3360 : *
3361 : * @note
3362 : * When adding a new data type to GDAL, you must do the following to
3363 : * support it properly within the GDALCopyWords function:
3364 : * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3365 : * This should invoke the appropriate GDALCopyWordsFromT wrapper.
3366 : * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3367 : * This should call the appropriate GDALCopyWordsT template.
3368 : * 3. If appropriate, overload the appropriate CopyWord template in the
3369 : * above namespace. This will ensure that any conversion issues are
3370 : * handled (cases like the float -> int32 case, where the min/max)
3371 : * values are subject to roundoff error.
3372 : */
3373 :
3374 108331000 : void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
3375 : GDALDataType eSrcType, int nSrcPixelStride,
3376 : void *CPL_RESTRICT pDstData,
3377 : GDALDataType eDstType, int nDstPixelStride,
3378 : GPtrDiff_t nWordCount)
3379 :
3380 : {
3381 : // On platforms where alignment matters, be careful
3382 108331000 : const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3383 108328000 : const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3384 108318000 : if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
3385 : {
3386 2 : CPLError(CE_Failure, CPLE_NotSupported,
3387 : "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
3388 : "argument");
3389 2 : return;
3390 : }
3391 108318000 : if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3392 60263000 : ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3393 60278600 : (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
3394 60276800 : (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3395 60276500 : (nDstPixelStride % nDstDataTypeSize) != 0))
3396 : {
3397 905 : if (eSrcType == eDstType)
3398 : {
3399 34800 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3400 : {
3401 34000 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3402 : static_cast<const GByte *>(pSrcData) +
3403 34000 : nSrcPixelStride * i,
3404 : nDstDataTypeSize);
3405 : }
3406 : }
3407 : else
3408 : {
3409 210 : const auto getAlignedPtr = [](GByte *ptr, int align)
3410 : {
3411 : return ptr +
3412 210 : ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
3413 210 : align);
3414 : };
3415 :
3416 : // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3417 : // be sure to get correctly aligned pointer.
3418 105 : constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
3419 : GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
3420 : GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
3421 : GByte *pabySrcBuffer =
3422 105 : getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
3423 : GByte *pabyDstBuffer =
3424 105 : getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
3425 3360 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3426 : {
3427 3255 : memcpy(pabySrcBuffer,
3428 : static_cast<const GByte *>(pSrcData) +
3429 3255 : nSrcPixelStride * i,
3430 : nSrcDataTypeSize);
3431 3255 : GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3432 : eDstType, 0, 1);
3433 3255 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3434 : pabyDstBuffer, nDstDataTypeSize);
3435 : }
3436 : }
3437 905 : return;
3438 : }
3439 :
3440 : // Deal with the case where we're replicating a single word into the
3441 : // provided buffer
3442 108317000 : if (nSrcPixelStride == 0 && nWordCount > 1)
3443 : {
3444 903923 : GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
3445 : nDstPixelStride, nWordCount);
3446 904877 : return;
3447 : }
3448 :
3449 107413000 : if (eSrcType == eDstType)
3450 : {
3451 54017300 : if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
3452 : {
3453 18581000 : GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
3454 : static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3455 : nWordCount);
3456 18579400 : return;
3457 : }
3458 :
3459 35436400 : if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
3460 20965400 : (nDstPixelStride % 2) == 0)
3461 : {
3462 20965300 : GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
3463 : static_cast<const short *>(pSrcData), nSrcPixelStride,
3464 : nWordCount);
3465 20964600 : return;
3466 : }
3467 :
3468 14471100 : if (nWordCount == 1)
3469 : {
3470 : #if defined(CSA_BUILD) || defined(__COVERITY__)
3471 : // Avoid false positives...
3472 : memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3473 : #else
3474 14056600 : if (nSrcDataTypeSize == 2)
3475 0 : memcpy(pDstData, pSrcData, 2);
3476 14056600 : else if (nSrcDataTypeSize == 4)
3477 14014000 : memcpy(pDstData, pSrcData, 4);
3478 42660 : else if (nSrcDataTypeSize == 8)
3479 26143 : memcpy(pDstData, pSrcData, 8);
3480 : else /* if( eSrcType == GDT_CFloat64 ) */
3481 16517 : memcpy(pDstData, pSrcData, 16);
3482 : #endif
3483 14056600 : return;
3484 : }
3485 :
3486 : // Let memcpy() handle the case where we're copying a packed buffer
3487 : // of pixels.
3488 414418 : if (nSrcPixelStride == nDstPixelStride)
3489 : {
3490 260159 : if (nSrcPixelStride == nSrcDataTypeSize)
3491 : {
3492 257984 : memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3493 257984 : return;
3494 : }
3495 : }
3496 : }
3497 :
3498 : // Handle the more general case -- deals with conversion of data types
3499 : // directly.
3500 53552400 : switch (eSrcType)
3501 : {
3502 14963900 : case GDT_Byte:
3503 14963900 : GDALCopyWordsFromT<unsigned char>(
3504 : static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3505 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3506 14967300 : break;
3507 1015 : case GDT_Int8:
3508 1015 : GDALCopyWordsFromT<signed char>(
3509 : static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3510 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3511 1015 : break;
3512 53142 : case GDT_UInt16:
3513 53142 : GDALCopyWordsFromT<unsigned short>(
3514 : static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3515 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3516 53142 : break;
3517 4543350 : case GDT_Int16:
3518 4543350 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3519 : nSrcPixelStride, false, pDstData,
3520 : eDstType, nDstPixelStride, nWordCount);
3521 4543340 : break;
3522 6772 : case GDT_UInt32:
3523 6772 : GDALCopyWordsFromT<unsigned int>(
3524 : static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3525 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3526 6772 : break;
3527 12254700 : case GDT_Int32:
3528 12254700 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3529 : nSrcPixelStride, false, pDstData, eDstType,
3530 : nDstPixelStride, nWordCount);
3531 12254700 : break;
3532 1465 : case GDT_UInt64:
3533 1465 : GDALCopyWordsFromT<std::uint64_t>(
3534 : static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3535 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3536 1465 : break;
3537 7309 : case GDT_Int64:
3538 7309 : GDALCopyWordsFromT<std::int64_t>(
3539 : static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3540 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3541 7309 : break;
3542 505 : case GDT_Float16:
3543 505 : GDALCopyWordsFromT<GFloat16>(
3544 : static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false,
3545 : pDstData, eDstType, nDstPixelStride, nWordCount);
3546 505 : break;
3547 320075 : case GDT_Float32:
3548 320075 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3549 : nSrcPixelStride, false, pDstData,
3550 : eDstType, nDstPixelStride, nWordCount);
3551 320073 : break;
3552 20678300 : case GDT_Float64:
3553 20678300 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3554 : nSrcPixelStride, false, pDstData,
3555 : eDstType, nDstPixelStride, nWordCount);
3556 20678500 : break;
3557 566969 : case GDT_CInt16:
3558 566969 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3559 : nSrcPixelStride, true, pDstData, eDstType,
3560 : nDstPixelStride, nWordCount);
3561 566969 : break;
3562 405 : case GDT_CInt32:
3563 405 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3564 : nSrcPixelStride, true, pDstData, eDstType,
3565 : nDstPixelStride, nWordCount);
3566 405 : break;
3567 212 : case GDT_CFloat16:
3568 212 : GDALCopyWordsFromT<GFloat16>(
3569 : static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true,
3570 : pDstData, eDstType, nDstPixelStride, nWordCount);
3571 212 : break;
3572 1371 : case GDT_CFloat32:
3573 1371 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3574 : nSrcPixelStride, true, pDstData, eDstType,
3575 : nDstPixelStride, nWordCount);
3576 1371 : break;
3577 172579 : case GDT_CFloat64:
3578 172579 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3579 : nSrcPixelStride, true, pDstData,
3580 : eDstType, nDstPixelStride, nWordCount);
3581 172579 : break;
3582 0 : case GDT_Unknown:
3583 : case GDT_TypeCount:
3584 0 : CPLAssert(false);
3585 : }
3586 : }
3587 :
3588 : /************************************************************************/
3589 : /* GDALCopyBits() */
3590 : /************************************************************************/
3591 :
3592 : /**
3593 : * Bitwise word copying.
3594 : *
3595 : * A function for moving sets of partial bytes around. Loosely
3596 : * speaking this is a bitwise analog to GDALCopyWords().
3597 : *
3598 : * It copies nStepCount "words" where each word is nBitCount bits long.
3599 : * The nSrcStep and nDstStep are the number of bits from the start of one
3600 : * word to the next (same as nBitCount if they are packed). The nSrcOffset
3601 : * and nDstOffset are the offset into the source and destination buffers
3602 : * to start at, also measured in bits.
3603 : *
3604 : * All bit offsets are assumed to start from the high order bit in a byte
3605 : * (i.e. most significant bit first). Currently this function is not very
3606 : * optimized, but it may be improved for some common cases in the future
3607 : * as needed.
3608 : *
3609 : * @param pabySrcData the source data buffer.
3610 : * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3611 : * first word to copy.
3612 : * @param nSrcStep the offset in bits from the start one source word to the
3613 : * start of the next.
3614 : * @param pabyDstData the destination data buffer.
3615 : * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3616 : * first word to copy over.
3617 : * @param nDstStep the offset in bits from the start one word to the
3618 : * start of the next.
3619 : * @param nBitCount the number of bits in a word to be copied.
3620 : * @param nStepCount the number of words to copy.
3621 : */
3622 :
3623 0 : void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3624 : GByte *pabyDstData, int nDstOffset, int nDstStep,
3625 : int nBitCount, int nStepCount)
3626 :
3627 : {
3628 0 : VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
3629 :
3630 0 : for (int iStep = 0; iStep < nStepCount; iStep++)
3631 : {
3632 0 : for (int iBit = 0; iBit < nBitCount; iBit++)
3633 : {
3634 0 : if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
3635 0 : pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
3636 : else
3637 0 : pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
3638 :
3639 0 : nSrcOffset++;
3640 0 : nDstOffset++;
3641 : }
3642 :
3643 0 : nSrcOffset += (nSrcStep - nBitCount);
3644 0 : nDstOffset += (nDstStep - nBitCount);
3645 : }
3646 : }
3647 :
3648 : /************************************************************************/
3649 : /* GDALGetBestOverviewLevel() */
3650 : /* */
3651 : /* Returns the best overview level to satisfy the query or -1 if none */
3652 : /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */
3653 : /* returning a valid overview level */
3654 : /************************************************************************/
3655 :
3656 0 : int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
3657 : int &nXSize, int &nYSize, int nBufXSize,
3658 : int nBufYSize)
3659 : {
3660 0 : return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
3661 0 : nBufXSize, nBufYSize, nullptr);
3662 : }
3663 :
3664 322828 : int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
3665 : int &nYOff, int &nXSize, int &nYSize,
3666 : int nBufXSize, int nBufYSize,
3667 : GDALRasterIOExtraArg *psExtraArg)
3668 : {
3669 : /* -------------------------------------------------------------------- */
3670 : /* Compute the desired downsampling factor. It is */
3671 : /* based on the least reduced axis, and represents the number */
3672 : /* of source pixels to one destination pixel. */
3673 : /* -------------------------------------------------------------------- */
3674 322828 : const double dfDesiredDownsamplingFactor =
3675 322828 : ((nXSize / static_cast<double>(nBufXSize)) <
3676 160491 : (nYSize / static_cast<double>(nBufYSize)) ||
3677 : nBufYSize == 1)
3678 354204 : ? nXSize / static_cast<double>(nBufXSize)
3679 129115 : : nYSize / static_cast<double>(nBufYSize);
3680 :
3681 : /* -------------------------------------------------------------------- */
3682 : /* Find the overview level that largest downsampling factor (most */
3683 : /* downsampled) that is still less than (or only a little more) */
3684 : /* downsampled than the request. */
3685 : /* -------------------------------------------------------------------- */
3686 322828 : const int nOverviewCount = poBand->GetOverviewCount();
3687 322828 : GDALRasterBand *poBestOverview = nullptr;
3688 322828 : double dfBestDownsamplingFactor = 0;
3689 322828 : int nBestOverviewLevel = -1;
3690 :
3691 : const char *pszOversampligThreshold =
3692 322828 : CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
3693 :
3694 : // Note: keep this logic for overview selection in sync between
3695 : // gdalwarp_lib.cpp and rasterio.cpp
3696 : // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
3697 : const double dfOversamplingThreshold =
3698 645647 : pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
3699 322819 : : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
3700 645638 : ? 1.0
3701 322828 : : 1.2;
3702 325519 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3703 : {
3704 5529 : GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
3705 11058 : if (poOverview == nullptr ||
3706 11057 : poOverview->GetXSize() > poBand->GetXSize() ||
3707 5528 : poOverview->GetYSize() > poBand->GetYSize())
3708 : {
3709 1 : continue;
3710 : }
3711 :
3712 : // Compute downsampling factor of this overview
3713 : const double dfDownsamplingFactor = std::min(
3714 5528 : poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
3715 11056 : poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
3716 :
3717 : // Is it nearly the requested factor and better (lower) than
3718 : // the current best factor?
3719 : // Use an epsilon because of numerical instability.
3720 5528 : constexpr double EPSILON = 1e-1;
3721 5636 : if (dfDownsamplingFactor >=
3722 5528 : dfDesiredDownsamplingFactor * dfOversamplingThreshold +
3723 5420 : EPSILON ||
3724 : dfDownsamplingFactor <= dfBestDownsamplingFactor)
3725 : {
3726 108 : continue;
3727 : }
3728 :
3729 : // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3730 5420 : const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
3731 :
3732 5420 : if (pszResampling != nullptr &&
3733 71 : STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
3734 16 : continue;
3735 :
3736 : // OK, this is our new best overview.
3737 5404 : poBestOverview = poOverview;
3738 5404 : nBestOverviewLevel = iOverview;
3739 5404 : dfBestDownsamplingFactor = dfDownsamplingFactor;
3740 :
3741 5404 : if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
3742 : EPSILON)
3743 : {
3744 2838 : break;
3745 : }
3746 : }
3747 :
3748 : /* -------------------------------------------------------------------- */
3749 : /* If we didn't find an overview that helps us, just return */
3750 : /* indicating failure and the full resolution image will be used. */
3751 : /* -------------------------------------------------------------------- */
3752 322828 : if (nBestOverviewLevel < 0)
3753 319922 : return -1;
3754 :
3755 : /* -------------------------------------------------------------------- */
3756 : /* Recompute the source window in terms of the selected */
3757 : /* overview. */
3758 : /* -------------------------------------------------------------------- */
3759 : const double dfXFactor =
3760 2906 : poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
3761 : const double dfYFactor =
3762 2906 : poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
3763 2906 : CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
3764 : poBestOverview->GetYSize());
3765 :
3766 8718 : const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
3767 2906 : static_cast<int>(nXOff / dfXFactor + 0.5));
3768 8718 : const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
3769 2906 : static_cast<int>(nYOff / dfYFactor + 0.5));
3770 2906 : int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
3771 2906 : int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
3772 2906 : if (nOXOff + nOXSize > poBestOverview->GetXSize())
3773 0 : nOXSize = poBestOverview->GetXSize() - nOXOff;
3774 2906 : if (nOYOff + nOYSize > poBestOverview->GetYSize())
3775 2 : nOYSize = poBestOverview->GetYSize() - nOYOff;
3776 :
3777 2906 : if (psExtraArg)
3778 : {
3779 2906 : if (psExtraArg->bFloatingPointWindowValidity)
3780 : {
3781 45 : psExtraArg->dfXOff /= dfXFactor;
3782 45 : psExtraArg->dfXSize /= dfXFactor;
3783 45 : psExtraArg->dfYOff /= dfYFactor;
3784 45 : psExtraArg->dfYSize /= dfYFactor;
3785 : }
3786 2861 : else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
3787 : {
3788 16 : psExtraArg->bFloatingPointWindowValidity = true;
3789 16 : psExtraArg->dfXOff = nXOff / dfXFactor;
3790 16 : psExtraArg->dfXSize = nXSize / dfXFactor;
3791 16 : psExtraArg->dfYOff = nYOff / dfYFactor;
3792 16 : psExtraArg->dfYSize = nYSize / dfYFactor;
3793 : }
3794 : }
3795 :
3796 2906 : nXOff = nOXOff;
3797 2906 : nYOff = nOYOff;
3798 2906 : nXSize = nOXSize;
3799 2906 : nYSize = nOYSize;
3800 :
3801 2906 : return nBestOverviewLevel;
3802 : }
3803 :
3804 : /************************************************************************/
3805 : /* OverviewRasterIO() */
3806 : /* */
3807 : /* Special work function to utilize available overviews to */
3808 : /* more efficiently satisfy downsampled requests. It will */
3809 : /* return CE_Failure if there are no appropriate overviews */
3810 : /* available but it doesn't emit any error messages. */
3811 : /************************************************************************/
3812 :
3813 : //! @cond Doxygen_Suppress
3814 2 : CPLErr GDALRasterBand::OverviewRasterIO(
3815 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3816 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3817 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
3818 :
3819 : {
3820 : GDALRasterIOExtraArg sExtraArg;
3821 2 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3822 :
3823 2 : const int nOverview = GDALBandGetBestOverviewLevel2(
3824 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
3825 2 : if (nOverview < 0)
3826 1 : return CE_Failure;
3827 :
3828 : /* -------------------------------------------------------------------- */
3829 : /* Recast the call in terms of the new raster layer. */
3830 : /* -------------------------------------------------------------------- */
3831 1 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
3832 1 : if (poOverviewBand == nullptr)
3833 0 : return CE_Failure;
3834 :
3835 1 : return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3836 : pData, nBufXSize, nBufYSize, eBufType,
3837 1 : nPixelSpace, nLineSpace, &sExtraArg);
3838 : }
3839 :
3840 : /************************************************************************/
3841 : /* TryOverviewRasterIO() */
3842 : /************************************************************************/
3843 :
3844 161946 : CPLErr GDALRasterBand::TryOverviewRasterIO(
3845 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3846 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3847 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
3848 : int *pbTried)
3849 : {
3850 161946 : int nXOffMod = nXOff;
3851 161946 : int nYOffMod = nYOff;
3852 161946 : int nXSizeMod = nXSize;
3853 161946 : int nYSizeMod = nYSize;
3854 : GDALRasterIOExtraArg sExtraArg;
3855 :
3856 161946 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3857 :
3858 161946 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3859 : this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
3860 : &sExtraArg);
3861 :
3862 161946 : if (iOvrLevel >= 0)
3863 : {
3864 49 : GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
3865 49 : if (poOverviewBand)
3866 : {
3867 49 : *pbTried = TRUE;
3868 49 : return poOverviewBand->RasterIO(
3869 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
3870 : nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
3871 49 : &sExtraArg);
3872 : }
3873 : }
3874 :
3875 161897 : *pbTried = FALSE;
3876 161897 : return CE_None;
3877 : }
3878 :
3879 : /************************************************************************/
3880 : /* TryOverviewRasterIO() */
3881 : /************************************************************************/
3882 :
3883 158041 : CPLErr GDALDataset::TryOverviewRasterIO(
3884 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3885 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3886 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
3887 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
3888 : int *pbTried)
3889 : {
3890 158041 : int nXOffMod = nXOff;
3891 158041 : int nYOffMod = nYOff;
3892 158041 : int nXSizeMod = nXSize;
3893 158041 : int nYSizeMod = nYSize;
3894 : GDALRasterIOExtraArg sExtraArg;
3895 158041 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3896 :
3897 316082 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3898 158041 : papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
3899 : nBufYSize, &sExtraArg);
3900 :
3901 158080 : if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3902 39 : papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
3903 : {
3904 39 : *pbTried = TRUE;
3905 39 : return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3906 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
3907 : nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
3908 39 : nLineSpace, nBandSpace, &sExtraArg);
3909 : }
3910 : else
3911 : {
3912 158002 : *pbTried = FALSE;
3913 158002 : return CE_None;
3914 : }
3915 : }
3916 :
3917 : /************************************************************************/
3918 : /* GetBestOverviewLevel() */
3919 : /* */
3920 : /* Returns the best overview level to satisfy the query or -1 if none */
3921 : /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */
3922 : /* overview level */
3923 : /************************************************************************/
3924 :
3925 4 : static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
3926 : int &nYOff, int &nXSize, int &nYSize,
3927 : int nBufXSize, int nBufYSize,
3928 : int nBandCount,
3929 : const int *panBandMap,
3930 : GDALRasterIOExtraArg *psExtraArg)
3931 : {
3932 4 : int nOverviewCount = 0;
3933 4 : GDALRasterBand *poFirstBand = nullptr;
3934 :
3935 : /* -------------------------------------------------------------------- */
3936 : /* Check that all bands have the same number of overviews and */
3937 : /* that they have all the same size and block dimensions */
3938 : /* -------------------------------------------------------------------- */
3939 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
3940 : {
3941 8 : GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
3942 8 : if (poBand == nullptr)
3943 0 : return -1;
3944 8 : if (iBand == 0)
3945 : {
3946 4 : poFirstBand = poBand;
3947 4 : nOverviewCount = poBand->GetOverviewCount();
3948 : }
3949 4 : else if (nOverviewCount != poBand->GetOverviewCount())
3950 : {
3951 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3952 : "mismatched overview count, use std method.");
3953 0 : return -1;
3954 : }
3955 : else
3956 : {
3957 4 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3958 : {
3959 0 : GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
3960 : GDALRasterBand *poOvrFirstBand =
3961 0 : poFirstBand->GetOverview(iOverview);
3962 0 : if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
3963 0 : continue;
3964 :
3965 0 : if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3966 0 : poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
3967 : {
3968 0 : CPLDebug("GDAL",
3969 : "GDALDataset::GetBestOverviewLevel() ... "
3970 : "mismatched overview sizes, use std method.");
3971 0 : return -1;
3972 : }
3973 0 : int nBlockXSizeFirst = 0;
3974 0 : int nBlockYSizeFirst = 0;
3975 0 : poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
3976 : &nBlockYSizeFirst);
3977 :
3978 0 : int nBlockXSizeCurrent = 0;
3979 0 : int nBlockYSizeCurrent = 0;
3980 0 : poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
3981 : &nBlockYSizeCurrent);
3982 :
3983 0 : if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3984 0 : nBlockYSizeFirst != nBlockYSizeCurrent)
3985 : {
3986 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3987 : "mismatched block sizes, use std method.");
3988 0 : return -1;
3989 : }
3990 : }
3991 : }
3992 : }
3993 4 : if (poFirstBand == nullptr)
3994 0 : return -1;
3995 :
3996 4 : return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
3997 : nYSize, nBufXSize, nBufYSize,
3998 4 : psExtraArg);
3999 : }
4000 :
4001 : /************************************************************************/
4002 : /* BlockBasedRasterIO() */
4003 : /* */
4004 : /* This convenience function implements a dataset level */
4005 : /* RasterIO() interface based on calling down to fetch blocks, */
4006 : /* much like the GDALRasterBand::IRasterIO(), but it handles */
4007 : /* all bands at once, so that a format driver that handles a */
4008 : /* request for different bands of the same block efficiently */
4009 : /* (i.e. without re-reading interleaved data) will efficiently. */
4010 : /* */
4011 : /* This method is intended to be called by an overridden */
4012 : /* IRasterIO() method in the driver specific GDALDataset */
4013 : /* derived class. */
4014 : /* */
4015 : /* Default internal implementation of RasterIO() ... utilizes */
4016 : /* the Block access methods to satisfy the request. This would */
4017 : /* normally only be overridden by formats with overviews. */
4018 : /* */
4019 : /* To keep things relatively simple, this method does not */
4020 : /* currently take advantage of some special cases addressed in */
4021 : /* GDALRasterBand::IRasterIO(), so it is likely best to only */
4022 : /* call it when you know it will help. That is in cases where */
4023 : /* data is at 1:1 to the buffer, and you know the driver is */
4024 : /* implementing interleaved IO efficiently on a block by block */
4025 : /* basis. Overviews will be used when possible. */
4026 : /************************************************************************/
4027 :
4028 63626 : CPLErr GDALDataset::BlockBasedRasterIO(
4029 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4030 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4031 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4032 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
4033 :
4034 : {
4035 63626 : CPLAssert(nullptr != pData);
4036 :
4037 63626 : GByte **papabySrcBlock = nullptr;
4038 63626 : GDALRasterBlock *poBlock = nullptr;
4039 63626 : GDALRasterBlock **papoBlocks = nullptr;
4040 63626 : int nLBlockX = -1;
4041 63626 : int nLBlockY = -1;
4042 : int iBufYOff;
4043 : int iBufXOff;
4044 63626 : int nBlockXSize = 1;
4045 63626 : int nBlockYSize = 1;
4046 63626 : CPLErr eErr = CE_None;
4047 63626 : GDALDataType eDataType = GDT_Byte;
4048 :
4049 63626 : const bool bUseIntegerRequestCoords =
4050 64063 : (!psExtraArg->bFloatingPointWindowValidity ||
4051 437 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
4052 435 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
4053 :
4054 : /* -------------------------------------------------------------------- */
4055 : /* Ensure that all bands share a common block size and data type. */
4056 : /* -------------------------------------------------------------------- */
4057 301190 : for (int iBand = 0; iBand < nBandCount; iBand++)
4058 : {
4059 237564 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4060 :
4061 237562 : if (iBand == 0)
4062 : {
4063 63624 : poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4064 63625 : eDataType = poBand->GetRasterDataType();
4065 : }
4066 : else
4067 : {
4068 173938 : int nThisBlockXSize = 0;
4069 173938 : int nThisBlockYSize = 0;
4070 173938 : poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
4071 173939 : if (nThisBlockXSize != nBlockXSize ||
4072 173939 : nThisBlockYSize != nBlockYSize)
4073 : {
4074 1 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4075 : "mismatched block sizes, use std method.");
4076 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4077 : pData, nBufXSize, nBufYSize, eBufType,
4078 : nBandCount, panBandMap, nPixelSpace,
4079 0 : nLineSpace, nBandSpace, psExtraArg);
4080 : }
4081 :
4082 173938 : if (eDataType != poBand->GetRasterDataType() &&
4083 0 : (nXSize != nBufXSize || nYSize != nBufYSize))
4084 : {
4085 0 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4086 : "mismatched band data types, use std method.");
4087 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4088 : pData, nBufXSize, nBufYSize, eBufType,
4089 : nBandCount, panBandMap, nPixelSpace,
4090 0 : nLineSpace, nBandSpace, psExtraArg);
4091 : }
4092 : }
4093 : }
4094 :
4095 : /* ==================================================================== */
4096 : /* In this special case at full resolution we step through in */
4097 : /* blocks, turning the request over to the per-band */
4098 : /* IRasterIO(), but ensuring that all bands of one block are */
4099 : /* called before proceeding to the next. */
4100 : /* ==================================================================== */
4101 :
4102 63626 : if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4103 : {
4104 : GDALRasterIOExtraArg sDummyExtraArg;
4105 63622 : INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4106 :
4107 63622 : int nChunkYSize = 0;
4108 63622 : int nChunkXSize = 0;
4109 :
4110 223001 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4111 : {
4112 160409 : const int nChunkYOff = iBufYOff + nYOff;
4113 160409 : nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4114 160409 : if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4115 58864 : nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4116 :
4117 844670 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4118 : {
4119 685283 : const int nChunkXOff = iBufXOff + nXOff;
4120 685283 : nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4121 685283 : if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4122 75810 : nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4123 :
4124 685283 : GByte *pabyChunkData =
4125 685283 : static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4126 685283 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4127 :
4128 3327460 : for (int iBand = 0; iBand < nBandCount; iBand++)
4129 : {
4130 2643200 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4131 :
4132 5286370 : eErr = poBand->IRasterIO(
4133 : eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4134 : nChunkYSize,
4135 2643200 : pabyChunkData +
4136 2643200 : static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4137 : nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4138 2643200 : nLineSpace, &sDummyExtraArg);
4139 2643170 : if (eErr != CE_None)
4140 987 : return eErr;
4141 : }
4142 : }
4143 :
4144 179649 : if (psExtraArg->pfnProgress != nullptr &&
4145 20262 : !psExtraArg->pfnProgress(
4146 179649 : 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4147 : nBufYSize,
4148 : "", psExtraArg->pProgressData))
4149 : {
4150 5 : return CE_Failure;
4151 : }
4152 : }
4153 :
4154 62592 : return CE_None;
4155 : }
4156 :
4157 : /* Below code is not compatible with that case. It would need a complete */
4158 : /* separate code like done in GDALRasterBand::IRasterIO. */
4159 4 : if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4160 : {
4161 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4162 : nBufXSize, nBufYSize, eBufType, nBandCount,
4163 : panBandMap, nPixelSpace, nLineSpace,
4164 0 : nBandSpace, psExtraArg);
4165 : }
4166 :
4167 : /* We could have a smarter implementation, but that will do for now */
4168 4 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4169 0 : (nBufXSize != nXSize || nBufYSize != nYSize))
4170 : {
4171 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4172 : nBufXSize, nBufYSize, eBufType, nBandCount,
4173 : panBandMap, nPixelSpace, nLineSpace,
4174 0 : nBandSpace, psExtraArg);
4175 : }
4176 :
4177 : /* ==================================================================== */
4178 : /* Loop reading required source blocks to satisfy output */
4179 : /* request. This is the most general implementation. */
4180 : /* ==================================================================== */
4181 :
4182 4 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4183 :
4184 : papabySrcBlock =
4185 4 : static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4186 : papoBlocks =
4187 4 : static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4188 :
4189 : /* -------------------------------------------------------------------- */
4190 : /* Select an overview level if appropriate. */
4191 : /* -------------------------------------------------------------------- */
4192 :
4193 : GDALRasterIOExtraArg sExtraArg;
4194 4 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4195 4 : const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4196 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4197 : panBandMap, &sExtraArg);
4198 4 : if (nOverviewLevel >= 0)
4199 : {
4200 2 : GetRasterBand(panBandMap[0])
4201 2 : ->GetOverview(nOverviewLevel)
4202 2 : ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4203 : }
4204 :
4205 4 : double dfXOff = nXOff;
4206 4 : double dfYOff = nYOff;
4207 4 : double dfXSize = nXSize;
4208 4 : double dfYSize = nYSize;
4209 4 : if (sExtraArg.bFloatingPointWindowValidity)
4210 : {
4211 2 : dfXOff = sExtraArg.dfXOff;
4212 2 : dfYOff = sExtraArg.dfYOff;
4213 2 : dfXSize = sExtraArg.dfXSize;
4214 2 : dfYSize = sExtraArg.dfYSize;
4215 : }
4216 :
4217 : /* -------------------------------------------------------------------- */
4218 : /* Compute stepping increment. */
4219 : /* -------------------------------------------------------------------- */
4220 4 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4221 4 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4222 :
4223 4 : constexpr double EPS = 1e-10;
4224 : /* -------------------------------------------------------------------- */
4225 : /* Loop over buffer computing source locations. */
4226 : /* -------------------------------------------------------------------- */
4227 36 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
4228 : {
4229 : GPtrDiff_t iSrcOffset;
4230 :
4231 : // Add small epsilon to avoid some numeric precision issues.
4232 32 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4233 32 : const int iSrcY = static_cast<int>(std::min(
4234 32 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
4235 :
4236 32 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
4237 : static_cast<GPtrDiff_t>(nLineSpace);
4238 :
4239 302 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
4240 : {
4241 270 : const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4242 270 : const int iSrcX = static_cast<int>(std::min(
4243 270 : std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
4244 :
4245 : // FIXME: this code likely doesn't work if the dirty block gets
4246 : // flushed to disk before being completely written. In the meantime,
4247 : // bJustInitialize should probably be set to FALSE even if it is not
4248 : // ideal performance wise, and for lossy compression
4249 :
4250 : /* --------------------------------------------------------------------
4251 : */
4252 : /* Ensure we have the appropriate block loaded. */
4253 : /* --------------------------------------------------------------------
4254 : */
4255 270 : if (iSrcX < nLBlockX * nBlockXSize ||
4256 270 : iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
4257 266 : iSrcY < nLBlockY * nBlockYSize ||
4258 266 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
4259 : {
4260 4 : nLBlockX = iSrcX / nBlockXSize;
4261 4 : nLBlockY = iSrcY / nBlockYSize;
4262 :
4263 4 : const bool bJustInitialize =
4264 0 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
4265 0 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
4266 4 : nXOff <= nLBlockX * nBlockXSize &&
4267 0 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4268 : /*bool bMemZeroBuffer = FALSE;
4269 : if( eRWFlag == GF_Write && !bJustInitialize &&
4270 : nXOff <= nLBlockX * nBlockXSize &&
4271 : nYOff <= nLBlockY * nBlockYSize &&
4272 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4273 : (nXOff + nXSize == GetRasterXSize() &&
4274 : (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4275 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4276 : (nYOff + nYSize == GetRasterYSize() &&
4277 : (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4278 : {
4279 : bJustInitialize = TRUE;
4280 : bMemZeroBuffer = TRUE;
4281 : }*/
4282 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4283 : {
4284 8 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4285 8 : if (nOverviewLevel >= 0)
4286 2 : poBand = poBand->GetOverview(nOverviewLevel);
4287 16 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
4288 8 : bJustInitialize);
4289 8 : if (poBlock == nullptr)
4290 : {
4291 0 : eErr = CE_Failure;
4292 0 : goto CleanupAndReturn;
4293 : }
4294 :
4295 8 : if (eRWFlag == GF_Write)
4296 0 : poBlock->MarkDirty();
4297 :
4298 8 : if (papoBlocks[iBand] != nullptr)
4299 0 : papoBlocks[iBand]->DropLock();
4300 :
4301 8 : papoBlocks[iBand] = poBlock;
4302 :
4303 8 : papabySrcBlock[iBand] =
4304 8 : static_cast<GByte *>(poBlock->GetDataRef());
4305 : /*if( bMemZeroBuffer )
4306 : {
4307 : memset(papabySrcBlock[iBand], 0,
4308 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4309 : * nBlockYSize);
4310 : }*/
4311 : }
4312 : }
4313 :
4314 : /* --------------------------------------------------------------------
4315 : */
4316 : /* Copy over this pixel of data. */
4317 : /* --------------------------------------------------------------------
4318 : */
4319 270 : iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
4320 270 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
4321 270 : (static_cast<GPtrDiff_t>(iSrcY) -
4322 270 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
4323 270 : nBlockXSize) *
4324 270 : nBandDataSize;
4325 :
4326 980 : for (int iBand = 0; iBand < nBandCount; iBand++)
4327 : {
4328 710 : GByte *pabySrcBlock = papabySrcBlock[iBand];
4329 710 : GPtrDiff_t iBandBufOffset =
4330 710 : iBufOffset + static_cast<GPtrDiff_t>(iBand) *
4331 : static_cast<GPtrDiff_t>(nBandSpace);
4332 :
4333 710 : if (eDataType == eBufType)
4334 : {
4335 710 : if (eRWFlag == GF_Read)
4336 710 : memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
4337 710 : pabySrcBlock + iSrcOffset, nBandDataSize);
4338 : else
4339 0 : memcpy(pabySrcBlock + iSrcOffset,
4340 : static_cast<const GByte *>(pData) +
4341 0 : iBandBufOffset,
4342 : nBandDataSize);
4343 : }
4344 : else
4345 : {
4346 : /* type to type conversion ... ouch, this is expensive way
4347 : of handling single words */
4348 :
4349 0 : if (eRWFlag == GF_Read)
4350 0 : GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
4351 : static_cast<GByte *>(pData) +
4352 0 : iBandBufOffset,
4353 : eBufType, 0, 1);
4354 : else
4355 0 : GDALCopyWords64(static_cast<const GByte *>(pData) +
4356 0 : iBandBufOffset,
4357 0 : eBufType, 0, pabySrcBlock + iSrcOffset,
4358 : eDataType, 0, 1);
4359 : }
4360 : }
4361 :
4362 270 : iBufOffset += static_cast<int>(nPixelSpace);
4363 : }
4364 : }
4365 :
4366 : /* -------------------------------------------------------------------- */
4367 : /* CleanupAndReturn. */
4368 : /* -------------------------------------------------------------------- */
4369 4 : CleanupAndReturn:
4370 4 : CPLFree(papabySrcBlock);
4371 4 : if (papoBlocks != nullptr)
4372 : {
4373 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4374 : {
4375 8 : if (papoBlocks[iBand] != nullptr)
4376 8 : papoBlocks[iBand]->DropLock();
4377 : }
4378 4 : CPLFree(papoBlocks);
4379 : }
4380 :
4381 4 : return eErr;
4382 : }
4383 :
4384 : //! @endcond
4385 :
4386 : /************************************************************************/
4387 : /* GDALCopyWholeRasterGetSwathSize() */
4388 : /************************************************************************/
4389 :
4390 2943 : static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
4391 : GDALRasterBand *poDstPrototypeBand,
4392 : int nBandCount,
4393 : int bDstIsCompressed,
4394 : int bInterleave, int *pnSwathCols,
4395 : int *pnSwathLines)
4396 : {
4397 2943 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4398 2943 : int nSrcBlockXSize = 0;
4399 2943 : int nSrcBlockYSize = 0;
4400 2943 : int nBlockXSize = 0;
4401 2943 : int nBlockYSize = 0;
4402 :
4403 2943 : int nXSize = poSrcPrototypeBand->GetXSize();
4404 2943 : int nYSize = poSrcPrototypeBand->GetYSize();
4405 :
4406 2943 : poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
4407 2943 : poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4408 :
4409 2943 : const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4410 2943 : const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4411 :
4412 2943 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4413 2943 : if (bInterleave)
4414 1361 : nPixelSize *= nBandCount;
4415 :
4416 : // aim for one row of blocks. Do not settle for less.
4417 2943 : int nSwathCols = nXSize;
4418 2943 : int nSwathLines = nMaxBlockYSize;
4419 :
4420 : const char *pszSrcCompression =
4421 2943 : poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4422 2943 : if (pszSrcCompression == nullptr)
4423 : {
4424 2917 : auto poSrcDS = poSrcPrototypeBand->GetDataset();
4425 2917 : if (poSrcDS)
4426 : pszSrcCompression =
4427 2911 : poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4428 : }
4429 :
4430 : /* -------------------------------------------------------------------- */
4431 : /* What will our swath size be? */
4432 : /* -------------------------------------------------------------------- */
4433 : // When writing interleaved data in a compressed format, we want to be sure
4434 : // that each block will only be written once, so the swath size must not be
4435 : // greater than the block cache.
4436 2943 : const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4437 : int nTargetSwathSize;
4438 2943 : if (pszSwathSize != nullptr)
4439 0 : nTargetSwathSize = static_cast<int>(
4440 0 : std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4441 : else
4442 : {
4443 : // As a default, take one 1/4 of the cache size.
4444 2943 : nTargetSwathSize = static_cast<int>(
4445 2943 : std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4446 :
4447 : // but if the minimum idal swath buf size is less, then go for it to
4448 : // avoid unnecessarily abusing RAM usage.
4449 : // but try to use 10 MB at least.
4450 2943 : GIntBig nIdealSwathBufSize =
4451 2943 : static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4452 2943 : int nMinTargetSwathSize = 10 * 1000 * 1000;
4453 :
4454 2943 : if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
4455 2943 : GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
4456 : {
4457 2 : nMinTargetSwathSize = nTargetSwathSize;
4458 : }
4459 :
4460 2943 : if (nIdealSwathBufSize < nTargetSwathSize &&
4461 2933 : nIdealSwathBufSize < nMinTargetSwathSize)
4462 : {
4463 2930 : nIdealSwathBufSize = nMinTargetSwathSize;
4464 : }
4465 :
4466 2943 : if (pszSrcCompression != nullptr &&
4467 157 : EQUAL(pszSrcCompression, "JPEG2000") &&
4468 0 : (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4469 0 : (nSrcBlockYSize % nBlockYSize) == 0)))
4470 : {
4471 2 : nIdealSwathBufSize =
4472 4 : std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4473 2 : nSrcBlockYSize * nPixelSize);
4474 : }
4475 2943 : if (nTargetSwathSize > nIdealSwathBufSize)
4476 2929 : nTargetSwathSize = static_cast<int>(
4477 2929 : std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4478 : }
4479 :
4480 2943 : if (nTargetSwathSize < 1000000)
4481 8 : nTargetSwathSize = 1000000;
4482 :
4483 : /* But let's check that */
4484 3151 : if (bDstIsCompressed && bInterleave &&
4485 208 : nTargetSwathSize > GDALGetCacheMax64())
4486 : {
4487 0 : CPLError(CE_Warning, CPLE_AppDefined,
4488 : "When translating into a compressed interleave format, "
4489 : "the block cache size (" CPL_FRMT_GIB ") "
4490 : "should be at least the size of the swath (%d) "
4491 : "(GDAL_SWATH_SIZE config. option)",
4492 : GDALGetCacheMax64(), nTargetSwathSize);
4493 : }
4494 :
4495 : #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4496 : #define ROUND_TO(x, y) (((x) / (y)) * (y))
4497 :
4498 : // if both input and output datasets are tiled, that the tile dimensions
4499 : // are "compatible", try to stick to a swath dimension that is a multiple
4500 : // of input and output block dimensions.
4501 2943 : if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4502 33 : IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4503 33 : IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4504 33 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4505 33 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4506 : {
4507 33 : if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
4508 33 : nPixelSize <=
4509 33 : static_cast<GIntBig>(nTargetSwathSize))
4510 : {
4511 33 : nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4512 33 : nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4513 33 : if (nSwathCols == 0)
4514 0 : nSwathCols = nMaxBlockXSize;
4515 33 : if (nSwathCols > nXSize)
4516 31 : nSwathCols = nXSize;
4517 33 : nSwathLines = nMaxBlockYSize;
4518 :
4519 33 : if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4520 33 : static_cast<GIntBig>(nTargetSwathSize))
4521 : {
4522 0 : nSwathCols = nXSize;
4523 0 : nSwathLines = nBlockYSize;
4524 : }
4525 : }
4526 : }
4527 :
4528 2943 : const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4529 2943 : const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4530 2943 : if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
4531 : {
4532 1 : nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4533 1 : if (nSwathLines == 0)
4534 1 : nSwathLines = 1;
4535 :
4536 1 : CPLDebug(
4537 : "GDAL",
4538 : "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4539 : "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4540 : "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4541 1 : nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
4542 : }
4543 : // If we are processing single scans, try to handle several at once.
4544 : // If we are handling swaths already, only grow the swath if a row
4545 : // of blocks is substantially less than our target buffer size.
4546 2942 : else if (nSwathLines == 1 ||
4547 2441 : nMemoryPerCol * nSwathLines <
4548 2441 : static_cast<GIntBig>(nTargetSwathSize) / 10)
4549 : {
4550 2915 : nSwathLines = std::min(
4551 : nYSize,
4552 2915 : std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4553 :
4554 : /* If possible try to align to source and target block height */
4555 2915 : if ((nSwathLines % nMaxBlockYSize) != 0 &&
4556 987 : nSwathLines > nMaxBlockYSize &&
4557 987 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4558 959 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4559 177 : nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4560 : }
4561 :
4562 2943 : if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4563 0 : (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4564 0 : IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
4565 : {
4566 : // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4567 2 : if (nSwathLines < nSrcBlockYSize)
4568 : {
4569 0 : nSwathLines = nSrcBlockYSize;
4570 :
4571 : // Number of pixels that can be read/write simultaneously.
4572 0 : nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4573 0 : nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4574 0 : if (nSwathCols == 0)
4575 0 : nSwathCols = nSrcBlockXSize;
4576 0 : if (nSwathCols > nXSize)
4577 0 : nSwathCols = nXSize;
4578 :
4579 0 : CPLDebug(
4580 : "GDAL",
4581 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4582 : "too high block, "
4583 : "use partial width at one time");
4584 : }
4585 2 : else if ((nSwathLines % nSrcBlockYSize) != 0)
4586 : {
4587 : /* Round on a multiple of nSrcBlockYSize */
4588 0 : nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4589 0 : CPLDebug(
4590 : "GDAL",
4591 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4592 : "round nSwathLines to block height : %d",
4593 : nSwathLines);
4594 : }
4595 : }
4596 2941 : else if (bDstIsCompressed)
4597 : {
4598 374 : if (nSwathLines < nBlockYSize)
4599 : {
4600 142 : nSwathLines = nBlockYSize;
4601 :
4602 : // Number of pixels that can be read/write simultaneously.
4603 142 : nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4604 142 : nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4605 142 : if (nSwathCols == 0)
4606 0 : nSwathCols = nBlockXSize;
4607 142 : if (nSwathCols > nXSize)
4608 142 : nSwathCols = nXSize;
4609 :
4610 142 : CPLDebug(
4611 : "GDAL",
4612 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4613 : "too high block, "
4614 : "use partial width at one time");
4615 : }
4616 232 : else if ((nSwathLines % nBlockYSize) != 0)
4617 : {
4618 : // Round on a multiple of nBlockYSize.
4619 9 : nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4620 9 : CPLDebug(
4621 : "GDAL",
4622 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4623 : "round nSwathLines to block height : %d",
4624 : nSwathLines);
4625 : }
4626 : }
4627 :
4628 2943 : *pnSwathCols = nSwathCols;
4629 2943 : *pnSwathLines = nSwathLines;
4630 2943 : }
4631 :
4632 : /************************************************************************/
4633 : /* GDALDatasetCopyWholeRaster() */
4634 : /************************************************************************/
4635 :
4636 : /**
4637 : * \brief Copy all dataset raster data.
4638 : *
4639 : * This function copies the complete raster contents of one dataset to
4640 : * another similarly configured dataset. The source and destination
4641 : * dataset must have the same number of bands, and the same width
4642 : * and height. The bands do not have to have the same data type.
4643 : *
4644 : * This function is primarily intended to support implementation of
4645 : * driver specific CreateCopy() functions. It implements efficient copying,
4646 : * in particular "chunking" the copy in substantial blocks and, if appropriate,
4647 : * performing the transfer in a pixel interleaved fashion.
4648 : *
4649 : * Currently the only papszOptions value supported are :
4650 : * <ul>
4651 : * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
4652 : * write access pattern (this does not modify the layout of the destination
4653 : * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
4654 : * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
4655 : * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
4656 : * (GDAL >= 2.2)</li>
4657 : * </ul>
4658 : * More options may be supported in the future.
4659 : *
4660 : * @param hSrcDS the source dataset
4661 : * @param hDstDS the destination dataset
4662 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4663 : * @param pfnProgress progress reporting function.
4664 : * @param pProgressData callback data for progress function.
4665 : *
4666 : * @return CE_None on success, or CE_Failure on failure.
4667 : */
4668 :
4669 2910 : CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
4670 : GDALDatasetH hDstDS,
4671 : CSLConstList papszOptions,
4672 : GDALProgressFunc pfnProgress,
4673 : void *pProgressData)
4674 :
4675 : {
4676 2910 : VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4677 2910 : VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4678 :
4679 2910 : GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4680 2910 : GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4681 :
4682 2910 : if (pfnProgress == nullptr)
4683 3 : pfnProgress = GDALDummyProgress;
4684 :
4685 : /* -------------------------------------------------------------------- */
4686 : /* Confirm the datasets match in size and band counts. */
4687 : /* -------------------------------------------------------------------- */
4688 2910 : const int nXSize = poDstDS->GetRasterXSize();
4689 2910 : const int nYSize = poDstDS->GetRasterYSize();
4690 2910 : const int nBandCount = poDstDS->GetRasterCount();
4691 :
4692 2910 : if (poSrcDS->GetRasterXSize() != nXSize ||
4693 5820 : poSrcDS->GetRasterYSize() != nYSize ||
4694 2910 : poSrcDS->GetRasterCount() != nBandCount)
4695 : {
4696 0 : CPLError(CE_Failure, CPLE_AppDefined,
4697 : "Input and output dataset sizes or band counts do not\n"
4698 : "match in GDALDatasetCopyWholeRaster()");
4699 0 : return CE_Failure;
4700 : }
4701 :
4702 : /* -------------------------------------------------------------------- */
4703 : /* Report preliminary (0) progress. */
4704 : /* -------------------------------------------------------------------- */
4705 2910 : if (!pfnProgress(0.0, nullptr, pProgressData))
4706 : {
4707 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4708 : "User terminated CreateCopy()");
4709 1 : return CE_Failure;
4710 : }
4711 :
4712 : /* -------------------------------------------------------------------- */
4713 : /* Get our prototype band, and assume the others are similarly */
4714 : /* configured. */
4715 : /* -------------------------------------------------------------------- */
4716 2909 : if (nBandCount == 0)
4717 0 : return CE_None;
4718 :
4719 2909 : GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4720 2909 : GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4721 2909 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4722 :
4723 : /* -------------------------------------------------------------------- */
4724 : /* Do we want to try and do the operation in a pixel */
4725 : /* interleaved fashion? */
4726 : /* -------------------------------------------------------------------- */
4727 2909 : bool bInterleave = false;
4728 : const char *pszInterleave =
4729 2909 : poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4730 2909 : if (pszInterleave != nullptr &&
4731 1142 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4732 175 : bInterleave = true;
4733 :
4734 2909 : pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4735 2909 : if (pszInterleave != nullptr &&
4736 2101 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4737 1314 : bInterleave = true;
4738 :
4739 2909 : pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
4740 2909 : if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
4741 5 : bInterleave = true;
4742 2904 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
4743 7 : bInterleave = false;
4744 : // attributes is specific to the TileDB driver
4745 2897 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
4746 4 : bInterleave = true;
4747 2893 : else if (pszInterleave != nullptr)
4748 : {
4749 0 : CPLError(CE_Warning, CPLE_NotSupported,
4750 : "Unsupported value for option INTERLEAVE");
4751 : }
4752 :
4753 : // If the destination is compressed, we must try to write blocks just once,
4754 : // to save disk space (GTiff case for example), and to avoid data loss
4755 : // (JPEG compression for example).
4756 2909 : bool bDstIsCompressed = false;
4757 : const char *pszDstCompressed =
4758 2909 : CSLFetchNameValue(papszOptions, "COMPRESSED");
4759 2909 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4760 352 : bDstIsCompressed = true;
4761 :
4762 : /* -------------------------------------------------------------------- */
4763 : /* What will our swath size be? */
4764 : /* -------------------------------------------------------------------- */
4765 :
4766 2909 : int nSwathCols = 0;
4767 2909 : int nSwathLines = 0;
4768 2909 : GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
4769 : nBandCount, bDstIsCompressed, bInterleave,
4770 : &nSwathCols, &nSwathLines);
4771 :
4772 2909 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4773 2909 : if (bInterleave)
4774 1361 : nPixelSize *= nBandCount;
4775 :
4776 2909 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
4777 2909 : if (pSwathBuf == nullptr)
4778 : {
4779 0 : return CE_Failure;
4780 : }
4781 :
4782 2909 : CPLDebug("GDAL",
4783 : "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4784 : nSwathCols, nSwathLines, static_cast<int>(bInterleave));
4785 :
4786 : // Advise the source raster that we are going to read it completely
4787 : // Note: this might already have been done by GDALCreateCopy() in the
4788 : // likely case this function is indirectly called by it
4789 2909 : poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
4790 2909 : nullptr, nullptr);
4791 :
4792 : /* ==================================================================== */
4793 : /* Band oriented (uninterleaved) case. */
4794 : /* ==================================================================== */
4795 2909 : CPLErr eErr = CE_None;
4796 : const bool bCheckHoles =
4797 2909 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
4798 :
4799 2909 : if (!bInterleave)
4800 : {
4801 : GDALRasterIOExtraArg sExtraArg;
4802 1548 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4803 1548 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4804 :
4805 4644 : const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
4806 1548 : DIV_ROUND_UP(nYSize, nSwathLines) *
4807 1548 : DIV_ROUND_UP(nXSize, nSwathCols);
4808 1548 : GIntBig nBlocksDone = 0;
4809 :
4810 3798 : for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
4811 : {
4812 2250 : int nBand = iBand + 1;
4813 :
4814 4652 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4815 : {
4816 2402 : int nThisLines = nSwathLines;
4817 :
4818 2402 : if (iY + nThisLines > nYSize)
4819 268 : nThisLines = nYSize - iY;
4820 :
4821 4804 : for (int iX = 0; iX < nXSize && eErr == CE_None;
4822 2402 : iX += nSwathCols)
4823 : {
4824 2402 : int nThisCols = nSwathCols;
4825 :
4826 2402 : if (iX + nThisCols > nXSize)
4827 0 : nThisCols = nXSize - iX;
4828 :
4829 2402 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4830 2402 : if (bCheckHoles)
4831 : {
4832 : nStatus = poSrcDS->GetRasterBand(nBand)
4833 973 : ->GetDataCoverageStatus(
4834 : iX, iY, nThisCols, nThisLines,
4835 : GDAL_DATA_COVERAGE_STATUS_DATA);
4836 : }
4837 2402 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4838 : {
4839 2398 : sExtraArg.pfnProgress = GDALScaledProgress;
4840 4796 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4841 2398 : nBlocksDone / static_cast<double>(nTotalBlocks),
4842 2398 : (nBlocksDone + 0.5) /
4843 2398 : static_cast<double>(nTotalBlocks),
4844 : pfnProgress, pProgressData);
4845 2398 : if (sExtraArg.pProgressData == nullptr)
4846 1421 : sExtraArg.pfnProgress = nullptr;
4847 :
4848 2398 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4849 : nThisLines, pSwathBuf,
4850 : nThisCols, nThisLines, eDT, 1,
4851 : &nBand, 0, 0, 0, &sExtraArg);
4852 :
4853 2398 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4854 :
4855 2398 : if (eErr == CE_None)
4856 2394 : eErr = poDstDS->RasterIO(
4857 : GF_Write, iX, iY, nThisCols, nThisLines,
4858 : pSwathBuf, nThisCols, nThisLines, eDT, 1,
4859 : &nBand, 0, 0, 0, nullptr);
4860 : }
4861 :
4862 2402 : nBlocksDone++;
4863 4765 : if (eErr == CE_None &&
4864 2363 : !pfnProgress(nBlocksDone /
4865 2363 : static_cast<double>(nTotalBlocks),
4866 : nullptr, pProgressData))
4867 : {
4868 2 : eErr = CE_Failure;
4869 2 : CPLError(CE_Failure, CPLE_UserInterrupt,
4870 : "User terminated CreateCopy()");
4871 : }
4872 : }
4873 : }
4874 : }
4875 : }
4876 :
4877 : /* ==================================================================== */
4878 : /* Pixel interleaved case. */
4879 : /* ==================================================================== */
4880 : else /* if( bInterleave ) */
4881 : {
4882 : GDALRasterIOExtraArg sExtraArg;
4883 1361 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4884 1361 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4885 :
4886 1361 : const GIntBig nTotalBlocks =
4887 1361 : static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4888 1361 : DIV_ROUND_UP(nXSize, nSwathCols);
4889 1361 : GIntBig nBlocksDone = 0;
4890 :
4891 2953 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4892 : {
4893 1592 : int nThisLines = nSwathLines;
4894 :
4895 1592 : if (iY + nThisLines > nYSize)
4896 209 : nThisLines = nYSize - iY;
4897 :
4898 3189 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
4899 : {
4900 1597 : int nThisCols = nSwathCols;
4901 :
4902 1597 : if (iX + nThisCols > nXSize)
4903 3 : nThisCols = nXSize - iX;
4904 :
4905 1597 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4906 1597 : if (bCheckHoles)
4907 : {
4908 1366 : nStatus = 0;
4909 1419 : for (int iBand = 0; iBand < nBandCount; iBand++)
4910 : {
4911 1400 : nStatus |= poSrcDS->GetRasterBand(iBand + 1)
4912 1400 : ->GetDataCoverageStatus(
4913 : iX, iY, nThisCols, nThisLines,
4914 : GDAL_DATA_COVERAGE_STATUS_DATA);
4915 1400 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4916 1347 : break;
4917 : }
4918 : }
4919 1597 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4920 : {
4921 1578 : sExtraArg.pfnProgress = GDALScaledProgress;
4922 3156 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4923 1578 : nBlocksDone / static_cast<double>(nTotalBlocks),
4924 1578 : (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4925 : pfnProgress, pProgressData);
4926 1578 : if (sExtraArg.pProgressData == nullptr)
4927 344 : sExtraArg.pfnProgress = nullptr;
4928 :
4929 1578 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4930 : nThisLines, pSwathBuf, nThisCols,
4931 : nThisLines, eDT, nBandCount,
4932 : nullptr, 0, 0, 0, &sExtraArg);
4933 :
4934 1578 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4935 :
4936 1578 : if (eErr == CE_None)
4937 1577 : eErr = poDstDS->RasterIO(
4938 : GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
4939 : nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
4940 : 0, 0, nullptr);
4941 : }
4942 :
4943 1597 : nBlocksDone++;
4944 3190 : if (eErr == CE_None &&
4945 1593 : !pfnProgress(nBlocksDone /
4946 1593 : static_cast<double>(nTotalBlocks),
4947 : nullptr, pProgressData))
4948 : {
4949 1 : eErr = CE_Failure;
4950 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4951 : "User terminated CreateCopy()");
4952 : }
4953 : }
4954 : }
4955 : }
4956 :
4957 : /* -------------------------------------------------------------------- */
4958 : /* Cleanup */
4959 : /* -------------------------------------------------------------------- */
4960 2909 : CPLFree(pSwathBuf);
4961 :
4962 2909 : return eErr;
4963 : }
4964 :
4965 : /************************************************************************/
4966 : /* GDALRasterBandCopyWholeRaster() */
4967 : /************************************************************************/
4968 :
4969 : /**
4970 : * \brief Copy a whole raster band
4971 : *
4972 : * This function copies the complete raster contents of one band to
4973 : * another similarly configured band. The source and destination
4974 : * bands must have the same width and height. The bands do not have
4975 : * to have the same data type.
4976 : *
4977 : * It implements efficient copying, in particular "chunking" the copy in
4978 : * substantial blocks.
4979 : *
4980 : * Currently the only papszOptions value supported are :
4981 : * <ul>
4982 : * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4983 : * achieve best compression.</li>
4984 : * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4985 : * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li>
4986 : * </ul>
4987 : *
4988 : * @param hSrcBand the source band
4989 : * @param hDstBand the destination band
4990 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4991 : * @param pfnProgress progress reporting function.
4992 : * @param pProgressData callback data for progress function.
4993 : *
4994 : * @return CE_None on success, or CE_Failure on failure.
4995 : */
4996 :
4997 34 : CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
4998 : GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
4999 : const char *const *const papszOptions, GDALProgressFunc pfnProgress,
5000 : void *pProgressData)
5001 :
5002 : {
5003 34 : VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5004 34 : VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5005 :
5006 34 : GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
5007 34 : GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
5008 34 : CPLErr eErr = CE_None;
5009 :
5010 34 : if (pfnProgress == nullptr)
5011 11 : pfnProgress = GDALDummyProgress;
5012 :
5013 : /* -------------------------------------------------------------------- */
5014 : /* Confirm the datasets match in size and band counts. */
5015 : /* -------------------------------------------------------------------- */
5016 34 : int nXSize = poSrcBand->GetXSize();
5017 34 : int nYSize = poSrcBand->GetYSize();
5018 :
5019 34 : if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
5020 : {
5021 0 : CPLError(CE_Failure, CPLE_AppDefined,
5022 : "Input and output band sizes do not\n"
5023 : "match in GDALRasterBandCopyWholeRaster()");
5024 0 : return CE_Failure;
5025 : }
5026 :
5027 : /* -------------------------------------------------------------------- */
5028 : /* Report preliminary (0) progress. */
5029 : /* -------------------------------------------------------------------- */
5030 34 : if (!pfnProgress(0.0, nullptr, pProgressData))
5031 : {
5032 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5033 : "User terminated CreateCopy()");
5034 0 : return CE_Failure;
5035 : }
5036 :
5037 34 : GDALDataType eDT = poDstBand->GetRasterDataType();
5038 :
5039 : // If the destination is compressed, we must try to write blocks just once,
5040 : // to save disk space (GTiff case for example), and to avoid data loss
5041 : // (JPEG compression for example).
5042 34 : bool bDstIsCompressed = false;
5043 : const char *pszDstCompressed =
5044 34 : CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
5045 34 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5046 22 : bDstIsCompressed = true;
5047 :
5048 : /* -------------------------------------------------------------------- */
5049 : /* What will our swath size be? */
5050 : /* -------------------------------------------------------------------- */
5051 :
5052 34 : int nSwathCols = 0;
5053 34 : int nSwathLines = 0;
5054 34 : GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
5055 : FALSE, &nSwathCols, &nSwathLines);
5056 :
5057 34 : const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5058 :
5059 34 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5060 34 : if (pSwathBuf == nullptr)
5061 : {
5062 0 : return CE_Failure;
5063 : }
5064 :
5065 34 : CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5066 : nSwathCols, nSwathLines);
5067 :
5068 : const bool bCheckHoles =
5069 34 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5070 :
5071 : // Advise the source raster that we are going to read it completely
5072 34 : poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
5073 :
5074 : /* ==================================================================== */
5075 : /* Band oriented (uninterleaved) case. */
5076 : /* ==================================================================== */
5077 :
5078 82 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5079 : {
5080 48 : int nThisLines = nSwathLines;
5081 :
5082 48 : if (iY + nThisLines > nYSize)
5083 8 : nThisLines = nYSize - iY;
5084 :
5085 96 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5086 : {
5087 48 : int nThisCols = nSwathCols;
5088 :
5089 48 : if (iX + nThisCols > nXSize)
5090 0 : nThisCols = nXSize - iX;
5091 :
5092 48 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5093 48 : if (bCheckHoles)
5094 : {
5095 0 : nStatus = poSrcBand->GetDataCoverageStatus(
5096 : iX, iY, nThisCols, nThisLines,
5097 : GDAL_DATA_COVERAGE_STATUS_DATA);
5098 : }
5099 48 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5100 : {
5101 48 : eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5102 : nThisLines, pSwathBuf, nThisCols,
5103 : nThisLines, eDT, 0, 0, nullptr);
5104 :
5105 48 : if (eErr == CE_None)
5106 48 : eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5107 : nThisLines, pSwathBuf, nThisCols,
5108 : nThisLines, eDT, 0, 0, nullptr);
5109 : }
5110 :
5111 96 : if (eErr == CE_None &&
5112 48 : !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
5113 : nullptr, pProgressData))
5114 : {
5115 0 : eErr = CE_Failure;
5116 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5117 : "User terminated CreateCopy()");
5118 : }
5119 : }
5120 : }
5121 :
5122 : /* -------------------------------------------------------------------- */
5123 : /* Cleanup */
5124 : /* -------------------------------------------------------------------- */
5125 34 : CPLFree(pSwathBuf);
5126 :
5127 34 : return eErr;
5128 : }
5129 :
5130 : /************************************************************************/
5131 : /* GDALCopyRasterIOExtraArg () */
5132 : /************************************************************************/
5133 :
5134 323368 : void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5135 : GDALRasterIOExtraArg *psSrcArg)
5136 : {
5137 323368 : INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5138 323368 : if (psSrcArg)
5139 : {
5140 323368 : psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5141 323368 : psDestArg->pfnProgress = psSrcArg->pfnProgress;
5142 323368 : psDestArg->pProgressData = psSrcArg->pProgressData;
5143 323368 : psDestArg->bFloatingPointWindowValidity =
5144 323368 : psSrcArg->bFloatingPointWindowValidity;
5145 323368 : if (psSrcArg->bFloatingPointWindowValidity)
5146 : {
5147 3116 : psDestArg->dfXOff = psSrcArg->dfXOff;
5148 3116 : psDestArg->dfYOff = psSrcArg->dfYOff;
5149 3116 : psDestArg->dfXSize = psSrcArg->dfXSize;
5150 3116 : psDestArg->dfYSize = psSrcArg->dfYSize;
5151 : }
5152 : }
5153 323368 : }
5154 :
5155 : /************************************************************************/
5156 : /* HasOnlyNoData() */
5157 : /************************************************************************/
5158 :
5159 24858142 : template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5160 : {
5161 24858142 : return value == noDataValue;
5162 : }
5163 :
5164 560311 : template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5165 : {
5166 560311 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5167 : }
5168 :
5169 503708 : template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5170 : {
5171 503708 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5172 : }
5173 :
5174 : template <class T>
5175 12154 : static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5176 : size_t nHeight, size_t nLineStride,
5177 : size_t nComponents)
5178 : {
5179 : // Fast test: check the 4 corners and the middle pixel.
5180 23530 : for (size_t iBand = 0; iBand < nComponents; iBand++)
5181 : {
5182 25049 : if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5183 12439 : IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5184 12334 : noDataValue) &&
5185 12334 : IsEqualToNoData(
5186 12334 : pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5187 12334 : nComponents +
5188 : iBand],
5189 11389 : noDataValue) &&
5190 11389 : IsEqualToNoData(
5191 11389 : pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5192 : noDataValue) &&
5193 11381 : IsEqualToNoData(
5194 11381 : pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
5195 11381 : nComponents +
5196 : iBand],
5197 : noDataValue)))
5198 : {
5199 1234 : return false;
5200 : }
5201 : }
5202 :
5203 : // Test all pixels.
5204 37828 : for (size_t iY = 0; iY < nHeight; iY++)
5205 : {
5206 26962 : const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
5207 25888845 : for (size_t iX = 0; iX < nWidth * nComponents; iX++)
5208 : {
5209 25862003 : if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
5210 : {
5211 54 : return false;
5212 : }
5213 : }
5214 : }
5215 10866 : return true;
5216 : }
5217 :
5218 : /************************************************************************/
5219 : /* GDALBufferHasOnlyNoData() */
5220 : /************************************************************************/
5221 :
5222 38310 : bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
5223 : size_t nWidth, size_t nHeight, size_t nLineStride,
5224 : size_t nComponents, int nBitsPerSample,
5225 : GDALBufferSampleFormat nSampleFormat)
5226 : {
5227 : // In the case where the nodata is 0, we can compare several bytes at
5228 : // once. Select the largest natural integer type for the architecture.
5229 : #if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5230 : // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5231 : typedef std::uint64_t WordType;
5232 : #else
5233 : typedef std::uint32_t WordType;
5234 : #endif
5235 38310 : if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
5236 : // Do not use this optimized code path for floating point numbers,
5237 : // as it can't detect negative zero.
5238 : nSampleFormat != GSF_FLOATING_POINT)
5239 : {
5240 26150 : const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
5241 26150 : const size_t nSize =
5242 26150 : (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
5243 26150 : size_t i = 0;
5244 : const size_t nInitialIters =
5245 52300 : std::min(sizeof(WordType) -
5246 26150 : static_cast<size_t>(
5247 : reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5248 : sizeof(WordType)),
5249 26150 : nSize);
5250 217715 : for (; i < nInitialIters; i++)
5251 : {
5252 195563 : if (pabyBuffer[i])
5253 3998 : return false;
5254 : }
5255 16452000 : for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
5256 : {
5257 16436900 : if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
5258 7086 : return false;
5259 : }
5260 52415 : for (; i < nSize; i++)
5261 : {
5262 37354 : if (pabyBuffer[i])
5263 5 : return false;
5264 : }
5265 15061 : return true;
5266 : }
5267 :
5268 12160 : if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
5269 : {
5270 22264 : return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
5271 11132 : HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
5272 11132 : static_cast<uint8_t>(dfNoDataValue), nWidth,
5273 11132 : nHeight, nLineStride, nComponents);
5274 : }
5275 1028 : if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
5276 : {
5277 : // Use unsigned implementation by converting the nodatavalue to
5278 : // unsigned
5279 63 : return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
5280 31 : HasOnlyNoDataT(
5281 : static_cast<const uint8_t *>(pBuffer),
5282 31 : static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
5283 32 : nWidth, nHeight, nLineStride, nComponents);
5284 : }
5285 996 : if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
5286 : {
5287 21 : return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
5288 10 : HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
5289 10 : static_cast<uint16_t>(dfNoDataValue), nWidth,
5290 11 : nHeight, nLineStride, nComponents);
5291 : }
5292 985 : if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
5293 : {
5294 : // Use unsigned implementation by converting the nodatavalue to
5295 : // unsigned
5296 109 : return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
5297 54 : HasOnlyNoDataT(
5298 : static_cast<const uint16_t *>(pBuffer),
5299 54 : static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
5300 55 : nWidth, nHeight, nLineStride, nComponents);
5301 : }
5302 930 : if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
5303 : {
5304 73 : return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
5305 36 : HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
5306 : static_cast<uint32_t>(dfNoDataValue), nWidth,
5307 37 : nHeight, nLineStride, nComponents);
5308 : }
5309 893 : if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
5310 : {
5311 : // Use unsigned implementation by converting the nodatavalue to
5312 : // unsigned
5313 19 : return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
5314 9 : HasOnlyNoDataT(
5315 : static_cast<const uint32_t *>(pBuffer),
5316 9 : static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
5317 10 : nWidth, nHeight, nLineStride, nComponents);
5318 : }
5319 883 : if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
5320 : {
5321 56 : return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
5322 28 : HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
5323 : static_cast<uint64_t>(dfNoDataValue), nWidth,
5324 28 : nHeight, nLineStride, nComponents);
5325 : }
5326 855 : if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
5327 : {
5328 : // Use unsigned implementation by converting the nodatavalue to
5329 : // unsigned
5330 0 : return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
5331 0 : HasOnlyNoDataT(
5332 : static_cast<const uint64_t *>(pBuffer),
5333 0 : static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
5334 0 : nWidth, nHeight, nLineStride, nComponents);
5335 : }
5336 855 : if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
5337 : {
5338 689 : return (std::isnan(dfNoDataValue) ||
5339 1377 : GDALIsValueInRange<float>(dfNoDataValue)) &&
5340 688 : HasOnlyNoDataT(static_cast<const float *>(pBuffer),
5341 : static_cast<float>(dfNoDataValue), nWidth,
5342 689 : nHeight, nLineStride, nComponents);
5343 : }
5344 166 : if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
5345 : {
5346 166 : return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
5347 : dfNoDataValue, nWidth, nHeight, nLineStride,
5348 166 : nComponents);
5349 : }
5350 0 : return false;
5351 : }
5352 :
5353 : #ifdef HAVE_SSE2
5354 :
5355 : /************************************************************************/
5356 : /* GDALDeinterleave3Byte() */
5357 : /************************************************************************/
5358 :
5359 : #if defined(__GNUC__) && !defined(__clang__)
5360 : __attribute__((optimize("no-tree-vectorize")))
5361 : #endif
5362 : static void
5363 71273 : GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5364 : GByte *CPL_RESTRICT pabyDest0,
5365 : GByte *CPL_RESTRICT pabyDest1,
5366 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5367 : #ifdef USE_NEON_OPTIMIZATIONS
5368 : {
5369 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5370 : nIters);
5371 : }
5372 : #else
5373 : {
5374 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5375 71273 : if (CPLHaveRuntimeSSSE3())
5376 : {
5377 71286 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5378 71278 : pabyDest2, nIters);
5379 : }
5380 : #endif
5381 :
5382 2 : size_t i = 0;
5383 2 : if (((reinterpret_cast<uintptr_t>(pabySrc) |
5384 2 : reinterpret_cast<uintptr_t>(pabyDest0) |
5385 2 : reinterpret_cast<uintptr_t>(pabyDest1) |
5386 2 : reinterpret_cast<uintptr_t>(pabyDest2)) %
5387 : sizeof(unsigned int)) == 0)
5388 : {
5389 : // Slightly better than GCC autovectorizer
5390 17 : for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
5391 : {
5392 15 : unsigned int word0 =
5393 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
5394 15 : unsigned int word1 =
5395 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
5396 15 : unsigned int word2 =
5397 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
5398 15 : reinterpret_cast<unsigned int *>(pabyDest0)[j] =
5399 15 : (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
5400 15 : ((word2 >> 8) << 24);
5401 15 : reinterpret_cast<unsigned int *>(pabyDest1)[j] =
5402 15 : ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
5403 15 : (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
5404 15 : pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
5405 15 : pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
5406 15 : pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
5407 15 : pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
5408 : }
5409 : }
5410 : #if defined(__clang__)
5411 : #pragma clang loop vectorize(disable)
5412 : #endif
5413 3 : for (; i < nIters; ++i)
5414 : {
5415 1 : pabyDest0[i] = pabySrc[3 * i + 0];
5416 1 : pabyDest1[i] = pabySrc[3 * i + 1];
5417 1 : pabyDest2[i] = pabySrc[3 * i + 2];
5418 : }
5419 : }
5420 : #endif
5421 :
5422 : /************************************************************************/
5423 : /* GDALDeinterleave4Byte() */
5424 : /************************************************************************/
5425 :
5426 : #if !defined(__GNUC__) || defined(__clang__)
5427 :
5428 : /************************************************************************/
5429 : /* deinterleave() */
5430 : /************************************************************************/
5431 :
5432 : template <bool SHIFT, bool MASK>
5433 : inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5434 : __m128i &xmm2_ori, __m128i &xmm3_ori)
5435 : {
5436 : // Set higher 24bit of each int32 packed word to 0
5437 : if (SHIFT)
5438 : {
5439 : xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5440 : xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5441 : xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5442 : xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5443 : }
5444 : __m128i xmm0;
5445 : __m128i xmm1;
5446 : __m128i xmm2;
5447 : __m128i xmm3;
5448 : if (MASK)
5449 : {
5450 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
5451 : xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5452 : xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5453 : xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5454 : xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5455 : }
5456 : else
5457 : {
5458 : xmm0 = xmm0_ori;
5459 : xmm1 = xmm1_ori;
5460 : xmm2 = xmm2_ori;
5461 : xmm3 = xmm3_ori;
5462 : }
5463 : // Pack int32 to int16
5464 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
5465 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
5466 : // Pack int16 to uint8
5467 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
5468 : return xmm0;
5469 : }
5470 :
5471 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5472 : GByte *CPL_RESTRICT pabyDest0,
5473 : GByte *CPL_RESTRICT pabyDest1,
5474 : GByte *CPL_RESTRICT pabyDest2,
5475 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5476 : #ifdef USE_NEON_OPTIMIZATIONS
5477 : {
5478 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5479 : pabyDest3, nIters);
5480 : }
5481 : #else
5482 : {
5483 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5484 : if (CPLHaveRuntimeSSSE3())
5485 : {
5486 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5487 : pabyDest2, pabyDest3, nIters);
5488 : }
5489 : #endif
5490 :
5491 : // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5492 : // do something slightly better.
5493 : size_t i = 0;
5494 : for (; i + 15 < nIters; i += 16)
5495 : {
5496 : __m128i xmm0_ori = _mm_loadu_si128(
5497 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5498 : __m128i xmm1_ori = _mm_loadu_si128(
5499 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5500 : __m128i xmm2_ori = _mm_loadu_si128(
5501 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5502 : __m128i xmm3_ori = _mm_loadu_si128(
5503 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5504 :
5505 : _mm_storeu_si128(
5506 : reinterpret_cast<__m128i *>(pabyDest0 + i),
5507 : deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5508 : _mm_storeu_si128(
5509 : reinterpret_cast<__m128i *>(pabyDest1 + i),
5510 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5511 : _mm_storeu_si128(
5512 : reinterpret_cast<__m128i *>(pabyDest2 + i),
5513 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5514 : _mm_storeu_si128(
5515 : reinterpret_cast<__m128i *>(pabyDest3 + i),
5516 : deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5517 : }
5518 :
5519 : #if defined(__clang__)
5520 : #pragma clang loop vectorize(disable)
5521 : #endif
5522 : for (; i < nIters; ++i)
5523 : {
5524 : pabyDest0[i] = pabySrc[4 * i + 0];
5525 : pabyDest1[i] = pabySrc[4 * i + 1];
5526 : pabyDest2[i] = pabySrc[4 * i + 2];
5527 : pabyDest3[i] = pabySrc[4 * i + 3];
5528 : }
5529 : }
5530 : #endif
5531 : #else
5532 : // GCC autovectorizer does an excellent job
5533 53107 : __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
5534 : const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5535 : GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5536 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5537 : {
5538 524428000 : for (size_t i = 0; i < nIters; ++i)
5539 : {
5540 524375000 : pabyDest0[i] = pabySrc[4 * i + 0];
5541 524375000 : pabyDest1[i] = pabySrc[4 * i + 1];
5542 524375000 : pabyDest2[i] = pabySrc[4 * i + 2];
5543 524375000 : pabyDest3[i] = pabySrc[4 * i + 3];
5544 : }
5545 53107 : }
5546 : #endif
5547 :
5548 : #else
5549 :
5550 : /************************************************************************/
5551 : /* GDALDeinterleave3Byte() */
5552 : /************************************************************************/
5553 :
5554 : // TODO: Enabling below could help on non-Intel architectures where GCC knows
5555 : // how to auto-vectorize
5556 : // #if defined(__GNUC__)
5557 : //__attribute__((optimize("tree-vectorize")))
5558 : // #endif
5559 : static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5560 : GByte *CPL_RESTRICT pabyDest0,
5561 : GByte *CPL_RESTRICT pabyDest1,
5562 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5563 : {
5564 : for (size_t i = 0; i < nIters; ++i)
5565 : {
5566 : pabyDest0[i] = pabySrc[3 * i + 0];
5567 : pabyDest1[i] = pabySrc[3 * i + 1];
5568 : pabyDest2[i] = pabySrc[3 * i + 2];
5569 : }
5570 : }
5571 :
5572 : /************************************************************************/
5573 : /* GDALDeinterleave4Byte() */
5574 : /************************************************************************/
5575 :
5576 : // TODO: Enabling below could help on non-Intel architectures where gcc knows
5577 : // how to auto-vectorize
5578 : // #if defined(__GNUC__)
5579 : //__attribute__((optimize("tree-vectorize")))
5580 : // #endif
5581 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5582 : GByte *CPL_RESTRICT pabyDest0,
5583 : GByte *CPL_RESTRICT pabyDest1,
5584 : GByte *CPL_RESTRICT pabyDest2,
5585 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5586 : {
5587 : for (size_t i = 0; i < nIters; ++i)
5588 : {
5589 : pabyDest0[i] = pabySrc[4 * i + 0];
5590 : pabyDest1[i] = pabySrc[4 * i + 1];
5591 : pabyDest2[i] = pabySrc[4 * i + 2];
5592 : pabyDest3[i] = pabySrc[4 * i + 3];
5593 : }
5594 : }
5595 :
5596 : #endif
5597 :
5598 : /************************************************************************/
5599 : /* GDALDeinterleave() */
5600 : /************************************************************************/
5601 :
5602 : /*! Copy values from a pixel-interleave buffer to multiple per-component
5603 : buffers.
5604 :
5605 : In pseudo-code
5606 : \verbatim
5607 : for(size_t i = 0; i < nIters; ++i)
5608 : for(int iComp = 0; iComp < nComponents; iComp++ )
5609 : ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
5610 : \endverbatim
5611 :
5612 : The implementation is optimized for a few cases, like de-interleaving
5613 : of 3 or 4-components Byte buffers.
5614 :
5615 : \since GDAL 3.6
5616 : */
5617 125137 : void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
5618 : int nComponents, void **ppDestBuffer,
5619 : GDALDataType eDestDT, size_t nIters)
5620 : {
5621 125137 : if (eSourceDT == eDestDT)
5622 : {
5623 125115 : if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8)
5624 : {
5625 124382 : if (nComponents == 3)
5626 : {
5627 71276 : const GByte *CPL_RESTRICT pabySrc =
5628 : static_cast<const GByte *>(pSourceBuffer);
5629 71276 : GByte *CPL_RESTRICT pabyDest0 =
5630 : static_cast<GByte *>(ppDestBuffer[0]);
5631 71276 : GByte *CPL_RESTRICT pabyDest1 =
5632 : static_cast<GByte *>(ppDestBuffer[1]);
5633 71276 : GByte *CPL_RESTRICT pabyDest2 =
5634 : static_cast<GByte *>(ppDestBuffer[2]);
5635 71276 : GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5636 : nIters);
5637 71282 : return;
5638 : }
5639 53106 : else if (nComponents == 4)
5640 : {
5641 53107 : const GByte *CPL_RESTRICT pabySrc =
5642 : static_cast<const GByte *>(pSourceBuffer);
5643 53107 : GByte *CPL_RESTRICT pabyDest0 =
5644 : static_cast<GByte *>(ppDestBuffer[0]);
5645 53107 : GByte *CPL_RESTRICT pabyDest1 =
5646 : static_cast<GByte *>(ppDestBuffer[1]);
5647 53107 : GByte *CPL_RESTRICT pabyDest2 =
5648 : static_cast<GByte *>(ppDestBuffer[2]);
5649 53107 : GByte *CPL_RESTRICT pabyDest3 =
5650 : static_cast<GByte *>(ppDestBuffer[3]);
5651 53107 : GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5652 : pabyDest3, nIters);
5653 53107 : return;
5654 0 : }
5655 : }
5656 : #if ((defined(__GNUC__) && !defined(__clang__)) || \
5657 : defined(__INTEL_CLANG_COMPILER)) && \
5658 : defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
5659 1466 : else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
5660 733 : CPLHaveRuntimeSSSE3())
5661 : {
5662 733 : if (nComponents == 3)
5663 : {
5664 239 : const GUInt16 *CPL_RESTRICT panSrc =
5665 : static_cast<const GUInt16 *>(pSourceBuffer);
5666 239 : GUInt16 *CPL_RESTRICT panDest0 =
5667 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5668 239 : GUInt16 *CPL_RESTRICT panDest1 =
5669 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5670 239 : GUInt16 *CPL_RESTRICT panDest2 =
5671 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5672 239 : GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
5673 : panDest2, nIters);
5674 239 : return;
5675 : }
5676 : #if !defined(__INTEL_CLANG_COMPILER)
5677 : // ICC autovectorizer doesn't do a good job, at least with icx
5678 : // 2022.1.0.20220316
5679 494 : else if (nComponents == 4)
5680 : {
5681 494 : const GUInt16 *CPL_RESTRICT panSrc =
5682 : static_cast<const GUInt16 *>(pSourceBuffer);
5683 494 : GUInt16 *CPL_RESTRICT panDest0 =
5684 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5685 494 : GUInt16 *CPL_RESTRICT panDest1 =
5686 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5687 494 : GUInt16 *CPL_RESTRICT panDest2 =
5688 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5689 494 : GUInt16 *CPL_RESTRICT panDest3 =
5690 : static_cast<GUInt16 *>(ppDestBuffer[3]);
5691 494 : GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
5692 : panDest2, panDest3, nIters);
5693 494 : return;
5694 : }
5695 : #endif
5696 : }
5697 : #endif
5698 : }
5699 :
5700 21 : const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
5701 22 : const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
5702 87 : for (int iComp = 0; iComp < nComponents; iComp++)
5703 : {
5704 65 : GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
5705 65 : iComp * nSourceDTSize,
5706 : eSourceDT, nComponents * nSourceDTSize,
5707 65 : ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
5708 : }
5709 : }
5710 :
5711 : /************************************************************************/
5712 : /* GDALTranspose2DSingleToSingle() */
5713 : /************************************************************************/
5714 : /**
5715 : * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way.
5716 : *
5717 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5718 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5719 : * @param nSrcWidth Width of pSrc array.
5720 : * @param nSrcHeight Height of pSrc array.
5721 : */
5722 :
5723 : template <class DST, class SRC>
5724 145 : void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc,
5725 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5726 : size_t nSrcHeight)
5727 : {
5728 145 : constexpr size_t blocksize = 32;
5729 315 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5730 : {
5731 170 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5732 390 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5733 : {
5734 : // transpose the block beginning at [i,j]
5735 220 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5736 2509 : for (size_t k = i; k < max_k; ++k)
5737 : {
5738 41017 : for (size_t l = j; l < max_l; ++l)
5739 : {
5740 38728 : GDALCopyWord(pSrc[l + k * nSrcWidth],
5741 38728 : pDst[k + l * nSrcHeight]);
5742 : }
5743 : }
5744 : }
5745 : }
5746 145 : }
5747 :
5748 : /************************************************************************/
5749 : /* GDALTranspose2DComplexToComplex() */
5750 : /************************************************************************/
5751 : /**
5752 : * Transpose a 2D array of complex values into an array of complex values,
5753 : * in a efficient (cache-oblivious) way.
5754 : *
5755 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5756 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5757 : * @param nSrcWidth Width of pSrc array.
5758 : * @param nSrcHeight Height of pSrc array.
5759 : */
5760 : template <class DST, class SRC>
5761 25 : void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc,
5762 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5763 : size_t nSrcHeight)
5764 : {
5765 25 : constexpr size_t blocksize = 32;
5766 50 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5767 : {
5768 25 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5769 50 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5770 : {
5771 : // transpose the block beginning at [i,j]
5772 25 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5773 75 : for (size_t k = i; k < max_k; ++k)
5774 : {
5775 200 : for (size_t l = j; l < max_l; ++l)
5776 : {
5777 150 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5778 150 : pDst[2 * (k + l * nSrcHeight) + 0]);
5779 150 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1],
5780 150 : pDst[2 * (k + l * nSrcHeight) + 1]);
5781 : }
5782 : }
5783 : }
5784 : }
5785 25 : }
5786 :
5787 : /************************************************************************/
5788 : /* GDALTranspose2DComplexToSingle() */
5789 : /************************************************************************/
5790 : /**
5791 : * Transpose a 2D array of complex values into an array of non-complex values,
5792 : * in a efficient (cache-oblivious) way.
5793 : *
5794 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5795 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5796 : * @param nSrcWidth Width of pSrc array.
5797 : * @param nSrcHeight Height of pSrc array.
5798 : */
5799 : template <class DST, class SRC>
5800 55 : void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc,
5801 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5802 : size_t nSrcHeight)
5803 : {
5804 55 : constexpr size_t blocksize = 32;
5805 110 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5806 : {
5807 55 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5808 110 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5809 : {
5810 : // transpose the block beginning at [i,j]
5811 55 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5812 165 : for (size_t k = i; k < max_k; ++k)
5813 : {
5814 440 : for (size_t l = j; l < max_l; ++l)
5815 : {
5816 330 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5817 330 : pDst[k + l * nSrcHeight]);
5818 : }
5819 : }
5820 : }
5821 : }
5822 55 : }
5823 :
5824 : /************************************************************************/
5825 : /* GDALTranspose2DSingleToComplex() */
5826 : /************************************************************************/
5827 : /**
5828 : * Transpose a 2D array of non-complex values into an array of complex values,
5829 : * in a efficient (cache-oblivious) way.
5830 : *
5831 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5832 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5833 : * @param nSrcWidth Width of pSrc array.
5834 : * @param nSrcHeight Height of pSrc array.
5835 : */
5836 : template <class DST, class SRC>
5837 55 : void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc,
5838 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5839 : size_t nSrcHeight)
5840 : {
5841 55 : constexpr size_t blocksize = 32;
5842 110 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5843 : {
5844 55 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5845 110 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5846 : {
5847 : // transpose the block beginning at [i,j]
5848 55 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5849 165 : for (size_t k = i; k < max_k; ++k)
5850 : {
5851 440 : for (size_t l = j; l < max_l; ++l)
5852 : {
5853 330 : GDALCopyWord(pSrc[l + k * nSrcWidth],
5854 330 : pDst[2 * (k + l * nSrcHeight) + 0]);
5855 330 : pDst[2 * (k + l * nSrcHeight) + 1] = 0;
5856 : }
5857 : }
5858 : }
5859 : }
5860 55 : }
5861 :
5862 : /************************************************************************/
5863 : /* GDALTranspose2D() */
5864 : /************************************************************************/
5865 :
5866 : template <class DST, bool DST_IS_COMPLEX>
5867 280 : static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst,
5868 : size_t nSrcWidth, size_t nSrcHeight)
5869 : {
5870 : #define CALL_GDALTranspose2D_internal(SRC_TYPE) \
5871 : do \
5872 : { \
5873 : if constexpr (DST_IS_COMPLEX) \
5874 : { \
5875 : GDALTranspose2DSingleToComplex( \
5876 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5877 : nSrcHeight); \
5878 : } \
5879 : else \
5880 : { \
5881 : GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \
5882 : pDst, nSrcWidth, nSrcHeight); \
5883 : } \
5884 : } while (0)
5885 :
5886 : #define CALL_GDALTranspose2DComplex_internal(SRC_TYPE) \
5887 : do \
5888 : { \
5889 : if constexpr (DST_IS_COMPLEX) \
5890 : { \
5891 : GDALTranspose2DComplexToComplex( \
5892 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5893 : nSrcHeight); \
5894 : } \
5895 : else \
5896 : { \
5897 : GDALTranspose2DComplexToSingle( \
5898 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5899 : nSrcHeight); \
5900 : } \
5901 : } while (0)
5902 :
5903 : // clang-format off
5904 280 : switch (eSrcType)
5905 : {
5906 16 : case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t); break;
5907 15 : case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t); break;
5908 24 : case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t); break;
5909 16 : case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t); break;
5910 24 : case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t); break;
5911 16 : case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t); break;
5912 16 : case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t); break;
5913 16 : case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t); break;
5914 16 : case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16); break;
5915 17 : case GDT_Float32: CALL_GDALTranspose2D_internal(float); break;
5916 24 : case GDT_Float64: CALL_GDALTranspose2D_internal(double); break;
5917 16 : case GDT_CInt16: CALL_GDALTranspose2DComplex_internal(int16_t); break;
5918 16 : case GDT_CInt32: CALL_GDALTranspose2DComplex_internal(int32_t); break;
5919 16 : case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break;
5920 16 : case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break;
5921 16 : case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break;
5922 0 : case GDT_Unknown:
5923 : case GDT_TypeCount:
5924 0 : break;
5925 : }
5926 : // clang-format on
5927 :
5928 : #undef CALL_GDALTranspose2D_internal
5929 : #undef CALL_GDALTranspose2DComplex_internal
5930 280 : }
5931 :
5932 : /************************************************************************/
5933 : /* GDALInterleave2Byte() */
5934 : /************************************************************************/
5935 :
5936 : #if defined(HAVE_SSE2) && \
5937 : (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
5938 :
5939 : // ICC autovectorizer doesn't do a good job at generating good SSE code,
5940 : // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
5941 : #if defined(__GNUC__)
5942 : __attribute__((noinline))
5943 : #endif
5944 : static void
5945 : GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
5946 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
5947 : {
5948 : size_t i = 0;
5949 : constexpr size_t VALS_PER_ITER = 16;
5950 : for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
5951 : {
5952 : __m128i xmm0 =
5953 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i));
5954 : __m128i xmm1 = _mm_loadu_si128(
5955 : reinterpret_cast<__m128i const *>(pSrc + i + nIters));
5956 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i),
5957 : _mm_unpacklo_epi8(xmm0, xmm1));
5958 : _mm_storeu_si128(
5959 : reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER),
5960 : _mm_unpackhi_epi8(xmm0, xmm1));
5961 : }
5962 : #if defined(__clang__)
5963 : #pragma clang loop vectorize(disable)
5964 : #endif
5965 : for (; i < nIters; ++i)
5966 : {
5967 : pDst[2 * i + 0] = pSrc[i + 0 * nIters];
5968 : pDst[2 * i + 1] = pSrc[i + 1 * nIters];
5969 : }
5970 : }
5971 :
5972 : #else
5973 :
5974 : #if defined(__GNUC__) && !defined(__clang__)
5975 : __attribute__((optimize("tree-vectorize")))
5976 : #endif
5977 : #if defined(__GNUC__)
5978 : __attribute__((noinline))
5979 : #endif
5980 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
5981 : // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
5982 : #pragma clang diagnostic push
5983 : #pragma clang diagnostic ignored "-Wpass-failed"
5984 : #endif
5985 : static void
5986 4 : GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
5987 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
5988 : {
5989 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
5990 : #pragma clang loop vectorize(enable)
5991 : #endif
5992 44 : for (size_t i = 0; i < nIters; ++i)
5993 : {
5994 40 : pDst[2 * i + 0] = pSrc[i + 0 * nIters];
5995 40 : pDst[2 * i + 1] = pSrc[i + 1 * nIters];
5996 : }
5997 4 : }
5998 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
5999 : #pragma clang diagnostic pop
6000 : #endif
6001 :
6002 : #endif
6003 :
6004 : /************************************************************************/
6005 : /* GDALInterleave4Byte() */
6006 : /************************************************************************/
6007 :
6008 : #if defined(HAVE_SSE2) && \
6009 : (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
6010 :
6011 : // ICC autovectorizer doesn't do a good job at generating good SSE code,
6012 : // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
6013 : #if defined(__GNUC__)
6014 : __attribute__((noinline))
6015 : #endif
6016 : static void
6017 : GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6018 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
6019 : {
6020 : size_t i = 0;
6021 : constexpr size_t VALS_PER_ITER = 16;
6022 : for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
6023 : {
6024 : __m128i xmm0 = _mm_loadu_si128(
6025 : reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters));
6026 : __m128i xmm1 = _mm_loadu_si128(
6027 : reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters));
6028 : __m128i xmm2 = _mm_loadu_si128(
6029 : reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters));
6030 : __m128i xmm3 = _mm_loadu_si128(
6031 : reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters));
6032 : auto tmp0 = _mm_unpacklo_epi8(
6033 : xmm0,
6034 : xmm1); // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...)
6035 : auto tmp1 = _mm_unpackhi_epi8(
6036 : xmm0,
6037 : xmm1); // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...)
6038 : auto tmp2 = _mm_unpacklo_epi8(
6039 : xmm2,
6040 : xmm3); // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...)
6041 : auto tmp3 = _mm_unpackhi_epi8(
6042 : xmm2,
6043 : xmm3); // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...)
6044 : auto tmp2_0 = _mm_unpacklo_epi16(
6045 : tmp0,
6046 : tmp2); // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...)
6047 : auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
6048 : auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
6049 : auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
6050 : _mm_storeu_si128(
6051 : reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER),
6052 : tmp2_0);
6053 : _mm_storeu_si128(
6054 : reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER),
6055 : tmp2_1);
6056 : _mm_storeu_si128(
6057 : reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER),
6058 : tmp2_2);
6059 : _mm_storeu_si128(
6060 : reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER),
6061 : tmp2_3);
6062 : }
6063 : #if defined(__clang__)
6064 : #pragma clang loop vectorize(disable)
6065 : #endif
6066 : for (; i < nIters; ++i)
6067 : {
6068 : pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6069 : pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6070 : pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6071 : pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6072 : }
6073 : }
6074 :
6075 : #else
6076 :
6077 : #if defined(__GNUC__) && !defined(__clang__)
6078 : __attribute__((optimize("tree-vectorize")))
6079 : #endif
6080 : #if defined(__GNUC__)
6081 : __attribute__((noinline))
6082 : #endif
6083 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6084 : // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6085 : #pragma clang diagnostic push
6086 : #pragma clang diagnostic ignored "-Wpass-failed"
6087 : #endif
6088 : static void
6089 2 : GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6090 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
6091 : {
6092 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6093 : #pragma clang loop vectorize(enable)
6094 : #endif
6095 36 : for (size_t i = 0; i < nIters; ++i)
6096 : {
6097 34 : pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6098 34 : pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6099 34 : pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6100 34 : pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6101 : }
6102 2 : }
6103 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6104 : #pragma clang diagnostic pop
6105 : #endif
6106 :
6107 : #endif
6108 :
6109 : /************************************************************************/
6110 : /* GDALTranspose2D() */
6111 : /************************************************************************/
6112 :
6113 : /**
6114 : * Transpose a 2D array in a efficient (cache-oblivious) way.
6115 : *
6116 : * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight.
6117 : * @param eSrcType Data type of pSrc.
6118 : * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth.
6119 : * @param eDstType Data type of pDst.
6120 : * @param nSrcWidth Width of pSrc array.
6121 : * @param nSrcHeight Height of pSrc array.
6122 : * @since GDAL 3.11
6123 : */
6124 :
6125 305 : void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst,
6126 : GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight)
6127 : {
6128 305 : if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8))
6129 : {
6130 25 : if (nSrcHeight == 2)
6131 : {
6132 4 : GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc),
6133 : static_cast<uint8_t *>(pDst), nSrcWidth);
6134 4 : return;
6135 : }
6136 21 : if (nSrcHeight == 4)
6137 : {
6138 2 : GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc),
6139 : static_cast<uint8_t *>(pDst), nSrcWidth);
6140 2 : return;
6141 : }
6142 : #if (defined(HAVE_SSSE3_AT_COMPILE_TIME) && \
6143 : (defined(__x86_64) || defined(_M_X64)))
6144 19 : if (CPLHaveRuntimeSSSE3())
6145 : {
6146 19 : GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6147 : static_cast<uint8_t *>(pDst), nSrcWidth,
6148 : nSrcHeight);
6149 19 : return;
6150 : }
6151 : #elif defined(USE_NEON_OPTIMIZATIONS)
6152 : {
6153 : GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6154 : static_cast<uint8_t *>(pDst), nSrcWidth,
6155 : nSrcHeight);
6156 : return;
6157 : }
6158 : #endif
6159 : }
6160 :
6161 : #define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX) \
6162 : GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>( \
6163 : pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight)
6164 :
6165 : // clang-format off
6166 280 : switch (eDstType)
6167 : {
6168 15 : case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t, false); break;
6169 15 : case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t, false); break;
6170 24 : case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t, false); break;
6171 16 : case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t, false); break;
6172 24 : case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t, false); break;
6173 16 : case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t, false); break;
6174 16 : case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t, false); break;
6175 16 : case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t, false); break;
6176 16 : case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16, false); break;
6177 17 : case GDT_Float32: CALL_GDALTranspose2D_internal(float, false); break;
6178 25 : case GDT_Float64: CALL_GDALTranspose2D_internal(double, false); break;
6179 16 : case GDT_CInt16: CALL_GDALTranspose2D_internal(int16_t, true); break;
6180 16 : case GDT_CInt32: CALL_GDALTranspose2D_internal(int32_t, true); break;
6181 16 : case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break;
6182 16 : case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break;
6183 16 : case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break;
6184 0 : case GDT_Unknown:
6185 : case GDT_TypeCount:
6186 0 : break;
6187 : }
6188 : // clang-format on
6189 :
6190 : #undef CALL_GDALTranspose2D_internal
6191 : }
6192 :
6193 : /************************************************************************/
6194 : /* ExtractBitAndConvertTo255() */
6195 : /************************************************************************/
6196 :
6197 : #if defined(__GNUC__) || defined(_MSC_VER)
6198 : // Signedness of char implementation dependent, so be explicit.
6199 : // Assumes 2-complement integer types and sign extension of right shifting
6200 : // GCC guarantees such:
6201 : // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
6202 156490 : static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6203 : {
6204 156490 : return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >>
6205 156490 : 7);
6206 : }
6207 : #else
6208 : // Portable way
6209 : static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6210 : {
6211 : return (byVal & (1 << nBit)) ? 255 : 0;
6212 : }
6213 : #endif
6214 :
6215 : /************************************************************************/
6216 : /* ExpandEightPackedBitsToByteAt255() */
6217 : /************************************************************************/
6218 :
6219 19377 : static inline void ExpandEightPackedBitsToByteAt255(GByte byVal,
6220 : GByte abyOutput[8])
6221 : {
6222 19377 : abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7);
6223 19377 : abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6);
6224 19377 : abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5);
6225 19377 : abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4);
6226 19377 : abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3);
6227 19377 : abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2);
6228 19377 : abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1);
6229 19377 : abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0);
6230 19377 : }
6231 :
6232 : /************************************************************************/
6233 : /* GDALExpandPackedBitsToByteAt0Or255() */
6234 : /************************************************************************/
6235 :
6236 : /** Expand packed-bits (ordered from most-significant bit to least one)
6237 : into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6238 : at 1 to a byte at 255.
6239 :
6240 : The function does (in a possibly more optimized way) the following:
6241 : \code{.cpp}
6242 : for (size_t i = 0; i < nInputBits; ++i )
6243 : {
6244 : pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0;
6245 : }
6246 : \endcode
6247 :
6248 : @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6249 : @param pabyOutput Output array of nInputBits bytes.
6250 : @param nInputBits Number of valid bits in pabyInput.
6251 :
6252 : @since 3.11
6253 : */
6254 :
6255 44405 : void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput,
6256 : GByte *CPL_RESTRICT pabyOutput,
6257 : size_t nInputBits)
6258 : {
6259 44405 : const size_t nInputWholeBytes = nInputBits / 8;
6260 44405 : size_t iByte = 0;
6261 :
6262 : #ifdef HAVE_SSE2
6263 : // Mask to isolate each bit
6264 44405 : const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4,
6265 : 8, 16, 32, 64, -128);
6266 44405 : const __m128i zero = _mm_setzero_si128();
6267 44405 : const __m128i all_ones = _mm_set1_epi8(-1);
6268 : #ifdef __SSSE3__
6269 : const __m128i dispatch_two_bytes =
6270 : _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
6271 : #endif
6272 44405 : constexpr size_t SSE_REG_SIZE = sizeof(bit_mask);
6273 132354 : for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE)
6274 : {
6275 87949 : __m128i reg_ori = _mm_loadu_si128(
6276 87949 : reinterpret_cast<const __m128i *>(pabyInput + iByte));
6277 :
6278 87949 : constexpr int NUM_PROCESSED_BYTES_PER_REG = 2;
6279 791541 : for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k)
6280 : {
6281 : // Given reg_ori = (A, B, ... 14 other bytes ...),
6282 : // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B)
6283 : #ifdef __SSSE3__
6284 : __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes);
6285 : #else
6286 703592 : __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori);
6287 703592 : reg = _mm_unpacklo_epi16(reg, reg);
6288 703592 : reg = _mm_unpacklo_epi32(reg, reg);
6289 : #endif
6290 :
6291 : // Test if bits of interest are set
6292 703592 : reg = _mm_and_si128(reg, bit_mask);
6293 :
6294 : // Now test if those bits are set, by comparing to zero. So the
6295 : // result will be that bytes where bits are set will be at 0, and
6296 : // ones where they are cleared will be at 0xFF. So the inverse of
6297 : // the end result we want!
6298 703592 : reg = _mm_cmpeq_epi8(reg, zero);
6299 :
6300 : // Invert the result
6301 703592 : reg = _mm_andnot_si128(reg, all_ones);
6302 :
6303 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg);
6304 :
6305 703592 : pabyOutput += SSE_REG_SIZE;
6306 :
6307 : // Right-shift of 2 bytes
6308 703592 : reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG);
6309 : }
6310 : }
6311 :
6312 : #endif // HAVE_SSE2
6313 :
6314 63782 : for (; iByte < nInputWholeBytes; ++iByte)
6315 : {
6316 19377 : ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput);
6317 19377 : pabyOutput += 8;
6318 : }
6319 45879 : for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6320 : {
6321 1474 : *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit);
6322 1474 : ++pabyOutput;
6323 : }
6324 44405 : }
6325 :
6326 : /************************************************************************/
6327 : /* ExpandEightPackedBitsToByteAt1() */
6328 : /************************************************************************/
6329 :
6330 136113 : static inline void ExpandEightPackedBitsToByteAt1(GByte byVal,
6331 : GByte abyOutput[8])
6332 : {
6333 136113 : abyOutput[0] = (byVal >> 7) & 0x1;
6334 136113 : abyOutput[1] = (byVal >> 6) & 0x1;
6335 136113 : abyOutput[2] = (byVal >> 5) & 0x1;
6336 136113 : abyOutput[3] = (byVal >> 4) & 0x1;
6337 136113 : abyOutput[4] = (byVal >> 3) & 0x1;
6338 136113 : abyOutput[5] = (byVal >> 2) & 0x1;
6339 136113 : abyOutput[6] = (byVal >> 1) & 0x1;
6340 136113 : abyOutput[7] = (byVal >> 0) & 0x1;
6341 136113 : }
6342 :
6343 : /************************************************************************/
6344 : /* GDALExpandPackedBitsToByteAt0Or1() */
6345 : /************************************************************************/
6346 :
6347 : /** Expand packed-bits (ordered from most-significant bit to least one)
6348 : into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6349 : at 1 to a byte at 1.
6350 :
6351 : The function does (in a possibly more optimized way) the following:
6352 : \code{.cpp}
6353 : for (size_t i = 0; i < nInputBits; ++i )
6354 : {
6355 : pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
6356 : }
6357 : \endcode
6358 :
6359 : @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6360 : @param pabyOutput Output array of nInputBits bytes.
6361 : @param nInputBits Number of valid bits in pabyInput.
6362 :
6363 : @since 3.11
6364 : */
6365 :
6366 7041 : void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput,
6367 : GByte *CPL_RESTRICT pabyOutput,
6368 : size_t nInputBits)
6369 : {
6370 7041 : const size_t nInputWholeBytes = nInputBits / 8;
6371 7041 : size_t iByte = 0;
6372 143154 : for (; iByte < nInputWholeBytes; ++iByte)
6373 : {
6374 136113 : ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput);
6375 136113 : pabyOutput += 8;
6376 : }
6377 18902 : for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6378 : {
6379 11861 : *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1;
6380 11861 : ++pabyOutput;
6381 : }
6382 7041 : }
|