Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Contains default implementation of GDALRasterBand::IRasterIO()
5 : * and supporting functions of broader utility.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1998, Frank Warmerdam
10 : * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11 : *
12 : * SPDX-License-Identifier: MIT
13 : ****************************************************************************/
14 :
15 : #include "cpl_port.h"
16 : #include "gdal.h"
17 : #include "gdal_priv.h"
18 :
19 : #include <cassert>
20 : #include <climits>
21 : #include <cmath>
22 : #include <cstddef>
23 : #include <cstdio>
24 : #include <cstdlib>
25 : #include <cstring>
26 :
27 : #include <algorithm>
28 : #include <limits>
29 : #include <stdexcept>
30 : #include <type_traits>
31 :
32 : #include "cpl_conv.h"
33 : #include "cpl_cpu_features.h"
34 : #include "cpl_error.h"
35 : #include "cpl_float.h"
36 : #include "cpl_progress.h"
37 : #include "cpl_string.h"
38 : #include "cpl_vsi.h"
39 : #include "gdal_priv_templates.hpp"
40 : #include "gdal_vrt.h"
41 : #include "gdalwarper.h"
42 : #include "memdataset.h"
43 : #include "vrtdataset.h"
44 :
45 : #if defined(__x86_64) || defined(_M_X64)
46 : #include <emmintrin.h>
47 : #define HAVE_SSE2
48 : #elif defined(USE_NEON_OPTIMIZATIONS)
49 : #include "include_sse2neon.h"
50 : #define HAVE_SSE2
51 : #endif
52 :
53 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
54 : #include "rasterio_ssse3.h"
55 : #ifdef __SSSE3__
56 : #include <tmmintrin.h>
57 : #endif
58 : #endif
59 :
60 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
61 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
62 : int nDstPixelStride, GPtrDiff_t nWordCount);
63 :
64 : /************************************************************************/
65 : /* DownsamplingIntegerXFactor() */
66 : /************************************************************************/
67 :
68 : template <bool bSameDataType, int DATA_TYPE_SIZE>
69 695677 : static bool DownsamplingIntegerXFactor(
70 : GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
71 : GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
72 : GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
73 : int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
74 : {
75 695677 : const int nBandDataSize =
76 : bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
77 695677 : int nOuterLoopIters = nBufXSize - 1;
78 695677 : const int nIncSrcOffset = nSrcXInc * nBandDataSize;
79 : const GByte *CPL_RESTRICT pabySrcData;
80 695677 : int nEndBlockX = nBlockXSize + nStartBlockX;
81 :
82 695677 : if (iSrcX < nEndBlockX)
83 : {
84 294999 : CPLAssert(poBlock);
85 294999 : goto no_reload_block;
86 : }
87 400678 : goto reload_block;
88 :
89 : // Don't do the last iteration in the loop, as iSrcX might go beyond
90 : // nRasterXSize - 1
91 1264772 : while (--nOuterLoopIters >= 1)
92 : {
93 201834 : iSrcX += nSrcXInc;
94 201834 : pabySrcData += nIncSrcOffset;
95 201834 : pabyDstData += nPixelSpace;
96 :
97 : /* --------------------------------------------------------------------
98 : */
99 : /* Ensure we have the appropriate block loaded. */
100 : /* --------------------------------------------------------------------
101 : */
102 201834 : if (iSrcX >= nEndBlockX)
103 : {
104 201834 : reload_block:
105 : {
106 615102 : const int nLBlockX = iSrcX / nBlockXSize;
107 615102 : nStartBlockX = nLBlockX * nBlockXSize;
108 615102 : nEndBlockX = nStartBlockX + nBlockXSize;
109 :
110 615102 : if (poBlock != nullptr)
111 341314 : poBlock->DropLock();
112 :
113 615102 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
114 615102 : if (poBlock == nullptr)
115 : {
116 1 : return false;
117 : }
118 : }
119 :
120 615101 : no_reload_block:
121 : const GByte *pabySrcBlock =
122 1264772 : static_cast<const GByte *>(poBlock->GetDataRef());
123 1264772 : GPtrDiff_t iSrcOffset =
124 1264772 : (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
125 1264772 : pabySrcData = pabySrcBlock + iSrcOffset;
126 : }
127 :
128 : /* --------------------------------------------------------------------
129 : */
130 : /* Copy the maximum run of pixels. */
131 : /* --------------------------------------------------------------------
132 : */
133 :
134 1264772 : const int nIters = std::min(
135 1264772 : (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
136 : if (bSameDataType)
137 : {
138 1264367 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
139 1264367 : if (nIters > 1)
140 : {
141 : if (DATA_TYPE_SIZE == 1)
142 : {
143 326246 : pabySrcData += nIncSrcOffset;
144 326246 : pabyDstData += nPixelSpace;
145 326246 : GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
146 326246 : nPixelSpace, nIters - 1);
147 326246 : pabySrcData +=
148 326246 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
149 326246 : pabyDstData +=
150 326246 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
151 : }
152 : else
153 : {
154 4395158 : for (int i = 0; i < nIters - 1; i++)
155 : {
156 4197064 : pabySrcData += nIncSrcOffset;
157 4197064 : pabyDstData += nPixelSpace;
158 4197064 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
159 : }
160 : }
161 524340 : iSrcX += nSrcXInc * (nIters - 1);
162 524340 : nOuterLoopIters -= nIters - 1;
163 : }
164 : }
165 : else
166 : {
167 : // Type to type conversion ...
168 405 : GDALCopyWords64(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
169 405 : eBufType, nPixelSpace, std::max(1, nIters));
170 405 : if (nIters > 1)
171 : {
172 198 : pabySrcData +=
173 198 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
174 198 : pabyDstData +=
175 198 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
176 198 : iSrcX += nSrcXInc * (nIters - 1);
177 198 : nOuterLoopIters -= nIters - 1;
178 : }
179 : }
180 : }
181 :
182 : // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
183 1062938 : if (nOuterLoopIters == 0)
184 : {
185 367262 : const int nRasterXSize = poBand->GetXSize();
186 367262 : iSrcX =
187 734524 : static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
188 367262 : static_cast<GInt64>(nRasterXSize - 1)));
189 367262 : pabyDstData += nPixelSpace;
190 367262 : if (iSrcX < nEndBlockX)
191 : {
192 354672 : goto no_reload_block;
193 : }
194 12590 : goto reload_block;
195 : }
196 695676 : return true;
197 : }
198 :
199 : /************************************************************************/
200 : /* IRasterIO() */
201 : /* */
202 : /* Default internal implementation of RasterIO() ... utilizes */
203 : /* the Block access methods to satisfy the request. This would */
204 : /* normally only be overridden by formats with overviews. */
205 : /************************************************************************/
206 :
207 6019240 : CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
208 : int nXSize, int nYSize, void *pData,
209 : int nBufXSize, int nBufYSize,
210 : GDALDataType eBufType, GSpacing nPixelSpace,
211 : GSpacing nLineSpace,
212 : GDALRasterIOExtraArg *psExtraArg)
213 :
214 : {
215 6019240 : if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
216 : {
217 0 : CPLError(eFlushBlockErr, CPLE_AppDefined,
218 : "An error occurred while writing a dirty block "
219 : "from GDALRasterBand::IRasterIO");
220 0 : CPLErr eErr = eFlushBlockErr;
221 0 : eFlushBlockErr = CE_None;
222 0 : return eErr;
223 : }
224 6019240 : if (nBlockXSize <= 0 || nBlockYSize <= 0)
225 : {
226 68 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
227 0 : return CE_Failure;
228 : }
229 :
230 6019170 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
231 6019170 : const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
232 6019180 : GByte dummyBlock[2] = {0, 0};
233 6019180 : GByte *pabySrcBlock =
234 : dummyBlock; /* to avoid Coverity warning about nullptr dereference */
235 6019180 : GDALRasterBlock *poBlock = nullptr;
236 6019180 : const bool bUseIntegerRequestCoords =
237 6264990 : (!psExtraArg->bFloatingPointWindowValidity ||
238 245815 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
239 222431 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
240 :
241 : /* ==================================================================== */
242 : /* A common case is the data requested with the destination */
243 : /* is packed, and the block width is the raster width. */
244 : /* ==================================================================== */
245 5941490 : if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
246 3096660 : nBlockXSize == GetXSize() && nBufXSize == nXSize &&
247 11960700 : nBufYSize == nYSize && bUseIntegerRequestCoords)
248 : {
249 2984890 : CPLErr eErr = CE_None;
250 2984890 : int nLBlockY = -1;
251 :
252 8955340 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
253 : {
254 5971420 : const int iSrcY = iBufYOff + nYOff;
255 :
256 5971420 : if (iSrcY < nLBlockY * nBlockYSize ||
257 5970990 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
258 : {
259 3246140 : nLBlockY = iSrcY / nBlockYSize;
260 3246140 : bool bJustInitialize =
261 295352 : eRWFlag == GF_Write && nXOff == 0 &&
262 3598550 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
263 57057 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
264 :
265 : // Is this a partial tile at right and/or bottom edges of
266 : // the raster, and that is going to be completely written?
267 : // If so, do not load it from storage, but zero it so that
268 : // the content outsize of the validity area is initialized.
269 3246140 : bool bMemZeroBuffer = false;
270 295352 : if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
271 23813 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
272 3541580 : nYOff + nYSize == GetYSize() &&
273 89 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
274 : {
275 89 : bJustInitialize = true;
276 89 : bMemZeroBuffer = true;
277 : }
278 :
279 3246140 : if (poBlock)
280 261243 : poBlock->DropLock();
281 :
282 3246140 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
283 3246130 : poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
284 3246270 : if (poBlock == nullptr)
285 : {
286 1070 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
287 : nullptr)
288 : {
289 0 : CPLError(CE_Failure, CPLE_AppDefined,
290 : "GetBlockRef failed at X block offset %d, "
291 : "Y block offset %d%s",
292 : 0, nLBlockY,
293 0 : (nErrorCounter != CPLGetErrorCounter())
294 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
295 : : "");
296 : }
297 1070 : eErr = CE_Failure;
298 1070 : break;
299 : }
300 :
301 3245200 : if (eRWFlag == GF_Write)
302 295352 : poBlock->MarkDirty();
303 :
304 3245200 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
305 3245160 : if (bMemZeroBuffer)
306 : {
307 89 : memset(pabySrcBlock, 0,
308 89 : static_cast<GPtrDiff_t>(nBandDataSize) *
309 89 : nBlockXSize * nBlockYSize);
310 : }
311 : }
312 :
313 5970430 : const auto nSrcByteOffset =
314 5970430 : (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
315 5970430 : nBlockXSize +
316 5970430 : nXOff) *
317 5970430 : nBandDataSize;
318 :
319 5970430 : if (eDataType == eBufType)
320 : {
321 2335410 : if (eRWFlag == GF_Read)
322 1864910 : memcpy(static_cast<GByte *>(pData) +
323 1864910 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
324 1864910 : pabySrcBlock + nSrcByteOffset,
325 : static_cast<size_t>(nLineSpace));
326 : else
327 470500 : memcpy(pabySrcBlock + nSrcByteOffset,
328 470500 : static_cast<GByte *>(pData) +
329 470500 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
330 : static_cast<size_t>(nLineSpace));
331 : }
332 : else
333 : {
334 : // Type to type conversion.
335 3635020 : if (eRWFlag == GF_Read)
336 3613650 : GDALCopyWords64(
337 3613650 : pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
338 : static_cast<GByte *>(pData) +
339 3613650 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
340 : eBufType, static_cast<int>(nPixelSpace), nBufXSize);
341 : else
342 21371 : GDALCopyWords64(static_cast<GByte *>(pData) +
343 21371 : static_cast<GPtrDiff_t>(iBufYOff) *
344 : nLineSpace,
345 : eBufType, static_cast<int>(nPixelSpace),
346 21371 : pabySrcBlock + nSrcByteOffset, eDataType,
347 : nBandDataSize, nBufXSize);
348 : }
349 :
350 6043290 : if (psExtraArg->pfnProgress != nullptr &&
351 72839 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
352 : psExtraArg->pProgressData))
353 : {
354 5 : eErr = CE_Failure;
355 5 : break;
356 : }
357 : }
358 :
359 2985000 : if (poBlock)
360 2983920 : poBlock->DropLock();
361 :
362 2984970 : return eErr;
363 : }
364 :
365 : /* ==================================================================== */
366 : /* Do we have overviews that would be appropriate to satisfy */
367 : /* this request? */
368 : /* ==================================================================== */
369 3034300 : if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
370 : eRWFlag == GF_Read)
371 : {
372 : GDALRasterIOExtraArg sExtraArg;
373 2902 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
374 :
375 : const int nOverview =
376 2902 : GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
377 : nBufXSize, nBufYSize, &sExtraArg);
378 2902 : if (nOverview >= 0)
379 : {
380 2827 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
381 2827 : if (poOverviewBand == nullptr)
382 2827 : return CE_Failure;
383 :
384 2827 : return poOverviewBand->RasterIO(
385 : eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
386 2827 : nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
387 : }
388 : }
389 :
390 842693 : if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
391 6 : nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
392 3874140 : nLineSpace == nPixelSpace * nBufXSize &&
393 6 : CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
394 : {
395 0 : memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
396 0 : return CE_None;
397 : }
398 :
399 : /* ==================================================================== */
400 : /* The second case when we don't need subsample data but likely */
401 : /* need data type conversion. */
402 : /* ==================================================================== */
403 3031440 : if ( // nPixelSpace == nBufDataSize &&
404 3031440 : nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
405 : {
406 : #if DEBUG_VERBOSE
407 : printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
408 : nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
409 : #endif
410 :
411 : /* --------------------------------------------------------------------
412 : */
413 : /* Loop over buffer computing source locations. */
414 : /* --------------------------------------------------------------------
415 : */
416 : // Calculate starting values out of loop
417 2466250 : const int nLBlockXStart = nXOff / nBlockXSize;
418 2466250 : const int nXSpanEnd = nBufXSize + nXOff;
419 :
420 2466250 : int nYInc = 0;
421 4971200 : for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
422 2504950 : iBufYOff += nYInc, iSrcY += nYInc)
423 : {
424 2505020 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
425 : static_cast<GPtrDiff_t>(nLineSpace);
426 2505020 : int nLBlockY = iSrcY / nBlockYSize;
427 2505020 : int nLBlockX = nLBlockXStart;
428 2505020 : int iSrcX = nXOff;
429 5228480 : while (iSrcX < nXSpanEnd)
430 : {
431 2723510 : int nXSpan = nLBlockX * nBlockXSize;
432 2723510 : if (nXSpan < INT_MAX - nBlockXSize)
433 2723510 : nXSpan += nBlockXSize;
434 : else
435 0 : nXSpan = INT_MAX;
436 2723510 : const int nXRight = nXSpan;
437 2723510 : nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
438 2723510 : const size_t nXSpanSize =
439 2723510 : nXSpan * static_cast<size_t>(nPixelSpace);
440 :
441 2723510 : bool bJustInitialize =
442 2042150 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
443 37242 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
444 4791240 : nXOff <= nLBlockX * nBlockXSize &&
445 25577 : nXOff + nXSize >= nXRight;
446 :
447 : // Is this a partial tile at right and/or bottom edges of
448 : // the raster, and that is going to be completely written?
449 : // If so, do not load it from storage, but zero it so that
450 : // the content outsize of the validity area is initialized.
451 2723510 : bool bMemZeroBuffer = false;
452 2042150 : if (eRWFlag == GF_Write && !bJustInitialize &&
453 2017810 : nXOff <= nLBlockX * nBlockXSize &&
454 2016180 : nYOff <= nLBlockY * nBlockYSize &&
455 12139 : (nXOff + nXSize >= nXRight ||
456 : // cppcheck-suppress knownConditionTrueFalse
457 4768360 : (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
458 11959 : (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
459 10737 : (nYOff + nYSize == GetYSize() &&
460 1949 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
461 : {
462 3171 : bJustInitialize = true;
463 3171 : bMemZeroBuffer = true;
464 : }
465 :
466 : /* --------------------------------------------------------------------
467 : */
468 : /* Ensure we have the appropriate block loaded. */
469 : /* --------------------------------------------------------------------
470 : */
471 2723510 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
472 2723540 : poBlock =
473 2723520 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
474 2723540 : if (!poBlock)
475 : {
476 71 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
477 : nullptr)
478 : {
479 0 : CPLError(CE_Failure, CPLE_AppDefined,
480 : "GetBlockRef failed at X block offset %d, "
481 : "Y block offset %d%s",
482 : nLBlockX, nLBlockY,
483 0 : (nErrorCounter != CPLGetErrorCounter())
484 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
485 : : "");
486 : }
487 71 : return (CE_Failure);
488 : }
489 :
490 2723470 : if (eRWFlag == GF_Write)
491 2042150 : poBlock->MarkDirty();
492 :
493 2723470 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
494 2723470 : if (bMemZeroBuffer)
495 : {
496 3171 : memset(pabySrcBlock, 0,
497 3171 : static_cast<GPtrDiff_t>(nBandDataSize) *
498 3171 : nBlockXSize * nBlockYSize);
499 : }
500 : /* --------------------------------------------------------------------
501 : */
502 : /* Copy over this chunk of data. */
503 : /* --------------------------------------------------------------------
504 : */
505 2723470 : GPtrDiff_t iSrcOffset =
506 2723470 : (static_cast<GPtrDiff_t>(iSrcX) -
507 2723470 : static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
508 2723470 : (static_cast<GPtrDiff_t>(iSrcY) -
509 2723470 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
510 2723470 : nBlockXSize) *
511 2723470 : nBandDataSize;
512 : // Fill up as many rows as possible for the loaded block.
513 5446930 : const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
514 2723470 : nBufYSize - iBufYOff);
515 58612700 : for (int k = 0; k < kmax; k++)
516 : {
517 55889300 : if (eDataType == eBufType && nPixelSpace == nBufDataSize)
518 : {
519 51932700 : if (eRWFlag == GF_Read)
520 47489900 : memcpy(static_cast<GByte *>(pData) + iBufOffset +
521 47489900 : static_cast<GPtrDiff_t>(k) * nLineSpace,
522 47489900 : pabySrcBlock + iSrcOffset, nXSpanSize);
523 : else
524 4442790 : memcpy(pabySrcBlock + iSrcOffset,
525 4442790 : static_cast<GByte *>(pData) + iBufOffset +
526 4442790 : static_cast<GPtrDiff_t>(k) * nLineSpace,
527 : nXSpanSize);
528 : }
529 : else
530 : {
531 : /* type to type conversion */
532 3956590 : if (eRWFlag == GF_Read)
533 3897200 : GDALCopyWords64(
534 3897200 : pabySrcBlock + iSrcOffset, eDataType,
535 : nBandDataSize,
536 3897200 : static_cast<GByte *>(pData) + iBufOffset +
537 3897200 : static_cast<GPtrDiff_t>(k) * nLineSpace,
538 : eBufType, static_cast<int>(nPixelSpace),
539 : nXSpan);
540 : else
541 59398 : GDALCopyWords64(
542 59398 : static_cast<GByte *>(pData) + iBufOffset +
543 59398 : static_cast<GPtrDiff_t>(k) * nLineSpace,
544 : eBufType, static_cast<int>(nPixelSpace),
545 59398 : pabySrcBlock + iSrcOffset, eDataType,
546 : nBandDataSize, nXSpan);
547 : }
548 :
549 55889200 : iSrcOffset +=
550 55889200 : static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
551 : }
552 :
553 : iBufOffset =
554 2723460 : CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
555 2723460 : nLBlockX++;
556 2723460 : iSrcX += nXSpan;
557 :
558 2723460 : poBlock->DropLock();
559 2723460 : poBlock = nullptr;
560 : }
561 :
562 : /* Compute the increment to go on a block boundary */
563 2504970 : nYInc = nBlockYSize - (iSrcY % nBlockYSize);
564 :
565 2506820 : if (psExtraArg->pfnProgress != nullptr &&
566 1849 : !psExtraArg->pfnProgress(
567 2506820 : 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
568 : psExtraArg->pProgressData))
569 : {
570 12 : return CE_Failure;
571 : }
572 : }
573 :
574 2466190 : return CE_None;
575 : }
576 :
577 : /* ==================================================================== */
578 : /* Loop reading required source blocks to satisfy output */
579 : /* request. This is the most general implementation. */
580 : /* ==================================================================== */
581 :
582 565185 : double dfXOff = nXOff;
583 565185 : double dfYOff = nYOff;
584 565185 : double dfXSize = nXSize;
585 565185 : double dfYSize = nYSize;
586 565185 : if (psExtraArg->bFloatingPointWindowValidity)
587 : {
588 230468 : dfXOff = psExtraArg->dfXOff;
589 230468 : dfYOff = psExtraArg->dfYOff;
590 230468 : dfXSize = psExtraArg->dfXSize;
591 230468 : dfYSize = psExtraArg->dfYSize;
592 : }
593 :
594 : /* -------------------------------------------------------------------- */
595 : /* Compute stepping increment. */
596 : /* -------------------------------------------------------------------- */
597 565185 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
598 565185 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
599 565185 : CPLErr eErr = CE_None;
600 :
601 565185 : if (eRWFlag == GF_Write)
602 : {
603 : /* --------------------------------------------------------------------
604 : */
605 : /* Write case */
606 : /* Loop over raster window computing source locations in the buffer.
607 : */
608 : /* --------------------------------------------------------------------
609 : */
610 166651 : GByte *pabyDstBlock = nullptr;
611 166651 : int nLBlockX = -1;
612 166651 : int nLBlockY = -1;
613 :
614 1259990 : for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
615 : {
616 1093340 : const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
617 :
618 12384000 : for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
619 : {
620 11290600 : const int iBufXOff =
621 11290600 : static_cast<int>((iDstX - nXOff) / dfSrcXInc);
622 11290600 : GPtrDiff_t iBufOffset =
623 11290600 : static_cast<GPtrDiff_t>(iBufYOff) *
624 : static_cast<GPtrDiff_t>(nLineSpace) +
625 11290600 : iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
626 :
627 : // FIXME: this code likely doesn't work if the dirty block gets
628 : // flushed to disk before being completely written.
629 : // In the meantime, bJustInitialize should probably be set to
630 : // FALSE even if it is not ideal performance wise, and for
631 : // lossy compression.
632 :
633 : /* --------------------------------------------------------------------
634 : */
635 : /* Ensure we have the appropriate block loaded. */
636 : /* --------------------------------------------------------------------
637 : */
638 11290600 : if (iDstX < nLBlockX * nBlockXSize ||
639 11041300 : iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
640 10584600 : iDstY < nLBlockY * nBlockYSize ||
641 10584600 : iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
642 : {
643 738682 : nLBlockX = iDstX / nBlockXSize;
644 738682 : nLBlockY = iDstY / nBlockYSize;
645 :
646 738682 : const bool bJustInitialize =
647 1065950 : nYOff <= nLBlockY * nBlockYSize &&
648 327271 : nYOff + nYSize - nBlockYSize >=
649 327271 : nLBlockY * nBlockYSize &&
650 1116260 : nXOff <= nLBlockX * nBlockXSize &&
651 50305 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
652 : /*bool bMemZeroBuffer = FALSE;
653 : if( !bJustInitialize &&
654 : nXOff <= nLBlockX * nBlockXSize &&
655 : nYOff <= nLBlockY * nBlockYSize &&
656 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
657 : (nXOff + nXSize == GetXSize() &&
658 : (nLBlockX+1) * nBlockXSize > GetXSize())) &&
659 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
660 : (nYOff + nYSize == GetYSize() &&
661 : (nLBlockY+1) * nBlockYSize > GetYSize())) )
662 : {
663 : bJustInitialize = TRUE;
664 : bMemZeroBuffer = TRUE;
665 : }*/
666 738682 : if (poBlock != nullptr)
667 572031 : poBlock->DropLock();
668 :
669 738682 : poBlock =
670 738682 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
671 738682 : if (poBlock == nullptr)
672 : {
673 0 : return (CE_Failure);
674 : }
675 :
676 738682 : poBlock->MarkDirty();
677 :
678 738682 : pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
679 : /*if( bMemZeroBuffer )
680 : {
681 : memset(pabyDstBlock, 0,
682 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
683 : * nBlockYSize);
684 : }*/
685 : }
686 :
687 : // To make Coverity happy. Should not happen by design.
688 11290600 : if (pabyDstBlock == nullptr)
689 : {
690 0 : CPLAssert(false);
691 : eErr = CE_Failure;
692 : break;
693 : }
694 :
695 : /* --------------------------------------------------------------------
696 : */
697 : /* Copy over this pixel of data. */
698 : /* --------------------------------------------------------------------
699 : */
700 11290600 : GPtrDiff_t iDstOffset =
701 11290600 : (static_cast<GPtrDiff_t>(iDstX) -
702 11290600 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
703 11290600 : (static_cast<GPtrDiff_t>(iDstY) -
704 11290600 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
705 11290600 : nBlockXSize) *
706 11290600 : nBandDataSize;
707 :
708 11290600 : if (eDataType == eBufType)
709 : {
710 11287500 : memcpy(pabyDstBlock + iDstOffset,
711 11287500 : static_cast<GByte *>(pData) + iBufOffset,
712 : nBandDataSize);
713 : }
714 : else
715 : {
716 : /* type to type conversion ... ouch, this is expensive way
717 : of handling single words */
718 3096 : GDALCopyWords64(static_cast<GByte *>(pData) + iBufOffset,
719 3096 : eBufType, 0, pabyDstBlock + iDstOffset,
720 : eDataType, 0, 1);
721 : }
722 : }
723 :
724 1093340 : if (psExtraArg->pfnProgress != nullptr &&
725 0 : !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
726 : psExtraArg->pProgressData))
727 : {
728 0 : eErr = CE_Failure;
729 0 : break;
730 : }
731 : }
732 : }
733 : else
734 : {
735 398534 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
736 : {
737 8601 : if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
738 2513 : psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
739 2511 : psExtraArg->eResampleAlg == GRIORA_Bilinear ||
740 6093 : psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
741 2925 : GetColorTable() != nullptr)
742 : {
743 0 : CPLError(CE_Warning, CPLE_NotSupported,
744 : "Resampling method not supported on paletted band. "
745 : "Falling back to nearest neighbour");
746 : }
747 3047 : else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
748 3 : GDALDataTypeIsComplex(eDataType))
749 : {
750 0 : CPLError(CE_Warning, CPLE_NotSupported,
751 : "Resampling method not supported on complex data type "
752 : "band. Falling back to nearest neighbour");
753 : }
754 : else
755 : {
756 3044 : return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
757 : pData, nBufXSize, nBufYSize, eBufType,
758 3045 : nPixelSpace, nLineSpace, psExtraArg);
759 : }
760 : }
761 :
762 395483 : int nLimitBlockY = 0;
763 395483 : const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
764 395483 : int nStartBlockX = -nBlockXSize;
765 395483 : const double EPS = 1e-10;
766 395483 : int nLBlockY = -1;
767 395483 : const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
768 395483 : const bool bIntegerXFactor =
769 372806 : bUseIntegerRequestCoords &&
770 669271 : static_cast<int>(dfSrcXInc) == dfSrcXInc &&
771 273788 : static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
772 :
773 : /* --------------------------------------------------------------------
774 : */
775 : /* Read case */
776 : /* Loop over buffer computing source locations. */
777 : /* --------------------------------------------------------------------
778 : */
779 2451410 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
780 : {
781 : // Add small epsilon to avoid some numeric precision issues.
782 2055940 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
783 2055940 : const int iSrcY = static_cast<int>(std::min(
784 2055940 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
785 :
786 2055940 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
787 : static_cast<GPtrDiff_t>(nLineSpace);
788 :
789 2055940 : if (iSrcY >= nLimitBlockY)
790 : {
791 433624 : nLBlockY = iSrcY / nBlockYSize;
792 433624 : nLimitBlockY = nLBlockY * nBlockYSize;
793 433624 : if (nLimitBlockY < INT_MAX - nBlockYSize)
794 433624 : nLimitBlockY += nBlockYSize;
795 : else
796 0 : nLimitBlockY = INT_MAX;
797 : // Make sure a new block is loaded.
798 433624 : nStartBlockX = -nBlockXSize;
799 : }
800 1622320 : else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
801 : {
802 : // Make sure a new block is loaded.
803 441987 : nStartBlockX = -nBlockXSize;
804 : }
805 :
806 2055940 : GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
807 2055940 : static_cast<GPtrDiff_t>(nBlockXSize);
808 :
809 2055940 : if (bIntegerXFactor)
810 : {
811 695677 : int iSrcX = static_cast<int>(dfSrcXStart);
812 695677 : const int nSrcXInc = static_cast<int>(dfSrcXInc);
813 695677 : GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
814 695677 : bool bRet = false;
815 695677 : if (bByteCopy)
816 : {
817 585768 : bRet = DownsamplingIntegerXFactor<true, 1>(
818 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
819 : static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
820 : GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
821 : }
822 109909 : else if (eDataType == eBufType)
823 : {
824 109704 : switch (nBandDataSize)
825 : {
826 109624 : case 2:
827 109624 : bRet = DownsamplingIntegerXFactor<true, 2>(
828 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
829 : pabyDstData, static_cast<int>(nPixelSpace),
830 : nBufXSize, eDataType, eDataType, nStartBlockX,
831 : nBlockXSize, poBlock, nLBlockY);
832 109624 : break;
833 22 : case 4:
834 22 : bRet = DownsamplingIntegerXFactor<true, 4>(
835 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
836 : pabyDstData, static_cast<int>(nPixelSpace),
837 : nBufXSize, eDataType, eDataType, nStartBlockX,
838 : nBlockXSize, poBlock, nLBlockY);
839 22 : break;
840 56 : case 8:
841 56 : bRet = DownsamplingIntegerXFactor<true, 8>(
842 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
843 : pabyDstData, static_cast<int>(nPixelSpace),
844 : nBufXSize, eDataType, eDataType, nStartBlockX,
845 : nBlockXSize, poBlock, nLBlockY);
846 56 : break;
847 2 : case 16:
848 2 : bRet = DownsamplingIntegerXFactor<true, 16>(
849 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
850 : pabyDstData, static_cast<int>(nPixelSpace),
851 : nBufXSize, eDataType, eDataType, nStartBlockX,
852 : nBlockXSize, poBlock, nLBlockY);
853 2 : break;
854 0 : default:
855 0 : CPLAssert(false);
856 : break;
857 : }
858 : }
859 : else
860 : {
861 205 : bRet = DownsamplingIntegerXFactor<false, 0>(
862 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
863 : static_cast<int>(nPixelSpace), nBufXSize, eDataType,
864 : eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
865 : }
866 695677 : if (!bRet)
867 1 : eErr = CE_Failure;
868 : }
869 : else
870 : {
871 1360260 : double dfSrcX = dfSrcXStart;
872 582293000 : for (int iBufXOff = 0; iBufXOff < nBufXSize;
873 580933000 : iBufXOff++, dfSrcX += dfSrcXInc)
874 : {
875 : // TODO?: try to avoid the clamping for most iterations
876 : const int iSrcX = static_cast<int>(
877 1161870000 : std::min(std::max(0.0, dfSrcX),
878 580933000 : static_cast<double>(nRasterXSize - 1)));
879 :
880 : /* --------------------------------------------------------------------
881 : */
882 : /* Ensure we have the appropriate block loaded. */
883 : /* --------------------------------------------------------------------
884 : */
885 580933000 : if (iSrcX >= nBlockXSize + nStartBlockX)
886 : {
887 1702800 : const int nLBlockX = iSrcX / nBlockXSize;
888 1702800 : nStartBlockX = nLBlockX * nBlockXSize;
889 :
890 1702800 : if (poBlock != nullptr)
891 1581100 : poBlock->DropLock();
892 :
893 1702800 : poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
894 1702800 : if (poBlock == nullptr)
895 : {
896 9 : eErr = CE_Failure;
897 9 : break;
898 : }
899 :
900 : pabySrcBlock =
901 1702790 : static_cast<GByte *>(poBlock->GetDataRef());
902 : }
903 580933000 : const GPtrDiff_t nDiffX =
904 580933000 : static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
905 :
906 : /* --------------------------------------------------------------------
907 : */
908 : /* Copy over this pixel of data. */
909 : /* --------------------------------------------------------------------
910 : */
911 :
912 580933000 : if (bByteCopy)
913 : {
914 527231000 : GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
915 527231000 : static_cast<GByte *>(pData)[iBufOffset] =
916 527231000 : pabySrcBlock[iSrcOffset];
917 : }
918 53701600 : else if (eDataType == eBufType)
919 : {
920 48225600 : GPtrDiff_t iSrcOffset =
921 48225600 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
922 48225600 : memcpy(static_cast<GByte *>(pData) + iBufOffset,
923 48225600 : pabySrcBlock + iSrcOffset, nBandDataSize);
924 : }
925 : else
926 : {
927 : // Type to type conversion ...
928 5476050 : GPtrDiff_t iSrcOffset =
929 5476050 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
930 5476050 : GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
931 : static_cast<GByte *>(pData) +
932 5476050 : iBufOffset,
933 : eBufType, 0, 1);
934 : }
935 :
936 580933000 : iBufOffset += static_cast<int>(nPixelSpace);
937 : }
938 : }
939 2055940 : if (eErr == CE_Failure)
940 11 : break;
941 :
942 2287020 : if (psExtraArg->pfnProgress != nullptr &&
943 231086 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
944 : psExtraArg->pProgressData))
945 : {
946 1 : eErr = CE_Failure;
947 1 : break;
948 : }
949 : }
950 : }
951 :
952 562134 : if (poBlock != nullptr)
953 562124 : poBlock->DropLock();
954 :
955 562134 : return eErr;
956 : }
957 :
958 : /************************************************************************/
959 : /* GDALRasterIOTransformer() */
960 : /************************************************************************/
961 :
962 : struct GDALRasterIOTransformerStruct
963 : {
964 : double dfXOff;
965 : double dfYOff;
966 : double dfXRatioDstToSrc;
967 : double dfYRatioDstToSrc;
968 : };
969 :
970 6748 : static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
971 : int nPointCount, double *x, double *y,
972 : double * /* z */, int *panSuccess)
973 : {
974 6748 : GDALRasterIOTransformerStruct *psParams =
975 : static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
976 6748 : if (bDstToSrc)
977 : {
978 252996 : for (int i = 0; i < nPointCount; i++)
979 : {
980 246836 : x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
981 246836 : y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
982 246836 : panSuccess[i] = TRUE;
983 : }
984 : }
985 : else
986 : {
987 1176 : for (int i = 0; i < nPointCount; i++)
988 : {
989 588 : x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
990 588 : y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
991 588 : panSuccess[i] = TRUE;
992 : }
993 : }
994 6748 : return TRUE;
995 : }
996 :
997 : /************************************************************************/
998 : /* RasterIOResampled() */
999 : /************************************************************************/
1000 :
1001 : //! @cond Doxygen_Suppress
1002 3045 : CPLErr GDALRasterBand::RasterIOResampled(
1003 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1004 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1005 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1006 : {
1007 : // Determine if we use warping resampling or overview resampling
1008 : const bool bUseWarp =
1009 3045 : (GDALDataTypeIsComplex(eDataType) &&
1010 3202 : psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
1011 157 : psExtraArg->eResampleAlg != GRIORA_Mode);
1012 :
1013 3045 : double dfXOff = nXOff;
1014 3045 : double dfYOff = nYOff;
1015 3045 : double dfXSize = nXSize;
1016 3045 : double dfYSize = nYSize;
1017 3045 : if (psExtraArg->bFloatingPointWindowValidity)
1018 : {
1019 2586 : dfXOff = psExtraArg->dfXOff;
1020 2586 : dfYOff = psExtraArg->dfYOff;
1021 2586 : dfXSize = psExtraArg->dfXSize;
1022 2586 : dfYSize = psExtraArg->dfYSize;
1023 : }
1024 :
1025 3045 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1026 3045 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1027 :
1028 : // Determine the coordinates in the "virtual" output raster to see
1029 : // if there are not integers, in which case we will use them as a shift
1030 : // so that subwindow extracts give the exact same results as entire raster
1031 : // scaling.
1032 3045 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1033 3045 : bool bHasXOffVirtual = false;
1034 3045 : int nDestXOffVirtual = 0;
1035 3045 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1036 : {
1037 2717 : bHasXOffVirtual = true;
1038 2717 : dfXOff = nXOff;
1039 2717 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1040 : }
1041 :
1042 3045 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1043 3045 : bool bHasYOffVirtual = false;
1044 3045 : int nDestYOffVirtual = 0;
1045 3045 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1046 : {
1047 2712 : bHasYOffVirtual = true;
1048 2712 : dfYOff = nYOff;
1049 2712 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1050 : }
1051 :
1052 : // Create a MEM dataset that wraps the output buffer.
1053 : GDALDataset *poMEMDS;
1054 3045 : void *pTempBuffer = nullptr;
1055 3045 : GSpacing nPSMem = nPixelSpace;
1056 3045 : GSpacing nLSMem = nLineSpace;
1057 3045 : void *pDataMem = pData;
1058 3045 : GDALDataType eDTMem = eBufType;
1059 3045 : if (eBufType != eDataType)
1060 : {
1061 40 : nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1062 40 : nLSMem = nPSMem * nBufXSize;
1063 : pTempBuffer =
1064 40 : VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1065 40 : if (pTempBuffer == nullptr)
1066 0 : return CE_Failure;
1067 40 : pDataMem = pTempBuffer;
1068 40 : eDTMem = eDataType;
1069 : }
1070 :
1071 : poMEMDS =
1072 3045 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1073 : nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1074 3045 : GByte *pabyData = static_cast<GByte *>(pDataMem) -
1075 3045 : nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1076 3045 : GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1077 : poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1078 3045 : poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1079 :
1080 3045 : const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1081 3045 : const int nNBITS = pszNBITS ? atoi(pszNBITS) : 0;
1082 3045 : if (pszNBITS)
1083 6 : GDALRasterBand::FromHandle(hMEMBand)->SetMetadataItem(
1084 6 : "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1085 :
1086 3044 : CPLErr eErr = CE_None;
1087 :
1088 : // Do the resampling.
1089 3044 : if (bUseWarp)
1090 : {
1091 149 : int bHasNoData = FALSE;
1092 149 : double dfNoDataValue = GetNoDataValue(&bHasNoData);
1093 :
1094 149 : VRTDatasetH hVRTDS = nullptr;
1095 149 : GDALRasterBandH hVRTBand = nullptr;
1096 149 : if (GetDataset() == nullptr)
1097 : {
1098 : /* Create VRT dataset that wraps the whole dataset */
1099 0 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1100 0 : VRTAddBand(hVRTDS, eDataType, nullptr);
1101 0 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1102 0 : VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1103 : 0, 0, nRasterXSize, nRasterYSize, nullptr,
1104 : VRT_NODATA_UNSET);
1105 :
1106 : /* Add a mask band if needed */
1107 0 : if (GetMaskFlags() != GMF_ALL_VALID)
1108 : {
1109 0 : GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1110 : VRTSourcedRasterBand *poVRTMaskBand =
1111 : reinterpret_cast<VRTSourcedRasterBand *>(
1112 : reinterpret_cast<GDALRasterBand *>(hVRTBand)
1113 0 : ->GetMaskBand());
1114 0 : poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1115 0 : nRasterYSize, 0, 0,
1116 0 : nRasterXSize, nRasterYSize);
1117 : }
1118 : }
1119 :
1120 149 : GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1121 149 : switch (psExtraArg->eResampleAlg)
1122 : {
1123 0 : case GRIORA_NearestNeighbour:
1124 0 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1125 0 : break;
1126 147 : case GRIORA_Bilinear:
1127 147 : psWarpOptions->eResampleAlg = GRA_Bilinear;
1128 147 : break;
1129 0 : case GRIORA_Cubic:
1130 0 : psWarpOptions->eResampleAlg = GRA_Cubic;
1131 0 : break;
1132 0 : case GRIORA_CubicSpline:
1133 0 : psWarpOptions->eResampleAlg = GRA_CubicSpline;
1134 0 : break;
1135 0 : case GRIORA_Lanczos:
1136 0 : psWarpOptions->eResampleAlg = GRA_Lanczos;
1137 0 : break;
1138 0 : case GRIORA_Average:
1139 0 : psWarpOptions->eResampleAlg = GRA_Average;
1140 0 : break;
1141 2 : case GRIORA_RMS:
1142 2 : psWarpOptions->eResampleAlg = GRA_RMS;
1143 2 : break;
1144 0 : case GRIORA_Mode:
1145 0 : psWarpOptions->eResampleAlg = GRA_Mode;
1146 0 : break;
1147 0 : default:
1148 0 : CPLAssert(false);
1149 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1150 : break;
1151 : }
1152 149 : psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1153 149 : psWarpOptions->hDstDS = poMEMDS;
1154 149 : psWarpOptions->nBandCount = 1;
1155 149 : int nSrcBandNumber = hVRTDS ? 1 : nBand;
1156 149 : int nDstBandNumber = 1;
1157 149 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1158 149 : psWarpOptions->panDstBands = &nDstBandNumber;
1159 298 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1160 149 : ? psExtraArg->pfnProgress
1161 : : GDALDummyProgress;
1162 149 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1163 149 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1164 149 : if (bHasNoData)
1165 : {
1166 0 : psWarpOptions->papszWarpOptions = CSLSetNameValue(
1167 : psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1168 0 : if (psWarpOptions->padfSrcNoDataReal == nullptr)
1169 : {
1170 0 : psWarpOptions->padfSrcNoDataReal =
1171 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1172 0 : psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1173 : }
1174 :
1175 0 : if (psWarpOptions->padfDstNoDataReal == nullptr)
1176 : {
1177 0 : psWarpOptions->padfDstNoDataReal =
1178 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1179 0 : psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1180 : }
1181 : }
1182 :
1183 : GDALRasterIOTransformerStruct sTransformer;
1184 149 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1185 149 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1186 149 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1187 149 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1188 149 : psWarpOptions->pTransformerArg = &sTransformer;
1189 :
1190 : GDALWarpOperationH hWarpOperation =
1191 149 : GDALCreateWarpOperation(psWarpOptions);
1192 149 : eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1193 : nDestYOffVirtual, nBufXSize, nBufYSize);
1194 149 : GDALDestroyWarpOperation(hWarpOperation);
1195 :
1196 149 : psWarpOptions->panSrcBands = nullptr;
1197 149 : psWarpOptions->panDstBands = nullptr;
1198 149 : GDALDestroyWarpOptions(psWarpOptions);
1199 :
1200 149 : if (hVRTDS)
1201 0 : GDALClose(hVRTDS);
1202 : }
1203 : else
1204 : {
1205 2895 : const char *pszResampling =
1206 3551 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1207 780 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1208 246 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1209 239 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1210 172 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1211 95 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1212 43 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1213 3 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1214 : : "UNKNOWN";
1215 :
1216 2895 : int nKernelRadius = 0;
1217 : GDALResampleFunction pfnResampleFunc =
1218 2895 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1219 2896 : CPLAssert(pfnResampleFunc);
1220 : GDALDataType eWrkDataType =
1221 2896 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1222 2895 : int nHasNoData = 0;
1223 2895 : double dfNoDataValue = GetNoDataValue(&nHasNoData);
1224 2896 : const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1225 2896 : if (!bHasNoData)
1226 2806 : dfNoDataValue = 0.0;
1227 :
1228 2896 : int nDstBlockXSize = nBufXSize;
1229 2896 : int nDstBlockYSize = nBufYSize;
1230 2896 : int nFullResXChunk = 0;
1231 2896 : int nFullResYChunk = 0;
1232 : while (true)
1233 : {
1234 2907 : nFullResXChunk =
1235 2907 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1236 2907 : nFullResYChunk =
1237 2907 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1238 2907 : if (nFullResXChunk > nRasterXSize)
1239 2664 : nFullResXChunk = nRasterXSize;
1240 2907 : if (nFullResYChunk > nRasterYSize)
1241 265 : nFullResYChunk = nRasterYSize;
1242 2907 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1243 2853 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1244 : 1024 * 1024))
1245 : break;
1246 : // When operating on the full width of a raster whose block width is
1247 : // the raster width, prefer doing chunks in height.
1248 11 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1249 : nDstBlockYSize > 1)
1250 0 : nDstBlockYSize /= 2;
1251 : /* Otherwise cut the maximal dimension */
1252 11 : else if (nDstBlockXSize > 1 &&
1253 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1254 11 : nDstBlockXSize /= 2;
1255 : else
1256 0 : nDstBlockYSize /= 2;
1257 : }
1258 :
1259 2896 : int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1260 2896 : int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1261 2896 : if (nOvrXFactor == 0)
1262 2029 : nOvrXFactor = 1;
1263 2896 : if (nOvrYFactor == 0)
1264 2028 : nOvrYFactor = 1;
1265 2896 : int nFullResXSizeQueried =
1266 2896 : nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1267 2896 : int nFullResYSizeQueried =
1268 2896 : nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1269 :
1270 2896 : if (nFullResXSizeQueried > nRasterXSize)
1271 2555 : nFullResXSizeQueried = nRasterXSize;
1272 2896 : if (nFullResYSizeQueried > nRasterYSize)
1273 154 : nFullResYSizeQueried = nRasterYSize;
1274 :
1275 : void *pChunk =
1276 2896 : VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1277 : nFullResXSizeQueried, nFullResYSizeQueried);
1278 2896 : GByte *pabyChunkNoDataMask = nullptr;
1279 :
1280 2896 : GDALRasterBand *poMaskBand = GetMaskBand();
1281 2895 : int l_nMaskFlags = GetMaskFlags();
1282 :
1283 2895 : bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1284 2895 : if (bUseNoDataMask)
1285 : {
1286 157 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1287 : nFullResXSizeQueried, nFullResYSizeQueried));
1288 : }
1289 2896 : if (pChunk == nullptr ||
1290 158 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1291 : {
1292 1 : GDALClose(poMEMDS);
1293 0 : CPLFree(pChunk);
1294 0 : CPLFree(pabyChunkNoDataMask);
1295 0 : VSIFree(pTempBuffer);
1296 0 : return CE_Failure;
1297 : }
1298 :
1299 2895 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1300 2895 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1301 2895 : int nBlocksDone = 0;
1302 :
1303 : int nDstYOff;
1304 5791 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1305 2896 : nDstYOff += nDstBlockYSize)
1306 : {
1307 : int nDstYCount;
1308 2896 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1309 2895 : nDstYCount = nDstBlockYSize;
1310 : else
1311 1 : nDstYCount = nBufYSize - nDstYOff;
1312 :
1313 2896 : int nChunkYOff =
1314 2896 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1315 2896 : int nChunkYOff2 = nYOff + 1 +
1316 2896 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1317 : dfYRatioDstToSrc));
1318 2896 : if (nChunkYOff2 > nRasterYSize)
1319 377 : nChunkYOff2 = nRasterYSize;
1320 2896 : int nYCount = nChunkYOff2 - nChunkYOff;
1321 2896 : CPLAssert(nYCount <= nFullResYChunk);
1322 :
1323 2896 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1324 2896 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1325 2896 : if (nChunkYOffQueried < 0)
1326 : {
1327 277 : nChunkYSizeQueried += nChunkYOffQueried;
1328 277 : nChunkYOffQueried = 0;
1329 : }
1330 2896 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1331 380 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1332 2896 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1333 :
1334 2896 : int nDstXOff = 0;
1335 5792 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1336 2896 : nDstXOff += nDstBlockXSize)
1337 : {
1338 2896 : int nDstXCount = 0;
1339 2896 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1340 2895 : nDstXCount = nDstBlockXSize;
1341 : else
1342 1 : nDstXCount = nBufXSize - nDstXOff;
1343 :
1344 2896 : int nChunkXOff =
1345 2896 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1346 2896 : int nChunkXOff2 =
1347 2896 : nXOff + 1 +
1348 2896 : static_cast<int>(
1349 2896 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1350 2896 : if (nChunkXOff2 > nRasterXSize)
1351 2676 : nChunkXOff2 = nRasterXSize;
1352 2896 : int nXCount = nChunkXOff2 - nChunkXOff;
1353 2896 : CPLAssert(nXCount <= nFullResXChunk);
1354 :
1355 2896 : int nChunkXOffQueried =
1356 2896 : nChunkXOff - nKernelRadius * nOvrXFactor;
1357 2896 : int nChunkXSizeQueried =
1358 2896 : nXCount + 2 * nKernelRadius * nOvrXFactor;
1359 2896 : if (nChunkXOffQueried < 0)
1360 : {
1361 2580 : nChunkXSizeQueried += nChunkXOffQueried;
1362 2580 : nChunkXOffQueried = 0;
1363 : }
1364 2896 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1365 2567 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1366 2896 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1367 :
1368 : // Read the source buffers.
1369 2896 : eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1370 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1371 : nChunkXSizeQueried, nChunkYSizeQueried,
1372 : eWrkDataType, 0, 0, nullptr);
1373 :
1374 2896 : bool bSkipResample = false;
1375 2896 : bool bNoDataMaskFullyOpaque = false;
1376 2896 : if (eErr == CE_None && bUseNoDataMask)
1377 : {
1378 158 : eErr = poMaskBand->RasterIO(
1379 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1380 : nChunkXSizeQueried, nChunkYSizeQueried,
1381 : pabyChunkNoDataMask, nChunkXSizeQueried,
1382 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1383 :
1384 : /* Optimizations if mask if fully opaque or transparent */
1385 158 : int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1386 158 : GByte bVal = pabyChunkNoDataMask[0];
1387 158 : int i = 1;
1388 3751460 : for (; i < nPixels; i++)
1389 : {
1390 3751410 : if (pabyChunkNoDataMask[i] != bVal)
1391 104 : break;
1392 : }
1393 158 : if (i == nPixels)
1394 : {
1395 54 : if (bVal == 0)
1396 : {
1397 712 : for (int j = 0; j < nDstYCount; j++)
1398 : {
1399 686 : GDALCopyWords64(&dfNoDataValue, GDT_Float64, 0,
1400 : static_cast<GByte *>(pDataMem) +
1401 686 : nLSMem * (j + nDstYOff) +
1402 686 : nDstXOff * nPSMem,
1403 : eDTMem,
1404 : static_cast<int>(nPSMem),
1405 : nDstXCount);
1406 : }
1407 26 : bSkipResample = true;
1408 : }
1409 : else
1410 : {
1411 28 : bNoDataMaskFullyOpaque = true;
1412 : }
1413 : }
1414 : }
1415 :
1416 2896 : if (!bSkipResample && eErr == CE_None)
1417 : {
1418 2867 : const bool bPropagateNoData = false;
1419 2867 : void *pDstBuffer = nullptr;
1420 2867 : GDALDataType eDstBufferDataType = GDT_Unknown;
1421 : GDALRasterBand *poMEMBand =
1422 2867 : GDALRasterBand::FromHandle(hMEMBand);
1423 2867 : GDALOverviewResampleArgs args;
1424 2867 : args.eSrcDataType = eDataType;
1425 2867 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1426 2867 : args.nOvrXSize = poMEMBand->GetXSize();
1427 2867 : args.nOvrYSize = poMEMBand->GetYSize();
1428 2867 : args.nOvrNBITS = nNBITS;
1429 2867 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1430 2867 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1431 2867 : args.dfSrcXDelta =
1432 2867 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1433 2867 : args.dfSrcYDelta =
1434 2867 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1435 2867 : args.eWrkDataType = eWrkDataType;
1436 2867 : args.pabyChunkNodataMask =
1437 2867 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask;
1438 2867 : args.nChunkXOff =
1439 2867 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1440 2867 : args.nChunkXSize = nChunkXSizeQueried;
1441 2867 : args.nChunkYOff =
1442 2867 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1443 2867 : args.nChunkYSize = nChunkYSizeQueried;
1444 2867 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1445 2867 : args.nDstXOff2 = nDstXOff + nDestXOffVirtual + nDstXCount;
1446 2867 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1447 2867 : args.nDstYOff2 = nDstYOff + nDestYOffVirtual + nDstYCount;
1448 2867 : args.pszResampling = pszResampling;
1449 2867 : args.bHasNoData = bHasNoData;
1450 2867 : args.dfNoDataValue = dfNoDataValue;
1451 2867 : args.poColorTable = GetColorTable();
1452 2867 : args.bPropagateNoData = bPropagateNoData;
1453 2867 : eErr = pfnResampleFunc(args, pChunk, &pDstBuffer,
1454 : &eDstBufferDataType);
1455 2867 : if (eErr == CE_None)
1456 : {
1457 2867 : eErr = poMEMBand->RasterIO(
1458 : GF_Write, nDstXOff + nDestXOffVirtual,
1459 : nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1460 : pDstBuffer, nDstXCount, nDstYCount,
1461 : eDstBufferDataType, 0, 0, nullptr);
1462 : }
1463 2867 : CPLFree(pDstBuffer);
1464 : }
1465 :
1466 2896 : nBlocksDone++;
1467 3321 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1468 425 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1469 : "", psExtraArg->pProgressData))
1470 : {
1471 1 : eErr = CE_Failure;
1472 : }
1473 : }
1474 : }
1475 :
1476 2895 : CPLFree(pChunk);
1477 2896 : CPLFree(pabyChunkNoDataMask);
1478 : }
1479 :
1480 3045 : if (eBufType != eDataType)
1481 : {
1482 40 : CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1483 : GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1484 : pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1485 : nullptr));
1486 : }
1487 3045 : GDALClose(poMEMDS);
1488 3045 : VSIFree(pTempBuffer);
1489 :
1490 3045 : return eErr;
1491 : }
1492 :
1493 : /************************************************************************/
1494 : /* RasterIOResampled() */
1495 : /************************************************************************/
1496 :
1497 757 : CPLErr GDALDataset::RasterIOResampled(
1498 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1499 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1500 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
1501 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
1502 :
1503 : {
1504 : #if 0
1505 : // Determine if we use warping resampling or overview resampling
1506 : bool bUseWarp = false;
1507 : if( GDALDataTypeIsComplex( eDataType ) )
1508 : bUseWarp = true;
1509 : #endif
1510 :
1511 757 : double dfXOff = nXOff;
1512 757 : double dfYOff = nYOff;
1513 757 : double dfXSize = nXSize;
1514 757 : double dfYSize = nYSize;
1515 757 : if (psExtraArg->bFloatingPointWindowValidity)
1516 : {
1517 636 : dfXOff = psExtraArg->dfXOff;
1518 636 : dfYOff = psExtraArg->dfYOff;
1519 636 : dfXSize = psExtraArg->dfXSize;
1520 636 : dfYSize = psExtraArg->dfYSize;
1521 : }
1522 :
1523 757 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1524 757 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1525 :
1526 : // Determine the coordinates in the "virtual" output raster to see
1527 : // if there are not integers, in which case we will use them as a shift
1528 : // so that subwindow extracts give the exact same results as entire raster
1529 : // scaling.
1530 757 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1531 757 : bool bHasXOffVirtual = false;
1532 757 : int nDestXOffVirtual = 0;
1533 757 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1534 : {
1535 623 : bHasXOffVirtual = true;
1536 623 : dfXOff = nXOff;
1537 623 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1538 : }
1539 :
1540 757 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1541 757 : bool bHasYOffVirtual = false;
1542 757 : int nDestYOffVirtual = 0;
1543 757 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1544 : {
1545 583 : bHasYOffVirtual = true;
1546 583 : dfYOff = nYOff;
1547 583 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1548 : }
1549 :
1550 : // Create a MEM dataset that wraps the output buffer.
1551 : GDALDataset *poMEMDS =
1552 757 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1553 : nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1554 : GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1555 752 : CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
1556 761 : int nNBITS = 0;
1557 2336 : for (int i = 0; i < nBandCount; i++)
1558 : {
1559 1585 : char szBuffer[32] = {'\0'};
1560 3181 : int nRet = CPLPrintPointer(
1561 : szBuffer,
1562 1585 : static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
1563 1585 : nLineSpace * nDestYOffVirtual + nBandSpace * i,
1564 : sizeof(szBuffer));
1565 1596 : szBuffer[nRet] = 0;
1566 :
1567 1596 : char szBuffer0[64] = {'\0'};
1568 1596 : snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1569 :
1570 1596 : char szBuffer1[64] = {'\0'};
1571 1596 : snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
1572 : static_cast<GIntBig>(nPixelSpace));
1573 :
1574 1596 : char szBuffer2[64] = {'\0'};
1575 1596 : snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
1576 : static_cast<GIntBig>(nLineSpace));
1577 :
1578 1596 : char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
1579 :
1580 1596 : poMEMDS->AddBand(eBufType, apszOptions);
1581 :
1582 1591 : GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1583 1582 : papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
1584 : const char *pszNBITS =
1585 1575 : poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1586 1576 : if (pszNBITS)
1587 : {
1588 0 : nNBITS = atoi(pszNBITS);
1589 0 : poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1590 0 : "IMAGE_STRUCTURE");
1591 : }
1592 : }
1593 :
1594 751 : CPLErr eErr = CE_None;
1595 :
1596 : // TODO(schwehr): Why disabled? Why not just delete?
1597 : // Looks like this code was initially added as disable by copying
1598 : // from RasterIO here:
1599 : // https://trac.osgeo.org/gdal/changeset/29572
1600 : #if 0
1601 : // Do the resampling.
1602 : if( bUseWarp )
1603 : {
1604 : VRTDatasetH hVRTDS = nullptr;
1605 : GDALRasterBandH hVRTBand = nullptr;
1606 : if( GetDataset() == nullptr )
1607 : {
1608 : /* Create VRT dataset that wraps the whole dataset */
1609 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1610 : VRTAddBand( hVRTDS, eDataType, nullptr );
1611 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1612 : VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1613 : (GDALRasterBandH)this,
1614 : 0, 0,
1615 : nRasterXSize, nRasterYSize,
1616 : 0, 0,
1617 : nRasterXSize, nRasterYSize,
1618 : nullptr, VRT_NODATA_UNSET );
1619 :
1620 : /* Add a mask band if needed */
1621 : if( GetMaskFlags() != GMF_ALL_VALID )
1622 : {
1623 : ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1624 : VRTSourcedRasterBand* poVRTMaskBand =
1625 : (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1626 : poVRTMaskBand->
1627 : AddMaskBandSource( this,
1628 : 0, 0,
1629 : nRasterXSize, nRasterYSize,
1630 : 0, 0,
1631 : nRasterXSize, nRasterYSize);
1632 : }
1633 : }
1634 :
1635 : GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1636 : psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1637 : psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1638 : psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1639 : psWarpOptions->nBandCount = 1;
1640 : int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1641 : int nDstBandNumber = 1;
1642 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1643 : psWarpOptions->panDstBands = &nDstBandNumber;
1644 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1645 : psExtraArg->pfnProgress : GDALDummyProgress;
1646 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1647 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1648 : GDALRasterIOTransformerStruct sTransformer;
1649 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1650 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1651 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1652 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1653 : psWarpOptions->pTransformerArg = &sTransformer;
1654 :
1655 : GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1656 : eErr = GDALChunkAndWarpImage( hWarpOperation,
1657 : nDestXOffVirtual, nDestYOffVirtual,
1658 : nBufXSize, nBufYSize );
1659 : GDALDestroyWarpOperation( hWarpOperation );
1660 :
1661 : psWarpOptions->panSrcBands = nullptr;
1662 : psWarpOptions->panDstBands = nullptr;
1663 : GDALDestroyWarpOptions( psWarpOptions );
1664 :
1665 : if( hVRTDS )
1666 : GDALClose(hVRTDS);
1667 : }
1668 : else
1669 : #endif
1670 : {
1671 751 : const char *pszResampling =
1672 1385 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1673 634 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1674 0 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1675 0 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1676 0 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1677 0 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1678 0 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1679 0 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1680 : : "UNKNOWN";
1681 :
1682 751 : GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1683 740 : GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1684 : int nBlockXSize, nBlockYSize;
1685 745 : poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1686 :
1687 : int nKernelRadius;
1688 : GDALResampleFunction pfnResampleFunc =
1689 739 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1690 743 : CPLAssert(pfnResampleFunc);
1691 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1692 : GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1693 : GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1694 : #endif
1695 : GDALDataType eWrkDataType =
1696 743 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1697 :
1698 736 : int nDstBlockXSize = nBufXSize;
1699 736 : int nDstBlockYSize = nBufYSize;
1700 : int nFullResXChunk, nFullResYChunk;
1701 : while (true)
1702 : {
1703 736 : nFullResXChunk =
1704 736 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1705 736 : nFullResYChunk =
1706 736 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1707 736 : if (nFullResXChunk > nRasterXSize)
1708 557 : nFullResXChunk = nRasterXSize;
1709 736 : if (nFullResYChunk > nRasterYSize)
1710 43 : nFullResYChunk = nRasterYSize;
1711 736 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1712 734 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1713 : 1024 * 1024))
1714 : break;
1715 : // When operating on the full width of a raster whose block width is
1716 : // the raster width, prefer doing chunks in height.
1717 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1718 : nDstBlockYSize > 1)
1719 0 : nDstBlockYSize /= 2;
1720 : /* Otherwise cut the maximal dimension */
1721 0 : else if (nDstBlockXSize > 1 &&
1722 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1723 0 : nDstBlockXSize /= 2;
1724 : else
1725 0 : nDstBlockYSize /= 2;
1726 : }
1727 :
1728 1477 : int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1729 736 : static_cast<int>(0.5 + dfYRatioDstToSrc));
1730 741 : if (nOvrFactor == 0)
1731 94 : nOvrFactor = 1;
1732 741 : int nFullResXSizeQueried =
1733 741 : nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1734 741 : int nFullResYSizeQueried =
1735 741 : nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1736 :
1737 741 : if (nFullResXSizeQueried > nRasterXSize)
1738 587 : nFullResXSizeQueried = nRasterXSize;
1739 741 : if (nFullResYSizeQueried > nRasterYSize)
1740 46 : nFullResYSizeQueried = nRasterYSize;
1741 :
1742 741 : void *pChunk = VSI_MALLOC3_VERBOSE(
1743 : cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1744 : nBandCount),
1745 : nFullResXSizeQueried, nFullResYSizeQueried);
1746 754 : GByte *pabyChunkNoDataMask = nullptr;
1747 :
1748 754 : GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1749 754 : int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1750 :
1751 756 : bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1752 756 : if (bUseNoDataMask)
1753 : {
1754 489 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1755 : nFullResXSizeQueried, nFullResYSizeQueried));
1756 : }
1757 756 : if (pChunk == nullptr ||
1758 489 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1759 : {
1760 17 : GDALClose(poMEMDS);
1761 0 : CPLFree(pChunk);
1762 0 : CPLFree(pabyChunkNoDataMask);
1763 0 : CPLFree(papoDstBands);
1764 0 : return CE_Failure;
1765 : }
1766 :
1767 739 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1768 739 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1769 739 : int nBlocksDone = 0;
1770 :
1771 : int nDstYOff;
1772 1499 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1773 760 : nDstYOff += nDstBlockYSize)
1774 : {
1775 : int nDstYCount;
1776 739 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1777 739 : nDstYCount = nDstBlockYSize;
1778 : else
1779 0 : nDstYCount = nBufYSize - nDstYOff;
1780 :
1781 739 : int nChunkYOff =
1782 739 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1783 739 : int nChunkYOff2 = nYOff + 1 +
1784 739 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1785 : dfYRatioDstToSrc));
1786 739 : if (nChunkYOff2 > nRasterYSize)
1787 89 : nChunkYOff2 = nRasterYSize;
1788 739 : int nYCount = nChunkYOff2 - nChunkYOff;
1789 739 : CPLAssert(nYCount <= nFullResYChunk);
1790 :
1791 739 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1792 739 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1793 739 : if (nChunkYOffQueried < 0)
1794 : {
1795 92 : nChunkYSizeQueried += nChunkYOffQueried;
1796 92 : nChunkYOffQueried = 0;
1797 : }
1798 739 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1799 107 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1800 739 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1801 :
1802 : int nDstXOff;
1803 1498 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1804 759 : nDstXOff += nDstBlockXSize)
1805 : {
1806 : int nDstXCount;
1807 738 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1808 736 : nDstXCount = nDstBlockXSize;
1809 : else
1810 2 : nDstXCount = nBufXSize - nDstXOff;
1811 :
1812 738 : int nChunkXOff =
1813 738 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1814 738 : int nChunkXOff2 =
1815 738 : nXOff + 1 +
1816 738 : static_cast<int>(
1817 738 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1818 738 : if (nChunkXOff2 > nRasterXSize)
1819 588 : nChunkXOff2 = nRasterXSize;
1820 738 : int nXCount = nChunkXOff2 - nChunkXOff;
1821 738 : CPLAssert(nXCount <= nFullResXChunk);
1822 :
1823 738 : int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1824 738 : int nChunkXSizeQueried =
1825 738 : nXCount + 2 * nKernelRadius * nOvrFactor;
1826 738 : if (nChunkXOffQueried < 0)
1827 : {
1828 574 : nChunkXSizeQueried += nChunkXOffQueried;
1829 574 : nChunkXOffQueried = 0;
1830 : }
1831 738 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1832 595 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1833 738 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1834 :
1835 738 : bool bSkipResample = false;
1836 738 : bool bNoDataMaskFullyOpaque = false;
1837 738 : if (eErr == CE_None && bUseNoDataMask)
1838 : {
1839 489 : eErr = poMaskBand->RasterIO(
1840 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1841 : nChunkXSizeQueried, nChunkYSizeQueried,
1842 : pabyChunkNoDataMask, nChunkXSizeQueried,
1843 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1844 :
1845 : /* Optimizations if mask if fully opaque or transparent */
1846 489 : const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1847 489 : const GByte bVal = pabyChunkNoDataMask[0];
1848 489 : int i = 1; // Used after for.
1849 12776300 : for (; i < nPixels; i++)
1850 : {
1851 12775900 : if (pabyChunkNoDataMask[i] != bVal)
1852 72 : break;
1853 : }
1854 489 : if (i == nPixels)
1855 : {
1856 417 : if (bVal == 0)
1857 : {
1858 373 : GByte abyZero[16] = {0};
1859 780 : for (int iBand = 0; iBand < nBandCount; iBand++)
1860 : {
1861 3499 : for (int j = 0; j < nDstYCount; j++)
1862 : {
1863 3092 : GDALCopyWords64(
1864 : abyZero, GDT_Byte, 0,
1865 : static_cast<GByte *>(pData) +
1866 3092 : iBand * nBandSpace +
1867 3092 : nLineSpace * (j + nDstYOff) +
1868 3092 : nDstXOff * nPixelSpace,
1869 : eBufType, static_cast<int>(nPixelSpace),
1870 : nDstXCount);
1871 : }
1872 : }
1873 373 : bSkipResample = true;
1874 : }
1875 : else
1876 : {
1877 44 : bNoDataMaskFullyOpaque = true;
1878 : }
1879 : }
1880 : }
1881 :
1882 738 : if (!bSkipResample && eErr == CE_None)
1883 : {
1884 : /* Read the source buffers */
1885 368 : eErr = RasterIO(
1886 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1887 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1888 : nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1889 : nBandCount, panBandMap, 0, 0, 0, nullptr);
1890 : }
1891 :
1892 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1893 : if (pfnResampleFuncMultiBands && !bSkipResample &&
1894 : eErr == CE_None)
1895 : {
1896 : eErr = pfnResampleFuncMultiBands(
1897 : dfXRatioDstToSrc, dfYRatioDstToSrc,
1898 : dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1899 : dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1900 : eWrkDataType, (GByte *)pChunk, nBandCount,
1901 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1902 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1903 : nChunkXSizeQueried,
1904 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1905 : nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1906 : nDstXOff + nDestXOffVirtual + nDstXCount,
1907 : nDstYOff + nDestYOffVirtual,
1908 : nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1909 : pszResampling, FALSE /*bHasNoData*/,
1910 : 0.0 /* dfNoDataValue */, nullptr /* color table*/,
1911 : eDataType);
1912 : }
1913 : else
1914 : #endif
1915 : {
1916 : size_t nChunkBandOffset =
1917 751 : static_cast<size_t>(nChunkXSizeQueried) *
1918 751 : nChunkYSizeQueried *
1919 751 : GDALGetDataTypeSizeBytes(eWrkDataType);
1920 1949 : for (int i = 0;
1921 1949 : i < nBandCount && !bSkipResample && eErr == CE_None;
1922 : i++)
1923 : {
1924 1190 : const bool bPropagateNoData = false;
1925 1190 : void *pDstBuffer = nullptr;
1926 1190 : GDALDataType eDstBufferDataType = GDT_Unknown;
1927 : GDALRasterBand *poMEMBand =
1928 1190 : poMEMDS->GetRasterBand(i + 1);
1929 1191 : GDALOverviewResampleArgs args;
1930 1191 : args.eSrcDataType = eDataType;
1931 1191 : args.eOvrDataType = poMEMBand->GetRasterDataType();
1932 1191 : args.nOvrXSize = poMEMBand->GetXSize();
1933 1189 : args.nOvrYSize = poMEMBand->GetYSize();
1934 1186 : args.nOvrNBITS = nNBITS;
1935 1186 : args.dfXRatioDstToSrc = dfXRatioDstToSrc;
1936 1186 : args.dfYRatioDstToSrc = dfYRatioDstToSrc;
1937 1186 : args.dfSrcXDelta =
1938 1186 : dfXOff - nXOff; /* == 0 if bHasXOffVirtual */
1939 1186 : args.dfSrcYDelta =
1940 1186 : dfYOff - nYOff; /* == 0 if bHasYOffVirtual */
1941 1186 : args.eWrkDataType = eWrkDataType;
1942 1186 : args.pabyChunkNodataMask = bNoDataMaskFullyOpaque
1943 1186 : ? nullptr
1944 : : pabyChunkNoDataMask;
1945 1186 : args.nChunkXOff =
1946 1186 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff);
1947 1186 : args.nChunkXSize = nChunkXSizeQueried;
1948 1186 : args.nChunkYOff =
1949 1186 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff);
1950 1186 : args.nChunkYSize = nChunkYSizeQueried;
1951 1186 : args.nDstXOff = nDstXOff + nDestXOffVirtual;
1952 1186 : args.nDstXOff2 =
1953 1186 : nDstXOff + nDestXOffVirtual + nDstXCount;
1954 1186 : args.nDstYOff = nDstYOff + nDestYOffVirtual;
1955 1186 : args.nDstYOff2 =
1956 1186 : nDstYOff + nDestYOffVirtual + nDstYCount;
1957 1186 : args.pszResampling = pszResampling;
1958 1186 : args.bHasNoData = false;
1959 1186 : args.dfNoDataValue = 0.0;
1960 1186 : args.poColorTable = nullptr;
1961 1186 : args.bPropagateNoData = bPropagateNoData;
1962 :
1963 : eErr =
1964 2378 : pfnResampleFunc(args,
1965 1186 : reinterpret_cast<GByte *>(pChunk) +
1966 1186 : i * nChunkBandOffset,
1967 : &pDstBuffer, &eDstBufferDataType);
1968 1192 : if (eErr == CE_None)
1969 : {
1970 1192 : eErr = poMEMBand->RasterIO(
1971 : GF_Write, nDstXOff + nDestXOffVirtual,
1972 : nDstYOff + nDestYOffVirtual, nDstXCount,
1973 : nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1974 : eDstBufferDataType, 0, 0, nullptr);
1975 : }
1976 1192 : CPLFree(pDstBuffer);
1977 : }
1978 : }
1979 :
1980 759 : nBlocksDone++;
1981 1148 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1982 389 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1983 : "", psExtraArg->pProgressData))
1984 : {
1985 0 : eErr = CE_Failure;
1986 : }
1987 : }
1988 : }
1989 :
1990 760 : CPLFree(pChunk);
1991 758 : CPLFree(pabyChunkNoDataMask);
1992 : }
1993 :
1994 758 : CPLFree(papoDstBands);
1995 758 : GDALClose(poMEMDS);
1996 :
1997 758 : return eErr;
1998 : }
1999 :
2000 : //! @endcond
2001 :
2002 : /************************************************************************/
2003 : /* GDALSwapWords() */
2004 : /************************************************************************/
2005 :
2006 : /**
2007 : * Byte swap words in-place.
2008 : *
2009 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2010 : * a memory array. No assumption is made that the words being swapped are
2011 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2012 : * to determine if the current platform is big endian or little endian. Use
2013 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2014 : * of a function call.
2015 : *
2016 : * @param pData pointer to start of data buffer.
2017 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2018 : * @param nWordCount the number of words to be swapped in this call.
2019 : * @param nWordSkip the byte offset from the start of one word to the start of
2020 : * the next. For packed buffers this is the same as nWordSize.
2021 : */
2022 :
2023 497157 : void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
2024 : int nWordSkip)
2025 :
2026 : {
2027 497157 : if (nWordCount > 0)
2028 497157 : VALIDATE_POINTER0(pData, "GDALSwapWords");
2029 :
2030 497157 : GByte *pabyData = static_cast<GByte *>(pData);
2031 :
2032 497157 : switch (nWordSize)
2033 : {
2034 7234 : case 1:
2035 7234 : break;
2036 :
2037 476923 : case 2:
2038 476923 : CPLAssert(nWordSkip >= 2 || nWordCount == 1);
2039 228064000 : for (int i = 0; i < nWordCount; i++)
2040 : {
2041 227587000 : CPL_SWAP16PTR(pabyData);
2042 227587000 : pabyData += nWordSkip;
2043 : }
2044 476923 : break;
2045 :
2046 10514 : case 4:
2047 10514 : CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2048 10514 : if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2049 : {
2050 29139300 : for (int i = 0; i < nWordCount; i++)
2051 : {
2052 29128800 : *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2053 : *reinterpret_cast<const GUInt32 *>(pabyData));
2054 29128800 : pabyData += nWordSkip;
2055 10511 : }
2056 : }
2057 : else
2058 : {
2059 9 : for (int i = 0; i < nWordCount; i++)
2060 : {
2061 6 : CPL_SWAP32PTR(pabyData);
2062 6 : pabyData += nWordSkip;
2063 : }
2064 : }
2065 10514 : break;
2066 :
2067 2486 : case 8:
2068 2486 : CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2069 2486 : if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2070 : {
2071 3358160 : for (int i = 0; i < nWordCount; i++)
2072 : {
2073 3355680 : *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2074 : *reinterpret_cast<const GUInt64 *>(pabyData));
2075 3355680 : pabyData += nWordSkip;
2076 2485 : }
2077 : }
2078 : else
2079 : {
2080 3 : for (int i = 0; i < nWordCount; i++)
2081 : {
2082 2 : CPL_SWAP64PTR(pabyData);
2083 2 : pabyData += nWordSkip;
2084 : }
2085 : }
2086 2486 : break;
2087 :
2088 0 : default:
2089 0 : CPLAssert(false);
2090 : }
2091 : }
2092 :
2093 : /************************************************************************/
2094 : /* GDALSwapWordsEx() */
2095 : /************************************************************************/
2096 :
2097 : /**
2098 : * Byte swap words in-place.
2099 : *
2100 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2101 : * a memory array. No assumption is made that the words being swapped are
2102 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2103 : * to determine if the current platform is big endian or little endian. Use
2104 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2105 : * of a function call.
2106 : *
2107 : * @param pData pointer to start of data buffer.
2108 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2109 : * @param nWordCount the number of words to be swapped in this call.
2110 : * @param nWordSkip the byte offset from the start of one word to the start of
2111 : * the next. For packed buffers this is the same as nWordSize.
2112 : * @since GDAL 2.1
2113 : */
2114 6118 : void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2115 : int nWordSkip)
2116 : {
2117 6118 : GByte *pabyData = static_cast<GByte *>(pData);
2118 12236 : while (nWordCount)
2119 : {
2120 : // Pick-up a multiple of 8 as max chunk size.
2121 6118 : const int nWordCountSmall =
2122 6118 : (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2123 6118 : GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2124 6118 : pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2125 6118 : nWordCount -= nWordCountSmall;
2126 : }
2127 6118 : }
2128 :
2129 : // Place the new GDALCopyWords helpers in an anonymous namespace
2130 : namespace
2131 : {
2132 :
2133 : /************************************************************************/
2134 : /* GDALCopyWordsT() */
2135 : /************************************************************************/
2136 : /**
2137 : * Template function, used to copy data from pSrcData into buffer
2138 : * pDstData, with stride nSrcPixelStride in the source data and
2139 : * stride nDstPixelStride in the destination data. This template can
2140 : * deal with the case where the input data type is real or complex and
2141 : * the output is real.
2142 : *
2143 : * @param pSrcData the source data buffer
2144 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2145 : * of interest.
2146 : * @param pDstData the destination buffer.
2147 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2148 : * interest.
2149 : * @param nWordCount the total number of pixel words to copy
2150 : *
2151 : * @code
2152 : * // Assume an input buffer of type GUInt16 named pBufferIn
2153 : * GByte *pBufferOut = new GByte[numBytesOut];
2154 : * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2155 : * @endcode
2156 : * @note
2157 : * This is a private function, and should not be exposed outside of
2158 : * rasterio.cpp. External users should call the GDALCopyWords driver function.
2159 : */
2160 :
2161 : template <class Tin, class Tout>
2162 46935824 : static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2163 : int nSrcPixelStride,
2164 : Tout *const CPL_RESTRICT pDstData,
2165 : int nDstPixelStride,
2166 : GPtrDiff_t nWordCount)
2167 : {
2168 46935824 : decltype(nWordCount) nDstOffset = 0;
2169 :
2170 46935824 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2171 46935824 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2172 597284721 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2173 : {
2174 550346517 : const Tin tValue =
2175 550346517 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2176 550346517 : Tout *const pOutPixel =
2177 550346517 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2178 :
2179 550346517 : GDALCopyWord(tValue, *pOutPixel);
2180 :
2181 550349637 : nDstOffset += nDstPixelStride;
2182 : }
2183 46939056 : }
2184 :
2185 : template <class Tin, class Tout>
2186 37991671 : static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2187 : int nSrcPixelStride,
2188 : Tout *const CPL_RESTRICT pDstData,
2189 : int nDstPixelStride, GPtrDiff_t nWordCount)
2190 : {
2191 37991671 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2192 : nWordCount);
2193 37991761 : }
2194 :
2195 : template <class Tin, class Tout>
2196 270689 : static void inline GDALCopyWordsT_8atatime(
2197 : const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2198 : Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2199 : GPtrDiff_t nWordCount)
2200 : {
2201 270689 : decltype(nWordCount) nDstOffset = 0;
2202 :
2203 270689 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2204 270689 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2205 270689 : decltype(nWordCount) n = 0;
2206 270689 : if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2207 : nDstPixelStride == static_cast<int>(sizeof(Tout)))
2208 : {
2209 25827774 : for (; n < nWordCount - 7; n += 8)
2210 : {
2211 25565694 : const Tin *pInValues = reinterpret_cast<const Tin *>(
2212 25565694 : pSrcDataPtr + (n * nSrcPixelStride));
2213 25565694 : Tout *const pOutPixels =
2214 25565694 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2215 :
2216 25565694 : GDALCopy8Words(pInValues, pOutPixels);
2217 :
2218 25560314 : nDstOffset += 8 * nDstPixelStride;
2219 : }
2220 : }
2221 786659 : for (; n < nWordCount; n++)
2222 : {
2223 515995 : const Tin tValue =
2224 515995 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2225 515995 : Tout *const pOutPixel =
2226 515995 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2227 :
2228 515995 : GDALCopyWord(tValue, *pOutPixel);
2229 :
2230 521339 : nDstOffset += nDstPixelStride;
2231 : }
2232 270664 : }
2233 :
2234 : #ifdef HAVE_SSE2
2235 :
2236 : template <class Tout>
2237 38881 : void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2238 : int nSrcPixelStride,
2239 : Tout *const CPL_RESTRICT pDstData,
2240 : int nDstPixelStride, GPtrDiff_t nWordCount)
2241 : {
2242 : static_assert(std::is_integral<Tout>::value &&
2243 : sizeof(Tout) == sizeof(uint16_t),
2244 : "Bad Tout");
2245 38881 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2246 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2247 : {
2248 32530 : decltype(nWordCount) n = 0;
2249 32530 : const __m128i xmm_zero = _mm_setzero_si128();
2250 32530 : GByte *CPL_RESTRICT pabyDstDataPtr =
2251 : reinterpret_cast<GByte *>(pDstData);
2252 1412288 : for (; n < nWordCount - 15; n += 16)
2253 : {
2254 1379758 : __m128i xmm = _mm_loadu_si128(
2255 1379758 : reinterpret_cast<const __m128i *>(pSrcData + n));
2256 1379758 : __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2257 1379758 : __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2258 : _mm_storeu_si128(
2259 1379758 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2260 : _mm_storeu_si128(
2261 1379758 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2262 : }
2263 106649 : for (; n < nWordCount; n++)
2264 : {
2265 74119 : pDstData[n] = pSrcData[n];
2266 32530 : }
2267 : }
2268 : else
2269 : {
2270 6351 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2271 : nDstPixelStride, nWordCount);
2272 : }
2273 38881 : }
2274 :
2275 : template <>
2276 26545 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2277 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2278 : int nDstPixelStride, GPtrDiff_t nWordCount)
2279 : {
2280 26545 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2281 : nDstPixelStride, nWordCount);
2282 26545 : }
2283 :
2284 : template <>
2285 12336 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2286 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2287 : int nDstPixelStride, GPtrDiff_t nWordCount)
2288 : {
2289 12336 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2290 : nDstPixelStride, nWordCount);
2291 12336 : }
2292 :
2293 : template <class Tout>
2294 12842068 : void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2295 : int nSrcPixelStride,
2296 : Tout *const CPL_RESTRICT pDstData,
2297 : int nDstPixelStride, GPtrDiff_t nWordCount)
2298 : {
2299 : static_assert(std::is_integral<Tout>::value &&
2300 : sizeof(Tout) == sizeof(uint32_t),
2301 : "Bad Tout");
2302 12842068 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2303 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2304 : {
2305 6286768 : decltype(nWordCount) n = 0;
2306 6286768 : const __m128i xmm_zero = _mm_setzero_si128();
2307 6286768 : GByte *CPL_RESTRICT pabyDstDataPtr =
2308 : reinterpret_cast<GByte *>(pDstData);
2309 70542610 : for (; n < nWordCount - 15; n += 16)
2310 : {
2311 64469252 : __m128i xmm = _mm_loadu_si128(
2312 64469252 : reinterpret_cast<const __m128i *>(pSrcData + n));
2313 64475452 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2314 64509952 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2315 64494352 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2316 64330852 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2317 64267552 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2318 64255852 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2319 : _mm_storeu_si128(
2320 64255852 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2321 : _mm_storeu_si128(
2322 64255852 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2323 : _mm_storeu_si128(
2324 64255852 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2325 : _mm_storeu_si128(
2326 64255852 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2327 : }
2328 14231699 : for (; n < nWordCount; n++)
2329 : {
2330 8158301 : pDstData[n] = pSrcData[n];
2331 6073358 : }
2332 : }
2333 : else
2334 : {
2335 6555350 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2336 : nDstPixelStride, nWordCount);
2337 : }
2338 12630768 : }
2339 :
2340 : template <>
2341 468 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2342 : int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData,
2343 : int nDstPixelStride, GPtrDiff_t nWordCount)
2344 : {
2345 468 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2346 : nDstPixelStride, nWordCount);
2347 468 : }
2348 :
2349 : template <>
2350 12844100 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2351 : int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData,
2352 : int nDstPixelStride, GPtrDiff_t nWordCount)
2353 : {
2354 12844100 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2355 : nDstPixelStride, nWordCount);
2356 12849000 : }
2357 :
2358 : template <>
2359 2471350 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2360 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2361 : int nDstPixelStride, GPtrDiff_t nWordCount)
2362 : {
2363 2471350 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2364 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2365 : {
2366 111905 : decltype(nWordCount) n = 0;
2367 111905 : const __m128i xmm_zero = _mm_setzero_si128();
2368 111905 : GByte *CPL_RESTRICT pabyDstDataPtr =
2369 : reinterpret_cast<GByte *>(pDstData);
2370 3259990 : for (; n < nWordCount - 15; n += 16)
2371 : {
2372 3148080 : __m128i xmm = _mm_loadu_si128(
2373 3148080 : reinterpret_cast<const __m128i *>(pSrcData + n));
2374 3148080 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2375 3148080 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2376 3148080 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2377 3148080 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2378 3148080 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2379 3148080 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2380 3148080 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2381 3148080 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2382 3148080 : __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2383 3148080 : __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2384 3148080 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2385 : xmm0_f);
2386 : _mm_storeu_ps(
2387 3148080 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2388 : _mm_storeu_ps(
2389 3148080 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2390 : _mm_storeu_ps(
2391 3148080 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2392 : }
2393 478444 : for (; n < nWordCount; n++)
2394 : {
2395 366539 : pDstData[n] = pSrcData[n];
2396 111905 : }
2397 : }
2398 : else
2399 : {
2400 2359440 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2401 : nDstPixelStride, nWordCount);
2402 : }
2403 2471350 : }
2404 :
2405 : template <>
2406 146754 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2407 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2408 : int nDstPixelStride, GPtrDiff_t nWordCount)
2409 : {
2410 146754 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2411 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2412 : {
2413 123698 : decltype(nWordCount) n = 0;
2414 123698 : const __m128i xmm_zero = _mm_setzero_si128();
2415 123698 : GByte *CPL_RESTRICT pabyDstDataPtr =
2416 : reinterpret_cast<GByte *>(pDstData);
2417 1422030 : for (; n < nWordCount - 15; n += 16)
2418 : {
2419 1298330 : __m128i xmm = _mm_loadu_si128(
2420 1298330 : reinterpret_cast<const __m128i *>(pSrcData + n));
2421 1298330 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2422 1298330 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2423 1298330 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2424 1298330 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2425 1298330 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2426 1298330 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2427 :
2428 1298330 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2429 1298330 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2430 1298330 : __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2431 1298330 : __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2432 1298330 : xmm0 = _mm_srli_si128(xmm0, 8);
2433 1298330 : xmm1 = _mm_srli_si128(xmm1, 8);
2434 1298330 : xmm2 = _mm_srli_si128(xmm2, 8);
2435 1298330 : xmm3 = _mm_srli_si128(xmm3, 8);
2436 1298330 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2437 1298330 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2438 1298330 : __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2439 1298330 : __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2440 :
2441 1298330 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2442 : xmm0_low_d);
2443 : _mm_storeu_pd(
2444 1298330 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2445 : xmm0_high_d);
2446 : _mm_storeu_pd(
2447 1298330 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2448 : xmm1_low_d);
2449 : _mm_storeu_pd(
2450 1298330 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2451 : xmm1_high_d);
2452 : _mm_storeu_pd(
2453 1298330 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2454 : xmm2_low_d);
2455 : _mm_storeu_pd(
2456 1298330 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2457 : xmm2_high_d);
2458 : _mm_storeu_pd(
2459 1298330 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2460 : xmm3_low_d);
2461 : _mm_storeu_pd(
2462 1298330 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2463 : xmm3_high_d);
2464 : }
2465 233373 : for (; n < nWordCount; n++)
2466 : {
2467 109675 : pDstData[n] = pSrcData[n];
2468 123698 : }
2469 : }
2470 : else
2471 : {
2472 23056 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2473 : nDstPixelStride, nWordCount);
2474 : }
2475 146754 : }
2476 :
2477 : template <>
2478 6040 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2479 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2480 : int nDstPixelStride, GPtrDiff_t nWordCount)
2481 : {
2482 6040 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2483 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2484 : {
2485 5065 : decltype(nWordCount) n = 0;
2486 : // In SSE2, min_epu16 does not exist, so shift from
2487 : // UInt16 to SInt16 to be able to use min_epi16
2488 5065 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2489 5065 : const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2490 138561 : for (; n < nWordCount - 7; n += 8)
2491 : {
2492 133496 : __m128i xmm = _mm_loadu_si128(
2493 133496 : reinterpret_cast<const __m128i *>(pSrcData + n));
2494 133496 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2495 133496 : xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2496 133496 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2497 133496 : xmm = _mm_packus_epi16(xmm, xmm);
2498 133496 : GDALCopyXMMToInt64(xmm,
2499 133496 : reinterpret_cast<GPtrDiff_t *>(pDstData + n));
2500 : }
2501 16083 : for (; n < nWordCount; n++)
2502 : {
2503 11018 : pDstData[n] =
2504 11018 : pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2505 5065 : }
2506 : }
2507 : else
2508 : {
2509 975 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2510 : nDstPixelStride, nWordCount);
2511 : }
2512 6040 : }
2513 :
2514 : template <>
2515 49 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2516 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2517 : int nDstPixelStride, GPtrDiff_t nWordCount)
2518 : {
2519 49 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2520 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2521 : {
2522 43 : decltype(nWordCount) n = 0;
2523 : // In SSE2, min_epu16 does not exist, so shift from
2524 : // UInt16 to SInt16 to be able to use min_epi16
2525 43 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2526 43 : const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2527 115 : for (; n < nWordCount - 7; n += 8)
2528 : {
2529 72 : __m128i xmm = _mm_loadu_si128(
2530 72 : reinterpret_cast<const __m128i *>(pSrcData + n));
2531 72 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2532 72 : xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2533 72 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2534 72 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm);
2535 : }
2536 111 : for (; n < nWordCount; n++)
2537 : {
2538 68 : pDstData[n] =
2539 68 : pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2540 43 : }
2541 : }
2542 : else
2543 : {
2544 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2545 : nDstPixelStride, nWordCount);
2546 : }
2547 49 : }
2548 :
2549 : template <>
2550 342 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2551 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2552 : int nDstPixelStride, GPtrDiff_t nWordCount)
2553 : {
2554 342 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2555 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2556 : {
2557 336 : decltype(nWordCount) n = 0;
2558 336 : const __m128i xmm_zero = _mm_setzero_si128();
2559 336 : GByte *CPL_RESTRICT pabyDstDataPtr =
2560 : reinterpret_cast<GByte *>(pDstData);
2561 1286 : for (; n < nWordCount - 7; n += 8)
2562 : {
2563 950 : __m128i xmm = _mm_loadu_si128(
2564 950 : reinterpret_cast<const __m128i *>(pSrcData + n));
2565 950 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2566 950 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2567 950 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2568 950 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2569 950 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2570 : xmm0_f);
2571 : _mm_storeu_ps(
2572 950 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2573 : }
2574 1043 : for (; n < nWordCount; n++)
2575 : {
2576 707 : pDstData[n] = pSrcData[n];
2577 336 : }
2578 : }
2579 : else
2580 : {
2581 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2582 : nDstPixelStride, nWordCount);
2583 : }
2584 342 : }
2585 :
2586 : template <>
2587 373 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2588 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2589 : int nDstPixelStride, GPtrDiff_t nWordCount)
2590 : {
2591 373 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2592 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2593 : {
2594 262 : decltype(nWordCount) n = 0;
2595 262 : const __m128i xmm_zero = _mm_setzero_si128();
2596 262 : GByte *CPL_RESTRICT pabyDstDataPtr =
2597 : reinterpret_cast<GByte *>(pDstData);
2598 507 : for (; n < nWordCount - 7; n += 8)
2599 : {
2600 245 : __m128i xmm = _mm_loadu_si128(
2601 245 : reinterpret_cast<const __m128i *>(pSrcData + n));
2602 245 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2603 245 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2604 :
2605 245 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2606 245 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2607 245 : xmm0 = _mm_srli_si128(xmm0, 8);
2608 245 : xmm1 = _mm_srli_si128(xmm1, 8);
2609 245 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2610 245 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2611 :
2612 245 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2613 : xmm0_low_d);
2614 : _mm_storeu_pd(
2615 245 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2616 : xmm0_high_d);
2617 : _mm_storeu_pd(
2618 245 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2619 : xmm1_low_d);
2620 : _mm_storeu_pd(
2621 245 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2622 : xmm1_high_d);
2623 : }
2624 832 : for (; n < nWordCount; n++)
2625 : {
2626 570 : pDstData[n] = pSrcData[n];
2627 262 : }
2628 : }
2629 : else
2630 : {
2631 111 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2632 : nDstPixelStride, nWordCount);
2633 : }
2634 373 : }
2635 :
2636 : template <>
2637 3091 : void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
2638 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2639 : int nDstPixelStride, GPtrDiff_t nWordCount)
2640 : {
2641 3091 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2642 : nDstPixelStride, nWordCount);
2643 3091 : }
2644 :
2645 : #endif // HAVE_SSE2
2646 :
2647 : template <>
2648 190103 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2649 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2650 : int nDstPixelStride, GPtrDiff_t nWordCount)
2651 : {
2652 190103 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2653 : nDstPixelStride, nWordCount);
2654 190103 : }
2655 :
2656 : template <>
2657 15796 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2658 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2659 : int nDstPixelStride, GPtrDiff_t nWordCount)
2660 : {
2661 15796 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2662 : nDstPixelStride, nWordCount);
2663 15796 : }
2664 :
2665 : template <>
2666 61703 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2667 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2668 : int nDstPixelStride, GPtrDiff_t nWordCount)
2669 : {
2670 61703 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2671 : nDstPixelStride, nWordCount);
2672 61700 : }
2673 :
2674 : /************************************************************************/
2675 : /* GDALCopyWordsComplexT() */
2676 : /************************************************************************/
2677 : /**
2678 : * Template function, used to copy data from pSrcData into buffer
2679 : * pDstData, with stride nSrcPixelStride in the source data and
2680 : * stride nDstPixelStride in the destination data. Deals with the
2681 : * complex case, where input is complex and output is complex.
2682 : *
2683 : * @param pSrcData the source data buffer
2684 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2685 : * of interest.
2686 : * @param pDstData the destination buffer.
2687 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2688 : * interest.
2689 : * @param nWordCount the total number of pixel words to copy
2690 : *
2691 : */
2692 : template <class Tin, class Tout>
2693 96717 : inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
2694 : int nSrcPixelStride,
2695 : Tout *const CPL_RESTRICT pDstData,
2696 : int nDstPixelStride, GPtrDiff_t nWordCount)
2697 : {
2698 96717 : decltype(nWordCount) nDstOffset = 0;
2699 96717 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2700 96717 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2701 :
2702 5241771 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2703 : {
2704 5145049 : const Tin *const pPixelIn =
2705 5145049 : reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2706 5145049 : Tout *const pPixelOut =
2707 5145049 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2708 :
2709 5145049 : GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2710 5145049 : GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2711 :
2712 5145049 : nDstOffset += nDstPixelStride;
2713 : }
2714 96717 : }
2715 :
2716 : /************************************************************************/
2717 : /* GDALCopyWordsComplexOutT() */
2718 : /************************************************************************/
2719 : /**
2720 : * Template function, used to copy data from pSrcData into buffer
2721 : * pDstData, with stride nSrcPixelStride in the source data and
2722 : * stride nDstPixelStride in the destination data. Deals with the
2723 : * case where the value is real coming in, but complex going out.
2724 : *
2725 : * @param pSrcData the source data buffer
2726 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2727 : * of interest, in bytes.
2728 : * @param pDstData the destination buffer.
2729 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2730 : * interest, in bytes.
2731 : * @param nWordCount the total number of pixel words to copy
2732 : *
2733 : */
2734 : template <class Tin, class Tout>
2735 3877 : inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
2736 : int nSrcPixelStride,
2737 : Tout *const CPL_RESTRICT pDstData,
2738 : int nDstPixelStride, GPtrDiff_t nWordCount)
2739 : {
2740 3877 : decltype(nWordCount) nDstOffset = 0;
2741 :
2742 3877 : const Tout tOutZero = static_cast<Tout>(0);
2743 :
2744 3877 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2745 3877 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2746 :
2747 1099414 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2748 : {
2749 1095537 : const Tin tValue =
2750 1095537 : *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2751 1095537 : Tout *const pPixelOut =
2752 1095537 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2753 1095537 : GDALCopyWord(tValue, *pPixelOut);
2754 :
2755 1095537 : pPixelOut[1] = tOutZero;
2756 :
2757 1095537 : nDstOffset += nDstPixelStride;
2758 : }
2759 3877 : }
2760 :
2761 : /************************************************************************/
2762 : /* GDALCopyWordsFromT() */
2763 : /************************************************************************/
2764 : /**
2765 : * Template driver function. Given the input type T, call the appropriate
2766 : * GDALCopyWordsT function template for the desired output type. You should
2767 : * never call this function directly (call GDALCopyWords instead).
2768 : *
2769 : * @param pSrcData source data buffer
2770 : * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2771 : * @param bInComplex input is complex
2772 : * @param pDstData destination data buffer
2773 : * @param eDstType destination data type
2774 : * @param nDstPixelStride pixel stride in output buffer, in pixel words
2775 : * @param nWordCount number of pixel words to be copied
2776 : */
2777 : template <class T>
2778 53872056 : inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
2779 : int nSrcPixelStride, bool bInComplex,
2780 : void *CPL_RESTRICT pDstData,
2781 : GDALDataType eDstType, int nDstPixelStride,
2782 : GPtrDiff_t nWordCount)
2783 : {
2784 53872056 : switch (eDstType)
2785 : {
2786 4580243 : case GDT_Byte:
2787 4580243 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2788 : static_cast<unsigned char *>(pDstData),
2789 : nDstPixelStride, nWordCount);
2790 4580301 : break;
2791 809 : case GDT_Int8:
2792 809 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2793 : static_cast<signed char *>(pDstData),
2794 : nDstPixelStride, nWordCount);
2795 809 : break;
2796 105082 : case GDT_UInt16:
2797 105082 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2798 : static_cast<unsigned short *>(pDstData),
2799 : nDstPixelStride, nWordCount);
2800 105076 : break;
2801 4127203 : case GDT_Int16:
2802 4127203 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2803 : static_cast<short *>(pDstData), nDstPixelStride,
2804 : nWordCount);
2805 4127203 : break;
2806 9486 : case GDT_UInt32:
2807 9486 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2808 : static_cast<unsigned int *>(pDstData),
2809 : nDstPixelStride, nWordCount);
2810 9486 : break;
2811 26049925 : case GDT_Int32:
2812 26049925 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2813 : static_cast<int *>(pDstData), nDstPixelStride,
2814 : nWordCount);
2815 26055626 : break;
2816 856 : case GDT_UInt64:
2817 856 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2818 : static_cast<std::uint64_t *>(pDstData),
2819 : nDstPixelStride, nWordCount);
2820 856 : break;
2821 5170 : case GDT_Int64:
2822 5170 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2823 : static_cast<std::int64_t *>(pDstData),
2824 : nDstPixelStride, nWordCount);
2825 5170 : break;
2826 942 : case GDT_Float16:
2827 942 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2828 : static_cast<GFloat16 *>(pDstData), nDstPixelStride,
2829 : nWordCount);
2830 942 : break;
2831 3695809 : case GDT_Float32:
2832 3695809 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2833 : static_cast<float *>(pDstData), nDstPixelStride,
2834 : nWordCount);
2835 3695809 : break;
2836 15194189 : case GDT_Float64:
2837 15194189 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2838 : static_cast<double *>(pDstData), nDstPixelStride,
2839 : nWordCount);
2840 15194249 : break;
2841 94123 : case GDT_CInt16:
2842 94123 : if (bInComplex)
2843 : {
2844 92870 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2845 : static_cast<short *>(pDstData),
2846 : nDstPixelStride, nWordCount);
2847 : }
2848 : else // input is not complex, so we need to promote to a complex
2849 : // buffer
2850 : {
2851 1253 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2852 : static_cast<short *>(pDstData),
2853 : nDstPixelStride, nWordCount);
2854 : }
2855 94123 : break;
2856 1052 : case GDT_CInt32:
2857 1052 : if (bInComplex)
2858 : {
2859 421 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2860 : static_cast<int *>(pDstData),
2861 : nDstPixelStride, nWordCount);
2862 : }
2863 : else // input is not complex, so we need to promote to a complex
2864 : // buffer
2865 : {
2866 631 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2867 : static_cast<int *>(pDstData),
2868 : nDstPixelStride, nWordCount);
2869 : }
2870 1052 : break;
2871 281 : case GDT_CFloat16:
2872 281 : if (bInComplex)
2873 : {
2874 16 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2875 : static_cast<GFloat16 *>(pDstData),
2876 : nDstPixelStride, nWordCount);
2877 : }
2878 : else // input is not complex, so we need to promote to a complex
2879 : // buffer
2880 : {
2881 265 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2882 : static_cast<GFloat16 *>(pDstData),
2883 : nDstPixelStride, nWordCount);
2884 : }
2885 281 : break;
2886 3359 : case GDT_CFloat32:
2887 3359 : if (bInComplex)
2888 : {
2889 2564 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2890 : static_cast<float *>(pDstData),
2891 : nDstPixelStride, nWordCount);
2892 : }
2893 : else // input is not complex, so we need to promote to a complex
2894 : // buffer
2895 : {
2896 795 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2897 : static_cast<float *>(pDstData),
2898 : nDstPixelStride, nWordCount);
2899 : }
2900 3359 : break;
2901 1779 : case GDT_CFloat64:
2902 1779 : if (bInComplex)
2903 : {
2904 846 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2905 : static_cast<double *>(pDstData),
2906 : nDstPixelStride, nWordCount);
2907 : }
2908 : else // input is not complex, so we need to promote to a complex
2909 : // buffer
2910 : {
2911 933 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2912 : static_cast<double *>(pDstData),
2913 : nDstPixelStride, nWordCount);
2914 : }
2915 1779 : break;
2916 0 : case GDT_Unknown:
2917 : case GDT_TypeCount:
2918 0 : CPLAssert(false);
2919 : }
2920 53877811 : }
2921 :
2922 : } // end anonymous namespace
2923 :
2924 : /************************************************************************/
2925 : /* GDALReplicateWord() */
2926 : /************************************************************************/
2927 :
2928 : template <class T>
2929 530285 : inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
2930 : GPtrDiff_t nWordCount)
2931 : {
2932 530285 : const T valSet = *static_cast<const T *>(pDstData);
2933 530285 : if (nDstPixelStride == static_cast<int>(sizeof(T)))
2934 : {
2935 500546 : T *pDstPtr = static_cast<T *>(pDstData) + 1;
2936 20668583 : while (nWordCount >= 4)
2937 : {
2938 20168093 : nWordCount -= 4;
2939 20168093 : pDstPtr[0] = valSet;
2940 20168093 : pDstPtr[1] = valSet;
2941 20168093 : pDstPtr[2] = valSet;
2942 20168093 : pDstPtr[3] = valSet;
2943 20168093 : pDstPtr += 4;
2944 : }
2945 1269777 : while (nWordCount > 0)
2946 : {
2947 769231 : --nWordCount;
2948 769231 : *pDstPtr = valSet;
2949 769231 : pDstPtr++;
2950 : }
2951 : }
2952 : else
2953 : {
2954 29751 : GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2955 1040338 : while (nWordCount > 0)
2956 : {
2957 1010587 : --nWordCount;
2958 1010587 : *reinterpret_cast<T *>(pabyDstPtr) = valSet;
2959 1010587 : pabyDstPtr += nDstPixelStride;
2960 : }
2961 : }
2962 530285 : }
2963 :
2964 912783 : static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
2965 : GDALDataType eSrcType,
2966 : void *CPL_RESTRICT pDstData,
2967 : GDALDataType eDstType, int nDstPixelStride,
2968 : GPtrDiff_t nWordCount)
2969 : {
2970 : /* -----------------------------------------------------------------------
2971 : */
2972 : /* Special case when the source data is always the same value */
2973 : /* (for VRTSourcedRasterBand::IRasterIO and
2974 : * VRTDerivedRasterBand::IRasterIO*/
2975 : /* for example) */
2976 : /* -----------------------------------------------------------------------
2977 : */
2978 : // Let the general translation case do the necessary conversions
2979 : // on the first destination element.
2980 912783 : GDALCopyWords64(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
2981 :
2982 : // Now copy the first element to the nWordCount - 1 following destination
2983 : // elements.
2984 912810 : nWordCount--;
2985 912810 : GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
2986 :
2987 912810 : switch (eDstType)
2988 : {
2989 382426 : case GDT_Byte:
2990 : case GDT_Int8:
2991 : {
2992 382426 : if (nDstPixelStride == 1)
2993 : {
2994 342121 : if (nWordCount > 0)
2995 342121 : memset(pabyDstWord,
2996 342121 : *reinterpret_cast<const GByte *>(pDstData),
2997 : nWordCount);
2998 : }
2999 : else
3000 : {
3001 40305 : GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
3002 23942200 : while (nWordCount > 0)
3003 : {
3004 23901900 : --nWordCount;
3005 23901900 : *pabyDstWord = valSet;
3006 23901900 : pabyDstWord += nDstPixelStride;
3007 : }
3008 : }
3009 382426 : break;
3010 : }
3011 :
3012 : #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \
3013 : case enum_type: \
3014 : { \
3015 : GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \
3016 : break; \
3017 : }
3018 :
3019 1723 : CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
3020 169649 : CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
3021 56 : CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
3022 300220 : CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
3023 21 : CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
3024 1024 : CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
3025 0 : CASE_DUPLICATE_SIMPLE(GDT_Float16, GFloat16)
3026 52459 : CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
3027 5120 : CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
3028 :
3029 : #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \
3030 : case enum_type: \
3031 : { \
3032 : c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \
3033 : c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \
3034 : while (nWordCount > 0) \
3035 : { \
3036 : --nWordCount; \
3037 : reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \
3038 : reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \
3039 : pabyDstWord += nDstPixelStride; \
3040 : } \
3041 : break; \
3042 : }
3043 :
3044 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
3045 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
3046 0 : CASE_DUPLICATE_COMPLEX(GDT_CFloat16, GFloat16)
3047 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
3048 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
3049 :
3050 0 : case GDT_Unknown:
3051 : case GDT_TypeCount:
3052 0 : CPLAssert(false);
3053 : }
3054 912744 : }
3055 :
3056 : /************************************************************************/
3057 : /* GDALUnrolledCopy() */
3058 : /************************************************************************/
3059 :
3060 : template <class T, int srcStride, int dstStride>
3061 3135680 : static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3062 : const T *CPL_RESTRICT pSrc,
3063 : GPtrDiff_t nIters)
3064 : {
3065 3135680 : if (nIters >= 16)
3066 : {
3067 135333105 : for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3068 : {
3069 132324519 : pDest[0 * dstStride] = pSrc[0 * srcStride];
3070 132324519 : pDest[1 * dstStride] = pSrc[1 * srcStride];
3071 132324519 : pDest[2 * dstStride] = pSrc[2 * srcStride];
3072 132324519 : pDest[3 * dstStride] = pSrc[3 * srcStride];
3073 132324519 : pDest[4 * dstStride] = pSrc[4 * srcStride];
3074 132324519 : pDest[5 * dstStride] = pSrc[5 * srcStride];
3075 132324519 : pDest[6 * dstStride] = pSrc[6 * srcStride];
3076 132324519 : pDest[7 * dstStride] = pSrc[7 * srcStride];
3077 132324519 : pDest[8 * dstStride] = pSrc[8 * srcStride];
3078 132324519 : pDest[9 * dstStride] = pSrc[9 * srcStride];
3079 132324519 : pDest[10 * dstStride] = pSrc[10 * srcStride];
3080 132324519 : pDest[11 * dstStride] = pSrc[11 * srcStride];
3081 132324519 : pDest[12 * dstStride] = pSrc[12 * srcStride];
3082 132324519 : pDest[13 * dstStride] = pSrc[13 * srcStride];
3083 132324519 : pDest[14 * dstStride] = pSrc[14 * srcStride];
3084 132324519 : pDest[15 * dstStride] = pSrc[15 * srcStride];
3085 132324519 : pDest += 16 * dstStride;
3086 132324519 : pSrc += 16 * srcStride;
3087 : }
3088 3008585 : nIters = nIters % 16;
3089 : }
3090 5377278 : for (GPtrDiff_t i = 0; i < nIters; i++)
3091 : {
3092 2241600 : pDest[i * dstStride] = *pSrc;
3093 2241600 : pSrc += srcStride;
3094 : }
3095 3135680 : }
3096 :
3097 : template <class T, int srcStride, int dstStride>
3098 3129579 : static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3099 : const T *CPL_RESTRICT pSrc,
3100 : GPtrDiff_t nIters)
3101 : {
3102 3129579 : GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3103 3129596 : }
3104 :
3105 : #ifdef HAVE_SSE2
3106 :
3107 : template <>
3108 352916 : void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3109 : const GByte *CPL_RESTRICT pSrc,
3110 : GPtrDiff_t nIters)
3111 : {
3112 352916 : decltype(nIters) i = 0;
3113 352916 : if (nIters > 16)
3114 : {
3115 194663 : const __m128i xmm_mask = _mm_set1_epi16(0xff);
3116 : // If we were sure that there would always be 1 trailing byte, we could
3117 : // check against nIters - 15
3118 2988090 : for (; i < nIters - 16; i += 16)
3119 : {
3120 : __m128i xmm0 =
3121 2793430 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3122 : __m128i xmm1 =
3123 5586860 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3124 : // Set higher 8bit of each int16 packed word to 0
3125 2793430 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3126 2793430 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3127 : // Pack int16 to uint8 and merge back both vector
3128 2793430 : xmm0 = _mm_packus_epi16(xmm0, xmm1);
3129 :
3130 : // Store result
3131 2793430 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3132 :
3133 2793430 : pSrc += 2 * 16;
3134 : }
3135 : }
3136 4619880 : for (; i < nIters; i++)
3137 : {
3138 4266960 : pDest[i] = *pSrc;
3139 4266960 : pSrc += 2;
3140 : }
3141 352916 : }
3142 :
3143 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3144 :
3145 : template <>
3146 191860 : void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3147 : const GByte *CPL_RESTRICT pSrc,
3148 : GPtrDiff_t nIters)
3149 : {
3150 191860 : if (nIters > 16 && CPLHaveRuntimeSSSE3())
3151 : {
3152 185760 : GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3153 : }
3154 : else
3155 : {
3156 6100 : GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
3157 : }
3158 191860 : }
3159 :
3160 : #endif
3161 :
3162 : template <>
3163 106241 : void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3164 : const GByte *CPL_RESTRICT pSrc,
3165 : GPtrDiff_t nIters)
3166 : {
3167 106241 : decltype(nIters) i = 0;
3168 106241 : if (nIters > 16)
3169 : {
3170 100948 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
3171 : // If we were sure that there would always be 3 trailing bytes, we could
3172 : // check against nIters - 15
3173 9914600 : for (; i < nIters - 16; i += 16)
3174 : {
3175 : __m128i xmm0 =
3176 9813290 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3177 : __m128i xmm1 =
3178 9813290 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3179 : __m128i xmm2 =
3180 9813290 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3181 : __m128i xmm3 =
3182 19626600 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3183 : // Set higher 24bit of each int32 packed word to 0
3184 9813290 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3185 9813290 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3186 9813290 : xmm2 = _mm_and_si128(xmm2, xmm_mask);
3187 9813290 : xmm3 = _mm_and_si128(xmm3, xmm_mask);
3188 : // Pack int32 to int16
3189 9813590 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
3190 9813510 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
3191 : // Pack int16 to uint8
3192 9813650 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
3193 :
3194 : // Store result
3195 9813650 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3196 :
3197 9813650 : pSrc += 4 * 16;
3198 : }
3199 : }
3200 1136160 : for (; i < nIters; i++)
3201 : {
3202 1029550 : pDest[i] = *pSrc;
3203 1029550 : pSrc += 4;
3204 : }
3205 106604 : }
3206 : #endif // HAVE_SSE2
3207 :
3208 : /************************************************************************/
3209 : /* GDALFastCopy() */
3210 : /************************************************************************/
3211 :
3212 : template <class T>
3213 40035200 : static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
3214 : const T *CPL_RESTRICT pSrc, int nSrcStride,
3215 : GPtrDiff_t nIters)
3216 : {
3217 40035200 : constexpr int sizeofT = static_cast<int>(sizeof(T));
3218 40035200 : if (nIters == 1)
3219 : {
3220 22098510 : *pDest = *pSrc;
3221 : }
3222 17936753 : else if (nDestStride == sizeofT)
3223 : {
3224 14839784 : if (nSrcStride == sizeofT)
3225 : {
3226 14073987 : memcpy(pDest, pSrc, nIters * sizeof(T));
3227 : }
3228 765756 : else if (nSrcStride == 2 * sizeofT)
3229 : {
3230 355869 : GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
3231 : }
3232 409887 : else if (nSrcStride == 3 * sizeofT)
3233 : {
3234 288432 : GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
3235 : }
3236 121455 : else if (nSrcStride == 4 * sizeofT)
3237 : {
3238 110223 : GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
3239 : }
3240 : else
3241 : {
3242 12966620 : while (nIters-- > 0)
3243 : {
3244 12955430 : *pDest = *pSrc;
3245 12955430 : pSrc += nSrcStride / sizeofT;
3246 12955430 : pDest++;
3247 : }
3248 : }
3249 : }
3250 3096979 : else if (nSrcStride == sizeofT)
3251 : {
3252 3091245 : if (nDestStride == 2 * sizeofT)
3253 : {
3254 158659 : GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
3255 : }
3256 2932580 : else if (nDestStride == 3 * sizeofT)
3257 : {
3258 2144782 : GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
3259 : }
3260 787802 : else if (nDestStride == 4 * sizeofT)
3261 : {
3262 722632 : GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
3263 : }
3264 : else
3265 : {
3266 12883710 : while (nIters-- > 0)
3267 : {
3268 12818500 : *pDest = *pSrc;
3269 12818500 : pSrc++;
3270 12818500 : pDest += nDestStride / sizeofT;
3271 : }
3272 : }
3273 : }
3274 : else
3275 : {
3276 1212836 : while (nIters-- > 0)
3277 : {
3278 1207102 : *pDest = *pSrc;
3279 1207102 : pSrc += nSrcStride / sizeofT;
3280 1207102 : pDest += nDestStride / sizeofT;
3281 : }
3282 : }
3283 40034700 : }
3284 :
3285 : /************************************************************************/
3286 : /* GDALFastCopyByte() */
3287 : /************************************************************************/
3288 :
3289 326246 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
3290 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3291 : int nDstPixelStride, GPtrDiff_t nWordCount)
3292 : {
3293 326246 : GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
3294 : nWordCount);
3295 326246 : }
3296 :
3297 : /************************************************************************/
3298 : /* GDALCopyWords() */
3299 : /************************************************************************/
3300 :
3301 : /**
3302 : * Copy pixel words from buffer to buffer.
3303 : *
3304 : * @see GDALCopyWords64()
3305 : */
3306 86959800 : void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
3307 : GDALDataType eSrcType, int nSrcPixelStride,
3308 : void *CPL_RESTRICT pDstData,
3309 : GDALDataType eDstType, int nDstPixelStride,
3310 : int nWordCount)
3311 : {
3312 86959800 : GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
3313 : nDstPixelStride, nWordCount);
3314 86958200 : }
3315 :
3316 : /************************************************************************/
3317 : /* GDALCopyWords64() */
3318 : /************************************************************************/
3319 :
3320 : /**
3321 : * Copy pixel words from buffer to buffer.
3322 : *
3323 : * This function is used to copy pixel word values from one memory buffer
3324 : * to another, with support for conversion between data types, and differing
3325 : * step factors. The data type conversion is done using the following
3326 : * rules:
3327 : * <ul>
3328 : * <li>Values assigned to a lower range integer type are clipped. For
3329 : * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3330 : * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3331 : * </li>
3332 : * <li>
3333 : * Assignment from floating point to integer rounds to closest integer.
3334 : * +Infinity is mapped to the largest integer. -Infinity is mapped to the
3335 : * smallest integer. NaN is mapped to 0.
3336 : * </li>
3337 : * <li>
3338 : * Assignment from non-complex to complex will result in the imaginary part
3339 : * being set to zero on output.
3340 : * </li>
3341 : * <li> Assignment from complex to
3342 : * non-complex will result in the complex portion being lost and the real
3343 : * component being preserved (<i>not magnitude!</i>).
3344 : * </li>
3345 : * </ul>
3346 : *
3347 : * No assumptions are made about the source or destination words occurring
3348 : * on word boundaries. It is assumed that all values are in native machine
3349 : * byte order.
3350 : *
3351 : * @param pSrcData Pointer to source data to be converted.
3352 : * @param eSrcType the source data type (see GDALDataType enum)
3353 : * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3354 : * in bytes
3355 : * @param pDstData Pointer to buffer where destination data should go
3356 : * @param eDstType the destination data type (see GDALDataType enum)
3357 : * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3358 : * words), in bytes
3359 : * @param nWordCount number of words to be copied
3360 : *
3361 : * @note
3362 : * When adding a new data type to GDAL, you must do the following to
3363 : * support it properly within the GDALCopyWords function:
3364 : * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3365 : * This should invoke the appropriate GDALCopyWordsFromT wrapper.
3366 : * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3367 : * This should call the appropriate GDALCopyWordsT template.
3368 : * 3. If appropriate, overload the appropriate CopyWord template in the
3369 : * above namespace. This will ensure that any conversion issues are
3370 : * handled (cases like the float -> int32 case, where the min/max)
3371 : * values are subject to roundoff error.
3372 : */
3373 :
3374 108600000 : void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
3375 : GDALDataType eSrcType, int nSrcPixelStride,
3376 : void *CPL_RESTRICT pDstData,
3377 : GDALDataType eDstType, int nDstPixelStride,
3378 : GPtrDiff_t nWordCount)
3379 :
3380 : {
3381 : // On platforms where alignment matters, be careful
3382 108600000 : const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3383 108595000 : const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3384 108601000 : if (CPL_UNLIKELY(nSrcDataTypeSize == 0 || nDstDataTypeSize == 0))
3385 : {
3386 2 : CPLError(CE_Failure, CPLE_NotSupported,
3387 : "GDALCopyWords64(): unsupported GDT_Unknown/GDT_TypeCount "
3388 : "argument");
3389 2 : return;
3390 : }
3391 108601000 : if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3392 58399000 : ((reinterpret_cast<uintptr_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3393 58401200 : (reinterpret_cast<uintptr_t>(pDstData) % nDstDataTypeSize) != 0 ||
3394 58400600 : (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3395 58400100 : (nDstPixelStride % nDstDataTypeSize) != 0))
3396 : {
3397 905 : if (eSrcType == eDstType)
3398 : {
3399 34800 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3400 : {
3401 34000 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3402 : static_cast<const GByte *>(pSrcData) +
3403 34000 : nSrcPixelStride * i,
3404 : nDstDataTypeSize);
3405 : }
3406 : }
3407 : else
3408 : {
3409 210 : const auto getAlignedPtr = [](GByte *ptr, int align)
3410 : {
3411 : return ptr +
3412 210 : ((align - (reinterpret_cast<uintptr_t>(ptr) % align)) %
3413 210 : align);
3414 : };
3415 :
3416 : // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3417 : // be sure to get correctly aligned pointer.
3418 105 : constexpr size_t SIZEOF_CFLOAT64 = 2 * sizeof(double);
3419 : GByte abySrcBuffer[2 * SIZEOF_CFLOAT64];
3420 : GByte abyDstBuffer[2 * SIZEOF_CFLOAT64];
3421 : GByte *pabySrcBuffer =
3422 105 : getAlignedPtr(abySrcBuffer, nSrcDataTypeSize);
3423 : GByte *pabyDstBuffer =
3424 105 : getAlignedPtr(abyDstBuffer, nDstDataTypeSize);
3425 3360 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3426 : {
3427 3255 : memcpy(pabySrcBuffer,
3428 : static_cast<const GByte *>(pSrcData) +
3429 3255 : nSrcPixelStride * i,
3430 : nSrcDataTypeSize);
3431 3255 : GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3432 : eDstType, 0, 1);
3433 3255 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3434 : pabyDstBuffer, nDstDataTypeSize);
3435 : }
3436 : }
3437 905 : return;
3438 : }
3439 :
3440 : // Deal with the case where we're replicating a single word into the
3441 : // provided buffer
3442 108600000 : if (nSrcPixelStride == 0 && nWordCount > 1)
3443 : {
3444 912795 : GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
3445 : nDstPixelStride, nWordCount);
3446 912772 : return;
3447 : }
3448 :
3449 107688000 : if (eSrcType == eDstType)
3450 : {
3451 53953300 : if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
3452 : {
3453 19047800 : GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
3454 : static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3455 : nWordCount);
3456 19046200 : return;
3457 : }
3458 :
3459 34905500 : if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
3460 20668400 : (nDstPixelStride % 2) == 0)
3461 : {
3462 20668400 : GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
3463 : static_cast<const short *>(pSrcData), nSrcPixelStride,
3464 : nWordCount);
3465 20668200 : return;
3466 : }
3467 :
3468 14237100 : if (nWordCount == 1)
3469 : {
3470 : #if defined(CSA_BUILD) || defined(__COVERITY__)
3471 : // Avoid false positives...
3472 : memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3473 : #else
3474 13852500 : if (nSrcDataTypeSize == 2)
3475 0 : memcpy(pDstData, pSrcData, 2);
3476 13852500 : else if (nSrcDataTypeSize == 4)
3477 13809100 : memcpy(pDstData, pSrcData, 4);
3478 43451 : else if (nSrcDataTypeSize == 8)
3479 26931 : memcpy(pDstData, pSrcData, 8);
3480 : else /* if( eSrcType == GDT_CFloat64 ) */
3481 16520 : memcpy(pDstData, pSrcData, 16);
3482 : #endif
3483 13852500 : return;
3484 : }
3485 :
3486 : // Let memcpy() handle the case where we're copying a packed buffer
3487 : // of pixels.
3488 384627 : if (nSrcPixelStride == nDstPixelStride)
3489 : {
3490 256838 : if (nSrcPixelStride == nSrcDataTypeSize)
3491 : {
3492 256767 : memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3493 256767 : return;
3494 : }
3495 : }
3496 : }
3497 :
3498 : // Handle the more general case -- deals with conversion of data types
3499 : // directly.
3500 53862200 : switch (eSrcType)
3501 : {
3502 15506500 : case GDT_Byte:
3503 15506500 : GDALCopyWordsFromT<unsigned char>(
3504 : static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3505 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3506 15509500 : break;
3507 1254 : case GDT_Int8:
3508 1254 : GDALCopyWordsFromT<signed char>(
3509 : static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3510 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3511 1254 : break;
3512 53350 : case GDT_UInt16:
3513 53350 : GDALCopyWordsFromT<unsigned short>(
3514 : static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3515 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3516 53350 : break;
3517 4350250 : case GDT_Int16:
3518 4350250 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3519 : nSrcPixelStride, false, pDstData,
3520 : eDstType, nDstPixelStride, nWordCount);
3521 4350270 : break;
3522 7094 : case GDT_UInt32:
3523 7094 : GDALCopyWordsFromT<unsigned int>(
3524 : static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3525 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3526 7094 : break;
3527 12255000 : case GDT_Int32:
3528 12255000 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3529 : nSrcPixelStride, false, pDstData, eDstType,
3530 : nDstPixelStride, nWordCount);
3531 12255000 : break;
3532 1663 : case GDT_UInt64:
3533 1663 : GDALCopyWordsFromT<std::uint64_t>(
3534 : static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3535 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3536 1663 : break;
3537 10994 : case GDT_Int64:
3538 10994 : GDALCopyWordsFromT<std::int64_t>(
3539 : static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3540 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3541 10994 : break;
3542 1169 : case GDT_Float16:
3543 1169 : GDALCopyWordsFromT<GFloat16>(
3544 : static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, false,
3545 : pDstData, eDstType, nDstPixelStride, nWordCount);
3546 1169 : break;
3547 395108 : case GDT_Float32:
3548 395108 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3549 : nSrcPixelStride, false, pDstData,
3550 : eDstType, nDstPixelStride, nWordCount);
3551 395104 : break;
3552 20634800 : case GDT_Float64:
3553 20634800 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3554 : nSrcPixelStride, false, pDstData,
3555 : eDstType, nDstPixelStride, nWordCount);
3556 20634800 : break;
3557 478156 : case GDT_CInt16:
3558 478156 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3559 : nSrcPixelStride, true, pDstData, eDstType,
3560 : nDstPixelStride, nWordCount);
3561 478156 : break;
3562 571 : case GDT_CInt32:
3563 571 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3564 : nSrcPixelStride, true, pDstData, eDstType,
3565 : nDstPixelStride, nWordCount);
3566 571 : break;
3567 436 : case GDT_CFloat16:
3568 436 : GDALCopyWordsFromT<GFloat16>(
3569 : static_cast<const GFloat16 *>(pSrcData), nSrcPixelStride, true,
3570 : pDstData, eDstType, nDstPixelStride, nWordCount);
3571 436 : break;
3572 1577 : case GDT_CFloat32:
3573 1577 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3574 : nSrcPixelStride, true, pDstData, eDstType,
3575 : nDstPixelStride, nWordCount);
3576 1577 : break;
3577 174239 : case GDT_CFloat64:
3578 174239 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3579 : nSrcPixelStride, true, pDstData,
3580 : eDstType, nDstPixelStride, nWordCount);
3581 174239 : break;
3582 0 : case GDT_Unknown:
3583 : case GDT_TypeCount:
3584 0 : CPLAssert(false);
3585 : }
3586 : }
3587 :
3588 : /************************************************************************/
3589 : /* GDALCopyBits() */
3590 : /************************************************************************/
3591 :
3592 : /**
3593 : * Bitwise word copying.
3594 : *
3595 : * A function for moving sets of partial bytes around. Loosely
3596 : * speaking this is a bitwise analog to GDALCopyWords().
3597 : *
3598 : * It copies nStepCount "words" where each word is nBitCount bits long.
3599 : * The nSrcStep and nDstStep are the number of bits from the start of one
3600 : * word to the next (same as nBitCount if they are packed). The nSrcOffset
3601 : * and nDstOffset are the offset into the source and destination buffers
3602 : * to start at, also measured in bits.
3603 : *
3604 : * All bit offsets are assumed to start from the high order bit in a byte
3605 : * (i.e. most significant bit first). Currently this function is not very
3606 : * optimized, but it may be improved for some common cases in the future
3607 : * as needed.
3608 : *
3609 : * @param pabySrcData the source data buffer.
3610 : * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3611 : * first word to copy.
3612 : * @param nSrcStep the offset in bits from the start one source word to the
3613 : * start of the next.
3614 : * @param pabyDstData the destination data buffer.
3615 : * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3616 : * first word to copy over.
3617 : * @param nDstStep the offset in bits from the start one word to the
3618 : * start of the next.
3619 : * @param nBitCount the number of bits in a word to be copied.
3620 : * @param nStepCount the number of words to copy.
3621 : */
3622 :
3623 0 : void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3624 : GByte *pabyDstData, int nDstOffset, int nDstStep,
3625 : int nBitCount, int nStepCount)
3626 :
3627 : {
3628 0 : VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
3629 :
3630 0 : for (int iStep = 0; iStep < nStepCount; iStep++)
3631 : {
3632 0 : for (int iBit = 0; iBit < nBitCount; iBit++)
3633 : {
3634 0 : if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
3635 0 : pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
3636 : else
3637 0 : pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
3638 :
3639 0 : nSrcOffset++;
3640 0 : nDstOffset++;
3641 : }
3642 :
3643 0 : nSrcOffset += (nSrcStep - nBitCount);
3644 0 : nDstOffset += (nDstStep - nBitCount);
3645 : }
3646 : }
3647 :
3648 : /************************************************************************/
3649 : /* GDALGetBestOverviewLevel() */
3650 : /* */
3651 : /* Returns the best overview level to satisfy the query or -1 if none */
3652 : /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */
3653 : /* returning a valid overview level */
3654 : /************************************************************************/
3655 :
3656 0 : int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
3657 : int &nXSize, int &nYSize, int nBufXSize,
3658 : int nBufYSize)
3659 : {
3660 0 : return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
3661 0 : nBufXSize, nBufYSize, nullptr);
3662 : }
3663 :
3664 523804 : int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
3665 : int &nYOff, int &nXSize, int &nYSize,
3666 : int nBufXSize, int nBufYSize,
3667 : GDALRasterIOExtraArg *psExtraArg)
3668 : {
3669 523804 : if (psExtraArg != nullptr && psExtraArg->nVersion > 1 &&
3670 523804 : psExtraArg->bUseOnlyThisScale)
3671 109 : return -1;
3672 : /* -------------------------------------------------------------------- */
3673 : /* Compute the desired downsampling factor. It is */
3674 : /* based on the least reduced axis, and represents the number */
3675 : /* of source pixels to one destination pixel. */
3676 : /* -------------------------------------------------------------------- */
3677 523695 : const double dfDesiredDownsamplingFactor =
3678 523695 : ((nXSize / static_cast<double>(nBufXSize)) <
3679 361357 : (nYSize / static_cast<double>(nBufYSize)) ||
3680 : nBufYSize == 1)
3681 752080 : ? nXSize / static_cast<double>(nBufXSize)
3682 132972 : : nYSize / static_cast<double>(nBufYSize);
3683 :
3684 : /* -------------------------------------------------------------------- */
3685 : /* Find the overview level that largest downsampling factor (most */
3686 : /* downsampled) that is still less than (or only a little more) */
3687 : /* downsampled than the request. */
3688 : /* -------------------------------------------------------------------- */
3689 523695 : const int nOverviewCount = poBand->GetOverviewCount();
3690 523695 : GDALRasterBand *poBestOverview = nullptr;
3691 523695 : double dfBestDownsamplingFactor = 0;
3692 523695 : int nBestOverviewLevel = -1;
3693 :
3694 : const char *pszOversampligThreshold =
3695 523695 : CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
3696 :
3697 : // Note: keep this logic for overview selection in sync between
3698 : // gdalwarp_lib.cpp and rasterio.cpp
3699 : // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
3700 : const double dfOversamplingThreshold =
3701 1047380 : pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
3702 523686 : : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
3703 1047370 : ? 1.0
3704 523695 : : 1.2;
3705 526390 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3706 : {
3707 5547 : GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
3708 11094 : if (poOverview == nullptr ||
3709 11093 : poOverview->GetXSize() > poBand->GetXSize() ||
3710 5546 : poOverview->GetYSize() > poBand->GetYSize())
3711 : {
3712 1 : continue;
3713 : }
3714 :
3715 : // Compute downsampling factor of this overview
3716 : const double dfDownsamplingFactor = std::min(
3717 5546 : poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
3718 11092 : poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
3719 :
3720 : // Is it nearly the requested factor and better (lower) than
3721 : // the current best factor?
3722 : // Use an epsilon because of numerical instability.
3723 5546 : constexpr double EPSILON = 1e-1;
3724 5654 : if (dfDownsamplingFactor >=
3725 5546 : dfDesiredDownsamplingFactor * dfOversamplingThreshold +
3726 5438 : EPSILON ||
3727 : dfDownsamplingFactor <= dfBestDownsamplingFactor)
3728 : {
3729 108 : continue;
3730 : }
3731 :
3732 : // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3733 5438 : const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
3734 :
3735 5438 : if (pszResampling != nullptr &&
3736 71 : STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
3737 16 : continue;
3738 :
3739 : // OK, this is our new best overview.
3740 5422 : poBestOverview = poOverview;
3741 5422 : nBestOverviewLevel = iOverview;
3742 5422 : dfBestDownsamplingFactor = dfDownsamplingFactor;
3743 :
3744 5422 : if (std::abs(dfDesiredDownsamplingFactor - dfDownsamplingFactor) <
3745 : EPSILON)
3746 : {
3747 2852 : break;
3748 : }
3749 : }
3750 :
3751 : /* -------------------------------------------------------------------- */
3752 : /* If we didn't find an overview that helps us, just return */
3753 : /* indicating failure and the full resolution image will be used. */
3754 : /* -------------------------------------------------------------------- */
3755 523695 : if (nBestOverviewLevel < 0)
3756 520771 : return -1;
3757 :
3758 : /* -------------------------------------------------------------------- */
3759 : /* Recompute the source window in terms of the selected */
3760 : /* overview. */
3761 : /* -------------------------------------------------------------------- */
3762 : const double dfXFactor =
3763 2924 : poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
3764 : const double dfYFactor =
3765 2924 : poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
3766 2924 : CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
3767 : poBestOverview->GetYSize());
3768 :
3769 8772 : const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
3770 2924 : static_cast<int>(nXOff / dfXFactor + 0.5));
3771 8772 : const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
3772 2924 : static_cast<int>(nYOff / dfYFactor + 0.5));
3773 2924 : int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
3774 2924 : int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
3775 2924 : if (nOXOff + nOXSize > poBestOverview->GetXSize())
3776 0 : nOXSize = poBestOverview->GetXSize() - nOXOff;
3777 2924 : if (nOYOff + nOYSize > poBestOverview->GetYSize())
3778 2 : nOYSize = poBestOverview->GetYSize() - nOYOff;
3779 :
3780 2924 : if (psExtraArg)
3781 : {
3782 2924 : if (psExtraArg->bFloatingPointWindowValidity)
3783 : {
3784 50 : psExtraArg->dfXOff /= dfXFactor;
3785 50 : psExtraArg->dfXSize /= dfXFactor;
3786 50 : psExtraArg->dfYOff /= dfYFactor;
3787 50 : psExtraArg->dfYSize /= dfYFactor;
3788 : }
3789 2874 : else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
3790 : {
3791 16 : psExtraArg->bFloatingPointWindowValidity = true;
3792 16 : psExtraArg->dfXOff = nXOff / dfXFactor;
3793 16 : psExtraArg->dfXSize = nXSize / dfXFactor;
3794 16 : psExtraArg->dfYOff = nYOff / dfYFactor;
3795 16 : psExtraArg->dfYSize = nYSize / dfYFactor;
3796 : }
3797 : }
3798 :
3799 2924 : nXOff = nOXOff;
3800 2924 : nYOff = nOYOff;
3801 2924 : nXSize = nOXSize;
3802 2924 : nYSize = nOYSize;
3803 :
3804 2924 : return nBestOverviewLevel;
3805 : }
3806 :
3807 : /************************************************************************/
3808 : /* OverviewRasterIO() */
3809 : /* */
3810 : /* Special work function to utilize available overviews to */
3811 : /* more efficiently satisfy downsampled requests. It will */
3812 : /* return CE_Failure if there are no appropriate overviews */
3813 : /* available but it doesn't emit any error messages. */
3814 : /************************************************************************/
3815 :
3816 : //! @cond Doxygen_Suppress
3817 2 : CPLErr GDALRasterBand::OverviewRasterIO(
3818 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3819 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3820 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
3821 :
3822 : {
3823 : GDALRasterIOExtraArg sExtraArg;
3824 2 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3825 :
3826 2 : const int nOverview = GDALBandGetBestOverviewLevel2(
3827 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
3828 2 : if (nOverview < 0)
3829 1 : return CE_Failure;
3830 :
3831 : /* -------------------------------------------------------------------- */
3832 : /* Recast the call in terms of the new raster layer. */
3833 : /* -------------------------------------------------------------------- */
3834 1 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
3835 1 : if (poOverviewBand == nullptr)
3836 0 : return CE_Failure;
3837 :
3838 1 : return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3839 : pData, nBufXSize, nBufYSize, eBufType,
3840 1 : nPixelSpace, nLineSpace, &sExtraArg);
3841 : }
3842 :
3843 : /************************************************************************/
3844 : /* TryOverviewRasterIO() */
3845 : /************************************************************************/
3846 :
3847 362416 : CPLErr GDALRasterBand::TryOverviewRasterIO(
3848 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3849 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3850 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
3851 : int *pbTried)
3852 : {
3853 362416 : int nXOffMod = nXOff;
3854 362416 : int nYOffMod = nYOff;
3855 362416 : int nXSizeMod = nXSize;
3856 362416 : int nYSizeMod = nYSize;
3857 : GDALRasterIOExtraArg sExtraArg;
3858 :
3859 362416 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3860 :
3861 362416 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3862 : this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
3863 : &sExtraArg);
3864 :
3865 362416 : if (iOvrLevel >= 0)
3866 : {
3867 50 : GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
3868 50 : if (poOverviewBand)
3869 : {
3870 50 : *pbTried = TRUE;
3871 50 : return poOverviewBand->RasterIO(
3872 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
3873 : nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
3874 50 : &sExtraArg);
3875 : }
3876 : }
3877 :
3878 362366 : *pbTried = FALSE;
3879 362366 : return CE_None;
3880 : }
3881 :
3882 : /************************************************************************/
3883 : /* TryOverviewRasterIO() */
3884 : /************************************************************************/
3885 :
3886 158477 : CPLErr GDALDataset::TryOverviewRasterIO(
3887 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3888 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3889 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
3890 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg,
3891 : int *pbTried)
3892 : {
3893 158477 : int nXOffMod = nXOff;
3894 158477 : int nYOffMod = nYOff;
3895 158477 : int nXSizeMod = nXSize;
3896 158477 : int nYSizeMod = nYSize;
3897 : GDALRasterIOExtraArg sExtraArg;
3898 158477 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3899 :
3900 316954 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3901 158477 : papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
3902 : nBufYSize, &sExtraArg);
3903 :
3904 158518 : if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3905 41 : papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
3906 : {
3907 41 : *pbTried = TRUE;
3908 41 : return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3909 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
3910 : nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
3911 41 : nLineSpace, nBandSpace, &sExtraArg);
3912 : }
3913 : else
3914 : {
3915 158436 : *pbTried = FALSE;
3916 158436 : return CE_None;
3917 : }
3918 : }
3919 :
3920 : /************************************************************************/
3921 : /* GetBestOverviewLevel() */
3922 : /* */
3923 : /* Returns the best overview level to satisfy the query or -1 if none */
3924 : /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */
3925 : /* overview level */
3926 : /************************************************************************/
3927 :
3928 4 : static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
3929 : int &nYOff, int &nXSize, int &nYSize,
3930 : int nBufXSize, int nBufYSize,
3931 : int nBandCount,
3932 : const int *panBandMap,
3933 : GDALRasterIOExtraArg *psExtraArg)
3934 : {
3935 4 : int nOverviewCount = 0;
3936 4 : GDALRasterBand *poFirstBand = nullptr;
3937 :
3938 : /* -------------------------------------------------------------------- */
3939 : /* Check that all bands have the same number of overviews and */
3940 : /* that they have all the same size and block dimensions */
3941 : /* -------------------------------------------------------------------- */
3942 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
3943 : {
3944 8 : GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
3945 8 : if (poBand == nullptr)
3946 0 : return -1;
3947 8 : if (iBand == 0)
3948 : {
3949 4 : poFirstBand = poBand;
3950 4 : nOverviewCount = poBand->GetOverviewCount();
3951 : }
3952 4 : else if (nOverviewCount != poBand->GetOverviewCount())
3953 : {
3954 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3955 : "mismatched overview count, use std method.");
3956 0 : return -1;
3957 : }
3958 : else
3959 : {
3960 4 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3961 : {
3962 0 : GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
3963 : GDALRasterBand *poOvrFirstBand =
3964 0 : poFirstBand->GetOverview(iOverview);
3965 0 : if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
3966 0 : continue;
3967 :
3968 0 : if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3969 0 : poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
3970 : {
3971 0 : CPLDebug("GDAL",
3972 : "GDALDataset::GetBestOverviewLevel() ... "
3973 : "mismatched overview sizes, use std method.");
3974 0 : return -1;
3975 : }
3976 0 : int nBlockXSizeFirst = 0;
3977 0 : int nBlockYSizeFirst = 0;
3978 0 : poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
3979 : &nBlockYSizeFirst);
3980 :
3981 0 : int nBlockXSizeCurrent = 0;
3982 0 : int nBlockYSizeCurrent = 0;
3983 0 : poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
3984 : &nBlockYSizeCurrent);
3985 :
3986 0 : if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3987 0 : nBlockYSizeFirst != nBlockYSizeCurrent)
3988 : {
3989 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3990 : "mismatched block sizes, use std method.");
3991 0 : return -1;
3992 : }
3993 : }
3994 : }
3995 : }
3996 4 : if (poFirstBand == nullptr)
3997 0 : return -1;
3998 :
3999 4 : return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
4000 : nYSize, nBufXSize, nBufYSize,
4001 4 : psExtraArg);
4002 : }
4003 :
4004 : /************************************************************************/
4005 : /* BlockBasedRasterIO() */
4006 : /* */
4007 : /* This convenience function implements a dataset level */
4008 : /* RasterIO() interface based on calling down to fetch blocks, */
4009 : /* much like the GDALRasterBand::IRasterIO(), but it handles */
4010 : /* all bands at once, so that a format driver that handles a */
4011 : /* request for different bands of the same block efficiently */
4012 : /* (i.e. without re-reading interleaved data) will efficiently. */
4013 : /* */
4014 : /* This method is intended to be called by an overridden */
4015 : /* IRasterIO() method in the driver specific GDALDataset */
4016 : /* derived class. */
4017 : /* */
4018 : /* Default internal implementation of RasterIO() ... utilizes */
4019 : /* the Block access methods to satisfy the request. This would */
4020 : /* normally only be overridden by formats with overviews. */
4021 : /* */
4022 : /* To keep things relatively simple, this method does not */
4023 : /* currently take advantage of some special cases addressed in */
4024 : /* GDALRasterBand::IRasterIO(), so it is likely best to only */
4025 : /* call it when you know it will help. That is in cases where */
4026 : /* data is at 1:1 to the buffer, and you know the driver is */
4027 : /* implementing interleaved IO efficiently on a block by block */
4028 : /* basis. Overviews will be used when possible. */
4029 : /************************************************************************/
4030 :
4031 63910 : CPLErr GDALDataset::BlockBasedRasterIO(
4032 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
4033 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
4034 : int nBandCount, const int *panBandMap, GSpacing nPixelSpace,
4035 : GSpacing nLineSpace, GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg)
4036 :
4037 : {
4038 63910 : CPLAssert(nullptr != pData);
4039 :
4040 63910 : GByte **papabySrcBlock = nullptr;
4041 63910 : GDALRasterBlock *poBlock = nullptr;
4042 63910 : GDALRasterBlock **papoBlocks = nullptr;
4043 63910 : int nLBlockX = -1;
4044 63910 : int nLBlockY = -1;
4045 : int iBufYOff;
4046 : int iBufXOff;
4047 63910 : int nBlockXSize = 1;
4048 63910 : int nBlockYSize = 1;
4049 63910 : CPLErr eErr = CE_None;
4050 63910 : GDALDataType eDataType = GDT_Byte;
4051 :
4052 63910 : const bool bUseIntegerRequestCoords =
4053 63940 : (!psExtraArg->bFloatingPointWindowValidity ||
4054 30 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
4055 28 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
4056 :
4057 : /* -------------------------------------------------------------------- */
4058 : /* Ensure that all bands share a common block size and data type. */
4059 : /* -------------------------------------------------------------------- */
4060 303005 : for (int iBand = 0; iBand < nBandCount; iBand++)
4061 : {
4062 239093 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4063 :
4064 239096 : if (iBand == 0)
4065 : {
4066 63911 : poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4067 63910 : eDataType = poBand->GetRasterDataType();
4068 : }
4069 : else
4070 : {
4071 175185 : int nThisBlockXSize = 0;
4072 175185 : int nThisBlockYSize = 0;
4073 175185 : poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
4074 175185 : if (nThisBlockXSize != nBlockXSize ||
4075 175183 : nThisBlockYSize != nBlockYSize)
4076 : {
4077 2 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4078 : "mismatched block sizes, use std method.");
4079 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4080 : pData, nBufXSize, nBufYSize, eBufType,
4081 : nBandCount, panBandMap, nPixelSpace,
4082 0 : nLineSpace, nBandSpace, psExtraArg);
4083 : }
4084 :
4085 175183 : if (eDataType != poBand->GetRasterDataType() &&
4086 0 : (nXSize != nBufXSize || nYSize != nBufYSize))
4087 : {
4088 2 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
4089 : "mismatched band data types, use std method.");
4090 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
4091 : pData, nBufXSize, nBufYSize, eBufType,
4092 : nBandCount, panBandMap, nPixelSpace,
4093 0 : nLineSpace, nBandSpace, psExtraArg);
4094 : }
4095 : }
4096 : }
4097 :
4098 : /* ==================================================================== */
4099 : /* In this special case at full resolution we step through in */
4100 : /* blocks, turning the request over to the per-band */
4101 : /* IRasterIO(), but ensuring that all bands of one block are */
4102 : /* called before proceeding to the next. */
4103 : /* ==================================================================== */
4104 :
4105 63912 : if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4106 : {
4107 : GDALRasterIOExtraArg sDummyExtraArg;
4108 63908 : INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4109 :
4110 63908 : int nChunkYSize = 0;
4111 63908 : int nChunkXSize = 0;
4112 :
4113 210044 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4114 : {
4115 147151 : const int nChunkYOff = iBufYOff + nYOff;
4116 147151 : nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4117 147151 : if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4118 59100 : nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4119 :
4120 816576 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4121 : {
4122 670439 : const int nChunkXOff = iBufXOff + nXOff;
4123 670439 : nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4124 670439 : if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4125 70147 : nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4126 :
4127 670439 : GByte *pabyChunkData =
4128 670439 : static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4129 670439 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4130 :
4131 3265400 : for (int iBand = 0; iBand < nBandCount; iBand++)
4132 : {
4133 2595980 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4134 :
4135 5191960 : eErr = poBand->IRasterIO(
4136 : eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4137 : nChunkYSize,
4138 2595980 : pabyChunkData +
4139 2595980 : static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4140 : nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4141 2595980 : nLineSpace, &sDummyExtraArg);
4142 2595980 : if (eErr != CE_None)
4143 1014 : return eErr;
4144 : }
4145 : }
4146 :
4147 164927 : if (psExtraArg->pfnProgress != nullptr &&
4148 18790 : !psExtraArg->pfnProgress(
4149 164927 : 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4150 : nBufYSize,
4151 : "", psExtraArg->pProgressData))
4152 : {
4153 1 : return CE_Failure;
4154 : }
4155 : }
4156 :
4157 62893 : return CE_None;
4158 : }
4159 :
4160 : /* Below code is not compatible with that case. It would need a complete */
4161 : /* separate code like done in GDALRasterBand::IRasterIO. */
4162 4 : if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4163 : {
4164 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4165 : nBufXSize, nBufYSize, eBufType, nBandCount,
4166 : panBandMap, nPixelSpace, nLineSpace,
4167 0 : nBandSpace, psExtraArg);
4168 : }
4169 :
4170 : /* We could have a smarter implementation, but that will do for now */
4171 4 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4172 0 : (nBufXSize != nXSize || nBufYSize != nYSize))
4173 : {
4174 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4175 : nBufXSize, nBufYSize, eBufType, nBandCount,
4176 : panBandMap, nPixelSpace, nLineSpace,
4177 0 : nBandSpace, psExtraArg);
4178 : }
4179 :
4180 : /* ==================================================================== */
4181 : /* Loop reading required source blocks to satisfy output */
4182 : /* request. This is the most general implementation. */
4183 : /* ==================================================================== */
4184 :
4185 4 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4186 :
4187 : papabySrcBlock =
4188 4 : static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4189 : papoBlocks =
4190 4 : static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4191 :
4192 : /* -------------------------------------------------------------------- */
4193 : /* Select an overview level if appropriate. */
4194 : /* -------------------------------------------------------------------- */
4195 :
4196 : GDALRasterIOExtraArg sExtraArg;
4197 4 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4198 4 : const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4199 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4200 : panBandMap, &sExtraArg);
4201 4 : if (nOverviewLevel >= 0)
4202 : {
4203 2 : GetRasterBand(panBandMap[0])
4204 2 : ->GetOverview(nOverviewLevel)
4205 2 : ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4206 : }
4207 :
4208 4 : double dfXOff = nXOff;
4209 4 : double dfYOff = nYOff;
4210 4 : double dfXSize = nXSize;
4211 4 : double dfYSize = nYSize;
4212 4 : if (sExtraArg.bFloatingPointWindowValidity)
4213 : {
4214 2 : dfXOff = sExtraArg.dfXOff;
4215 2 : dfYOff = sExtraArg.dfYOff;
4216 2 : dfXSize = sExtraArg.dfXSize;
4217 2 : dfYSize = sExtraArg.dfYSize;
4218 : }
4219 :
4220 : /* -------------------------------------------------------------------- */
4221 : /* Compute stepping increment. */
4222 : /* -------------------------------------------------------------------- */
4223 4 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4224 4 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4225 :
4226 4 : constexpr double EPS = 1e-10;
4227 : /* -------------------------------------------------------------------- */
4228 : /* Loop over buffer computing source locations. */
4229 : /* -------------------------------------------------------------------- */
4230 36 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
4231 : {
4232 : GPtrDiff_t iSrcOffset;
4233 :
4234 : // Add small epsilon to avoid some numeric precision issues.
4235 32 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4236 32 : const int iSrcY = static_cast<int>(std::min(
4237 32 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
4238 :
4239 32 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
4240 : static_cast<GPtrDiff_t>(nLineSpace);
4241 :
4242 302 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
4243 : {
4244 270 : const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4245 270 : const int iSrcX = static_cast<int>(std::min(
4246 270 : std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
4247 :
4248 : // FIXME: this code likely doesn't work if the dirty block gets
4249 : // flushed to disk before being completely written. In the meantime,
4250 : // bJustInitialize should probably be set to FALSE even if it is not
4251 : // ideal performance wise, and for lossy compression
4252 :
4253 : /* --------------------------------------------------------------------
4254 : */
4255 : /* Ensure we have the appropriate block loaded. */
4256 : /* --------------------------------------------------------------------
4257 : */
4258 270 : if (iSrcX < nLBlockX * nBlockXSize ||
4259 270 : iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
4260 266 : iSrcY < nLBlockY * nBlockYSize ||
4261 266 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
4262 : {
4263 4 : nLBlockX = iSrcX / nBlockXSize;
4264 4 : nLBlockY = iSrcY / nBlockYSize;
4265 :
4266 4 : const bool bJustInitialize =
4267 0 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
4268 0 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
4269 4 : nXOff <= nLBlockX * nBlockXSize &&
4270 0 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4271 : /*bool bMemZeroBuffer = FALSE;
4272 : if( eRWFlag == GF_Write && !bJustInitialize &&
4273 : nXOff <= nLBlockX * nBlockXSize &&
4274 : nYOff <= nLBlockY * nBlockYSize &&
4275 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4276 : (nXOff + nXSize == GetRasterXSize() &&
4277 : (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4278 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4279 : (nYOff + nYSize == GetRasterYSize() &&
4280 : (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4281 : {
4282 : bJustInitialize = TRUE;
4283 : bMemZeroBuffer = TRUE;
4284 : }*/
4285 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4286 : {
4287 8 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4288 8 : if (nOverviewLevel >= 0)
4289 2 : poBand = poBand->GetOverview(nOverviewLevel);
4290 16 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
4291 8 : bJustInitialize);
4292 8 : if (poBlock == nullptr)
4293 : {
4294 0 : eErr = CE_Failure;
4295 0 : goto CleanupAndReturn;
4296 : }
4297 :
4298 8 : if (eRWFlag == GF_Write)
4299 0 : poBlock->MarkDirty();
4300 :
4301 8 : if (papoBlocks[iBand] != nullptr)
4302 0 : papoBlocks[iBand]->DropLock();
4303 :
4304 8 : papoBlocks[iBand] = poBlock;
4305 :
4306 8 : papabySrcBlock[iBand] =
4307 8 : static_cast<GByte *>(poBlock->GetDataRef());
4308 : /*if( bMemZeroBuffer )
4309 : {
4310 : memset(papabySrcBlock[iBand], 0,
4311 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4312 : * nBlockYSize);
4313 : }*/
4314 : }
4315 : }
4316 :
4317 : /* --------------------------------------------------------------------
4318 : */
4319 : /* Copy over this pixel of data. */
4320 : /* --------------------------------------------------------------------
4321 : */
4322 270 : iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
4323 270 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
4324 270 : (static_cast<GPtrDiff_t>(iSrcY) -
4325 270 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
4326 270 : nBlockXSize) *
4327 270 : nBandDataSize;
4328 :
4329 980 : for (int iBand = 0; iBand < nBandCount; iBand++)
4330 : {
4331 710 : GByte *pabySrcBlock = papabySrcBlock[iBand];
4332 710 : GPtrDiff_t iBandBufOffset =
4333 710 : iBufOffset + static_cast<GPtrDiff_t>(iBand) *
4334 : static_cast<GPtrDiff_t>(nBandSpace);
4335 :
4336 710 : if (eDataType == eBufType)
4337 : {
4338 710 : if (eRWFlag == GF_Read)
4339 710 : memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
4340 710 : pabySrcBlock + iSrcOffset, nBandDataSize);
4341 : else
4342 0 : memcpy(pabySrcBlock + iSrcOffset,
4343 : static_cast<const GByte *>(pData) +
4344 0 : iBandBufOffset,
4345 : nBandDataSize);
4346 : }
4347 : else
4348 : {
4349 : /* type to type conversion ... ouch, this is expensive way
4350 : of handling single words */
4351 :
4352 0 : if (eRWFlag == GF_Read)
4353 0 : GDALCopyWords64(pabySrcBlock + iSrcOffset, eDataType, 0,
4354 : static_cast<GByte *>(pData) +
4355 0 : iBandBufOffset,
4356 : eBufType, 0, 1);
4357 : else
4358 0 : GDALCopyWords64(static_cast<const GByte *>(pData) +
4359 0 : iBandBufOffset,
4360 0 : eBufType, 0, pabySrcBlock + iSrcOffset,
4361 : eDataType, 0, 1);
4362 : }
4363 : }
4364 :
4365 270 : iBufOffset += static_cast<int>(nPixelSpace);
4366 : }
4367 : }
4368 :
4369 : /* -------------------------------------------------------------------- */
4370 : /* CleanupAndReturn. */
4371 : /* -------------------------------------------------------------------- */
4372 4 : CleanupAndReturn:
4373 4 : CPLFree(papabySrcBlock);
4374 4 : if (papoBlocks != nullptr)
4375 : {
4376 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4377 : {
4378 8 : if (papoBlocks[iBand] != nullptr)
4379 8 : papoBlocks[iBand]->DropLock();
4380 : }
4381 4 : CPLFree(papoBlocks);
4382 : }
4383 :
4384 4 : return eErr;
4385 : }
4386 :
4387 : //! @endcond
4388 :
4389 : /************************************************************************/
4390 : /* GDALCopyWholeRasterGetSwathSize() */
4391 : /************************************************************************/
4392 :
4393 3135 : static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
4394 : GDALRasterBand *poDstPrototypeBand,
4395 : int nBandCount,
4396 : int bDstIsCompressed,
4397 : int bInterleave, int *pnSwathCols,
4398 : int *pnSwathLines)
4399 : {
4400 3135 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4401 3135 : int nSrcBlockXSize = 0;
4402 3135 : int nSrcBlockYSize = 0;
4403 3135 : int nBlockXSize = 0;
4404 3135 : int nBlockYSize = 0;
4405 :
4406 3135 : int nXSize = poSrcPrototypeBand->GetXSize();
4407 3135 : int nYSize = poSrcPrototypeBand->GetYSize();
4408 :
4409 3135 : poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
4410 3135 : poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4411 :
4412 3135 : const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4413 3135 : const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4414 :
4415 3135 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4416 3135 : if (bInterleave)
4417 548 : nPixelSize *= nBandCount;
4418 :
4419 : // aim for one row of blocks. Do not settle for less.
4420 3135 : int nSwathCols = nXSize;
4421 3135 : int nSwathLines = nMaxBlockYSize;
4422 :
4423 : const char *pszSrcCompression =
4424 3135 : poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4425 3135 : if (pszSrcCompression == nullptr)
4426 : {
4427 3109 : auto poSrcDS = poSrcPrototypeBand->GetDataset();
4428 3109 : if (poSrcDS)
4429 : pszSrcCompression =
4430 3103 : poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4431 : }
4432 :
4433 : /* -------------------------------------------------------------------- */
4434 : /* What will our swath size be? */
4435 : /* -------------------------------------------------------------------- */
4436 : // When writing interleaved data in a compressed format, we want to be sure
4437 : // that each block will only be written once, so the swath size must not be
4438 : // greater than the block cache.
4439 3135 : const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4440 : int nTargetSwathSize;
4441 3135 : if (pszSwathSize != nullptr)
4442 0 : nTargetSwathSize = static_cast<int>(
4443 0 : std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4444 : else
4445 : {
4446 : // As a default, take one 1/4 of the cache size.
4447 3135 : nTargetSwathSize = static_cast<int>(
4448 3135 : std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4449 :
4450 : // but if the minimum idal swath buf size is less, then go for it to
4451 : // avoid unnecessarily abusing RAM usage.
4452 : // but try to use 10 MB at least.
4453 3135 : GIntBig nIdealSwathBufSize =
4454 3135 : static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4455 3135 : int nMinTargetSwathSize = 10 * 1000 * 1000;
4456 :
4457 3135 : if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
4458 3135 : GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
4459 : {
4460 2 : nMinTargetSwathSize = nTargetSwathSize;
4461 : }
4462 :
4463 3135 : if (nIdealSwathBufSize < nTargetSwathSize &&
4464 3125 : nIdealSwathBufSize < nMinTargetSwathSize)
4465 : {
4466 3122 : nIdealSwathBufSize = nMinTargetSwathSize;
4467 : }
4468 :
4469 3135 : if (pszSrcCompression != nullptr &&
4470 180 : EQUAL(pszSrcCompression, "JPEG2000") &&
4471 0 : (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4472 0 : (nSrcBlockYSize % nBlockYSize) == 0)))
4473 : {
4474 2 : nIdealSwathBufSize =
4475 4 : std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4476 2 : nSrcBlockYSize * nPixelSize);
4477 : }
4478 3135 : if (nTargetSwathSize > nIdealSwathBufSize)
4479 3121 : nTargetSwathSize = static_cast<int>(
4480 3121 : std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4481 : }
4482 :
4483 3135 : if (nTargetSwathSize < 1000000)
4484 8 : nTargetSwathSize = 1000000;
4485 :
4486 : /* But let's check that */
4487 3352 : if (bDstIsCompressed && bInterleave &&
4488 217 : nTargetSwathSize > GDALGetCacheMax64())
4489 : {
4490 0 : CPLError(CE_Warning, CPLE_AppDefined,
4491 : "When translating into a compressed interleave format, "
4492 : "the block cache size (" CPL_FRMT_GIB ") "
4493 : "should be at least the size of the swath (%d) "
4494 : "(GDAL_SWATH_SIZE config. option)",
4495 : GDALGetCacheMax64(), nTargetSwathSize);
4496 : }
4497 :
4498 : #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4499 : #define ROUND_TO(x, y) (((x) / (y)) * (y))
4500 :
4501 : // if both input and output datasets are tiled, that the tile dimensions
4502 : // are "compatible", try to stick to a swath dimension that is a multiple
4503 : // of input and output block dimensions.
4504 3135 : if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4505 37 : IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4506 37 : IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4507 37 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4508 37 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4509 : {
4510 37 : if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
4511 37 : nPixelSize <=
4512 37 : static_cast<GIntBig>(nTargetSwathSize))
4513 : {
4514 37 : nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4515 37 : nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4516 37 : if (nSwathCols == 0)
4517 0 : nSwathCols = nMaxBlockXSize;
4518 37 : if (nSwathCols > nXSize)
4519 35 : nSwathCols = nXSize;
4520 37 : nSwathLines = nMaxBlockYSize;
4521 :
4522 37 : if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4523 37 : static_cast<GIntBig>(nTargetSwathSize))
4524 : {
4525 0 : nSwathCols = nXSize;
4526 0 : nSwathLines = nBlockYSize;
4527 : }
4528 : }
4529 : }
4530 :
4531 3135 : const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4532 3135 : const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4533 3135 : if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
4534 : {
4535 1 : nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4536 1 : if (nSwathLines == 0)
4537 1 : nSwathLines = 1;
4538 :
4539 1 : CPLDebug(
4540 : "GDAL",
4541 : "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4542 : "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4543 : "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4544 1 : nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
4545 : }
4546 : // If we are processing single scans, try to handle several at once.
4547 : // If we are handling swaths already, only grow the swath if a row
4548 : // of blocks is substantially less than our target buffer size.
4549 3134 : else if (nSwathLines == 1 ||
4550 2599 : nMemoryPerCol * nSwathLines <
4551 2599 : static_cast<GIntBig>(nTargetSwathSize) / 10)
4552 : {
4553 3106 : nSwathLines = std::min(
4554 : nYSize,
4555 3106 : std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4556 :
4557 : /* If possible try to align to source and target block height */
4558 3106 : if ((nSwathLines % nMaxBlockYSize) != 0 &&
4559 251 : nSwathLines > nMaxBlockYSize &&
4560 251 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4561 222 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4562 202 : nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4563 : }
4564 :
4565 3135 : if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4566 0 : (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4567 0 : IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
4568 : {
4569 : // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4570 2 : if (nSwathLines < nSrcBlockYSize)
4571 : {
4572 0 : nSwathLines = nSrcBlockYSize;
4573 :
4574 : // Number of pixels that can be read/write simultaneously.
4575 0 : nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4576 0 : nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4577 0 : if (nSwathCols == 0)
4578 0 : nSwathCols = nSrcBlockXSize;
4579 0 : if (nSwathCols > nXSize)
4580 0 : nSwathCols = nXSize;
4581 :
4582 0 : CPLDebug(
4583 : "GDAL",
4584 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4585 : "too high block, "
4586 : "use partial width at one time");
4587 : }
4588 2 : else if ((nSwathLines % nSrcBlockYSize) != 0)
4589 : {
4590 : /* Round on a multiple of nSrcBlockYSize */
4591 0 : nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4592 0 : CPLDebug(
4593 : "GDAL",
4594 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4595 : "round nSwathLines to block height : %d",
4596 : nSwathLines);
4597 : }
4598 : }
4599 3133 : else if (bDstIsCompressed)
4600 : {
4601 407 : if (nSwathLines < nBlockYSize)
4602 : {
4603 145 : nSwathLines = nBlockYSize;
4604 :
4605 : // Number of pixels that can be read/write simultaneously.
4606 145 : nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4607 145 : nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4608 145 : if (nSwathCols == 0)
4609 0 : nSwathCols = nBlockXSize;
4610 145 : if (nSwathCols > nXSize)
4611 145 : nSwathCols = nXSize;
4612 :
4613 145 : CPLDebug(
4614 : "GDAL",
4615 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4616 : "too high block, "
4617 : "use partial width at one time");
4618 : }
4619 262 : else if ((nSwathLines % nBlockYSize) != 0)
4620 : {
4621 : // Round on a multiple of nBlockYSize.
4622 9 : nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4623 9 : CPLDebug(
4624 : "GDAL",
4625 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4626 : "round nSwathLines to block height : %d",
4627 : nSwathLines);
4628 : }
4629 : }
4630 :
4631 3135 : *pnSwathCols = nSwathCols;
4632 3135 : *pnSwathLines = nSwathLines;
4633 3135 : }
4634 :
4635 : /************************************************************************/
4636 : /* GDALDatasetCopyWholeRaster() */
4637 : /************************************************************************/
4638 :
4639 : /**
4640 : * \brief Copy all dataset raster data.
4641 : *
4642 : * This function copies the complete raster contents of one dataset to
4643 : * another similarly configured dataset. The source and destination
4644 : * dataset must have the same number of bands, and the same width
4645 : * and height. The bands do not have to have the same data type.
4646 : *
4647 : * This function is primarily intended to support implementation of
4648 : * driver specific CreateCopy() functions. It implements efficient copying,
4649 : * in particular "chunking" the copy in substantial blocks and, if appropriate,
4650 : * performing the transfer in a pixel interleaved fashion.
4651 : *
4652 : * Currently the only papszOptions value supported are :
4653 : * <ul>
4654 : * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
4655 : * write access pattern (this does not modify the layout of the destination
4656 : * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
4657 : * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
4658 : * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
4659 : * (GDAL >= 2.2)</li>
4660 : * </ul>
4661 : * More options may be supported in the future.
4662 : *
4663 : * @param hSrcDS the source dataset
4664 : * @param hDstDS the destination dataset
4665 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4666 : * @param pfnProgress progress reporting function.
4667 : * @param pProgressData callback data for progress function.
4668 : *
4669 : * @return CE_None on success, or CE_Failure on failure.
4670 : */
4671 :
4672 3108 : CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
4673 : GDALDatasetH hDstDS,
4674 : CSLConstList papszOptions,
4675 : GDALProgressFunc pfnProgress,
4676 : void *pProgressData)
4677 :
4678 : {
4679 3108 : VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4680 3108 : VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4681 :
4682 3108 : GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4683 3108 : GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4684 :
4685 3108 : if (pfnProgress == nullptr)
4686 0 : pfnProgress = GDALDummyProgress;
4687 :
4688 : /* -------------------------------------------------------------------- */
4689 : /* Confirm the datasets match in size and band counts. */
4690 : /* -------------------------------------------------------------------- */
4691 3108 : const int nXSize = poDstDS->GetRasterXSize();
4692 3108 : const int nYSize = poDstDS->GetRasterYSize();
4693 3108 : const int nBandCount = poDstDS->GetRasterCount();
4694 :
4695 3108 : if (poSrcDS->GetRasterXSize() != nXSize ||
4696 6216 : poSrcDS->GetRasterYSize() != nYSize ||
4697 3108 : poSrcDS->GetRasterCount() != nBandCount)
4698 : {
4699 0 : CPLError(CE_Failure, CPLE_AppDefined,
4700 : "Input and output dataset sizes or band counts do not\n"
4701 : "match in GDALDatasetCopyWholeRaster()");
4702 0 : return CE_Failure;
4703 : }
4704 :
4705 : /* -------------------------------------------------------------------- */
4706 : /* Report preliminary (0) progress. */
4707 : /* -------------------------------------------------------------------- */
4708 3108 : if (!pfnProgress(0.0, nullptr, pProgressData))
4709 : {
4710 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4711 : "User terminated CreateCopy()");
4712 1 : return CE_Failure;
4713 : }
4714 :
4715 : /* -------------------------------------------------------------------- */
4716 : /* Get our prototype band, and assume the others are similarly */
4717 : /* configured. */
4718 : /* -------------------------------------------------------------------- */
4719 3107 : if (nBandCount == 0)
4720 0 : return CE_None;
4721 :
4722 3107 : GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4723 3107 : GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4724 3107 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4725 :
4726 : /* -------------------------------------------------------------------- */
4727 : /* Do we want to try and do the operation in a pixel */
4728 : /* interleaved fashion? */
4729 : /* -------------------------------------------------------------------- */
4730 3107 : bool bInterleave = false;
4731 : const char *pszInterleave =
4732 3107 : poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4733 3107 : if (pszInterleave != nullptr &&
4734 2756 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4735 184 : bInterleave = true;
4736 :
4737 3107 : pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4738 3107 : if (pszInterleave != nullptr &&
4739 2652 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4740 495 : bInterleave = true;
4741 :
4742 3107 : pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
4743 3107 : if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
4744 5 : bInterleave = true;
4745 3102 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
4746 13 : bInterleave = false;
4747 : // attributes is specific to the TileDB driver
4748 3089 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
4749 4 : bInterleave = true;
4750 3085 : else if (pszInterleave != nullptr)
4751 : {
4752 0 : CPLError(CE_Warning, CPLE_NotSupported,
4753 : "Unsupported value for option INTERLEAVE");
4754 : }
4755 :
4756 : // If the destination is compressed, we must try to write blocks just once,
4757 : // to save disk space (GTiff case for example), and to avoid data loss
4758 : // (JPEG compression for example).
4759 3107 : bool bDstIsCompressed = false;
4760 : const char *pszDstCompressed =
4761 3107 : CSLFetchNameValue(papszOptions, "COMPRESSED");
4762 3107 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4763 382 : bDstIsCompressed = true;
4764 :
4765 : /* -------------------------------------------------------------------- */
4766 : /* What will our swath size be? */
4767 : /* -------------------------------------------------------------------- */
4768 :
4769 3107 : int nSwathCols = 0;
4770 3107 : int nSwathLines = 0;
4771 3107 : GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
4772 : nBandCount, bDstIsCompressed, bInterleave,
4773 : &nSwathCols, &nSwathLines);
4774 :
4775 3107 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4776 3107 : if (bInterleave)
4777 548 : nPixelSize *= nBandCount;
4778 :
4779 3107 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
4780 3107 : if (pSwathBuf == nullptr)
4781 : {
4782 0 : return CE_Failure;
4783 : }
4784 :
4785 3107 : CPLDebug("GDAL",
4786 : "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4787 : nSwathCols, nSwathLines, static_cast<int>(bInterleave));
4788 :
4789 : // Advise the source raster that we are going to read it completely
4790 : // Note: this might already have been done by GDALCreateCopy() in the
4791 : // likely case this function is indirectly called by it
4792 3107 : poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
4793 3107 : nullptr, nullptr);
4794 :
4795 : /* ==================================================================== */
4796 : /* Band oriented (uninterleaved) case. */
4797 : /* ==================================================================== */
4798 3107 : CPLErr eErr = CE_None;
4799 : const bool bCheckHoles =
4800 3107 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
4801 :
4802 3107 : if (!bInterleave)
4803 : {
4804 : GDALRasterIOExtraArg sExtraArg;
4805 2559 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4806 2559 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4807 :
4808 7677 : const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
4809 2559 : DIV_ROUND_UP(nYSize, nSwathLines) *
4810 2559 : DIV_ROUND_UP(nXSize, nSwathCols);
4811 2559 : GIntBig nBlocksDone = 0;
4812 :
4813 7501 : for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
4814 : {
4815 4942 : int nBand = iBand + 1;
4816 :
4817 10199 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4818 : {
4819 5257 : int nThisLines = nSwathLines;
4820 :
4821 5257 : if (iY + nThisLines > nYSize)
4822 375 : nThisLines = nYSize - iY;
4823 :
4824 10514 : for (int iX = 0; iX < nXSize && eErr == CE_None;
4825 5257 : iX += nSwathCols)
4826 : {
4827 5257 : int nThisCols = nSwathCols;
4828 :
4829 5257 : if (iX + nThisCols > nXSize)
4830 0 : nThisCols = nXSize - iX;
4831 :
4832 5257 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4833 5257 : if (bCheckHoles)
4834 : {
4835 : nStatus = poSrcDS->GetRasterBand(nBand)
4836 3632 : ->GetDataCoverageStatus(
4837 : iX, iY, nThisCols, nThisLines,
4838 : GDAL_DATA_COVERAGE_STATUS_DATA);
4839 : }
4840 5257 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4841 : {
4842 5253 : sExtraArg.pfnProgress = GDALScaledProgress;
4843 10506 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4844 5253 : nBlocksDone / static_cast<double>(nTotalBlocks),
4845 5253 : (nBlocksDone + 0.5) /
4846 5253 : static_cast<double>(nTotalBlocks),
4847 : pfnProgress, pProgressData);
4848 5253 : if (sExtraArg.pProgressData == nullptr)
4849 1603 : sExtraArg.pfnProgress = nullptr;
4850 :
4851 5253 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4852 : nThisLines, pSwathBuf,
4853 : nThisCols, nThisLines, eDT, 1,
4854 : &nBand, 0, 0, 0, &sExtraArg);
4855 :
4856 5253 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4857 :
4858 5253 : if (eErr == CE_None)
4859 5246 : eErr = poDstDS->RasterIO(
4860 : GF_Write, iX, iY, nThisCols, nThisLines,
4861 : pSwathBuf, nThisCols, nThisLines, eDT, 1,
4862 : &nBand, 0, 0, 0, nullptr);
4863 : }
4864 :
4865 5257 : nBlocksDone++;
4866 10472 : if (eErr == CE_None &&
4867 5215 : !pfnProgress(nBlocksDone /
4868 5215 : static_cast<double>(nTotalBlocks),
4869 : nullptr, pProgressData))
4870 : {
4871 2 : eErr = CE_Failure;
4872 2 : CPLError(CE_Failure, CPLE_UserInterrupt,
4873 : "User terminated CreateCopy()");
4874 : }
4875 : }
4876 : }
4877 : }
4878 : }
4879 :
4880 : /* ==================================================================== */
4881 : /* Pixel interleaved case. */
4882 : /* ==================================================================== */
4883 : else /* if( bInterleave ) */
4884 : {
4885 : GDALRasterIOExtraArg sExtraArg;
4886 548 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4887 548 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4888 :
4889 548 : const GIntBig nTotalBlocks =
4890 548 : static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4891 548 : DIV_ROUND_UP(nXSize, nSwathCols);
4892 548 : GIntBig nBlocksDone = 0;
4893 :
4894 1311 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4895 : {
4896 763 : int nThisLines = nSwathLines;
4897 :
4898 763 : if (iY + nThisLines > nYSize)
4899 190 : nThisLines = nYSize - iY;
4900 :
4901 1531 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
4902 : {
4903 768 : int nThisCols = nSwathCols;
4904 :
4905 768 : if (iX + nThisCols > nXSize)
4906 3 : nThisCols = nXSize - iX;
4907 :
4908 768 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4909 768 : if (bCheckHoles)
4910 : {
4911 539 : nStatus = 0;
4912 592 : for (int iBand = 0; iBand < nBandCount; iBand++)
4913 : {
4914 573 : nStatus |= poSrcDS->GetRasterBand(iBand + 1)
4915 573 : ->GetDataCoverageStatus(
4916 : iX, iY, nThisCols, nThisLines,
4917 : GDAL_DATA_COVERAGE_STATUS_DATA);
4918 573 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4919 520 : break;
4920 : }
4921 : }
4922 768 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4923 : {
4924 749 : sExtraArg.pfnProgress = GDALScaledProgress;
4925 1498 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4926 749 : nBlocksDone / static_cast<double>(nTotalBlocks),
4927 749 : (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4928 : pfnProgress, pProgressData);
4929 749 : if (sExtraArg.pProgressData == nullptr)
4930 342 : sExtraArg.pfnProgress = nullptr;
4931 :
4932 749 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4933 : nThisLines, pSwathBuf, nThisCols,
4934 : nThisLines, eDT, nBandCount,
4935 : nullptr, 0, 0, 0, &sExtraArg);
4936 :
4937 749 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4938 :
4939 749 : if (eErr == CE_None)
4940 748 : eErr = poDstDS->RasterIO(
4941 : GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
4942 : nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
4943 : 0, 0, nullptr);
4944 : }
4945 :
4946 768 : nBlocksDone++;
4947 1532 : if (eErr == CE_None &&
4948 764 : !pfnProgress(nBlocksDone /
4949 764 : static_cast<double>(nTotalBlocks),
4950 : nullptr, pProgressData))
4951 : {
4952 1 : eErr = CE_Failure;
4953 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4954 : "User terminated CreateCopy()");
4955 : }
4956 : }
4957 : }
4958 : }
4959 :
4960 : /* -------------------------------------------------------------------- */
4961 : /* Cleanup */
4962 : /* -------------------------------------------------------------------- */
4963 3107 : CPLFree(pSwathBuf);
4964 :
4965 3107 : return eErr;
4966 : }
4967 :
4968 : /************************************************************************/
4969 : /* GDALRasterBandCopyWholeRaster() */
4970 : /************************************************************************/
4971 :
4972 : /**
4973 : * \brief Copy a whole raster band
4974 : *
4975 : * This function copies the complete raster contents of one band to
4976 : * another similarly configured band. The source and destination
4977 : * bands must have the same width and height. The bands do not have
4978 : * to have the same data type.
4979 : *
4980 : * It implements efficient copying, in particular "chunking" the copy in
4981 : * substantial blocks.
4982 : *
4983 : * Currently the only papszOptions value supported are :
4984 : * <ul>
4985 : * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4986 : * achieve best compression.</li>
4987 : * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4988 : * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li>
4989 : * </ul>
4990 : *
4991 : * @param hSrcBand the source band
4992 : * @param hDstBand the destination band
4993 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4994 : * @param pfnProgress progress reporting function.
4995 : * @param pProgressData callback data for progress function.
4996 : *
4997 : * @return CE_None on success, or CE_Failure on failure.
4998 : */
4999 :
5000 28 : CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
5001 : GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
5002 : const char *const *const papszOptions, GDALProgressFunc pfnProgress,
5003 : void *pProgressData)
5004 :
5005 : {
5006 28 : VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5007 28 : VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
5008 :
5009 28 : GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
5010 28 : GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
5011 28 : CPLErr eErr = CE_None;
5012 :
5013 28 : if (pfnProgress == nullptr)
5014 2 : pfnProgress = GDALDummyProgress;
5015 :
5016 : /* -------------------------------------------------------------------- */
5017 : /* Confirm the datasets match in size and band counts. */
5018 : /* -------------------------------------------------------------------- */
5019 28 : int nXSize = poSrcBand->GetXSize();
5020 28 : int nYSize = poSrcBand->GetYSize();
5021 :
5022 28 : if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
5023 : {
5024 0 : CPLError(CE_Failure, CPLE_AppDefined,
5025 : "Input and output band sizes do not\n"
5026 : "match in GDALRasterBandCopyWholeRaster()");
5027 0 : return CE_Failure;
5028 : }
5029 :
5030 : /* -------------------------------------------------------------------- */
5031 : /* Report preliminary (0) progress. */
5032 : /* -------------------------------------------------------------------- */
5033 28 : if (!pfnProgress(0.0, nullptr, pProgressData))
5034 : {
5035 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5036 : "User terminated CreateCopy()");
5037 0 : return CE_Failure;
5038 : }
5039 :
5040 28 : GDALDataType eDT = poDstBand->GetRasterDataType();
5041 :
5042 : // If the destination is compressed, we must try to write blocks just once,
5043 : // to save disk space (GTiff case for example), and to avoid data loss
5044 : // (JPEG compression for example).
5045 28 : bool bDstIsCompressed = false;
5046 : const char *pszDstCompressed =
5047 28 : CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
5048 28 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
5049 25 : bDstIsCompressed = true;
5050 :
5051 : /* -------------------------------------------------------------------- */
5052 : /* What will our swath size be? */
5053 : /* -------------------------------------------------------------------- */
5054 :
5055 28 : int nSwathCols = 0;
5056 28 : int nSwathLines = 0;
5057 28 : GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
5058 : FALSE, &nSwathCols, &nSwathLines);
5059 :
5060 28 : const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
5061 :
5062 28 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
5063 28 : if (pSwathBuf == nullptr)
5064 : {
5065 0 : return CE_Failure;
5066 : }
5067 :
5068 28 : CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
5069 : nSwathCols, nSwathLines);
5070 :
5071 : const bool bCheckHoles =
5072 28 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
5073 :
5074 : // Advise the source raster that we are going to read it completely
5075 28 : poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
5076 :
5077 : /* ==================================================================== */
5078 : /* Band oriented (uninterleaved) case. */
5079 : /* ==================================================================== */
5080 :
5081 70 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
5082 : {
5083 42 : int nThisLines = nSwathLines;
5084 :
5085 42 : if (iY + nThisLines > nYSize)
5086 8 : nThisLines = nYSize - iY;
5087 :
5088 84 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
5089 : {
5090 42 : int nThisCols = nSwathCols;
5091 :
5092 42 : if (iX + nThisCols > nXSize)
5093 0 : nThisCols = nXSize - iX;
5094 :
5095 42 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5096 42 : if (bCheckHoles)
5097 : {
5098 0 : nStatus = poSrcBand->GetDataCoverageStatus(
5099 : iX, iY, nThisCols, nThisLines,
5100 : GDAL_DATA_COVERAGE_STATUS_DATA);
5101 : }
5102 42 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5103 : {
5104 42 : eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5105 : nThisLines, pSwathBuf, nThisCols,
5106 : nThisLines, eDT, 0, 0, nullptr);
5107 :
5108 42 : if (eErr == CE_None)
5109 42 : eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5110 : nThisLines, pSwathBuf, nThisCols,
5111 : nThisLines, eDT, 0, 0, nullptr);
5112 : }
5113 :
5114 84 : if (eErr == CE_None &&
5115 42 : !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
5116 : nullptr, pProgressData))
5117 : {
5118 0 : eErr = CE_Failure;
5119 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5120 : "User terminated CreateCopy()");
5121 : }
5122 : }
5123 : }
5124 :
5125 : /* -------------------------------------------------------------------- */
5126 : /* Cleanup */
5127 : /* -------------------------------------------------------------------- */
5128 28 : CPLFree(pSwathBuf);
5129 :
5130 28 : return eErr;
5131 : }
5132 :
5133 : /************************************************************************/
5134 : /* GDALCopyRasterIOExtraArg () */
5135 : /************************************************************************/
5136 :
5137 525398 : void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5138 : GDALRasterIOExtraArg *psSrcArg)
5139 : {
5140 525398 : INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5141 525398 : if (psSrcArg)
5142 : {
5143 525398 : psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5144 525398 : psDestArg->pfnProgress = psSrcArg->pfnProgress;
5145 525398 : psDestArg->pProgressData = psSrcArg->pProgressData;
5146 525398 : psDestArg->bFloatingPointWindowValidity =
5147 525398 : psSrcArg->bFloatingPointWindowValidity;
5148 525398 : if (psSrcArg->bFloatingPointWindowValidity)
5149 : {
5150 204009 : psDestArg->dfXOff = psSrcArg->dfXOff;
5151 204009 : psDestArg->dfYOff = psSrcArg->dfYOff;
5152 204009 : psDestArg->dfXSize = psSrcArg->dfXSize;
5153 204009 : psDestArg->dfYSize = psSrcArg->dfYSize;
5154 : }
5155 525398 : if (psSrcArg->nVersion >= 2)
5156 : {
5157 525398 : psDestArg->bUseOnlyThisScale = psSrcArg->bUseOnlyThisScale;
5158 : }
5159 : }
5160 525398 : }
5161 :
5162 : /************************************************************************/
5163 : /* HasOnlyNoData() */
5164 : /************************************************************************/
5165 :
5166 24858130 : template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5167 : {
5168 24858130 : return value == noDataValue;
5169 : }
5170 :
5171 0 : template <> bool IsEqualToNoData<GFloat16>(GFloat16 value, GFloat16 noDataValue)
5172 : {
5173 : using std::isnan;
5174 0 : return isnan(noDataValue) ? isnan(value) : value == noDataValue;
5175 : }
5176 :
5177 560433 : template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5178 : {
5179 560433 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5180 : }
5181 :
5182 13481800 : template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5183 : {
5184 13481800 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5185 : }
5186 :
5187 : template <class T>
5188 15877 : static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5189 : size_t nHeight, size_t nLineStride,
5190 : size_t nComponents)
5191 : {
5192 : // Fast test: check the 4 corners and the middle pixel.
5193 30853 : for (size_t iBand = 0; iBand < nComponents; iBand++)
5194 : {
5195 32495 : if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5196 16162 : IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5197 15938 : noDataValue) &&
5198 15938 : IsEqualToNoData(
5199 15938 : pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5200 15938 : nComponents +
5201 : iBand],
5202 14989 : noDataValue) &&
5203 14989 : IsEqualToNoData(
5204 14989 : pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5205 : noDataValue) &&
5206 14981 : IsEqualToNoData(
5207 14981 : pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
5208 14981 : nComponents +
5209 : iBand],
5210 : noDataValue)))
5211 : {
5212 1357 : return false;
5213 : }
5214 : }
5215 :
5216 : // Test all pixels.
5217 45028 : for (size_t iY = 0; iY < nHeight; iY++)
5218 : {
5219 30562 : const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
5220 38852440 : for (size_t iX = 0; iX < nWidth * nComponents; iX++)
5221 : {
5222 38821960 : if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
5223 : {
5224 54 : return false;
5225 : }
5226 : }
5227 : }
5228 14466 : return true;
5229 : }
5230 :
5231 : /************************************************************************/
5232 : /* GDALBufferHasOnlyNoData() */
5233 : /************************************************************************/
5234 :
5235 42598 : bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
5236 : size_t nWidth, size_t nHeight, size_t nLineStride,
5237 : size_t nComponents, int nBitsPerSample,
5238 : GDALBufferSampleFormat nSampleFormat)
5239 : {
5240 : // In the case where the nodata is 0, we can compare several bytes at
5241 : // once. Select the largest natural integer type for the architecture.
5242 : #if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5243 : // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5244 : typedef std::uint64_t WordType;
5245 : #else
5246 : typedef std::uint32_t WordType;
5247 : #endif
5248 42598 : if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
5249 : // Do not use this optimized code path for floating point numbers,
5250 : // as it can't detect negative zero.
5251 : nSampleFormat != GSF_FLOATING_POINT)
5252 : {
5253 26715 : const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
5254 26715 : const size_t nSize =
5255 26715 : (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
5256 26715 : size_t i = 0;
5257 : const size_t nInitialIters =
5258 53430 : std::min(sizeof(WordType) -
5259 26715 : static_cast<size_t>(
5260 : reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5261 : sizeof(WordType)),
5262 26715 : nSize);
5263 220399 : for (; i < nInitialIters; i++)
5264 : {
5265 198058 : if (pabyBuffer[i])
5266 4374 : return false;
5267 : }
5268 16516400 : for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
5269 : {
5270 16501200 : if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
5271 7197 : return false;
5272 : }
5273 52533 : for (; i < nSize; i++)
5274 : {
5275 37394 : if (pabyBuffer[i])
5276 5 : return false;
5277 : }
5278 15139 : return true;
5279 : }
5280 :
5281 15883 : if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
5282 : {
5283 22270 : return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
5284 11135 : HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
5285 11135 : static_cast<uint8_t>(dfNoDataValue), nWidth,
5286 11135 : nHeight, nLineStride, nComponents);
5287 : }
5288 4748 : if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
5289 : {
5290 : // Use unsigned implementation by converting the nodatavalue to
5291 : // unsigned
5292 63 : return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
5293 31 : HasOnlyNoDataT(
5294 : static_cast<const uint8_t *>(pBuffer),
5295 31 : static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
5296 32 : nWidth, nHeight, nLineStride, nComponents);
5297 : }
5298 4716 : if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
5299 : {
5300 21 : return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
5301 10 : HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
5302 10 : static_cast<uint16_t>(dfNoDataValue), nWidth,
5303 11 : nHeight, nLineStride, nComponents);
5304 : }
5305 4705 : if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
5306 : {
5307 : // Use unsigned implementation by converting the nodatavalue to
5308 : // unsigned
5309 97 : return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
5310 48 : HasOnlyNoDataT(
5311 : static_cast<const uint16_t *>(pBuffer),
5312 48 : static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
5313 49 : nWidth, nHeight, nLineStride, nComponents);
5314 : }
5315 4656 : if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
5316 : {
5317 73 : return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
5318 36 : HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
5319 : static_cast<uint32_t>(dfNoDataValue), nWidth,
5320 37 : nHeight, nLineStride, nComponents);
5321 : }
5322 4619 : if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
5323 : {
5324 : // Use unsigned implementation by converting the nodatavalue to
5325 : // unsigned
5326 23 : return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
5327 11 : HasOnlyNoDataT(
5328 : static_cast<const uint32_t *>(pBuffer),
5329 11 : static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
5330 12 : nWidth, nHeight, nLineStride, nComponents);
5331 : }
5332 4607 : if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
5333 : {
5334 56 : return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
5335 28 : HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
5336 : static_cast<uint64_t>(dfNoDataValue), nWidth,
5337 28 : nHeight, nLineStride, nComponents);
5338 : }
5339 4579 : if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
5340 : {
5341 : // Use unsigned implementation by converting the nodatavalue to
5342 : // unsigned
5343 0 : return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
5344 0 : HasOnlyNoDataT(
5345 : static_cast<const uint64_t *>(pBuffer),
5346 0 : static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
5347 0 : nWidth, nHeight, nLineStride, nComponents);
5348 : }
5349 4579 : if (nBitsPerSample == 16 && nSampleFormat == GSF_FLOATING_POINT)
5350 : {
5351 0 : return (std::isnan(dfNoDataValue) ||
5352 0 : GDALIsValueInRange<GFloat16>(dfNoDataValue)) &&
5353 0 : HasOnlyNoDataT(static_cast<const GFloat16 *>(pBuffer),
5354 : static_cast<GFloat16>(dfNoDataValue), nWidth,
5355 0 : nHeight, nLineStride, nComponents);
5356 : }
5357 4579 : if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
5358 : {
5359 750 : return (std::isnan(dfNoDataValue) ||
5360 1499 : GDALIsValueInRange<float>(dfNoDataValue)) &&
5361 749 : HasOnlyNoDataT(static_cast<const float *>(pBuffer),
5362 : static_cast<float>(dfNoDataValue), nWidth,
5363 750 : nHeight, nLineStride, nComponents);
5364 : }
5365 3829 : if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
5366 : {
5367 3829 : return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
5368 : dfNoDataValue, nWidth, nHeight, nLineStride,
5369 3829 : nComponents);
5370 : }
5371 0 : return false;
5372 : }
5373 :
5374 : #ifdef HAVE_SSE2
5375 :
5376 : /************************************************************************/
5377 : /* GDALDeinterleave3Byte() */
5378 : /************************************************************************/
5379 :
5380 : #if defined(__GNUC__) && !defined(__clang__)
5381 : __attribute__((optimize("no-tree-vectorize")))
5382 : #endif
5383 : static void
5384 152792 : GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5385 : GByte *CPL_RESTRICT pabyDest0,
5386 : GByte *CPL_RESTRICT pabyDest1,
5387 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5388 : #ifdef USE_NEON_OPTIMIZATIONS
5389 : {
5390 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5391 : nIters);
5392 : }
5393 : #else
5394 : {
5395 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5396 152792 : if (CPLHaveRuntimeSSSE3())
5397 : {
5398 152806 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5399 152797 : pabyDest2, nIters);
5400 : }
5401 : #endif
5402 :
5403 0 : size_t i = 0;
5404 0 : if (((reinterpret_cast<uintptr_t>(pabySrc) |
5405 0 : reinterpret_cast<uintptr_t>(pabyDest0) |
5406 0 : reinterpret_cast<uintptr_t>(pabyDest1) |
5407 0 : reinterpret_cast<uintptr_t>(pabyDest2)) %
5408 : sizeof(unsigned int)) == 0)
5409 : {
5410 : // Slightly better than GCC autovectorizer
5411 17 : for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
5412 : {
5413 15 : unsigned int word0 =
5414 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
5415 15 : unsigned int word1 =
5416 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
5417 15 : unsigned int word2 =
5418 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
5419 15 : reinterpret_cast<unsigned int *>(pabyDest0)[j] =
5420 15 : (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
5421 15 : ((word2 >> 8) << 24);
5422 15 : reinterpret_cast<unsigned int *>(pabyDest1)[j] =
5423 15 : ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
5424 15 : (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
5425 15 : pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
5426 15 : pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
5427 15 : pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
5428 15 : pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
5429 : }
5430 : }
5431 : #if defined(__clang__)
5432 : #pragma clang loop vectorize(disable)
5433 : #endif
5434 0 : for (; i < nIters; ++i)
5435 : {
5436 1 : pabyDest0[i] = pabySrc[3 * i + 0];
5437 1 : pabyDest1[i] = pabySrc[3 * i + 1];
5438 1 : pabyDest2[i] = pabySrc[3 * i + 2];
5439 : }
5440 : }
5441 : #endif
5442 :
5443 : /************************************************************************/
5444 : /* GDALDeinterleave4Byte() */
5445 : /************************************************************************/
5446 :
5447 : #if !defined(__GNUC__) || defined(__clang__)
5448 :
5449 : /************************************************************************/
5450 : /* deinterleave() */
5451 : /************************************************************************/
5452 :
5453 : template <bool SHIFT, bool MASK>
5454 : inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5455 : __m128i &xmm2_ori, __m128i &xmm3_ori)
5456 : {
5457 : // Set higher 24bit of each int32 packed word to 0
5458 : if (SHIFT)
5459 : {
5460 : xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5461 : xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5462 : xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5463 : xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5464 : }
5465 : __m128i xmm0;
5466 : __m128i xmm1;
5467 : __m128i xmm2;
5468 : __m128i xmm3;
5469 : if (MASK)
5470 : {
5471 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
5472 : xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5473 : xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5474 : xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5475 : xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5476 : }
5477 : else
5478 : {
5479 : xmm0 = xmm0_ori;
5480 : xmm1 = xmm1_ori;
5481 : xmm2 = xmm2_ori;
5482 : xmm3 = xmm3_ori;
5483 : }
5484 : // Pack int32 to int16
5485 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
5486 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
5487 : // Pack int16 to uint8
5488 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
5489 : return xmm0;
5490 : }
5491 :
5492 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5493 : GByte *CPL_RESTRICT pabyDest0,
5494 : GByte *CPL_RESTRICT pabyDest1,
5495 : GByte *CPL_RESTRICT pabyDest2,
5496 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5497 : #ifdef USE_NEON_OPTIMIZATIONS
5498 : {
5499 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5500 : pabyDest3, nIters);
5501 : }
5502 : #else
5503 : {
5504 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5505 : if (CPLHaveRuntimeSSSE3())
5506 : {
5507 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5508 : pabyDest2, pabyDest3, nIters);
5509 : }
5510 : #endif
5511 :
5512 : // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5513 : // do something slightly better.
5514 : size_t i = 0;
5515 : for (; i + 15 < nIters; i += 16)
5516 : {
5517 : __m128i xmm0_ori = _mm_loadu_si128(
5518 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5519 : __m128i xmm1_ori = _mm_loadu_si128(
5520 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5521 : __m128i xmm2_ori = _mm_loadu_si128(
5522 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5523 : __m128i xmm3_ori = _mm_loadu_si128(
5524 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5525 :
5526 : _mm_storeu_si128(
5527 : reinterpret_cast<__m128i *>(pabyDest0 + i),
5528 : deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5529 : _mm_storeu_si128(
5530 : reinterpret_cast<__m128i *>(pabyDest1 + i),
5531 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5532 : _mm_storeu_si128(
5533 : reinterpret_cast<__m128i *>(pabyDest2 + i),
5534 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5535 : _mm_storeu_si128(
5536 : reinterpret_cast<__m128i *>(pabyDest3 + i),
5537 : deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5538 : }
5539 :
5540 : #if defined(__clang__)
5541 : #pragma clang loop vectorize(disable)
5542 : #endif
5543 : for (; i < nIters; ++i)
5544 : {
5545 : pabyDest0[i] = pabySrc[4 * i + 0];
5546 : pabyDest1[i] = pabySrc[4 * i + 1];
5547 : pabyDest2[i] = pabySrc[4 * i + 2];
5548 : pabyDest3[i] = pabySrc[4 * i + 3];
5549 : }
5550 : }
5551 : #endif
5552 : #else
5553 : // GCC autovectorizer does an excellent job
5554 61587 : __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
5555 : const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5556 : GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5557 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5558 : {
5559 528343000 : for (size_t i = 0; i < nIters; ++i)
5560 : {
5561 528282000 : pabyDest0[i] = pabySrc[4 * i + 0];
5562 528282000 : pabyDest1[i] = pabySrc[4 * i + 1];
5563 528282000 : pabyDest2[i] = pabySrc[4 * i + 2];
5564 528282000 : pabyDest3[i] = pabySrc[4 * i + 3];
5565 : }
5566 61587 : }
5567 : #endif
5568 :
5569 : #else
5570 :
5571 : /************************************************************************/
5572 : /* GDALDeinterleave3Byte() */
5573 : /************************************************************************/
5574 :
5575 : // TODO: Enabling below could help on non-Intel architectures where GCC knows
5576 : // how to auto-vectorize
5577 : // #if defined(__GNUC__)
5578 : //__attribute__((optimize("tree-vectorize")))
5579 : // #endif
5580 : static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5581 : GByte *CPL_RESTRICT pabyDest0,
5582 : GByte *CPL_RESTRICT pabyDest1,
5583 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5584 : {
5585 : for (size_t i = 0; i < nIters; ++i)
5586 : {
5587 : pabyDest0[i] = pabySrc[3 * i + 0];
5588 : pabyDest1[i] = pabySrc[3 * i + 1];
5589 : pabyDest2[i] = pabySrc[3 * i + 2];
5590 : }
5591 : }
5592 :
5593 : /************************************************************************/
5594 : /* GDALDeinterleave4Byte() */
5595 : /************************************************************************/
5596 :
5597 : // TODO: Enabling below could help on non-Intel architectures where gcc knows
5598 : // how to auto-vectorize
5599 : // #if defined(__GNUC__)
5600 : //__attribute__((optimize("tree-vectorize")))
5601 : // #endif
5602 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5603 : GByte *CPL_RESTRICT pabyDest0,
5604 : GByte *CPL_RESTRICT pabyDest1,
5605 : GByte *CPL_RESTRICT pabyDest2,
5606 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5607 : {
5608 : for (size_t i = 0; i < nIters; ++i)
5609 : {
5610 : pabyDest0[i] = pabySrc[4 * i + 0];
5611 : pabyDest1[i] = pabySrc[4 * i + 1];
5612 : pabyDest2[i] = pabySrc[4 * i + 2];
5613 : pabyDest3[i] = pabySrc[4 * i + 3];
5614 : }
5615 : }
5616 :
5617 : #endif
5618 :
5619 : /************************************************************************/
5620 : /* GDALDeinterleave() */
5621 : /************************************************************************/
5622 :
5623 : /*! Copy values from a pixel-interleave buffer to multiple per-component
5624 : buffers.
5625 :
5626 : In pseudo-code
5627 : \verbatim
5628 : for(size_t i = 0; i < nIters; ++i)
5629 : for(int iComp = 0; iComp < nComponents; iComp++ )
5630 : ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
5631 : \endverbatim
5632 :
5633 : The implementation is optimized for a few cases, like de-interleaving
5634 : of 3 or 4-components Byte buffers.
5635 :
5636 : \since GDAL 3.6
5637 : */
5638 214732 : void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
5639 : int nComponents, void **ppDestBuffer,
5640 : GDALDataType eDestDT, size_t nIters)
5641 : {
5642 214732 : if (eSourceDT == eDestDT)
5643 : {
5644 214709 : if (eSourceDT == GDT_Byte || eSourceDT == GDT_Int8)
5645 : {
5646 214391 : if (nComponents == 3)
5647 : {
5648 152797 : const GByte *CPL_RESTRICT pabySrc =
5649 : static_cast<const GByte *>(pSourceBuffer);
5650 152797 : GByte *CPL_RESTRICT pabyDest0 =
5651 : static_cast<GByte *>(ppDestBuffer[0]);
5652 152797 : GByte *CPL_RESTRICT pabyDest1 =
5653 : static_cast<GByte *>(ppDestBuffer[1]);
5654 152797 : GByte *CPL_RESTRICT pabyDest2 =
5655 : static_cast<GByte *>(ppDestBuffer[2]);
5656 152797 : GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5657 : nIters);
5658 152803 : return;
5659 : }
5660 61594 : else if (nComponents == 4)
5661 : {
5662 61587 : const GByte *CPL_RESTRICT pabySrc =
5663 : static_cast<const GByte *>(pSourceBuffer);
5664 61587 : GByte *CPL_RESTRICT pabyDest0 =
5665 : static_cast<GByte *>(ppDestBuffer[0]);
5666 61587 : GByte *CPL_RESTRICT pabyDest1 =
5667 : static_cast<GByte *>(ppDestBuffer[1]);
5668 61587 : GByte *CPL_RESTRICT pabyDest2 =
5669 : static_cast<GByte *>(ppDestBuffer[2]);
5670 61587 : GByte *CPL_RESTRICT pabyDest3 =
5671 : static_cast<GByte *>(ppDestBuffer[3]);
5672 61587 : GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5673 : pabyDest3, nIters);
5674 61587 : return;
5675 7 : }
5676 : }
5677 : #if ((defined(__GNUC__) && !defined(__clang__)) || \
5678 : defined(__INTEL_CLANG_COMPILER)) && \
5679 : defined(HAVE_SSE2) && defined(HAVE_SSSE3_AT_COMPILE_TIME)
5680 636 : else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
5681 318 : CPLHaveRuntimeSSSE3())
5682 : {
5683 318 : if (nComponents == 3)
5684 : {
5685 123 : const GUInt16 *CPL_RESTRICT panSrc =
5686 : static_cast<const GUInt16 *>(pSourceBuffer);
5687 123 : GUInt16 *CPL_RESTRICT panDest0 =
5688 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5689 123 : GUInt16 *CPL_RESTRICT panDest1 =
5690 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5691 123 : GUInt16 *CPL_RESTRICT panDest2 =
5692 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5693 123 : GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
5694 : panDest2, nIters);
5695 123 : return;
5696 : }
5697 : #if !defined(__INTEL_CLANG_COMPILER)
5698 : // ICC autovectorizer doesn't do a good job, at least with icx
5699 : // 2022.1.0.20220316
5700 195 : else if (nComponents == 4)
5701 : {
5702 195 : const GUInt16 *CPL_RESTRICT panSrc =
5703 : static_cast<const GUInt16 *>(pSourceBuffer);
5704 195 : GUInt16 *CPL_RESTRICT panDest0 =
5705 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5706 195 : GUInt16 *CPL_RESTRICT panDest1 =
5707 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5708 195 : GUInt16 *CPL_RESTRICT panDest2 =
5709 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5710 195 : GUInt16 *CPL_RESTRICT panDest3 =
5711 : static_cast<GUInt16 *>(ppDestBuffer[3]);
5712 195 : GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
5713 : panDest2, panDest3, nIters);
5714 195 : return;
5715 : }
5716 : #endif
5717 : }
5718 : #endif
5719 : }
5720 :
5721 30 : const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
5722 29 : const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
5723 108 : for (int iComp = 0; iComp < nComponents; iComp++)
5724 : {
5725 79 : GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
5726 79 : iComp * nSourceDTSize,
5727 : eSourceDT, nComponents * nSourceDTSize,
5728 79 : ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
5729 : }
5730 : }
5731 :
5732 : /************************************************************************/
5733 : /* GDALTranspose2DSingleToSingle() */
5734 : /************************************************************************/
5735 : /**
5736 : * Transpose a 2D array of non-complex values, in a efficient (cache-oblivious) way.
5737 : *
5738 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5739 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5740 : * @param nSrcWidth Width of pSrc array.
5741 : * @param nSrcHeight Height of pSrc array.
5742 : */
5743 :
5744 : template <class DST, class SRC>
5745 145 : void GDALTranspose2DSingleToSingle(const SRC *CPL_RESTRICT pSrc,
5746 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5747 : size_t nSrcHeight)
5748 : {
5749 145 : constexpr size_t blocksize = 32;
5750 315 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5751 : {
5752 170 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5753 390 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5754 : {
5755 : // transpose the block beginning at [i,j]
5756 220 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5757 2509 : for (size_t k = i; k < max_k; ++k)
5758 : {
5759 41017 : for (size_t l = j; l < max_l; ++l)
5760 : {
5761 38728 : GDALCopyWord(pSrc[l + k * nSrcWidth],
5762 38728 : pDst[k + l * nSrcHeight]);
5763 : }
5764 : }
5765 : }
5766 : }
5767 145 : }
5768 :
5769 : /************************************************************************/
5770 : /* GDALTranspose2DComplexToComplex() */
5771 : /************************************************************************/
5772 : /**
5773 : * Transpose a 2D array of complex values into an array of complex values,
5774 : * in a efficient (cache-oblivious) way.
5775 : *
5776 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5777 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5778 : * @param nSrcWidth Width of pSrc array.
5779 : * @param nSrcHeight Height of pSrc array.
5780 : */
5781 : template <class DST, class SRC>
5782 25 : void GDALTranspose2DComplexToComplex(const SRC *CPL_RESTRICT pSrc,
5783 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5784 : size_t nSrcHeight)
5785 : {
5786 25 : constexpr size_t blocksize = 32;
5787 50 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5788 : {
5789 25 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5790 50 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5791 : {
5792 : // transpose the block beginning at [i,j]
5793 25 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5794 75 : for (size_t k = i; k < max_k; ++k)
5795 : {
5796 200 : for (size_t l = j; l < max_l; ++l)
5797 : {
5798 150 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5799 150 : pDst[2 * (k + l * nSrcHeight) + 0]);
5800 150 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 1],
5801 150 : pDst[2 * (k + l * nSrcHeight) + 1]);
5802 : }
5803 : }
5804 : }
5805 : }
5806 25 : }
5807 :
5808 : /************************************************************************/
5809 : /* GDALTranspose2DComplexToSingle() */
5810 : /************************************************************************/
5811 : /**
5812 : * Transpose a 2D array of complex values into an array of non-complex values,
5813 : * in a efficient (cache-oblivious) way.
5814 : *
5815 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5816 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5817 : * @param nSrcWidth Width of pSrc array.
5818 : * @param nSrcHeight Height of pSrc array.
5819 : */
5820 : template <class DST, class SRC>
5821 55 : void GDALTranspose2DComplexToSingle(const SRC *CPL_RESTRICT pSrc,
5822 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5823 : size_t nSrcHeight)
5824 : {
5825 55 : constexpr size_t blocksize = 32;
5826 110 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5827 : {
5828 55 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5829 110 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5830 : {
5831 : // transpose the block beginning at [i,j]
5832 55 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5833 165 : for (size_t k = i; k < max_k; ++k)
5834 : {
5835 440 : for (size_t l = j; l < max_l; ++l)
5836 : {
5837 330 : GDALCopyWord(pSrc[2 * (l + k * nSrcWidth) + 0],
5838 330 : pDst[k + l * nSrcHeight]);
5839 : }
5840 : }
5841 : }
5842 : }
5843 55 : }
5844 :
5845 : /************************************************************************/
5846 : /* GDALTranspose2DSingleToComplex() */
5847 : /************************************************************************/
5848 : /**
5849 : * Transpose a 2D array of non-complex values into an array of complex values,
5850 : * in a efficient (cache-oblivious) way.
5851 : *
5852 : * @param pSrc Source array of height = nSrcHeight and width = nSrcWidth.
5853 : * @param pDst Destination transposed array of height = nSrcWidth and width = nSrcHeight.
5854 : * @param nSrcWidth Width of pSrc array.
5855 : * @param nSrcHeight Height of pSrc array.
5856 : */
5857 : template <class DST, class SRC>
5858 55 : void GDALTranspose2DSingleToComplex(const SRC *CPL_RESTRICT pSrc,
5859 : DST *CPL_RESTRICT pDst, size_t nSrcWidth,
5860 : size_t nSrcHeight)
5861 : {
5862 55 : constexpr size_t blocksize = 32;
5863 110 : for (size_t i = 0; i < nSrcHeight; i += blocksize)
5864 : {
5865 55 : const size_t max_k = std::min(i + blocksize, nSrcHeight);
5866 110 : for (size_t j = 0; j < nSrcWidth; j += blocksize)
5867 : {
5868 : // transpose the block beginning at [i,j]
5869 55 : const size_t max_l = std::min(j + blocksize, nSrcWidth);
5870 165 : for (size_t k = i; k < max_k; ++k)
5871 : {
5872 440 : for (size_t l = j; l < max_l; ++l)
5873 : {
5874 330 : GDALCopyWord(pSrc[l + k * nSrcWidth],
5875 330 : pDst[2 * (k + l * nSrcHeight) + 0]);
5876 330 : pDst[2 * (k + l * nSrcHeight) + 1] = 0;
5877 : }
5878 : }
5879 : }
5880 : }
5881 55 : }
5882 :
5883 : /************************************************************************/
5884 : /* GDALTranspose2D() */
5885 : /************************************************************************/
5886 :
5887 : template <class DST, bool DST_IS_COMPLEX>
5888 280 : static void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, DST *pDst,
5889 : size_t nSrcWidth, size_t nSrcHeight)
5890 : {
5891 : #define CALL_GDALTranspose2D_internal(SRC_TYPE) \
5892 : do \
5893 : { \
5894 : if constexpr (DST_IS_COMPLEX) \
5895 : { \
5896 : GDALTranspose2DSingleToComplex( \
5897 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5898 : nSrcHeight); \
5899 : } \
5900 : else \
5901 : { \
5902 : GDALTranspose2DSingleToSingle(static_cast<const SRC_TYPE *>(pSrc), \
5903 : pDst, nSrcWidth, nSrcHeight); \
5904 : } \
5905 : } while (0)
5906 :
5907 : #define CALL_GDALTranspose2DComplex_internal(SRC_TYPE) \
5908 : do \
5909 : { \
5910 : if constexpr (DST_IS_COMPLEX) \
5911 : { \
5912 : GDALTranspose2DComplexToComplex( \
5913 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5914 : nSrcHeight); \
5915 : } \
5916 : else \
5917 : { \
5918 : GDALTranspose2DComplexToSingle( \
5919 : static_cast<const SRC_TYPE *>(pSrc), pDst, nSrcWidth, \
5920 : nSrcHeight); \
5921 : } \
5922 : } while (0)
5923 :
5924 : // clang-format off
5925 280 : switch (eSrcType)
5926 : {
5927 16 : case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t); break;
5928 15 : case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t); break;
5929 24 : case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t); break;
5930 16 : case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t); break;
5931 24 : case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t); break;
5932 16 : case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t); break;
5933 16 : case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t); break;
5934 16 : case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t); break;
5935 16 : case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16); break;
5936 17 : case GDT_Float32: CALL_GDALTranspose2D_internal(float); break;
5937 24 : case GDT_Float64: CALL_GDALTranspose2D_internal(double); break;
5938 16 : case GDT_CInt16: CALL_GDALTranspose2DComplex_internal(int16_t); break;
5939 16 : case GDT_CInt32: CALL_GDALTranspose2DComplex_internal(int32_t); break;
5940 16 : case GDT_CFloat16: CALL_GDALTranspose2DComplex_internal(GFloat16); break;
5941 16 : case GDT_CFloat32: CALL_GDALTranspose2DComplex_internal(float); break;
5942 16 : case GDT_CFloat64: CALL_GDALTranspose2DComplex_internal(double); break;
5943 0 : case GDT_Unknown:
5944 : case GDT_TypeCount:
5945 0 : break;
5946 : }
5947 : // clang-format on
5948 :
5949 : #undef CALL_GDALTranspose2D_internal
5950 : #undef CALL_GDALTranspose2DComplex_internal
5951 280 : }
5952 :
5953 : /************************************************************************/
5954 : /* GDALInterleave2Byte() */
5955 : /************************************************************************/
5956 :
5957 : #if defined(HAVE_SSE2) && \
5958 : (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
5959 :
5960 : // ICC autovectorizer doesn't do a good job at generating good SSE code,
5961 : // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
5962 : #if defined(__GNUC__)
5963 : __attribute__((noinline))
5964 : #endif
5965 : static void
5966 : GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
5967 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
5968 : {
5969 : size_t i = 0;
5970 : constexpr size_t VALS_PER_ITER = 16;
5971 : for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
5972 : {
5973 : __m128i xmm0 =
5974 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + i));
5975 : __m128i xmm1 = _mm_loadu_si128(
5976 : reinterpret_cast<__m128i const *>(pSrc + i + nIters));
5977 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDst + 2 * i),
5978 : _mm_unpacklo_epi8(xmm0, xmm1));
5979 : _mm_storeu_si128(
5980 : reinterpret_cast<__m128i *>(pDst + 2 * i + VALS_PER_ITER),
5981 : _mm_unpackhi_epi8(xmm0, xmm1));
5982 : }
5983 : #if defined(__clang__)
5984 : #pragma clang loop vectorize(disable)
5985 : #endif
5986 : for (; i < nIters; ++i)
5987 : {
5988 : pDst[2 * i + 0] = pSrc[i + 0 * nIters];
5989 : pDst[2 * i + 1] = pSrc[i + 1 * nIters];
5990 : }
5991 : }
5992 :
5993 : #else
5994 :
5995 : #if defined(__GNUC__) && !defined(__clang__)
5996 : __attribute__((optimize("tree-vectorize")))
5997 : #endif
5998 : #if defined(__GNUC__)
5999 : __attribute__((noinline))
6000 : #endif
6001 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6002 : // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6003 : #pragma clang diagnostic push
6004 : #pragma clang diagnostic ignored "-Wpass-failed"
6005 : #endif
6006 : static void
6007 4 : GDALInterleave2Byte(const uint8_t *CPL_RESTRICT pSrc,
6008 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
6009 : {
6010 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6011 : #pragma clang loop vectorize(enable)
6012 : #endif
6013 44 : for (size_t i = 0; i < nIters; ++i)
6014 : {
6015 40 : pDst[2 * i + 0] = pSrc[i + 0 * nIters];
6016 40 : pDst[2 * i + 1] = pSrc[i + 1 * nIters];
6017 : }
6018 4 : }
6019 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6020 : #pragma clang diagnostic pop
6021 : #endif
6022 :
6023 : #endif
6024 :
6025 : /************************************************************************/
6026 : /* GDALInterleave4Byte() */
6027 : /************************************************************************/
6028 :
6029 : #if defined(HAVE_SSE2) && \
6030 : (!defined(__GNUC__) || defined(__INTEL_CLANG_COMPILER))
6031 :
6032 : // ICC autovectorizer doesn't do a good job at generating good SSE code,
6033 : // at least with icx 2024.0.2.20231213, but it nicely unrolls the below loop.
6034 : #if defined(__GNUC__)
6035 : __attribute__((noinline))
6036 : #endif
6037 : static void
6038 : GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6039 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
6040 : {
6041 : size_t i = 0;
6042 : constexpr size_t VALS_PER_ITER = 16;
6043 : for (i = 0; i + VALS_PER_ITER <= nIters; i += VALS_PER_ITER)
6044 : {
6045 : __m128i xmm0 = _mm_loadu_si128(
6046 : reinterpret_cast<__m128i const *>(pSrc + i + 0 * nIters));
6047 : __m128i xmm1 = _mm_loadu_si128(
6048 : reinterpret_cast<__m128i const *>(pSrc + i + 1 * nIters));
6049 : __m128i xmm2 = _mm_loadu_si128(
6050 : reinterpret_cast<__m128i const *>(pSrc + i + 2 * nIters));
6051 : __m128i xmm3 = _mm_loadu_si128(
6052 : reinterpret_cast<__m128i const *>(pSrc + i + 3 * nIters));
6053 : auto tmp0 = _mm_unpacklo_epi8(
6054 : xmm0,
6055 : xmm1); // (xmm0_0, xmm1_0, xmm0_1, xmm1_1, xmm0_2, xmm1_2, ...)
6056 : auto tmp1 = _mm_unpackhi_epi8(
6057 : xmm0,
6058 : xmm1); // (xmm0_8, xmm1_8, xmm0_9, xmm1_9, xmm0_10, xmm1_10, ...)
6059 : auto tmp2 = _mm_unpacklo_epi8(
6060 : xmm2,
6061 : xmm3); // (xmm2_0, xmm3_0, xmm2_1, xmm3_1, xmm2_2, xmm3_2, ...)
6062 : auto tmp3 = _mm_unpackhi_epi8(
6063 : xmm2,
6064 : xmm3); // (xmm2_8, xmm3_8, xmm2_9, xmm3_9, xmm2_10, xmm3_10, ...)
6065 : auto tmp2_0 = _mm_unpacklo_epi16(
6066 : tmp0,
6067 : tmp2); // (xmm0_0, xmm1_0, xmm2_0, xmm3_0, xmm0_1, xmm1_1, xmm2_1, xmm3_1, ...)
6068 : auto tmp2_1 = _mm_unpackhi_epi16(tmp0, tmp2);
6069 : auto tmp2_2 = _mm_unpacklo_epi16(tmp1, tmp3);
6070 : auto tmp2_3 = _mm_unpackhi_epi16(tmp1, tmp3);
6071 : _mm_storeu_si128(
6072 : reinterpret_cast<__m128i *>(pDst + 4 * i + 0 * VALS_PER_ITER),
6073 : tmp2_0);
6074 : _mm_storeu_si128(
6075 : reinterpret_cast<__m128i *>(pDst + 4 * i + 1 * VALS_PER_ITER),
6076 : tmp2_1);
6077 : _mm_storeu_si128(
6078 : reinterpret_cast<__m128i *>(pDst + 4 * i + 2 * VALS_PER_ITER),
6079 : tmp2_2);
6080 : _mm_storeu_si128(
6081 : reinterpret_cast<__m128i *>(pDst + 4 * i + 3 * VALS_PER_ITER),
6082 : tmp2_3);
6083 : }
6084 : #if defined(__clang__)
6085 : #pragma clang loop vectorize(disable)
6086 : #endif
6087 : for (; i < nIters; ++i)
6088 : {
6089 : pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6090 : pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6091 : pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6092 : pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6093 : }
6094 : }
6095 :
6096 : #else
6097 :
6098 : #if defined(__GNUC__) && !defined(__clang__)
6099 : __attribute__((optimize("tree-vectorize")))
6100 : #endif
6101 : #if defined(__GNUC__)
6102 : __attribute__((noinline))
6103 : #endif
6104 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6105 : // clang++ -O2 -fsanitize=undefined fails to vectorize, ignore that warning
6106 : #pragma clang diagnostic push
6107 : #pragma clang diagnostic ignored "-Wpass-failed"
6108 : #endif
6109 : static void
6110 2 : GDALInterleave4Byte(const uint8_t *CPL_RESTRICT pSrc,
6111 : uint8_t *CPL_RESTRICT pDst, size_t nIters)
6112 : {
6113 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6114 : #pragma clang loop vectorize(enable)
6115 : #endif
6116 36 : for (size_t i = 0; i < nIters; ++i)
6117 : {
6118 34 : pDst[4 * i + 0] = pSrc[i + 0 * nIters];
6119 34 : pDst[4 * i + 1] = pSrc[i + 1 * nIters];
6120 34 : pDst[4 * i + 2] = pSrc[i + 2 * nIters];
6121 34 : pDst[4 * i + 3] = pSrc[i + 3 * nIters];
6122 : }
6123 2 : }
6124 : #if defined(__clang__) && !defined(__INTEL_CLANG_COMPILER)
6125 : #pragma clang diagnostic pop
6126 : #endif
6127 :
6128 : #endif
6129 :
6130 : /************************************************************************/
6131 : /* GDALTranspose2D() */
6132 : /************************************************************************/
6133 :
6134 : /**
6135 : * Transpose a 2D array in a efficient (cache-oblivious) way.
6136 : *
6137 : * @param pSrc Source array of width = nSrcWidth and height = nSrcHeight.
6138 : * @param eSrcType Data type of pSrc.
6139 : * @param pDst Destination transposed array of width = nSrcHeight and height = nSrcWidth.
6140 : * @param eDstType Data type of pDst.
6141 : * @param nSrcWidth Width of pSrc array.
6142 : * @param nSrcHeight Height of pSrc array.
6143 : * @since GDAL 3.11
6144 : */
6145 :
6146 305 : void GDALTranspose2D(const void *pSrc, GDALDataType eSrcType, void *pDst,
6147 : GDALDataType eDstType, size_t nSrcWidth, size_t nSrcHeight)
6148 : {
6149 305 : if (eSrcType == eDstType && (eSrcType == GDT_Byte || eSrcType == GDT_Int8))
6150 : {
6151 25 : if (nSrcHeight == 2)
6152 : {
6153 4 : GDALInterleave2Byte(static_cast<const uint8_t *>(pSrc),
6154 : static_cast<uint8_t *>(pDst), nSrcWidth);
6155 4 : return;
6156 : }
6157 21 : if (nSrcHeight == 4)
6158 : {
6159 2 : GDALInterleave4Byte(static_cast<const uint8_t *>(pSrc),
6160 : static_cast<uint8_t *>(pDst), nSrcWidth);
6161 2 : return;
6162 : }
6163 : #if (defined(HAVE_SSSE3_AT_COMPILE_TIME) && \
6164 : (defined(__x86_64) || defined(_M_X64)))
6165 19 : if (CPLHaveRuntimeSSSE3())
6166 : {
6167 19 : GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6168 : static_cast<uint8_t *>(pDst), nSrcWidth,
6169 : nSrcHeight);
6170 19 : return;
6171 : }
6172 : #elif defined(USE_NEON_OPTIMIZATIONS)
6173 : {
6174 : GDALTranspose2D_Byte_SSSE3(static_cast<const uint8_t *>(pSrc),
6175 : static_cast<uint8_t *>(pDst), nSrcWidth,
6176 : nSrcHeight);
6177 : return;
6178 : }
6179 : #endif
6180 : }
6181 :
6182 : #define CALL_GDALTranspose2D_internal(DST_TYPE, DST_IS_COMPLEX) \
6183 : GDALTranspose2D<DST_TYPE, DST_IS_COMPLEX>( \
6184 : pSrc, eSrcType, static_cast<DST_TYPE *>(pDst), nSrcWidth, nSrcHeight)
6185 :
6186 : // clang-format off
6187 280 : switch (eDstType)
6188 : {
6189 15 : case GDT_Byte: CALL_GDALTranspose2D_internal(uint8_t, false); break;
6190 15 : case GDT_Int8: CALL_GDALTranspose2D_internal(int8_t, false); break;
6191 24 : case GDT_UInt16: CALL_GDALTranspose2D_internal(uint16_t, false); break;
6192 16 : case GDT_Int16: CALL_GDALTranspose2D_internal(int16_t, false); break;
6193 24 : case GDT_UInt32: CALL_GDALTranspose2D_internal(uint32_t, false); break;
6194 16 : case GDT_Int32: CALL_GDALTranspose2D_internal(int32_t, false); break;
6195 16 : case GDT_UInt64: CALL_GDALTranspose2D_internal(uint64_t, false); break;
6196 16 : case GDT_Int64: CALL_GDALTranspose2D_internal(int64_t, false); break;
6197 16 : case GDT_Float16: CALL_GDALTranspose2D_internal(GFloat16, false); break;
6198 17 : case GDT_Float32: CALL_GDALTranspose2D_internal(float, false); break;
6199 25 : case GDT_Float64: CALL_GDALTranspose2D_internal(double, false); break;
6200 16 : case GDT_CInt16: CALL_GDALTranspose2D_internal(int16_t, true); break;
6201 16 : case GDT_CInt32: CALL_GDALTranspose2D_internal(int32_t, true); break;
6202 16 : case GDT_CFloat16: CALL_GDALTranspose2D_internal(GFloat16, true); break;
6203 16 : case GDT_CFloat32: CALL_GDALTranspose2D_internal(float, true); break;
6204 16 : case GDT_CFloat64: CALL_GDALTranspose2D_internal(double, true); break;
6205 0 : case GDT_Unknown:
6206 : case GDT_TypeCount:
6207 0 : break;
6208 : }
6209 : // clang-format on
6210 :
6211 : #undef CALL_GDALTranspose2D_internal
6212 : }
6213 :
6214 : /************************************************************************/
6215 : /* ExtractBitAndConvertTo255() */
6216 : /************************************************************************/
6217 :
6218 : #if defined(__GNUC__) || defined(_MSC_VER)
6219 : // Signedness of char implementation dependent, so be explicit.
6220 : // Assumes 2-complement integer types and sign extension of right shifting
6221 : // GCC guarantees such:
6222 : // https://gcc.gnu.org/onlinedocs/gcc/Integers-implementation.html#Integers-implementation
6223 157290 : static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6224 : {
6225 157290 : return static_cast<GByte>(static_cast<signed char>(byVal << (7 - nBit)) >>
6226 157290 : 7);
6227 : }
6228 : #else
6229 : // Portable way
6230 : static inline GByte ExtractBitAndConvertTo255(GByte byVal, int nBit)
6231 : {
6232 : return (byVal & (1 << nBit)) ? 255 : 0;
6233 : }
6234 : #endif
6235 :
6236 : /************************************************************************/
6237 : /* ExpandEightPackedBitsToByteAt255() */
6238 : /************************************************************************/
6239 :
6240 19457 : static inline void ExpandEightPackedBitsToByteAt255(GByte byVal,
6241 : GByte abyOutput[8])
6242 : {
6243 19457 : abyOutput[0] = ExtractBitAndConvertTo255(byVal, 7);
6244 19457 : abyOutput[1] = ExtractBitAndConvertTo255(byVal, 6);
6245 19457 : abyOutput[2] = ExtractBitAndConvertTo255(byVal, 5);
6246 19457 : abyOutput[3] = ExtractBitAndConvertTo255(byVal, 4);
6247 19457 : abyOutput[4] = ExtractBitAndConvertTo255(byVal, 3);
6248 19457 : abyOutput[5] = ExtractBitAndConvertTo255(byVal, 2);
6249 19457 : abyOutput[6] = ExtractBitAndConvertTo255(byVal, 1);
6250 19457 : abyOutput[7] = ExtractBitAndConvertTo255(byVal, 0);
6251 19457 : }
6252 :
6253 : /************************************************************************/
6254 : /* GDALExpandPackedBitsToByteAt0Or255() */
6255 : /************************************************************************/
6256 :
6257 : /** Expand packed-bits (ordered from most-significant bit to least one)
6258 : into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6259 : at 1 to a byte at 255.
6260 :
6261 : The function does (in a possibly more optimized way) the following:
6262 : \code{.cpp}
6263 : for (size_t i = 0; i < nInputBits; ++i )
6264 : {
6265 : pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 255 : 0;
6266 : }
6267 : \endcode
6268 :
6269 : @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6270 : @param pabyOutput Output array of nInputBits bytes.
6271 : @param nInputBits Number of valid bits in pabyInput.
6272 :
6273 : @since 3.11
6274 : */
6275 :
6276 44445 : void GDALExpandPackedBitsToByteAt0Or255(const GByte *CPL_RESTRICT pabyInput,
6277 : GByte *CPL_RESTRICT pabyOutput,
6278 : size_t nInputBits)
6279 : {
6280 44445 : const size_t nInputWholeBytes = nInputBits / 8;
6281 44445 : size_t iByte = 0;
6282 :
6283 : #ifdef HAVE_SSE2
6284 : // Mask to isolate each bit
6285 44445 : const __m128i bit_mask = _mm_set_epi8(1, 2, 4, 8, 16, 32, 64, -128, 1, 2, 4,
6286 : 8, 16, 32, 64, -128);
6287 44445 : const __m128i zero = _mm_setzero_si128();
6288 44445 : const __m128i all_ones = _mm_set1_epi8(-1);
6289 : #ifdef __SSSE3__
6290 : const __m128i dispatch_two_bytes =
6291 : _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);
6292 : #endif
6293 44445 : constexpr size_t SSE_REG_SIZE = sizeof(bit_mask);
6294 132394 : for (; iByte + SSE_REG_SIZE <= nInputWholeBytes; iByte += SSE_REG_SIZE)
6295 : {
6296 87949 : __m128i reg_ori = _mm_loadu_si128(
6297 87949 : reinterpret_cast<const __m128i *>(pabyInput + iByte));
6298 :
6299 87949 : constexpr int NUM_PROCESSED_BYTES_PER_REG = 2;
6300 791541 : for (size_t k = 0; k < SSE_REG_SIZE / NUM_PROCESSED_BYTES_PER_REG; ++k)
6301 : {
6302 : // Given reg_ori = (A, B, ... 14 other bytes ...),
6303 : // expand to (A, A, A, A, A, A, A, A, B, B, B, B, B, B, B, B)
6304 : #ifdef __SSSE3__
6305 : __m128i reg = _mm_shuffle_epi8(reg_ori, dispatch_two_bytes);
6306 : #else
6307 703592 : __m128i reg = _mm_unpacklo_epi8(reg_ori, reg_ori);
6308 703592 : reg = _mm_unpacklo_epi16(reg, reg);
6309 703592 : reg = _mm_unpacklo_epi32(reg, reg);
6310 : #endif
6311 :
6312 : // Test if bits of interest are set
6313 703592 : reg = _mm_and_si128(reg, bit_mask);
6314 :
6315 : // Now test if those bits are set, by comparing to zero. So the
6316 : // result will be that bytes where bits are set will be at 0, and
6317 : // ones where they are cleared will be at 0xFF. So the inverse of
6318 : // the end result we want!
6319 703592 : reg = _mm_cmpeq_epi8(reg, zero);
6320 :
6321 : // Invert the result
6322 703592 : reg = _mm_andnot_si128(reg, all_ones);
6323 :
6324 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pabyOutput), reg);
6325 :
6326 703592 : pabyOutput += SSE_REG_SIZE;
6327 :
6328 : // Right-shift of 2 bytes
6329 703592 : reg_ori = _mm_bsrli_si128(reg_ori, NUM_PROCESSED_BYTES_PER_REG);
6330 : }
6331 : }
6332 :
6333 : #endif // HAVE_SSE2
6334 :
6335 63902 : for (; iByte < nInputWholeBytes; ++iByte)
6336 : {
6337 19457 : ExpandEightPackedBitsToByteAt255(pabyInput[iByte], pabyOutput);
6338 19457 : pabyOutput += 8;
6339 : }
6340 46079 : for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6341 : {
6342 1634 : *pabyOutput = ExtractBitAndConvertTo255(pabyInput[iByte], 7 - iBit);
6343 1634 : ++pabyOutput;
6344 : }
6345 44445 : }
6346 :
6347 : /************************************************************************/
6348 : /* ExpandEightPackedBitsToByteAt1() */
6349 : /************************************************************************/
6350 :
6351 136113 : static inline void ExpandEightPackedBitsToByteAt1(GByte byVal,
6352 : GByte abyOutput[8])
6353 : {
6354 136113 : abyOutput[0] = (byVal >> 7) & 0x1;
6355 136113 : abyOutput[1] = (byVal >> 6) & 0x1;
6356 136113 : abyOutput[2] = (byVal >> 5) & 0x1;
6357 136113 : abyOutput[3] = (byVal >> 4) & 0x1;
6358 136113 : abyOutput[4] = (byVal >> 3) & 0x1;
6359 136113 : abyOutput[5] = (byVal >> 2) & 0x1;
6360 136113 : abyOutput[6] = (byVal >> 1) & 0x1;
6361 136113 : abyOutput[7] = (byVal >> 0) & 0x1;
6362 136113 : }
6363 :
6364 : /************************************************************************/
6365 : /* GDALExpandPackedBitsToByteAt0Or1() */
6366 : /************************************************************************/
6367 :
6368 : /** Expand packed-bits (ordered from most-significant bit to least one)
6369 : into a byte each, where a bit at 0 is expanded to a byte at 0, and a bit
6370 : at 1 to a byte at 1.
6371 :
6372 : The function does (in a possibly more optimized way) the following:
6373 : \code{.cpp}
6374 : for (size_t i = 0; i < nInputBits; ++i )
6375 : {
6376 : pabyOutput[i] = (pabyInput[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
6377 : }
6378 : \endcode
6379 :
6380 : @param pabyInput Input array of (nInputBits + 7) / 8 bytes.
6381 : @param pabyOutput Output array of nInputBits bytes.
6382 : @param nInputBits Number of valid bits in pabyInput.
6383 :
6384 : @since 3.11
6385 : */
6386 :
6387 7041 : void GDALExpandPackedBitsToByteAt0Or1(const GByte *CPL_RESTRICT pabyInput,
6388 : GByte *CPL_RESTRICT pabyOutput,
6389 : size_t nInputBits)
6390 : {
6391 7041 : const size_t nInputWholeBytes = nInputBits / 8;
6392 7041 : size_t iByte = 0;
6393 143154 : for (; iByte < nInputWholeBytes; ++iByte)
6394 : {
6395 136113 : ExpandEightPackedBitsToByteAt1(pabyInput[iByte], pabyOutput);
6396 136113 : pabyOutput += 8;
6397 : }
6398 18902 : for (int iBit = 0; iBit < static_cast<int>(nInputBits % 8); ++iBit)
6399 : {
6400 11861 : *pabyOutput = (pabyInput[iByte] >> (7 - iBit)) & 0x1;
6401 11861 : ++pabyOutput;
6402 : }
6403 7041 : }
|