Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Contains default implementation of GDALRasterBand::IRasterIO()
5 : * and supporting functions of broader utility.
6 : * Author: Frank Warmerdam, warmerdam@pobox.com
7 : *
8 : ******************************************************************************
9 : * Copyright (c) 1998, Frank Warmerdam
10 : * Copyright (c) 2007-2014, Even Rouault <even dot rouault at spatialys.com>
11 : *
12 : * Permission is hereby granted, free of charge, to any person obtaining a
13 : * copy of this software and associated documentation files (the "Software"),
14 : * to deal in the Software without restriction, including without limitation
15 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 : * and/or sell copies of the Software, and to permit persons to whom the
17 : * Software is furnished to do so, subject to the following conditions:
18 : *
19 : * The above copyright notice and this permission notice shall be included
20 : * in all copies or substantial portions of the Software.
21 : *
22 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 : * DEALINGS IN THE SOFTWARE.
29 : ****************************************************************************/
30 :
31 : #include "cpl_port.h"
32 : #include "gdal.h"
33 : #include "gdal_priv.h"
34 :
35 : #include <cassert>
36 : #include <climits>
37 : #include <cmath>
38 : #include <cstddef>
39 : #include <cstdio>
40 : #include <cstdlib>
41 : #include <cstring>
42 :
43 : #include <algorithm>
44 : #include <limits>
45 : #include <stdexcept>
46 : #include <type_traits>
47 :
48 : #include "cpl_conv.h"
49 : #include "cpl_cpu_features.h"
50 : #include "cpl_error.h"
51 : #include "cpl_progress.h"
52 : #include "cpl_string.h"
53 : #include "cpl_vsi.h"
54 : #include "gdal_priv_templates.hpp"
55 : #include "gdal_vrt.h"
56 : #include "gdalwarper.h"
57 : #include "memdataset.h"
58 : #include "vrtdataset.h"
59 :
60 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
61 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
62 : int nDstPixelStride, GPtrDiff_t nWordCount);
63 :
64 : /************************************************************************/
65 : /* DownsamplingIntegerXFactor() */
66 : /************************************************************************/
67 :
68 : template <bool bSameDataType, int DATA_TYPE_SIZE>
69 413154 : static bool DownsamplingIntegerXFactor(
70 : GDALRasterBand *poBand, int iSrcX, int nSrcXInc, GPtrDiff_t iSrcOffsetCst,
71 : GByte *CPL_RESTRICT pabyDstData, int nPixelSpace, int nBufXSize,
72 : GDALDataType eDataType, GDALDataType eBufType, int &nStartBlockX,
73 : int nBlockXSize, GDALRasterBlock *&poBlock, int nLBlockY)
74 : {
75 413154 : const int nBandDataSize =
76 : bSameDataType ? DATA_TYPE_SIZE : GDALGetDataTypeSizeBytes(eDataType);
77 413154 : int nOuterLoopIters = nBufXSize - 1;
78 413154 : const int nIncSrcOffset = nSrcXInc * nBandDataSize;
79 : const GByte *CPL_RESTRICT pabySrcData;
80 413154 : int nEndBlockX = nBlockXSize + nStartBlockX;
81 :
82 413154 : if (iSrcX < nEndBlockX)
83 : {
84 226125 : CPLAssert(poBlock);
85 226125 : goto no_reload_block;
86 : }
87 187029 : goto reload_block;
88 :
89 : // Don't do the last iteration in the loop, as iSrcX might go beyond
90 : // nRasterXSize - 1
91 932720 : while (--nOuterLoopIters >= 1)
92 : {
93 189034 : iSrcX += nSrcXInc;
94 189034 : pabySrcData += nIncSrcOffset;
95 189034 : pabyDstData += nPixelSpace;
96 :
97 : /* --------------------------------------------------------------------
98 : */
99 : /* Ensure we have the appropriate block loaded. */
100 : /* --------------------------------------------------------------------
101 : */
102 189034 : if (iSrcX >= nEndBlockX)
103 : {
104 189034 : reload_block:
105 : {
106 388653 : const int nLBlockX = iSrcX / nBlockXSize;
107 388653 : nStartBlockX = nLBlockX * nBlockXSize;
108 388653 : nEndBlockX = nStartBlockX + nBlockXSize;
109 :
110 388653 : if (poBlock != nullptr)
111 316698 : poBlock->DropLock();
112 :
113 388653 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
114 388653 : if (poBlock == nullptr)
115 : {
116 1 : return false;
117 : }
118 : }
119 :
120 388652 : no_reload_block:
121 : const GByte *pabySrcBlock =
122 932720 : static_cast<const GByte *>(poBlock->GetDataRef());
123 932720 : GPtrDiff_t iSrcOffset =
124 932720 : (iSrcX - nStartBlockX + iSrcOffsetCst) * nBandDataSize;
125 932720 : pabySrcData = pabySrcBlock + iSrcOffset;
126 : }
127 :
128 : /* --------------------------------------------------------------------
129 : */
130 : /* Copy the maximum run of pixels. */
131 : /* --------------------------------------------------------------------
132 : */
133 :
134 932720 : const int nIters = std::min(
135 932720 : (nEndBlockX - iSrcX + (nSrcXInc - 1)) / nSrcXInc, nOuterLoopIters);
136 : if (bSameDataType)
137 : {
138 932303 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
139 932303 : if (nIters > 1)
140 : {
141 : if (DATA_TYPE_SIZE == 1)
142 : {
143 276277 : pabySrcData += nIncSrcOffset;
144 276277 : pabyDstData += nPixelSpace;
145 276277 : GDALFastCopyByte(pabySrcData, nIncSrcOffset, pabyDstData,
146 276277 : nPixelSpace, nIters - 1);
147 276277 : pabySrcData +=
148 276277 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 2);
149 276277 : pabyDstData +=
150 276277 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 2);
151 : }
152 : else
153 : {
154 4443048 : for (int i = 0; i < nIters - 1; i++)
155 : {
156 4244524 : pabySrcData += nIncSrcOffset;
157 4244524 : pabyDstData += nPixelSpace;
158 4244524 : memcpy(pabyDstData, pabySrcData, nBandDataSize);
159 : }
160 : }
161 474803 : iSrcX += nSrcXInc * (nIters - 1);
162 474803 : nOuterLoopIters -= nIters - 1;
163 : }
164 : }
165 : else
166 : {
167 : // Type to type conversion ...
168 417 : GDALCopyWords(pabySrcData, eDataType, nIncSrcOffset, pabyDstData,
169 417 : eBufType, nPixelSpace, std::max(1, nIters));
170 417 : if (nIters > 1)
171 : {
172 206 : pabySrcData +=
173 206 : static_cast<GPtrDiff_t>(nIncSrcOffset) * (nIters - 1);
174 206 : pabyDstData +=
175 206 : static_cast<GPtrDiff_t>(nPixelSpace) * (nIters - 1);
176 206 : iSrcX += nSrcXInc * (nIters - 1);
177 206 : nOuterLoopIters -= nIters - 1;
178 : }
179 : }
180 : }
181 :
182 : // Deal with last iteration to avoid iSrcX to go beyond nRasterXSize - 1
183 743686 : if (nOuterLoopIters == 0)
184 : {
185 330533 : const int nRasterXSize = poBand->GetXSize();
186 330533 : iSrcX =
187 661066 : static_cast<int>(std::min(static_cast<GInt64>(iSrcX) + nSrcXInc,
188 330533 : static_cast<GInt64>(nRasterXSize - 1)));
189 330533 : pabyDstData += nPixelSpace;
190 330533 : if (iSrcX < nEndBlockX)
191 : {
192 317943 : goto no_reload_block;
193 : }
194 12590 : goto reload_block;
195 : }
196 413153 : return true;
197 : }
198 :
199 : /************************************************************************/
200 : /* IRasterIO() */
201 : /* */
202 : /* Default internal implementation of RasterIO() ... utilizes */
203 : /* the Block access methods to satisfy the request. This would */
204 : /* normally only be overridden by formats with overviews. */
205 : /************************************************************************/
206 :
207 5370250 : CPLErr GDALRasterBand::IRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
208 : int nXSize, int nYSize, void *pData,
209 : int nBufXSize, int nBufYSize,
210 : GDALDataType eBufType, GSpacing nPixelSpace,
211 : GSpacing nLineSpace,
212 : GDALRasterIOExtraArg *psExtraArg)
213 :
214 : {
215 5370250 : if (eRWFlag == GF_Write && eFlushBlockErr != CE_None)
216 : {
217 0 : CPLError(eFlushBlockErr, CPLE_AppDefined,
218 : "An error occurred while writing a dirty block "
219 : "from GDALRasterBand::IRasterIO");
220 0 : CPLErr eErr = eFlushBlockErr;
221 0 : eFlushBlockErr = CE_None;
222 0 : return eErr;
223 : }
224 5370250 : if (nBlockXSize <= 0 || nBlockYSize <= 0)
225 : {
226 0 : CPLError(CE_Failure, CPLE_AppDefined, "Invalid block size");
227 0 : return CE_Failure;
228 : }
229 :
230 5370260 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
231 5370260 : const int nBufDataSize = GDALGetDataTypeSizeBytes(eBufType);
232 5370260 : GByte dummyBlock[2] = {0, 0};
233 5370260 : GByte *pabySrcBlock =
234 : dummyBlock; /* to avoid Coverity warning about nullptr dereference */
235 5370260 : GDALRasterBlock *poBlock = nullptr;
236 5370260 : const bool bUseIntegerRequestCoords =
237 5410570 : (!psExtraArg->bFloatingPointWindowValidity ||
238 40304 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
239 15798 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
240 :
241 : /* ==================================================================== */
242 : /* A common case is the data requested with the destination */
243 : /* is packed, and the block width is the raster width. */
244 : /* ==================================================================== */
245 5290230 : if (nPixelSpace == nBufDataSize && nLineSpace == nPixelSpace * nXSize &&
246 2644570 : nBlockXSize == GetXSize() && nBufXSize == nXSize &&
247 10660500 : nBufYSize == nYSize && bUseIntegerRequestCoords)
248 : {
249 2539540 : CPLErr eErr = CE_None;
250 2539540 : int nLBlockY = -1;
251 :
252 6908740 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
253 : {
254 4370250 : const int iSrcY = iBufYOff + nYOff;
255 :
256 4370250 : if (iSrcY < nLBlockY * nBlockYSize ||
257 4370250 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
258 : {
259 2775810 : nLBlockY = iSrcY / nBlockYSize;
260 2775810 : bool bJustInitialize =
261 83668 : eRWFlag == GF_Write && nXOff == 0 &&
262 2907720 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
263 48245 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize;
264 :
265 : // Is this a partial tile at right and/or bottom edges of
266 : // the raster, and that is going to be completely written?
267 : // If so, do not load it from storage, but zero it so that
268 : // the content outsize of the validity area is initialized.
269 2775810 : bool bMemZeroBuffer = false;
270 83668 : if (eRWFlag == GF_Write && !bJustInitialize && nXOff == 0 &&
271 20819 : nXSize == nBlockXSize && nYOff <= nLBlockY * nBlockYSize &&
272 2859570 : nYOff + nYSize == GetYSize() &&
273 86 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)
274 : {
275 86 : bJustInitialize = true;
276 86 : bMemZeroBuffer = true;
277 : }
278 :
279 2775810 : if (poBlock)
280 236279 : poBlock->DropLock();
281 :
282 2775810 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
283 2775820 : poBlock = GetLockedBlockRef(0, nLBlockY, bJustInitialize);
284 2775840 : if (poBlock == nullptr)
285 : {
286 1064 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
287 : nullptr)
288 : {
289 0 : CPLError(CE_Failure, CPLE_AppDefined,
290 : "GetBlockRef failed at X block offset %d, "
291 : "Y block offset %d%s",
292 : 0, nLBlockY,
293 0 : (nErrorCounter != CPLGetErrorCounter())
294 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
295 : : "");
296 : }
297 1064 : eErr = CE_Failure;
298 1064 : break;
299 : }
300 :
301 2774770 : if (eRWFlag == GF_Write)
302 83668 : poBlock->MarkDirty();
303 :
304 2774770 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
305 2774780 : if (bMemZeroBuffer)
306 : {
307 86 : memset(pabySrcBlock, 0,
308 86 : static_cast<GPtrDiff_t>(nBandDataSize) *
309 86 : nBlockXSize * nBlockYSize);
310 : }
311 : }
312 :
313 4369210 : const auto nSrcByteOffset =
314 4369210 : (static_cast<GPtrDiff_t>(iSrcY - nLBlockY * nBlockYSize) *
315 4369210 : nBlockXSize +
316 4369210 : nXOff) *
317 4369210 : nBandDataSize;
318 :
319 4369210 : if (eDataType == eBufType)
320 : {
321 1594200 : if (eRWFlag == GF_Read)
322 1375040 : memcpy(static_cast<GByte *>(pData) +
323 1375040 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
324 1375040 : pabySrcBlock + nSrcByteOffset,
325 : static_cast<size_t>(nLineSpace));
326 : else
327 219160 : memcpy(pabySrcBlock + nSrcByteOffset,
328 219160 : static_cast<GByte *>(pData) +
329 219160 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
330 : static_cast<size_t>(nLineSpace));
331 : }
332 : else
333 : {
334 : // Type to type conversion.
335 :
336 2775010 : if (eRWFlag == GF_Read)
337 2757880 : GDALCopyWords(
338 2757880 : pabySrcBlock + nSrcByteOffset, eDataType, nBandDataSize,
339 : static_cast<GByte *>(pData) +
340 2757880 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace,
341 : eBufType, static_cast<int>(nPixelSpace), nBufXSize);
342 : else
343 17135 : GDALCopyWords(static_cast<GByte *>(pData) +
344 17135 : static_cast<GPtrDiff_t>(iBufYOff) *
345 : nLineSpace,
346 : eBufType, static_cast<int>(nPixelSpace),
347 17135 : pabySrcBlock + nSrcByteOffset, eDataType,
348 : nBandDataSize, nBufXSize);
349 : }
350 :
351 4398730 : if (psExtraArg->pfnProgress != nullptr &&
352 29523 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
353 : psExtraArg->pProgressData))
354 : {
355 5 : eErr = CE_Failure;
356 5 : break;
357 : }
358 : }
359 :
360 2539560 : if (poBlock)
361 2538500 : poBlock->DropLock();
362 :
363 2539560 : return eErr;
364 : }
365 :
366 : /* ==================================================================== */
367 : /* Do we have overviews that would be appropriate to satisfy */
368 : /* this request? */
369 : /* ==================================================================== */
370 2830720 : if ((nBufXSize < nXSize || nBufYSize < nYSize) && GetOverviewCount() > 0 &&
371 : eRWFlag == GF_Read)
372 : {
373 : GDALRasterIOExtraArg sExtraArg;
374 3330 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
375 :
376 : const int nOverview =
377 3330 : GDALBandGetBestOverviewLevel2(this, nXOff, nYOff, nXSize, nYSize,
378 : nBufXSize, nBufYSize, &sExtraArg);
379 3330 : if (nOverview >= 0)
380 : {
381 3304 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
382 3304 : if (poOverviewBand == nullptr)
383 3304 : return CE_Failure;
384 :
385 3304 : return poOverviewBand->RasterIO(
386 : eRWFlag, nXOff, nYOff, nXSize, nYSize, pData, nBufXSize,
387 3304 : nBufYSize, eBufType, nPixelSpace, nLineSpace, &sExtraArg);
388 : }
389 : }
390 :
391 638801 : if (eRWFlag == GF_Read && nBufXSize < nXSize / 100 &&
392 0 : nBufYSize < nYSize / 100 && nPixelSpace == nBufDataSize &&
393 3466210 : nLineSpace == nPixelSpace * nBufXSize &&
394 0 : CPLTestBool(CPLGetConfigOption("GDAL_NO_COSTLY_OVERVIEW", "NO")))
395 : {
396 0 : memset(pData, 0, static_cast<size_t>(nLineSpace * nBufYSize));
397 0 : return CE_None;
398 : }
399 :
400 : /* ==================================================================== */
401 : /* The second case when we don't need subsample data but likely */
402 : /* need data type conversion. */
403 : /* ==================================================================== */
404 2827410 : if ( // nPixelSpace == nBufDataSize &&
405 2827410 : nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
406 : {
407 : #if DEBUG_VERBOSE
408 : printf("IRasterIO(%d,%d,%d,%d) rw=%d case 2\n", /*ok*/
409 : nXOff, nYOff, nXSize, nYSize, static_cast<int>(eRWFlag));
410 : #endif
411 :
412 : /* --------------------------------------------------------------------
413 : */
414 : /* Loop over buffer computing source locations. */
415 : /* --------------------------------------------------------------------
416 : */
417 : // Calculate starting values out of loop
418 2463920 : const int nLBlockXStart = nXOff / nBlockXSize;
419 2463920 : const int nXSpanEnd = nBufXSize + nXOff;
420 :
421 2463920 : int nYInc = 0;
422 4955080 : for (int iBufYOff = 0, iSrcY = nYOff; iBufYOff < nBufYSize;
423 2491150 : iBufYOff += nYInc, iSrcY += nYInc)
424 : {
425 2491220 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
426 : static_cast<GPtrDiff_t>(nLineSpace);
427 2491220 : int nLBlockY = iSrcY / nBlockYSize;
428 2491220 : int nLBlockX = nLBlockXStart;
429 2491220 : int iSrcX = nXOff;
430 5186000 : while (iSrcX < nXSpanEnd)
431 : {
432 2694840 : int nXSpan = nLBlockX * nBlockXSize;
433 2694840 : if (nXSpan < INT_MAX - nBlockXSize)
434 2694840 : nXSpan += nBlockXSize;
435 : else
436 1 : nXSpan = INT_MAX;
437 2694840 : const int nXRight = nXSpan;
438 2694840 : nXSpan = (nXSpan < nXSpanEnd ? nXSpan : nXSpanEnd) - iSrcX;
439 2694840 : const size_t nXSpanSize =
440 2694840 : nXSpan * static_cast<size_t>(nPixelSpace);
441 :
442 2694840 : bool bJustInitialize =
443 2040120 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
444 37043 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
445 4760480 : nXOff <= nLBlockX * nBlockXSize &&
446 25526 : nXOff + nXSize >= nXRight;
447 :
448 : // Is this a partial tile at right and/or bottom edges of
449 : // the raster, and that is going to be completely written?
450 : // If so, do not load it from storage, but zero it so that
451 : // the content outsize of the validity area is initialized.
452 2694840 : bool bMemZeroBuffer = false;
453 2040120 : if (eRWFlag == GF_Write && !bJustInitialize &&
454 2015840 : nXOff <= nLBlockX * nBlockXSize &&
455 2014220 : nYOff <= nLBlockY * nBlockYSize &&
456 12014 : (nXOff + nXSize >= nXRight ||
457 4737620 : (nXOff + nXSize == GetXSize() && nXRight > GetXSize())) &&
458 11837 : (nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize ||
459 10597 : (nYOff + nYSize == GetYSize() &&
460 1839 : nLBlockY * nBlockYSize > GetYSize() - nBlockYSize)))
461 : {
462 3079 : bJustInitialize = true;
463 3079 : bMemZeroBuffer = true;
464 : }
465 :
466 : /* --------------------------------------------------------------------
467 : */
468 : /* Ensure we have the appropriate block loaded. */
469 : /* --------------------------------------------------------------------
470 : */
471 2694840 : const GUInt32 nErrorCounter = CPLGetErrorCounter();
472 2694840 : poBlock =
473 2694840 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
474 2694840 : if (!poBlock)
475 : {
476 70 : if (strstr(CPLGetLastErrorMsg(), "IReadBlock failed") ==
477 : nullptr)
478 : {
479 0 : CPLError(CE_Failure, CPLE_AppDefined,
480 : "GetBlockRef failed at X block offset %d, "
481 : "Y block offset %d%s",
482 : nLBlockX, nLBlockY,
483 0 : (nErrorCounter != CPLGetErrorCounter())
484 0 : ? CPLSPrintf(": %s", CPLGetLastErrorMsg())
485 : : "");
486 : }
487 70 : return (CE_Failure);
488 : }
489 :
490 2694770 : if (eRWFlag == GF_Write)
491 2040120 : poBlock->MarkDirty();
492 :
493 2694770 : pabySrcBlock = static_cast<GByte *>(poBlock->GetDataRef());
494 2694770 : if (bMemZeroBuffer)
495 : {
496 3079 : memset(pabySrcBlock, 0,
497 3079 : static_cast<GPtrDiff_t>(nBandDataSize) *
498 3079 : nBlockXSize * nBlockYSize);
499 : }
500 : /* --------------------------------------------------------------------
501 : */
502 : /* Copy over this chunk of data. */
503 : /* --------------------------------------------------------------------
504 : */
505 2694770 : GPtrDiff_t iSrcOffset =
506 2694770 : (static_cast<GPtrDiff_t>(iSrcX) -
507 2694770 : static_cast<GPtrDiff_t>(nLBlockX * nBlockXSize) +
508 2694770 : (static_cast<GPtrDiff_t>(iSrcY) -
509 2694770 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
510 2694770 : nBlockXSize) *
511 2694770 : nBandDataSize;
512 : // Fill up as many rows as possible for the loaded block.
513 5389540 : const int kmax = std::min(nBlockYSize - (iSrcY % nBlockYSize),
514 2694770 : nBufYSize - iBufYOff);
515 56709400 : for (int k = 0; k < kmax; k++)
516 : {
517 54014600 : if (eDataType == eBufType && nPixelSpace == nBufDataSize)
518 : {
519 50130800 : if (eRWFlag == GF_Read)
520 45724300 : memcpy(static_cast<GByte *>(pData) + iBufOffset +
521 45724300 : static_cast<GPtrDiff_t>(k) * nLineSpace,
522 45724300 : pabySrcBlock + iSrcOffset, nXSpanSize);
523 : else
524 4406500 : memcpy(pabySrcBlock + iSrcOffset,
525 4406500 : static_cast<GByte *>(pData) + iBufOffset +
526 4406500 : static_cast<GPtrDiff_t>(k) * nLineSpace,
527 : nXSpanSize);
528 : }
529 : else
530 : {
531 : /* type to type conversion */
532 3883750 : if (eRWFlag == GF_Read)
533 3824760 : GDALCopyWords(
534 3824760 : pabySrcBlock + iSrcOffset, eDataType,
535 : nBandDataSize,
536 3824760 : static_cast<GByte *>(pData) + iBufOffset +
537 3824760 : static_cast<GPtrDiff_t>(k) * nLineSpace,
538 : eBufType, static_cast<int>(nPixelSpace),
539 : nXSpan);
540 : else
541 58992 : GDALCopyWords(
542 58992 : static_cast<GByte *>(pData) + iBufOffset +
543 58992 : static_cast<GPtrDiff_t>(k) * nLineSpace,
544 : eBufType, static_cast<int>(nPixelSpace),
545 58992 : pabySrcBlock + iSrcOffset, eDataType,
546 : nBandDataSize, nXSpan);
547 : }
548 :
549 54014600 : iSrcOffset +=
550 54014600 : static_cast<GPtrDiff_t>(nBlockXSize) * nBandDataSize;
551 : }
552 :
553 : iBufOffset =
554 2694770 : CPLUnsanitizedAdd<GPtrDiff_t>(iBufOffset, nXSpanSize);
555 2694770 : nLBlockX++;
556 2694770 : iSrcX += nXSpan;
557 :
558 2694770 : poBlock->DropLock();
559 2694770 : poBlock = nullptr;
560 : }
561 :
562 : /* Compute the increment to go on a block boundary */
563 2491160 : nYInc = nBlockYSize - (iSrcY % nBlockYSize);
564 :
565 2491220 : if (psExtraArg->pfnProgress != nullptr &&
566 66 : !psExtraArg->pfnProgress(
567 2491220 : 1.0 * std::min(nBufYSize, iBufYOff + nYInc) / nBufYSize, "",
568 : psExtraArg->pProgressData))
569 : {
570 0 : return CE_Failure;
571 : }
572 : }
573 :
574 2463850 : return CE_None;
575 : }
576 :
577 : /* ==================================================================== */
578 : /* Loop reading required source blocks to satisfy output */
579 : /* request. This is the most general implementation. */
580 : /* ==================================================================== */
581 :
582 363491 : double dfXOff = nXOff;
583 363491 : double dfYOff = nYOff;
584 363491 : double dfXSize = nXSize;
585 363491 : double dfYSize = nYSize;
586 363491 : if (psExtraArg->bFloatingPointWindowValidity)
587 : {
588 28888 : dfXOff = psExtraArg->dfXOff;
589 28888 : dfYOff = psExtraArg->dfYOff;
590 28888 : dfXSize = psExtraArg->dfXSize;
591 28888 : dfYSize = psExtraArg->dfYSize;
592 : }
593 :
594 : /* -------------------------------------------------------------------- */
595 : /* Compute stepping increment. */
596 : /* -------------------------------------------------------------------- */
597 363491 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
598 363491 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
599 363491 : CPLErr eErr = CE_None;
600 :
601 363491 : if (eRWFlag == GF_Write)
602 : {
603 : /* --------------------------------------------------------------------
604 : */
605 : /* Write case */
606 : /* Loop over raster window computing source locations in the buffer.
607 : */
608 : /* --------------------------------------------------------------------
609 : */
610 166650 : GByte *pabyDstBlock = nullptr;
611 166650 : int nLBlockX = -1;
612 166650 : int nLBlockY = -1;
613 :
614 1259590 : for (int iDstY = nYOff; iDstY < nYOff + nYSize; iDstY++)
615 : {
616 1092940 : const int iBufYOff = static_cast<int>((iDstY - nYOff) / dfSrcYInc);
617 :
618 12063600 : for (int iDstX = nXOff; iDstX < nXOff + nXSize; iDstX++)
619 : {
620 10970600 : const int iBufXOff =
621 10970600 : static_cast<int>((iDstX - nXOff) / dfSrcXInc);
622 10970600 : GPtrDiff_t iBufOffset =
623 10970600 : static_cast<GPtrDiff_t>(iBufYOff) *
624 : static_cast<GPtrDiff_t>(nLineSpace) +
625 10970600 : iBufXOff * static_cast<GPtrDiff_t>(nPixelSpace);
626 :
627 : // FIXME: this code likely doesn't work if the dirty block gets
628 : // flushed to disk before being completely written.
629 : // In the meantime, bJustInitialize should probably be set to
630 : // FALSE even if it is not ideal performance wise, and for
631 : // lossy compression.
632 :
633 : /* --------------------------------------------------------------------
634 : */
635 : /* Ensure we have the appropriate block loaded. */
636 : /* --------------------------------------------------------------------
637 : */
638 10970600 : if (iDstX < nLBlockX * nBlockXSize ||
639 10721300 : iDstX - nBlockXSize >= nLBlockX * nBlockXSize ||
640 10264600 : iDstY < nLBlockY * nBlockYSize ||
641 10264600 : iDstY - nBlockYSize >= nLBlockY * nBlockYSize)
642 : {
643 738642 : nLBlockX = iDstX / nBlockXSize;
644 738642 : nLBlockY = iDstY / nBlockYSize;
645 :
646 738642 : const bool bJustInitialize =
647 1065870 : nYOff <= nLBlockY * nBlockYSize &&
648 327231 : nYOff + nYSize - nBlockYSize >=
649 327231 : nLBlockY * nBlockYSize &&
650 1116140 : nXOff <= nLBlockX * nBlockXSize &&
651 50265 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
652 : /*bool bMemZeroBuffer = FALSE;
653 : if( !bJustInitialize &&
654 : nXOff <= nLBlockX * nBlockXSize &&
655 : nYOff <= nLBlockY * nBlockYSize &&
656 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
657 : (nXOff + nXSize == GetXSize() &&
658 : (nLBlockX+1) * nBlockXSize > GetXSize())) &&
659 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
660 : (nYOff + nYSize == GetYSize() &&
661 : (nLBlockY+1) * nBlockYSize > GetYSize())) )
662 : {
663 : bJustInitialize = TRUE;
664 : bMemZeroBuffer = TRUE;
665 : }*/
666 738642 : if (poBlock != nullptr)
667 571992 : poBlock->DropLock();
668 :
669 738642 : poBlock =
670 738642 : GetLockedBlockRef(nLBlockX, nLBlockY, bJustInitialize);
671 738642 : if (poBlock == nullptr)
672 : {
673 0 : return (CE_Failure);
674 : }
675 :
676 738642 : poBlock->MarkDirty();
677 :
678 738642 : pabyDstBlock = static_cast<GByte *>(poBlock->GetDataRef());
679 : /*if( bMemZeroBuffer )
680 : {
681 : memset(pabyDstBlock, 0,
682 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
683 : * nBlockYSize);
684 : }*/
685 : }
686 :
687 : // To make Coverity happy. Should not happen by design.
688 10970600 : if (pabyDstBlock == nullptr)
689 : {
690 0 : CPLAssert(false);
691 : eErr = CE_Failure;
692 : break;
693 : }
694 :
695 : /* --------------------------------------------------------------------
696 : */
697 : /* Copy over this pixel of data. */
698 : /* --------------------------------------------------------------------
699 : */
700 10970600 : GPtrDiff_t iDstOffset =
701 10970600 : (static_cast<GPtrDiff_t>(iDstX) -
702 10970600 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
703 10970600 : (static_cast<GPtrDiff_t>(iDstY) -
704 10970600 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
705 10970600 : nBlockXSize) *
706 10970600 : nBandDataSize;
707 :
708 10970600 : if (eDataType == eBufType)
709 : {
710 10967500 : memcpy(pabyDstBlock + iDstOffset,
711 10967500 : static_cast<GByte *>(pData) + iBufOffset,
712 : nBandDataSize);
713 : }
714 : else
715 : {
716 : /* type to type conversion ... ouch, this is expensive way
717 : of handling single words */
718 :
719 3096 : GDALCopyWords(static_cast<GByte *>(pData) + iBufOffset,
720 3096 : eBufType, 0, pabyDstBlock + iDstOffset,
721 : eDataType, 0, 1);
722 : }
723 : }
724 :
725 1092940 : if (psExtraArg->pfnProgress != nullptr &&
726 0 : !psExtraArg->pfnProgress(1.0 * (iDstY - nYOff + 1) / nYSize, "",
727 : psExtraArg->pProgressData))
728 : {
729 0 : eErr = CE_Failure;
730 0 : break;
731 : }
732 : }
733 : }
734 : else
735 : {
736 196841 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
737 : {
738 7484 : if ((psExtraArg->eResampleAlg == GRIORA_Cubic ||
739 2452 : psExtraArg->eResampleAlg == GRIORA_CubicSpline ||
740 2450 : psExtraArg->eResampleAlg == GRIORA_Bilinear ||
741 5037 : psExtraArg->eResampleAlg == GRIORA_Lanczos) &&
742 2438 : GetColorTable() != nullptr)
743 : {
744 0 : CPLError(CE_Warning, CPLE_NotSupported,
745 : "Resampling method not supported on paletted band. "
746 : "Falling back to nearest neighbour");
747 : }
748 2519 : else if (psExtraArg->eResampleAlg == GRIORA_Gauss &&
749 3 : GDALDataTypeIsComplex(eDataType))
750 : {
751 0 : CPLError(CE_Warning, CPLE_NotSupported,
752 : "Resampling method not supported on complex data type "
753 : "band. Falling back to nearest neighbour");
754 : }
755 : else
756 : {
757 2516 : return RasterIOResampled(eRWFlag, nXOff, nYOff, nXSize, nYSize,
758 : pData, nBufXSize, nBufYSize, eBufType,
759 2516 : nPixelSpace, nLineSpace, psExtraArg);
760 : }
761 : }
762 :
763 194324 : int nLimitBlockY = 0;
764 194324 : const bool bByteCopy = eDataType == eBufType && nBandDataSize == 1;
765 194324 : int nStartBlockX = -nBlockXSize;
766 194324 : const double EPS = 1e-10;
767 194324 : int nLBlockY = -1;
768 194324 : const double dfSrcXStart = 0.5 * dfSrcXInc + dfXOff + EPS;
769 194324 : const bool bIntegerXFactor =
770 170831 : bUseIntegerRequestCoords &&
771 266279 : static_cast<int>(dfSrcXInc) == dfSrcXInc &&
772 71955 : static_cast<int>(dfSrcXInc) < INT_MAX / nBandDataSize;
773 :
774 : /* --------------------------------------------------------------------
775 : */
776 : /* Read case */
777 : /* Loop over buffer computing source locations. */
778 : /* --------------------------------------------------------------------
779 : */
780 1878270 : for (int iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
781 : {
782 : // Add small epsilon to avoid some numeric precision issues.
783 1683960 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
784 1683960 : const int iSrcY = static_cast<int>(std::min(
785 1683960 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
786 :
787 1683960 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
788 : static_cast<GPtrDiff_t>(nLineSpace);
789 :
790 1683960 : if (iSrcY >= nLimitBlockY)
791 : {
792 234792 : nLBlockY = iSrcY / nBlockYSize;
793 234792 : nLimitBlockY = nLBlockY * nBlockYSize;
794 234792 : if (nLimitBlockY < INT_MAX - nBlockYSize)
795 234792 : nLimitBlockY += nBlockYSize;
796 : else
797 0 : nLimitBlockY = INT_MAX;
798 : // Make sure a new block is loaded.
799 234792 : nStartBlockX = -nBlockXSize;
800 : }
801 1449170 : else if (static_cast<int>(dfSrcXStart) < nStartBlockX)
802 : {
803 : // Make sure a new block is loaded.
804 429498 : nStartBlockX = -nBlockXSize;
805 : }
806 :
807 1683960 : GPtrDiff_t iSrcOffsetCst = (iSrcY - nLBlockY * nBlockYSize) *
808 1683960 : static_cast<GPtrDiff_t>(nBlockXSize);
809 :
810 1683960 : if (bIntegerXFactor)
811 : {
812 413154 : int iSrcX = static_cast<int>(dfSrcXStart);
813 413154 : const int nSrcXInc = static_cast<int>(dfSrcXInc);
814 413154 : GByte *pabyDstData = static_cast<GByte *>(pData) + iBufOffset;
815 413154 : bool bRet = false;
816 413154 : if (bByteCopy)
817 : {
818 302831 : bRet = DownsamplingIntegerXFactor<true, 1>(
819 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
820 : static_cast<int>(nPixelSpace), nBufXSize, GDT_Byte,
821 : GDT_Byte, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
822 : }
823 110323 : else if (eDataType == eBufType)
824 : {
825 110114 : switch (nBandDataSize)
826 : {
827 110090 : case 2:
828 110090 : bRet = DownsamplingIntegerXFactor<true, 2>(
829 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
830 : pabyDstData, static_cast<int>(nPixelSpace),
831 : nBufXSize, eDataType, eDataType, nStartBlockX,
832 : nBlockXSize, poBlock, nLBlockY);
833 110090 : break;
834 16 : case 4:
835 16 : bRet = DownsamplingIntegerXFactor<true, 4>(
836 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
837 : pabyDstData, static_cast<int>(nPixelSpace),
838 : nBufXSize, eDataType, eDataType, nStartBlockX,
839 : nBlockXSize, poBlock, nLBlockY);
840 16 : break;
841 8 : case 8:
842 8 : bRet = DownsamplingIntegerXFactor<true, 8>(
843 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
844 : pabyDstData, static_cast<int>(nPixelSpace),
845 : nBufXSize, eDataType, eDataType, nStartBlockX,
846 : nBlockXSize, poBlock, nLBlockY);
847 8 : break;
848 0 : case 16:
849 0 : bRet = DownsamplingIntegerXFactor<true, 16>(
850 : this, iSrcX, nSrcXInc, iSrcOffsetCst,
851 : pabyDstData, static_cast<int>(nPixelSpace),
852 : nBufXSize, eDataType, eDataType, nStartBlockX,
853 : nBlockXSize, poBlock, nLBlockY);
854 0 : break;
855 0 : default:
856 0 : CPLAssert(false);
857 : break;
858 : }
859 : }
860 : else
861 : {
862 209 : bRet = DownsamplingIntegerXFactor<false, 0>(
863 : this, iSrcX, nSrcXInc, iSrcOffsetCst, pabyDstData,
864 : static_cast<int>(nPixelSpace), nBufXSize, eDataType,
865 : eBufType, nStartBlockX, nBlockXSize, poBlock, nLBlockY);
866 : }
867 413154 : if (!bRet)
868 1 : eErr = CE_Failure;
869 : }
870 : else
871 : {
872 1270800 : double dfSrcX = dfSrcXStart;
873 444966000 : for (int iBufXOff = 0; iBufXOff < nBufXSize;
874 443695000 : iBufXOff++, dfSrcX += dfSrcXInc)
875 : {
876 : // TODO?: try to avoid the clamping for most iterations
877 : const int iSrcX = static_cast<int>(
878 887390000 : std::min(std::max(0.0, dfSrcX),
879 443695000 : static_cast<double>(nRasterXSize - 1)));
880 :
881 : /* --------------------------------------------------------------------
882 : */
883 : /* Ensure we have the appropriate block loaded. */
884 : /* --------------------------------------------------------------------
885 : */
886 443695000 : if (iSrcX >= nBlockXSize + nStartBlockX)
887 : {
888 1704850 : const int nLBlockX = iSrcX / nBlockXSize;
889 1704850 : nStartBlockX = nLBlockX * nBlockXSize;
890 :
891 1704850 : if (poBlock != nullptr)
892 1582480 : poBlock->DropLock();
893 :
894 1704850 : poBlock = GetLockedBlockRef(nLBlockX, nLBlockY, FALSE);
895 1704850 : if (poBlock == nullptr)
896 : {
897 9 : eErr = CE_Failure;
898 9 : break;
899 : }
900 :
901 : pabySrcBlock =
902 1704840 : static_cast<GByte *>(poBlock->GetDataRef());
903 : }
904 443695000 : const GPtrDiff_t nDiffX =
905 443695000 : static_cast<GPtrDiff_t>(iSrcX - nStartBlockX);
906 :
907 : /* --------------------------------------------------------------------
908 : */
909 : /* Copy over this pixel of data. */
910 : /* --------------------------------------------------------------------
911 : */
912 :
913 443695000 : if (bByteCopy)
914 : {
915 390257000 : GPtrDiff_t iSrcOffset = nDiffX + iSrcOffsetCst;
916 390257000 : static_cast<GByte *>(pData)[iBufOffset] =
917 390257000 : pabySrcBlock[iSrcOffset];
918 : }
919 53437500 : else if (eDataType == eBufType)
920 : {
921 48215500 : GPtrDiff_t iSrcOffset =
922 48215500 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
923 48215500 : memcpy(static_cast<GByte *>(pData) + iBufOffset,
924 48215500 : pabySrcBlock + iSrcOffset, nBandDataSize);
925 : }
926 : else
927 : {
928 : // Type to type conversion ...
929 5222010 : GPtrDiff_t iSrcOffset =
930 5222010 : (nDiffX + iSrcOffsetCst) * nBandDataSize;
931 5222010 : GDALCopyWords(pabySrcBlock + iSrcOffset, eDataType, 0,
932 5222010 : static_cast<GByte *>(pData) + iBufOffset,
933 : eBufType, 0, 1);
934 : }
935 :
936 443695000 : iBufOffset += static_cast<int>(nPixelSpace);
937 : }
938 : }
939 1683960 : if (eErr == CE_Failure)
940 11 : break;
941 :
942 1686830 : if (psExtraArg->pfnProgress != nullptr &&
943 2878 : !psExtraArg->pfnProgress(1.0 * (iBufYOff + 1) / nBufYSize, "",
944 : psExtraArg->pProgressData))
945 : {
946 1 : eErr = CE_Failure;
947 1 : break;
948 : }
949 : }
950 : }
951 :
952 360974 : if (poBlock != nullptr)
953 360964 : poBlock->DropLock();
954 :
955 360974 : return eErr;
956 : }
957 :
958 : /************************************************************************/
959 : /* GDALRasterIOTransformer() */
960 : /************************************************************************/
961 :
962 : struct GDALRasterIOTransformerStruct
963 : {
964 : double dfXOff;
965 : double dfYOff;
966 : double dfXRatioDstToSrc;
967 : double dfYRatioDstToSrc;
968 : };
969 :
970 6748 : static int GDALRasterIOTransformer(void *pTransformerArg, int bDstToSrc,
971 : int nPointCount, double *x, double *y,
972 : double * /* z */, int *panSuccess)
973 : {
974 6748 : GDALRasterIOTransformerStruct *psParams =
975 : static_cast<GDALRasterIOTransformerStruct *>(pTransformerArg);
976 6748 : if (bDstToSrc)
977 : {
978 252996 : for (int i = 0; i < nPointCount; i++)
979 : {
980 246836 : x[i] = x[i] * psParams->dfXRatioDstToSrc + psParams->dfXOff;
981 246836 : y[i] = y[i] * psParams->dfYRatioDstToSrc + psParams->dfYOff;
982 246836 : panSuccess[i] = TRUE;
983 : }
984 : }
985 : else
986 : {
987 1176 : for (int i = 0; i < nPointCount; i++)
988 : {
989 588 : x[i] = (x[i] - psParams->dfXOff) / psParams->dfXRatioDstToSrc;
990 588 : y[i] = (y[i] - psParams->dfYOff) / psParams->dfYRatioDstToSrc;
991 588 : panSuccess[i] = TRUE;
992 : }
993 : }
994 6748 : return TRUE;
995 : }
996 :
997 : /************************************************************************/
998 : /* RasterIOResampled() */
999 : /************************************************************************/
1000 :
1001 : //! @cond Doxygen_Suppress
1002 2516 : CPLErr GDALRasterBand::RasterIOResampled(
1003 : GDALRWFlag /* eRWFlag */, int nXOff, int nYOff, int nXSize, int nYSize,
1004 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
1005 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
1006 : {
1007 : // Determine if we use warping resampling or overview resampling
1008 2516 : bool bUseWarp = false;
1009 2516 : if (GDALDataTypeIsComplex(eDataType))
1010 149 : bUseWarp = true;
1011 :
1012 2516 : double dfXOff = nXOff;
1013 2516 : double dfYOff = nYOff;
1014 2516 : double dfXSize = nXSize;
1015 2516 : double dfYSize = nYSize;
1016 2516 : if (psExtraArg->bFloatingPointWindowValidity)
1017 : {
1018 2105 : dfXOff = psExtraArg->dfXOff;
1019 2105 : dfYOff = psExtraArg->dfYOff;
1020 2105 : dfXSize = psExtraArg->dfXSize;
1021 2105 : dfYSize = psExtraArg->dfYSize;
1022 : }
1023 :
1024 2516 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1025 2516 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1026 :
1027 : // Determine the coordinates in the "virtual" output raster to see
1028 : // if there are not integers, in which case we will use them as a shift
1029 : // so that subwindow extracts give the exact same results as entire raster
1030 : // scaling.
1031 2516 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1032 2516 : bool bHasXOffVirtual = false;
1033 2516 : int nDestXOffVirtual = 0;
1034 2516 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1035 : {
1036 2191 : bHasXOffVirtual = true;
1037 2191 : dfXOff = nXOff;
1038 2191 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1039 : }
1040 :
1041 2516 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1042 2516 : bool bHasYOffVirtual = false;
1043 2516 : int nDestYOffVirtual = 0;
1044 2516 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1045 : {
1046 2185 : bHasYOffVirtual = true;
1047 2185 : dfYOff = nYOff;
1048 2185 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1049 : }
1050 :
1051 : // Create a MEM dataset that wraps the output buffer.
1052 : GDALDataset *poMEMDS;
1053 2516 : void *pTempBuffer = nullptr;
1054 2516 : GSpacing nPSMem = nPixelSpace;
1055 2516 : GSpacing nLSMem = nLineSpace;
1056 2516 : void *pDataMem = pData;
1057 2516 : GDALDataType eDTMem = eBufType;
1058 2516 : if (eBufType != eDataType)
1059 : {
1060 28 : nPSMem = GDALGetDataTypeSizeBytes(eDataType);
1061 28 : nLSMem = nPSMem * nBufXSize;
1062 : pTempBuffer =
1063 28 : VSI_MALLOC2_VERBOSE(nBufYSize, static_cast<size_t>(nLSMem));
1064 28 : if (pTempBuffer == nullptr)
1065 0 : return CE_Failure;
1066 28 : pDataMem = pTempBuffer;
1067 28 : eDTMem = eDataType;
1068 : }
1069 :
1070 : poMEMDS =
1071 2516 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1072 : nDestYOffVirtual + nBufYSize, 0, eDTMem, nullptr);
1073 2516 : GByte *pabyData = static_cast<GByte *>(pDataMem) -
1074 2516 : nPSMem * nDestXOffVirtual - nLSMem * nDestYOffVirtual;
1075 2516 : GDALRasterBandH hMEMBand = MEMCreateRasterBandEx(
1076 : poMEMDS, 1, pabyData, eDTMem, nPSMem, nLSMem, false);
1077 2516 : poMEMDS->SetBand(1, GDALRasterBand::FromHandle(hMEMBand));
1078 :
1079 2516 : const char *pszNBITS = GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1080 2516 : if (pszNBITS)
1081 6 : reinterpret_cast<GDALRasterBand *>(hMEMBand)->SetMetadataItem(
1082 6 : "NBITS", pszNBITS, "IMAGE_STRUCTURE");
1083 :
1084 2516 : CPLErr eErr = CE_None;
1085 :
1086 : // Do the resampling.
1087 2516 : if (bUseWarp)
1088 : {
1089 149 : int bHasNoData = FALSE;
1090 149 : double dfNoDataValue = GetNoDataValue(&bHasNoData);
1091 :
1092 149 : VRTDatasetH hVRTDS = nullptr;
1093 149 : GDALRasterBandH hVRTBand = nullptr;
1094 149 : if (GetDataset() == nullptr)
1095 : {
1096 : /* Create VRT dataset that wraps the whole dataset */
1097 0 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1098 0 : VRTAddBand(hVRTDS, eDataType, nullptr);
1099 0 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1100 0 : VRTAddSimpleSource(hVRTBand, this, 0, 0, nRasterXSize, nRasterYSize,
1101 : 0, 0, nRasterXSize, nRasterYSize, nullptr,
1102 : VRT_NODATA_UNSET);
1103 :
1104 : /* Add a mask band if needed */
1105 0 : if (GetMaskFlags() != GMF_ALL_VALID)
1106 : {
1107 0 : GDALDataset::FromHandle(hVRTDS)->CreateMaskBand(0);
1108 : VRTSourcedRasterBand *poVRTMaskBand =
1109 : reinterpret_cast<VRTSourcedRasterBand *>(
1110 : reinterpret_cast<GDALRasterBand *>(hVRTBand)
1111 0 : ->GetMaskBand());
1112 0 : poVRTMaskBand->AddMaskBandSource(this, 0, 0, nRasterXSize,
1113 0 : nRasterYSize, 0, 0,
1114 0 : nRasterXSize, nRasterYSize);
1115 : }
1116 : }
1117 :
1118 149 : GDALWarpOptions *psWarpOptions = GDALCreateWarpOptions();
1119 149 : switch (psExtraArg->eResampleAlg)
1120 : {
1121 0 : case GRIORA_NearestNeighbour:
1122 0 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1123 0 : break;
1124 147 : case GRIORA_Bilinear:
1125 147 : psWarpOptions->eResampleAlg = GRA_Bilinear;
1126 147 : break;
1127 0 : case GRIORA_Cubic:
1128 0 : psWarpOptions->eResampleAlg = GRA_Cubic;
1129 0 : break;
1130 0 : case GRIORA_CubicSpline:
1131 0 : psWarpOptions->eResampleAlg = GRA_CubicSpline;
1132 0 : break;
1133 0 : case GRIORA_Lanczos:
1134 0 : psWarpOptions->eResampleAlg = GRA_Lanczos;
1135 0 : break;
1136 0 : case GRIORA_Average:
1137 0 : psWarpOptions->eResampleAlg = GRA_Average;
1138 0 : break;
1139 2 : case GRIORA_RMS:
1140 2 : psWarpOptions->eResampleAlg = GRA_RMS;
1141 2 : break;
1142 0 : case GRIORA_Mode:
1143 0 : psWarpOptions->eResampleAlg = GRA_Mode;
1144 0 : break;
1145 0 : default:
1146 0 : CPLAssert(false);
1147 : psWarpOptions->eResampleAlg = GRA_NearestNeighbour;
1148 : break;
1149 : }
1150 149 : psWarpOptions->hSrcDS = hVRTDS ? hVRTDS : GetDataset();
1151 149 : psWarpOptions->hDstDS = poMEMDS;
1152 149 : psWarpOptions->nBandCount = 1;
1153 149 : int nSrcBandNumber = hVRTDS ? 1 : nBand;
1154 149 : int nDstBandNumber = 1;
1155 149 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1156 149 : psWarpOptions->panDstBands = &nDstBandNumber;
1157 298 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress
1158 149 : ? psExtraArg->pfnProgress
1159 : : GDALDummyProgress;
1160 149 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1161 149 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1162 149 : if (bHasNoData)
1163 : {
1164 0 : psWarpOptions->papszWarpOptions = CSLSetNameValue(
1165 : psWarpOptions->papszWarpOptions, "INIT_DEST", "NO_DATA");
1166 0 : if (psWarpOptions->padfSrcNoDataReal == nullptr)
1167 : {
1168 0 : psWarpOptions->padfSrcNoDataReal =
1169 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1170 0 : psWarpOptions->padfSrcNoDataReal[0] = dfNoDataValue;
1171 : }
1172 :
1173 0 : if (psWarpOptions->padfDstNoDataReal == nullptr)
1174 : {
1175 0 : psWarpOptions->padfDstNoDataReal =
1176 0 : static_cast<double *>(CPLMalloc(sizeof(double)));
1177 0 : psWarpOptions->padfDstNoDataReal[0] = dfNoDataValue;
1178 : }
1179 : }
1180 :
1181 : GDALRasterIOTransformerStruct sTransformer;
1182 149 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1183 149 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1184 149 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1185 149 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1186 149 : psWarpOptions->pTransformerArg = &sTransformer;
1187 :
1188 : GDALWarpOperationH hWarpOperation =
1189 149 : GDALCreateWarpOperation(psWarpOptions);
1190 149 : eErr = GDALChunkAndWarpImage(hWarpOperation, nDestXOffVirtual,
1191 : nDestYOffVirtual, nBufXSize, nBufYSize);
1192 149 : GDALDestroyWarpOperation(hWarpOperation);
1193 :
1194 149 : psWarpOptions->panSrcBands = nullptr;
1195 149 : psWarpOptions->panDstBands = nullptr;
1196 149 : GDALDestroyWarpOptions(psWarpOptions);
1197 :
1198 149 : if (hVRTDS)
1199 0 : GDALClose(hVRTDS);
1200 : }
1201 : else
1202 : {
1203 2367 : const char *pszResampling =
1204 2514 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1205 230 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1206 164 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1207 157 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1208 103 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1209 39 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1210 15 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1211 3 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1212 : : "UNKNOWN";
1213 :
1214 2367 : int nKernelRadius = 0;
1215 : GDALResampleFunction pfnResampleFunc =
1216 2367 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1217 2367 : CPLAssert(pfnResampleFunc);
1218 : GDALDataType eWrkDataType =
1219 2367 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1220 2367 : int nHasNoData = 0;
1221 2367 : double dfNoDataValue = GetNoDataValue(&nHasNoData);
1222 2367 : const bool bHasNoData = CPL_TO_BOOL(nHasNoData);
1223 2367 : if (!bHasNoData)
1224 2310 : dfNoDataValue = 0.0;
1225 :
1226 2367 : int nDstBlockXSize = nBufXSize;
1227 2367 : int nDstBlockYSize = nBufYSize;
1228 2367 : int nFullResXChunk = 0;
1229 2367 : int nFullResYChunk = 0;
1230 : while (true)
1231 : {
1232 2367 : nFullResXChunk =
1233 2367 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1234 2367 : nFullResYChunk =
1235 2367 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1236 2367 : if (nFullResXChunk > nRasterXSize)
1237 2178 : nFullResXChunk = nRasterXSize;
1238 2367 : if (nFullResYChunk > nRasterYSize)
1239 161 : nFullResYChunk = nRasterYSize;
1240 2367 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1241 2352 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1242 : 1024 * 1024))
1243 : break;
1244 : // When operating on the full width of a raster whose block width is
1245 : // the raster width, prefer doing chunks in height.
1246 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1247 : nDstBlockYSize > 1)
1248 0 : nDstBlockYSize /= 2;
1249 : /* Otherwise cut the maximal dimension */
1250 0 : else if (nDstBlockXSize > 1 &&
1251 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1252 0 : nDstBlockXSize /= 2;
1253 : else
1254 0 : nDstBlockYSize /= 2;
1255 : }
1256 :
1257 2367 : int nOvrXFactor = static_cast<int>(0.5 + dfXRatioDstToSrc);
1258 2367 : int nOvrYFactor = static_cast<int>(0.5 + dfYRatioDstToSrc);
1259 2367 : if (nOvrXFactor == 0)
1260 2016 : nOvrXFactor = 1;
1261 2367 : if (nOvrYFactor == 0)
1262 2015 : nOvrYFactor = 1;
1263 2367 : int nFullResXSizeQueried =
1264 2367 : nFullResXChunk + 2 * nKernelRadius * nOvrXFactor;
1265 2367 : int nFullResYSizeQueried =
1266 2367 : nFullResYChunk + 2 * nKernelRadius * nOvrYFactor;
1267 :
1268 2367 : if (nFullResXSizeQueried > nRasterXSize)
1269 2108 : nFullResXSizeQueried = nRasterXSize;
1270 2367 : if (nFullResYSizeQueried > nRasterYSize)
1271 102 : nFullResYSizeQueried = nRasterYSize;
1272 :
1273 : void *pChunk =
1274 2367 : VSI_MALLOC3_VERBOSE(GDALGetDataTypeSizeBytes(eWrkDataType),
1275 : nFullResXSizeQueried, nFullResYSizeQueried);
1276 2367 : GByte *pabyChunkNoDataMask = nullptr;
1277 :
1278 2367 : GDALRasterBand *poMaskBand = GetMaskBand();
1279 2367 : int l_nMaskFlags = GetMaskFlags();
1280 :
1281 2367 : bool bUseNoDataMask = ((l_nMaskFlags & GMF_ALL_VALID) == 0);
1282 2367 : if (bUseNoDataMask)
1283 : {
1284 113 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1285 : nFullResXSizeQueried, nFullResYSizeQueried));
1286 : }
1287 2367 : if (pChunk == nullptr ||
1288 113 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1289 : {
1290 0 : GDALClose(poMEMDS);
1291 0 : CPLFree(pChunk);
1292 0 : CPLFree(pabyChunkNoDataMask);
1293 0 : VSIFree(pTempBuffer);
1294 0 : return CE_Failure;
1295 : }
1296 :
1297 2367 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1298 2367 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1299 2367 : int nBlocksDone = 0;
1300 :
1301 : int nDstYOff;
1302 4734 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1303 2367 : nDstYOff += nDstBlockYSize)
1304 : {
1305 : int nDstYCount;
1306 2367 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1307 2367 : nDstYCount = nDstBlockYSize;
1308 : else
1309 0 : nDstYCount = nBufYSize - nDstYOff;
1310 :
1311 2367 : int nChunkYOff =
1312 2367 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1313 2367 : int nChunkYOff2 = nYOff + 1 +
1314 2367 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1315 : dfYRatioDstToSrc));
1316 2367 : if (nChunkYOff2 > nRasterYSize)
1317 269 : nChunkYOff2 = nRasterYSize;
1318 2367 : int nYCount = nChunkYOff2 - nChunkYOff;
1319 2367 : CPLAssert(nYCount <= nFullResYChunk);
1320 :
1321 2367 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrYFactor;
1322 2367 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrYFactor;
1323 2367 : if (nChunkYOffQueried < 0)
1324 : {
1325 204 : nChunkYSizeQueried += nChunkYOffQueried;
1326 204 : nChunkYOffQueried = 0;
1327 : }
1328 2367 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1329 304 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1330 2367 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1331 :
1332 2367 : int nDstXOff = 0;
1333 4734 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1334 2367 : nDstXOff += nDstBlockXSize)
1335 : {
1336 2367 : int nDstXCount = 0;
1337 2367 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1338 2367 : nDstXCount = nDstBlockXSize;
1339 : else
1340 0 : nDstXCount = nBufXSize - nDstXOff;
1341 :
1342 2367 : int nChunkXOff =
1343 2367 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1344 2367 : int nChunkXOff2 =
1345 2367 : nXOff + 1 +
1346 2367 : static_cast<int>(
1347 2367 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1348 2367 : if (nChunkXOff2 > nRasterXSize)
1349 2179 : nChunkXOff2 = nRasterXSize;
1350 2367 : int nXCount = nChunkXOff2 - nChunkXOff;
1351 2367 : CPLAssert(nXCount <= nFullResXChunk);
1352 :
1353 2367 : int nChunkXOffQueried =
1354 2367 : nChunkXOff - nKernelRadius * nOvrXFactor;
1355 2367 : int nChunkXSizeQueried =
1356 2367 : nXCount + 2 * nKernelRadius * nOvrXFactor;
1357 2367 : if (nChunkXOffQueried < 0)
1358 : {
1359 2121 : nChunkXSizeQueried += nChunkXOffQueried;
1360 2121 : nChunkXOffQueried = 0;
1361 : }
1362 2367 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1363 2107 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1364 2367 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1365 :
1366 : // Read the source buffers.
1367 2367 : eErr = RasterIO(GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1368 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1369 : nChunkXSizeQueried, nChunkYSizeQueried,
1370 : eWrkDataType, 0, 0, nullptr);
1371 :
1372 2367 : bool bSkipResample = false;
1373 2367 : bool bNoDataMaskFullyOpaque = false;
1374 2367 : if (eErr == CE_None && bUseNoDataMask)
1375 : {
1376 113 : eErr = poMaskBand->RasterIO(
1377 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1378 : nChunkXSizeQueried, nChunkYSizeQueried,
1379 : pabyChunkNoDataMask, nChunkXSizeQueried,
1380 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1381 :
1382 : /* Optimizations if mask if fully opaque or transparent */
1383 113 : int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1384 113 : GByte bVal = pabyChunkNoDataMask[0];
1385 113 : int i = 1;
1386 235275 : for (; i < nPixels; i++)
1387 : {
1388 235232 : if (pabyChunkNoDataMask[i] != bVal)
1389 70 : break;
1390 : }
1391 113 : if (i == nPixels)
1392 : {
1393 43 : if (bVal == 0)
1394 : {
1395 712 : for (int j = 0; j < nDstYCount; j++)
1396 : {
1397 686 : GDALCopyWords(&dfNoDataValue, GDT_Float64, 0,
1398 : static_cast<GByte *>(pDataMem) +
1399 686 : nLSMem * (j + nDstYOff) +
1400 686 : nDstXOff * nPSMem,
1401 : eDTMem, static_cast<int>(nPSMem),
1402 : nDstXCount);
1403 : }
1404 26 : bSkipResample = true;
1405 : }
1406 : else
1407 : {
1408 17 : bNoDataMaskFullyOpaque = true;
1409 : }
1410 : }
1411 : }
1412 :
1413 2367 : if (!bSkipResample && eErr == CE_None)
1414 : {
1415 2339 : const bool bPropagateNoData = false;
1416 2339 : void *pDstBuffer = nullptr;
1417 2339 : GDALDataType eDstBufferDataType = GDT_Unknown;
1418 : GDALRasterBand *poMEMBand =
1419 2339 : GDALRasterBand::FromHandle(hMEMBand);
1420 2339 : eErr = pfnResampleFunc(
1421 : dfXRatioDstToSrc, dfYRatioDstToSrc,
1422 : dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1423 : dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1424 : eWrkDataType, pChunk,
1425 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1426 2339 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1427 : nChunkXSizeQueried,
1428 2339 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1429 : nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1430 2339 : nDstXOff + nDestXOffVirtual + nDstXCount,
1431 : nDstYOff + nDestYOffVirtual,
1432 2339 : nDstYOff + nDestYOffVirtual + nDstYCount, poMEMBand,
1433 : &pDstBuffer, &eDstBufferDataType, pszResampling,
1434 2339 : bHasNoData, dfNoDataValue, GetColorTable(), eDataType,
1435 : bPropagateNoData);
1436 2339 : if (eErr == CE_None)
1437 : {
1438 2339 : eErr = poMEMBand->RasterIO(
1439 : GF_Write, nDstXOff + nDestXOffVirtual,
1440 : nDstYOff + nDestYOffVirtual, nDstXCount, nDstYCount,
1441 : pDstBuffer, nDstXCount, nDstYCount,
1442 : eDstBufferDataType, 0, 0, nullptr);
1443 : }
1444 2339 : CPLFree(pDstBuffer);
1445 : }
1446 :
1447 2367 : nBlocksDone++;
1448 2378 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1449 11 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1450 : "", psExtraArg->pProgressData))
1451 : {
1452 1 : eErr = CE_Failure;
1453 : }
1454 : }
1455 : }
1456 :
1457 2367 : CPLFree(pChunk);
1458 2367 : CPLFree(pabyChunkNoDataMask);
1459 : }
1460 :
1461 2516 : if (eBufType != eDataType)
1462 : {
1463 28 : CPL_IGNORE_RET_VAL(poMEMDS->GetRasterBand(1)->RasterIO(
1464 : GF_Read, nDestXOffVirtual, nDestYOffVirtual, nBufXSize, nBufYSize,
1465 : pData, nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
1466 : nullptr));
1467 : }
1468 2516 : GDALClose(poMEMDS);
1469 2516 : VSIFree(pTempBuffer);
1470 :
1471 2516 : return eErr;
1472 : }
1473 :
1474 : /************************************************************************/
1475 : /* RasterIOResampled() */
1476 : /************************************************************************/
1477 :
1478 280 : CPLErr GDALDataset::RasterIOResampled(GDALRWFlag /* eRWFlag */, int nXOff,
1479 : int nYOff, int nXSize, int nYSize,
1480 : void *pData, int nBufXSize, int nBufYSize,
1481 : GDALDataType eBufType, int nBandCount,
1482 : int *panBandMap, GSpacing nPixelSpace,
1483 : GSpacing nLineSpace, GSpacing nBandSpace,
1484 : GDALRasterIOExtraArg *psExtraArg)
1485 :
1486 : {
1487 : #if 0
1488 : // Determine if we use warping resampling or overview resampling
1489 : bool bUseWarp = false;
1490 : if( GDALDataTypeIsComplex( eDataType ) )
1491 : bUseWarp = true;
1492 : #endif
1493 :
1494 280 : double dfXOff = nXOff;
1495 280 : double dfYOff = nYOff;
1496 280 : double dfXSize = nXSize;
1497 280 : double dfYSize = nYSize;
1498 280 : if (psExtraArg->bFloatingPointWindowValidity)
1499 : {
1500 158 : dfXOff = psExtraArg->dfXOff;
1501 158 : dfYOff = psExtraArg->dfYOff;
1502 158 : dfXSize = psExtraArg->dfXSize;
1503 158 : dfYSize = psExtraArg->dfYSize;
1504 : }
1505 :
1506 280 : const double dfXRatioDstToSrc = dfXSize / nBufXSize;
1507 280 : const double dfYRatioDstToSrc = dfYSize / nBufYSize;
1508 :
1509 : // Determine the coordinates in the "virtual" output raster to see
1510 : // if there are not integers, in which case we will use them as a shift
1511 : // so that subwindow extracts give the exact same results as entire raster
1512 : // scaling.
1513 280 : double dfDestXOff = dfXOff / dfXRatioDstToSrc;
1514 280 : bool bHasXOffVirtual = false;
1515 280 : int nDestXOffVirtual = 0;
1516 280 : if (fabs(dfDestXOff - static_cast<int>(dfDestXOff + 0.5)) < 1e-8)
1517 : {
1518 162 : bHasXOffVirtual = true;
1519 162 : dfXOff = nXOff;
1520 162 : nDestXOffVirtual = static_cast<int>(dfDestXOff + 0.5);
1521 : }
1522 :
1523 280 : double dfDestYOff = dfYOff / dfYRatioDstToSrc;
1524 280 : bool bHasYOffVirtual = false;
1525 280 : int nDestYOffVirtual = 0;
1526 280 : if (fabs(dfDestYOff - static_cast<int>(dfDestYOff + 0.5)) < 1e-8)
1527 : {
1528 124 : bHasYOffVirtual = true;
1529 124 : dfYOff = nYOff;
1530 124 : nDestYOffVirtual = static_cast<int>(dfDestYOff + 0.5);
1531 : }
1532 :
1533 : // Create a MEM dataset that wraps the output buffer.
1534 : GDALDataset *poMEMDS =
1535 280 : MEMDataset::Create("", nDestXOffVirtual + nBufXSize,
1536 : nDestYOffVirtual + nBufYSize, 0, eBufType, nullptr);
1537 : GDALRasterBand **papoDstBands = static_cast<GDALRasterBand **>(
1538 275 : CPLMalloc(nBandCount * sizeof(GDALRasterBand *)));
1539 1190 : for (int i = 0; i < nBandCount; i++)
1540 : {
1541 917 : char szBuffer[32] = {'\0'};
1542 1856 : int nRet = CPLPrintPointer(
1543 : szBuffer,
1544 917 : static_cast<GByte *>(pData) - nPixelSpace * nDestXOffVirtual -
1545 917 : nLineSpace * nDestYOffVirtual + nBandSpace * i,
1546 : sizeof(szBuffer));
1547 939 : szBuffer[nRet] = 0;
1548 :
1549 939 : char szBuffer0[64] = {'\0'};
1550 939 : snprintf(szBuffer0, sizeof(szBuffer0), "DATAPOINTER=%s", szBuffer);
1551 :
1552 939 : char szBuffer1[64] = {'\0'};
1553 939 : snprintf(szBuffer1, sizeof(szBuffer1), "PIXELOFFSET=" CPL_FRMT_GIB,
1554 : static_cast<GIntBig>(nPixelSpace));
1555 :
1556 939 : char szBuffer2[64] = {'\0'};
1557 939 : snprintf(szBuffer2, sizeof(szBuffer2), "LINEOFFSET=" CPL_FRMT_GIB,
1558 : static_cast<GIntBig>(nLineSpace));
1559 :
1560 939 : char *apszOptions[4] = {szBuffer0, szBuffer1, szBuffer2, nullptr};
1561 :
1562 939 : poMEMDS->AddBand(eBufType, apszOptions);
1563 :
1564 944 : GDALRasterBand *poSrcBand = GetRasterBand(panBandMap[i]);
1565 928 : papoDstBands[i] = poMEMDS->GetRasterBand(i + 1);
1566 : const char *pszNBITS =
1567 928 : poSrcBand->GetMetadataItem("NBITS", "IMAGE_STRUCTURE");
1568 916 : if (pszNBITS)
1569 0 : poMEMDS->GetRasterBand(i + 1)->SetMetadataItem("NBITS", pszNBITS,
1570 0 : "IMAGE_STRUCTURE");
1571 : }
1572 :
1573 273 : CPLErr eErr = CE_None;
1574 :
1575 : // TODO(schwehr): Why disabled? Why not just delete?
1576 : // Looks like this code was initially added as disable by copying
1577 : // from RasterIO here:
1578 : // https://trac.osgeo.org/gdal/changeset/29572
1579 : #if 0
1580 : // Do the resampling.
1581 : if( bUseWarp )
1582 : {
1583 : VRTDatasetH hVRTDS = nullptr;
1584 : GDALRasterBandH hVRTBand = nullptr;
1585 : if( GetDataset() == nullptr )
1586 : {
1587 : /* Create VRT dataset that wraps the whole dataset */
1588 : hVRTDS = VRTCreate(nRasterXSize, nRasterYSize);
1589 : VRTAddBand( hVRTDS, eDataType, nullptr );
1590 : hVRTBand = GDALGetRasterBand(hVRTDS, 1);
1591 : VRTAddSimpleSource( (VRTSourcedRasterBandH)hVRTBand,
1592 : (GDALRasterBandH)this,
1593 : 0, 0,
1594 : nRasterXSize, nRasterYSize,
1595 : 0, 0,
1596 : nRasterXSize, nRasterYSize,
1597 : nullptr, VRT_NODATA_UNSET );
1598 :
1599 : /* Add a mask band if needed */
1600 : if( GetMaskFlags() != GMF_ALL_VALID )
1601 : {
1602 : ((GDALDataset*)hVRTDS)->CreateMaskBand(0);
1603 : VRTSourcedRasterBand* poVRTMaskBand =
1604 : (VRTSourcedRasterBand*)(((GDALRasterBand*)hVRTBand)->GetMaskBand());
1605 : poVRTMaskBand->
1606 : AddMaskBandSource( this,
1607 : 0, 0,
1608 : nRasterXSize, nRasterYSize,
1609 : 0, 0,
1610 : nRasterXSize, nRasterYSize);
1611 : }
1612 : }
1613 :
1614 : GDALWarpOptions* psWarpOptions = GDALCreateWarpOptions();
1615 : psWarpOptions->eResampleAlg = (GDALResampleAlg)psExtraArg->eResampleAlg;
1616 : psWarpOptions->hSrcDS = (GDALDatasetH) (hVRTDS ? hVRTDS : GetDataset());
1617 : psWarpOptions->hDstDS = (GDALDatasetH) poMEMDS;
1618 : psWarpOptions->nBandCount = 1;
1619 : int nSrcBandNumber = (hVRTDS ? 1 : nBand);
1620 : int nDstBandNumber = 1;
1621 : psWarpOptions->panSrcBands = &nSrcBandNumber;
1622 : psWarpOptions->panDstBands = &nDstBandNumber;
1623 : psWarpOptions->pfnProgress = psExtraArg->pfnProgress ?
1624 : psExtraArg->pfnProgress : GDALDummyProgress;
1625 : psWarpOptions->pProgressArg = psExtraArg->pProgressData;
1626 : psWarpOptions->pfnTransformer = GDALRasterIOTransformer;
1627 : GDALRasterIOTransformerStruct sTransformer;
1628 : sTransformer.dfXOff = bHasXOffVirtual ? 0 : dfXOff;
1629 : sTransformer.dfYOff = bHasYOffVirtual ? 0 : dfYOff;
1630 : sTransformer.dfXRatioDstToSrc = dfXRatioDstToSrc;
1631 : sTransformer.dfYRatioDstToSrc = dfYRatioDstToSrc;
1632 : psWarpOptions->pTransformerArg = &sTransformer;
1633 :
1634 : GDALWarpOperationH hWarpOperation = GDALCreateWarpOperation(psWarpOptions);
1635 : eErr = GDALChunkAndWarpImage( hWarpOperation,
1636 : nDestXOffVirtual, nDestYOffVirtual,
1637 : nBufXSize, nBufYSize );
1638 : GDALDestroyWarpOperation( hWarpOperation );
1639 :
1640 : psWarpOptions->panSrcBands = nullptr;
1641 : psWarpOptions->panDstBands = nullptr;
1642 : GDALDestroyWarpOptions( psWarpOptions );
1643 :
1644 : if( hVRTDS )
1645 : GDALClose(hVRTDS);
1646 : }
1647 : else
1648 : #endif
1649 : {
1650 273 : const char *pszResampling =
1651 430 : (psExtraArg->eResampleAlg == GRIORA_Bilinear) ? "BILINEAR"
1652 157 : : (psExtraArg->eResampleAlg == GRIORA_Cubic) ? "CUBIC"
1653 0 : : (psExtraArg->eResampleAlg == GRIORA_CubicSpline) ? "CUBICSPLINE"
1654 0 : : (psExtraArg->eResampleAlg == GRIORA_Lanczos) ? "LANCZOS"
1655 0 : : (psExtraArg->eResampleAlg == GRIORA_Average) ? "AVERAGE"
1656 0 : : (psExtraArg->eResampleAlg == GRIORA_RMS) ? "RMS"
1657 0 : : (psExtraArg->eResampleAlg == GRIORA_Mode) ? "MODE"
1658 0 : : (psExtraArg->eResampleAlg == GRIORA_Gauss) ? "GAUSS"
1659 : : "UNKNOWN";
1660 :
1661 273 : GDALRasterBand *poFirstSrcBand = GetRasterBand(panBandMap[0]);
1662 266 : GDALDataType eDataType = poFirstSrcBand->GetRasterDataType();
1663 : int nBlockXSize, nBlockYSize;
1664 270 : poFirstSrcBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
1665 :
1666 : int nKernelRadius;
1667 : GDALResampleFunction pfnResampleFunc =
1668 266 : GDALGetResampleFunction(pszResampling, &nKernelRadius);
1669 255 : CPLAssert(pfnResampleFunc);
1670 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1671 : GDALResampleFunctionMultiBands pfnResampleFuncMultiBands =
1672 : GDALGetResampleFunctionMultiBands(pszResampling, &nKernelRadius);
1673 : #endif
1674 : GDALDataType eWrkDataType =
1675 255 : GDALGetOvrWorkDataType(pszResampling, eDataType);
1676 :
1677 259 : int nDstBlockXSize = nBufXSize;
1678 259 : int nDstBlockYSize = nBufYSize;
1679 : int nFullResXChunk, nFullResYChunk;
1680 : while (true)
1681 : {
1682 259 : nFullResXChunk =
1683 259 : 3 + static_cast<int>(nDstBlockXSize * dfXRatioDstToSrc);
1684 259 : nFullResYChunk =
1685 259 : 3 + static_cast<int>(nDstBlockYSize * dfYRatioDstToSrc);
1686 259 : if (nFullResXChunk > nRasterXSize)
1687 138 : nFullResXChunk = nRasterXSize;
1688 259 : if (nFullResYChunk > nRasterYSize)
1689 33 : nFullResYChunk = nRasterYSize;
1690 259 : if ((nDstBlockXSize == 1 && nDstBlockYSize == 1) ||
1691 257 : (static_cast<GIntBig>(nFullResXChunk) * nFullResYChunk <=
1692 : 1024 * 1024))
1693 : break;
1694 : // When operating on the full width of a raster whose block width is
1695 : // the raster width, prefer doing chunks in height.
1696 0 : if (nFullResXChunk >= nXSize && nXSize == nBlockXSize &&
1697 : nDstBlockYSize > 1)
1698 0 : nDstBlockYSize /= 2;
1699 : /* Otherwise cut the maximal dimension */
1700 0 : else if (nDstBlockXSize > 1 &&
1701 0 : (nFullResXChunk > nFullResYChunk || nDstBlockYSize == 1))
1702 0 : nDstBlockXSize /= 2;
1703 : else
1704 0 : nDstBlockYSize /= 2;
1705 : }
1706 :
1707 523 : int nOvrFactor = std::max(static_cast<int>(0.5 + dfXRatioDstToSrc),
1708 259 : static_cast<int>(0.5 + dfYRatioDstToSrc));
1709 264 : if (nOvrFactor == 0)
1710 93 : nOvrFactor = 1;
1711 264 : int nFullResXSizeQueried =
1712 264 : nFullResXChunk + 2 * nKernelRadius * nOvrFactor;
1713 264 : int nFullResYSizeQueried =
1714 264 : nFullResYChunk + 2 * nKernelRadius * nOvrFactor;
1715 :
1716 264 : if (nFullResXSizeQueried > nRasterXSize)
1717 147 : nFullResXSizeQueried = nRasterXSize;
1718 264 : if (nFullResYSizeQueried > nRasterYSize)
1719 36 : nFullResYSizeQueried = nRasterYSize;
1720 :
1721 264 : void *pChunk = VSI_MALLOC3_VERBOSE(
1722 : cpl::fits_on<int>(GDALGetDataTypeSizeBytes(eWrkDataType) *
1723 : nBandCount),
1724 : nFullResXSizeQueried, nFullResYSizeQueried);
1725 276 : GByte *pabyChunkNoDataMask = nullptr;
1726 :
1727 276 : GDALRasterBand *poMaskBand = poFirstSrcBand->GetMaskBand();
1728 273 : int nMaskFlags = poFirstSrcBand->GetMaskFlags();
1729 :
1730 276 : bool bUseNoDataMask = ((nMaskFlags & GMF_ALL_VALID) == 0);
1731 276 : if (bUseNoDataMask)
1732 : {
1733 55 : pabyChunkNoDataMask = static_cast<GByte *>(VSI_MALLOC2_VERBOSE(
1734 : nFullResXSizeQueried, nFullResYSizeQueried));
1735 : }
1736 276 : if (pChunk == nullptr ||
1737 55 : (bUseNoDataMask && pabyChunkNoDataMask == nullptr))
1738 : {
1739 11 : GDALClose(poMEMDS);
1740 0 : CPLFree(pChunk);
1741 0 : CPLFree(pabyChunkNoDataMask);
1742 0 : CPLFree(papoDstBands);
1743 0 : return CE_Failure;
1744 : }
1745 :
1746 265 : int nTotalBlocks = ((nBufXSize + nDstBlockXSize - 1) / nDstBlockXSize) *
1747 265 : ((nBufYSize + nDstBlockYSize - 1) / nDstBlockYSize);
1748 265 : int nBlocksDone = 0;
1749 :
1750 : int nDstYOff;
1751 560 : for (nDstYOff = 0; nDstYOff < nBufYSize && eErr == CE_None;
1752 295 : nDstYOff += nDstBlockYSize)
1753 : {
1754 : int nDstYCount;
1755 265 : if (nDstYOff + nDstBlockYSize <= nBufYSize)
1756 267 : nDstYCount = nDstBlockYSize;
1757 : else
1758 0 : nDstYCount = nBufYSize - nDstYOff;
1759 :
1760 265 : int nChunkYOff =
1761 265 : nYOff + static_cast<int>(nDstYOff * dfYRatioDstToSrc);
1762 265 : int nChunkYOff2 = nYOff + 1 +
1763 265 : static_cast<int>(ceil((nDstYOff + nDstYCount) *
1764 : dfYRatioDstToSrc));
1765 265 : if (nChunkYOff2 > nRasterYSize)
1766 56 : nChunkYOff2 = nRasterYSize;
1767 265 : int nYCount = nChunkYOff2 - nChunkYOff;
1768 265 : CPLAssert(nYCount <= nFullResYChunk);
1769 :
1770 265 : int nChunkYOffQueried = nChunkYOff - nKernelRadius * nOvrFactor;
1771 265 : int nChunkYSizeQueried = nYCount + 2 * nKernelRadius * nOvrFactor;
1772 265 : if (nChunkYOffQueried < 0)
1773 : {
1774 56 : nChunkYSizeQueried += nChunkYOffQueried;
1775 56 : nChunkYOffQueried = 0;
1776 : }
1777 265 : if (nChunkYSizeQueried + nChunkYOffQueried > nRasterYSize)
1778 66 : nChunkYSizeQueried = nRasterYSize - nChunkYOffQueried;
1779 265 : CPLAssert(nChunkYSizeQueried <= nFullResYSizeQueried);
1780 :
1781 : int nDstXOff;
1782 545 : for (nDstXOff = 0; nDstXOff < nBufXSize && eErr == CE_None;
1783 280 : nDstXOff += nDstBlockXSize)
1784 : {
1785 : int nDstXCount;
1786 250 : if (nDstXOff + nDstBlockXSize <= nBufXSize)
1787 257 : nDstXCount = nDstBlockXSize;
1788 : else
1789 0 : nDstXCount = nBufXSize - nDstXOff;
1790 :
1791 250 : int nChunkXOff =
1792 250 : nXOff + static_cast<int>(nDstXOff * dfXRatioDstToSrc);
1793 250 : int nChunkXOff2 =
1794 250 : nXOff + 1 +
1795 250 : static_cast<int>(
1796 250 : ceil((nDstXOff + nDstXCount) * dfXRatioDstToSrc));
1797 250 : if (nChunkXOff2 > nRasterXSize)
1798 137 : nChunkXOff2 = nRasterXSize;
1799 250 : int nXCount = nChunkXOff2 - nChunkXOff;
1800 250 : CPLAssert(nXCount <= nFullResXChunk);
1801 :
1802 250 : int nChunkXOffQueried = nChunkXOff - nKernelRadius * nOvrFactor;
1803 250 : int nChunkXSizeQueried =
1804 250 : nXCount + 2 * nKernelRadius * nOvrFactor;
1805 250 : if (nChunkXOffQueried < 0)
1806 : {
1807 129 : nChunkXSizeQueried += nChunkXOffQueried;
1808 129 : nChunkXOffQueried = 0;
1809 : }
1810 250 : if (nChunkXSizeQueried + nChunkXOffQueried > nRasterXSize)
1811 138 : nChunkXSizeQueried = nRasterXSize - nChunkXOffQueried;
1812 250 : CPLAssert(nChunkXSizeQueried <= nFullResXSizeQueried);
1813 :
1814 250 : bool bSkipResample = false;
1815 250 : bool bNoDataMaskFullyOpaque = false;
1816 250 : if (eErr == CE_None && bUseNoDataMask)
1817 : {
1818 55 : eErr = poMaskBand->RasterIO(
1819 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1820 : nChunkXSizeQueried, nChunkYSizeQueried,
1821 : pabyChunkNoDataMask, nChunkXSizeQueried,
1822 : nChunkYSizeQueried, GDT_Byte, 0, 0, nullptr);
1823 :
1824 : /* Optimizations if mask if fully opaque or transparent */
1825 55 : const int nPixels = nChunkXSizeQueried * nChunkYSizeQueried;
1826 55 : const GByte bVal = pabyChunkNoDataMask[0];
1827 55 : int i = 1; // Used after for.
1828 123794 : for (; i < nPixels; i++)
1829 : {
1830 123777 : if (pabyChunkNoDataMask[i] != bVal)
1831 38 : break;
1832 : }
1833 55 : if (i == nPixels)
1834 : {
1835 17 : if (bVal == 0)
1836 : {
1837 16 : GByte abyZero[16] = {0};
1838 64 : for (int iBand = 0; iBand < nBandCount; iBand++)
1839 : {
1840 2016 : for (int j = 0; j < nDstYCount; j++)
1841 : {
1842 1968 : GDALCopyWords(
1843 : abyZero, GDT_Byte, 0,
1844 : static_cast<GByte *>(pData) +
1845 1968 : iBand * nBandSpace +
1846 1968 : nLineSpace * (j + nDstYOff) +
1847 1968 : nDstXOff * nPixelSpace,
1848 : eBufType, static_cast<int>(nPixelSpace),
1849 : nDstXCount);
1850 : }
1851 : }
1852 16 : bSkipResample = true;
1853 : }
1854 : else
1855 : {
1856 1 : bNoDataMaskFullyOpaque = true;
1857 : }
1858 : }
1859 : }
1860 :
1861 250 : if (!bSkipResample && eErr == CE_None)
1862 : {
1863 : /* Read the source buffers */
1864 234 : eErr = RasterIO(
1865 : GF_Read, nChunkXOffQueried, nChunkYOffQueried,
1866 : nChunkXSizeQueried, nChunkYSizeQueried, pChunk,
1867 : nChunkXSizeQueried, nChunkYSizeQueried, eWrkDataType,
1868 : nBandCount, panBandMap, 0, 0, 0, nullptr);
1869 : }
1870 :
1871 : #ifdef GDAL_ENABLE_RESAMPLING_MULTIBAND
1872 : if (pfnResampleFuncMultiBands && !bSkipResample &&
1873 : eErr == CE_None)
1874 : {
1875 : eErr = pfnResampleFuncMultiBands(
1876 : dfXRatioDstToSrc, dfYRatioDstToSrc,
1877 : dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1878 : dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1879 : eWrkDataType, (GByte *)pChunk, nBandCount,
1880 : bNoDataMaskFullyOpaque ? nullptr : pabyChunkNoDataMask,
1881 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1882 : nChunkXSizeQueried,
1883 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1884 : nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1885 : nDstXOff + nDestXOffVirtual + nDstXCount,
1886 : nDstYOff + nDestYOffVirtual,
1887 : nDstYOff + nDestYOffVirtual + nDstYCount, papoDstBands,
1888 : pszResampling, FALSE /*bHasNoData*/,
1889 : 0.0 /* dfNoDataValue */, nullptr /* color table*/,
1890 : eDataType);
1891 : }
1892 : else
1893 : #endif
1894 : {
1895 : size_t nChunkBandOffset =
1896 277 : static_cast<size_t>(nChunkXSizeQueried) *
1897 277 : nChunkYSizeQueried *
1898 277 : GDALGetDataTypeSizeBytes(eWrkDataType);
1899 1188 : for (int i = 0;
1900 1188 : i < nBandCount && !bSkipResample && eErr == CE_None;
1901 : i++)
1902 : {
1903 908 : const bool bPropagateNoData = false;
1904 908 : void *pDstBuffer = nullptr;
1905 908 : GDALDataType eDstBufferDataType = GDT_Unknown;
1906 : GDALRasterBand *poMEMBand =
1907 908 : poMEMDS->GetRasterBand(i + 1);
1908 909 : eErr = pfnResampleFunc(
1909 : dfXRatioDstToSrc, dfYRatioDstToSrc,
1910 : dfXOff - nXOff, /* == 0 if bHasXOffVirtual */
1911 : dfYOff - nYOff, /* == 0 if bHasYOffVirtual */
1912 : eWrkDataType,
1913 909 : reinterpret_cast<GByte *>(pChunk) +
1914 909 : i * nChunkBandOffset,
1915 : bNoDataMaskFullyOpaque ? nullptr
1916 : : pabyChunkNoDataMask,
1917 909 : nChunkXOffQueried - (bHasXOffVirtual ? 0 : nXOff),
1918 : nChunkXSizeQueried,
1919 909 : nChunkYOffQueried - (bHasYOffVirtual ? 0 : nYOff),
1920 : nChunkYSizeQueried, nDstXOff + nDestXOffVirtual,
1921 909 : nDstXOff + nDestXOffVirtual + nDstXCount,
1922 : nDstYOff + nDestYOffVirtual,
1923 909 : nDstYOff + nDestYOffVirtual + nDstYCount, poMEMBand,
1924 : &pDstBuffer, &eDstBufferDataType, pszResampling,
1925 : false /*bHasNoData*/, 0.0 /* dfNoDataValue */,
1926 : nullptr /* color table*/, eDataType,
1927 : bPropagateNoData);
1928 910 : if (eErr == CE_None)
1929 : {
1930 910 : eErr = poMEMBand->RasterIO(
1931 : GF_Write, nDstXOff + nDestXOffVirtual,
1932 : nDstYOff + nDestYOffVirtual, nDstXCount,
1933 : nDstYCount, pDstBuffer, nDstXCount, nDstYCount,
1934 : eDstBufferDataType, 0, 0, nullptr);
1935 : }
1936 910 : CPLFree(pDstBuffer);
1937 : }
1938 : }
1939 :
1940 280 : nBlocksDone++;
1941 282 : if (eErr == CE_None && psExtraArg->pfnProgress != nullptr &&
1942 2 : !psExtraArg->pfnProgress(1.0 * nBlocksDone / nTotalBlocks,
1943 : "", psExtraArg->pProgressData))
1944 : {
1945 0 : eErr = CE_Failure;
1946 : }
1947 : }
1948 : }
1949 :
1950 295 : CPLFree(pChunk);
1951 280 : CPLFree(pabyChunkNoDataMask);
1952 : }
1953 :
1954 280 : CPLFree(papoDstBands);
1955 280 : GDALClose(poMEMDS);
1956 :
1957 280 : return eErr;
1958 : }
1959 :
1960 : //! @endcond
1961 :
1962 : /************************************************************************/
1963 : /* GDALSwapWords() */
1964 : /************************************************************************/
1965 :
1966 : /**
1967 : * Byte swap words in-place.
1968 : *
1969 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
1970 : * a memory array. No assumption is made that the words being swapped are
1971 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
1972 : * to determine if the current platform is big endian or little endian. Use
1973 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
1974 : * of a function call.
1975 : *
1976 : * @param pData pointer to start of data buffer.
1977 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
1978 : * @param nWordCount the number of words to be swapped in this call.
1979 : * @param nWordSkip the byte offset from the start of one word to the start of
1980 : * the next. For packed buffers this is the same as nWordSize.
1981 : */
1982 :
1983 390131 : void CPL_STDCALL GDALSwapWords(void *pData, int nWordSize, int nWordCount,
1984 : int nWordSkip)
1985 :
1986 : {
1987 390131 : if (nWordCount > 0)
1988 390131 : VALIDATE_POINTER0(pData, "GDALSwapWords");
1989 :
1990 390131 : GByte *pabyData = static_cast<GByte *>(pData);
1991 :
1992 390131 : switch (nWordSize)
1993 : {
1994 3603 : case 1:
1995 3603 : break;
1996 :
1997 378958 : case 2:
1998 378958 : CPLAssert(nWordSkip >= 2 || nWordCount == 1);
1999 117423000 : for (int i = 0; i < nWordCount; i++)
2000 : {
2001 117044000 : CPL_SWAP16PTR(pabyData);
2002 117044000 : pabyData += nWordSkip;
2003 : }
2004 378958 : break;
2005 :
2006 7087 : case 4:
2007 7087 : CPLAssert(nWordSkip >= 4 || nWordCount == 1);
2008 7087 : if (CPL_IS_ALIGNED(pabyData, 4) && (nWordSkip % 4) == 0)
2009 : {
2010 16177200 : for (int i = 0; i < nWordCount; i++)
2011 : {
2012 16170100 : *reinterpret_cast<GUInt32 *>(pabyData) = CPL_SWAP32(
2013 : *reinterpret_cast<const GUInt32 *>(pabyData));
2014 16170100 : pabyData += nWordSkip;
2015 7084 : }
2016 : }
2017 : else
2018 : {
2019 9 : for (int i = 0; i < nWordCount; i++)
2020 : {
2021 6 : CPL_SWAP32PTR(pabyData);
2022 6 : pabyData += nWordSkip;
2023 : }
2024 : }
2025 7087 : break;
2026 :
2027 483 : case 8:
2028 483 : CPLAssert(nWordSkip >= 8 || nWordCount == 1);
2029 : #ifdef CPL_HAS_GINT64
2030 483 : if (CPL_IS_ALIGNED(pabyData, 8) && (nWordSkip % 8) == 0)
2031 : {
2032 27418 : for (int i = 0; i < nWordCount; i++)
2033 : {
2034 26936 : *reinterpret_cast<GUInt64 *>(pabyData) = CPL_SWAP64(
2035 : *reinterpret_cast<const GUInt64 *>(pabyData));
2036 26936 : pabyData += nWordSkip;
2037 482 : }
2038 : }
2039 : else
2040 : #endif
2041 : {
2042 3 : for (int i = 0; i < nWordCount; i++)
2043 : {
2044 2 : CPL_SWAP64PTR(pabyData);
2045 2 : pabyData += nWordSkip;
2046 : }
2047 : }
2048 483 : break;
2049 :
2050 0 : default:
2051 0 : CPLAssert(false);
2052 : }
2053 : }
2054 :
2055 : /************************************************************************/
2056 : /* GDALSwapWordsEx() */
2057 : /************************************************************************/
2058 :
2059 : /**
2060 : * Byte swap words in-place.
2061 : *
2062 : * This function will byte swap a set of 2, 4 or 8 byte words "in place" in
2063 : * a memory array. No assumption is made that the words being swapped are
2064 : * word aligned in memory. Use the CPL_LSB and CPL_MSB macros from cpl_port.h
2065 : * to determine if the current platform is big endian or little endian. Use
2066 : * The macros like CPL_SWAP32() to byte swap single values without the overhead
2067 : * of a function call.
2068 : *
2069 : * @param pData pointer to start of data buffer.
2070 : * @param nWordSize size of words being swapped in bytes. Normally 2, 4 or 8.
2071 : * @param nWordCount the number of words to be swapped in this call.
2072 : * @param nWordSkip the byte offset from the start of one word to the start of
2073 : * the next. For packed buffers this is the same as nWordSize.
2074 : * @since GDAL 2.1
2075 : */
2076 4240 : void CPL_STDCALL GDALSwapWordsEx(void *pData, int nWordSize, size_t nWordCount,
2077 : int nWordSkip)
2078 : {
2079 4240 : GByte *pabyData = static_cast<GByte *>(pData);
2080 8480 : while (nWordCount)
2081 : {
2082 : // Pick-up a multiple of 8 as max chunk size.
2083 4240 : const int nWordCountSmall =
2084 4240 : (nWordCount > (1 << 30)) ? (1 << 30) : static_cast<int>(nWordCount);
2085 4240 : GDALSwapWords(pabyData, nWordSize, nWordCountSmall, nWordSkip);
2086 4240 : pabyData += static_cast<size_t>(nWordSkip) * nWordCountSmall;
2087 4240 : nWordCount -= nWordCountSmall;
2088 : }
2089 4240 : }
2090 :
2091 : // Place the new GDALCopyWords helpers in an anonymous namespace
2092 : namespace
2093 : {
2094 :
2095 : /************************************************************************/
2096 : /* GDALCopyWordsT() */
2097 : /************************************************************************/
2098 : /**
2099 : * Template function, used to copy data from pSrcData into buffer
2100 : * pDstData, with stride nSrcPixelStride in the source data and
2101 : * stride nDstPixelStride in the destination data. This template can
2102 : * deal with the case where the input data type is real or complex and
2103 : * the output is real.
2104 : *
2105 : * @param pSrcData the source data buffer
2106 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2107 : * of interest.
2108 : * @param pDstData the destination buffer.
2109 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2110 : * interest.
2111 : * @param nWordCount the total number of pixel words to copy
2112 : *
2113 : * @code
2114 : * // Assume an input buffer of type GUInt16 named pBufferIn
2115 : * GByte *pBufferOut = new GByte[numBytesOut];
2116 : * GDALCopyWordsT<GUInt16, GByte>(pSrcData, 2, pDstData, 1, numBytesOut);
2117 : * @endcode
2118 : * @note
2119 : * This is a private function, and should not be exposed outside of
2120 : * rasterio.cpp. External users should call the GDALCopyWords driver function.
2121 : */
2122 :
2123 : template <class Tin, class Tout>
2124 45979756 : static void inline GDALCopyWordsGenericT(const Tin *const CPL_RESTRICT pSrcData,
2125 : int nSrcPixelStride,
2126 : Tout *const CPL_RESTRICT pDstData,
2127 : int nDstPixelStride,
2128 : GPtrDiff_t nWordCount)
2129 : {
2130 45979756 : decltype(nWordCount) nDstOffset = 0;
2131 :
2132 45979756 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2133 45979756 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2134 465950773 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2135 : {
2136 419970998 : const Tin tValue =
2137 419970998 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2138 419970998 : Tout *const pOutPixel =
2139 419970998 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2140 :
2141 419970998 : GDALCopyWord(tValue, *pOutPixel);
2142 :
2143 419970998 : nDstOffset += nDstPixelStride;
2144 : }
2145 45979756 : }
2146 :
2147 : template <class Tin, class Tout>
2148 38163060 : static void inline GDALCopyWordsT(const Tin *const CPL_RESTRICT pSrcData,
2149 : int nSrcPixelStride,
2150 : Tout *const CPL_RESTRICT pDstData,
2151 : int nDstPixelStride, GPtrDiff_t nWordCount)
2152 : {
2153 38163060 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData, nDstPixelStride,
2154 : nWordCount);
2155 38163060 : }
2156 :
2157 : template <class Tin, class Tout>
2158 6371626 : static void inline GDALCopyWordsT_8atatime(
2159 : const Tin *const CPL_RESTRICT pSrcData, int nSrcPixelStride,
2160 : Tout *const CPL_RESTRICT pDstData, int nDstPixelStride,
2161 : GPtrDiff_t nWordCount)
2162 : {
2163 6371626 : decltype(nWordCount) nDstOffset = 0;
2164 :
2165 6371626 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2166 6371626 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2167 6371626 : decltype(nWordCount) n = 0;
2168 6371626 : if (nSrcPixelStride == static_cast<int>(sizeof(Tin)) &&
2169 : nDstPixelStride == static_cast<int>(sizeof(Tout)))
2170 : {
2171 20977890 : for (; n < nWordCount - 7; n += 8)
2172 : {
2173 20793489 : const Tin *pInValues = reinterpret_cast<const Tin *>(
2174 20793489 : pSrcDataPtr + (n * nSrcPixelStride));
2175 20793489 : Tout *const pOutPixels =
2176 20793489 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2177 :
2178 20793489 : GDALCopy8Words(pInValues, pOutPixels);
2179 :
2180 20790849 : nDstOffset += 8 * nDstPixelStride;
2181 : }
2182 : }
2183 13045747 : for (; n < nWordCount; n++)
2184 : {
2185 6674121 : const Tin tValue =
2186 6674121 : *reinterpret_cast<const Tin *>(pSrcDataPtr + (n * nSrcPixelStride));
2187 6674121 : Tout *const pOutPixel =
2188 6674121 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2189 :
2190 6674121 : GDALCopyWord(tValue, *pOutPixel);
2191 :
2192 6676761 : nDstOffset += nDstPixelStride;
2193 : }
2194 6371626 : }
2195 :
2196 : #if defined(__x86_64) || defined(_M_X64)
2197 :
2198 : #include <emmintrin.h>
2199 :
2200 : template <class Tout>
2201 35614 : void GDALCopyWordsByteTo16Bit(const GByte *const CPL_RESTRICT pSrcData,
2202 : int nSrcPixelStride,
2203 : Tout *const CPL_RESTRICT pDstData,
2204 : int nDstPixelStride, GPtrDiff_t nWordCount)
2205 : {
2206 : static_assert(std::is_integral<Tout>::value &&
2207 : sizeof(Tout) == sizeof(uint16_t),
2208 : "Bad Tout");
2209 35614 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2210 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2211 : {
2212 29163 : decltype(nWordCount) n = 0;
2213 29163 : const __m128i xmm_zero = _mm_setzero_si128();
2214 29163 : GByte *CPL_RESTRICT pabyDstDataPtr =
2215 : reinterpret_cast<GByte *>(pDstData);
2216 685905 : for (; n < nWordCount - 15; n += 16)
2217 : {
2218 656742 : __m128i xmm = _mm_loadu_si128(
2219 656742 : reinterpret_cast<const __m128i *>(pSrcData + n));
2220 656742 : __m128i xmm0 = _mm_unpacklo_epi8(xmm, xmm_zero);
2221 656742 : __m128i xmm1 = _mm_unpackhi_epi8(xmm, xmm_zero);
2222 : _mm_storeu_si128(
2223 656742 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2), xmm0);
2224 : _mm_storeu_si128(
2225 656742 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 2 + 16), xmm1);
2226 : }
2227 100081 : for (; n < nWordCount; n++)
2228 : {
2229 70918 : pDstData[n] = pSrcData[n];
2230 29163 : }
2231 : }
2232 : else
2233 : {
2234 6451 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2235 : nDstPixelStride, nWordCount);
2236 : }
2237 35614 : }
2238 :
2239 : template <>
2240 26001 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2241 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2242 : int nDstPixelStride, GPtrDiff_t nWordCount)
2243 : {
2244 26001 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2245 : nDstPixelStride, nWordCount);
2246 26001 : }
2247 :
2248 : template <>
2249 9613 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2250 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2251 : int nDstPixelStride, GPtrDiff_t nWordCount)
2252 : {
2253 9613 : GDALCopyWordsByteTo16Bit(pSrcData, nSrcPixelStride, pDstData,
2254 : nDstPixelStride, nWordCount);
2255 9613 : }
2256 :
2257 : template <class Tout>
2258 10235137 : void GDALCopyWordsByteTo32Bit(const GByte *const CPL_RESTRICT pSrcData,
2259 : int nSrcPixelStride,
2260 : Tout *const CPL_RESTRICT pDstData,
2261 : int nDstPixelStride, GPtrDiff_t nWordCount)
2262 : {
2263 : static_assert(std::is_integral<Tout>::value &&
2264 : sizeof(Tout) == sizeof(uint32_t),
2265 : "Bad Tout");
2266 10235137 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2267 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2268 : {
2269 4807027 : decltype(nWordCount) n = 0;
2270 4807027 : const __m128i xmm_zero = _mm_setzero_si128();
2271 4807027 : GByte *CPL_RESTRICT pabyDstDataPtr =
2272 : reinterpret_cast<GByte *>(pDstData);
2273 54986451 : for (; n < nWordCount - 15; n += 16)
2274 : {
2275 50318224 : __m128i xmm = _mm_loadu_si128(
2276 50318224 : reinterpret_cast<const __m128i *>(pSrcData + n));
2277 50334824 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2278 50301824 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2279 50205624 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2280 50092424 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2281 50185424 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2282 50179424 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2283 : _mm_storeu_si128(
2284 50179424 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4), xmm0);
2285 : _mm_storeu_si128(
2286 50179424 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 16), xmm1);
2287 : _mm_storeu_si128(
2288 50179424 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 32), xmm2);
2289 : _mm_storeu_si128(
2290 50179424 : reinterpret_cast<__m128i *>(pabyDstDataPtr + n * 4 + 48), xmm3);
2291 : }
2292 8929282 : for (; n < nWordCount; n++)
2293 : {
2294 4261085 : pDstData[n] = pSrcData[n];
2295 4668197 : }
2296 : }
2297 : else
2298 : {
2299 5428080 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2300 : nDstPixelStride, nWordCount);
2301 : }
2302 10096237 : }
2303 :
2304 : template <>
2305 437 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2306 : int nSrcPixelStride, GUInt32 *const CPL_RESTRICT pDstData,
2307 : int nDstPixelStride, GPtrDiff_t nWordCount)
2308 : {
2309 437 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2310 : nDstPixelStride, nWordCount);
2311 437 : }
2312 :
2313 : template <>
2314 10234600 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2315 : int nSrcPixelStride, GInt32 *const CPL_RESTRICT pDstData,
2316 : int nDstPixelStride, GPtrDiff_t nWordCount)
2317 : {
2318 10234600 : GDALCopyWordsByteTo32Bit(pSrcData, nSrcPixelStride, pDstData,
2319 : nDstPixelStride, nWordCount);
2320 10234700 : }
2321 :
2322 : template <>
2323 2466810 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2324 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2325 : int nDstPixelStride, GPtrDiff_t nWordCount)
2326 : {
2327 2466810 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2328 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2329 : {
2330 108968 : decltype(nWordCount) n = 0;
2331 108968 : const __m128i xmm_zero = _mm_setzero_si128();
2332 108968 : GByte *CPL_RESTRICT pabyDstDataPtr =
2333 : reinterpret_cast<GByte *>(pDstData);
2334 3244400 : for (; n < nWordCount - 15; n += 16)
2335 : {
2336 3135430 : __m128i xmm = _mm_loadu_si128(
2337 3135430 : reinterpret_cast<const __m128i *>(pSrcData + n));
2338 3135430 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2339 3135430 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2340 3135430 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2341 3135430 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2342 3135430 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2343 3135430 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2344 3135430 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2345 3135430 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2346 3135430 : __m128 xmm2_f = _mm_cvtepi32_ps(xmm2);
2347 3135430 : __m128 xmm3_f = _mm_cvtepi32_ps(xmm3);
2348 3135430 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2349 : xmm0_f);
2350 : _mm_storeu_ps(
2351 3135430 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2352 : _mm_storeu_ps(
2353 3135430 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 32), xmm2_f);
2354 : _mm_storeu_ps(
2355 3135430 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 48), xmm3_f);
2356 : }
2357 469776 : for (; n < nWordCount; n++)
2358 : {
2359 360808 : pDstData[n] = pSrcData[n];
2360 108968 : }
2361 : }
2362 : else
2363 : {
2364 2357840 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2365 : nDstPixelStride, nWordCount);
2366 : }
2367 2466810 : }
2368 :
2369 : template <>
2370 142991 : void GDALCopyWordsT(const GByte *const CPL_RESTRICT pSrcData,
2371 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2372 : int nDstPixelStride, GPtrDiff_t nWordCount)
2373 : {
2374 142991 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2375 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2376 : {
2377 120161 : decltype(nWordCount) n = 0;
2378 120161 : const __m128i xmm_zero = _mm_setzero_si128();
2379 120161 : GByte *CPL_RESTRICT pabyDstDataPtr =
2380 : reinterpret_cast<GByte *>(pDstData);
2381 1392960 : for (; n < nWordCount - 15; n += 16)
2382 : {
2383 1272790 : __m128i xmm = _mm_loadu_si128(
2384 1272790 : reinterpret_cast<const __m128i *>(pSrcData + n));
2385 1272790 : __m128i xmm_low = _mm_unpacklo_epi8(xmm, xmm_zero);
2386 1272790 : __m128i xmm_high = _mm_unpackhi_epi8(xmm, xmm_zero);
2387 1272790 : __m128i xmm0 = _mm_unpacklo_epi16(xmm_low, xmm_zero);
2388 1272790 : __m128i xmm1 = _mm_unpackhi_epi16(xmm_low, xmm_zero);
2389 1272790 : __m128i xmm2 = _mm_unpacklo_epi16(xmm_high, xmm_zero);
2390 1272790 : __m128i xmm3 = _mm_unpackhi_epi16(xmm_high, xmm_zero);
2391 :
2392 1272790 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2393 1272790 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2394 1272790 : __m128d xmm2_low_d = _mm_cvtepi32_pd(xmm2);
2395 1272790 : __m128d xmm3_low_d = _mm_cvtepi32_pd(xmm3);
2396 1272790 : xmm0 = _mm_srli_si128(xmm0, 8);
2397 1272790 : xmm1 = _mm_srli_si128(xmm1, 8);
2398 1272790 : xmm2 = _mm_srli_si128(xmm2, 8);
2399 1272790 : xmm3 = _mm_srli_si128(xmm3, 8);
2400 1272790 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2401 1272790 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2402 1272790 : __m128d xmm2_high_d = _mm_cvtepi32_pd(xmm2);
2403 1272790 : __m128d xmm3_high_d = _mm_cvtepi32_pd(xmm3);
2404 :
2405 1272790 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2406 : xmm0_low_d);
2407 : _mm_storeu_pd(
2408 1272790 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2409 : xmm0_high_d);
2410 : _mm_storeu_pd(
2411 1272790 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2412 : xmm1_low_d);
2413 : _mm_storeu_pd(
2414 1272790 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2415 : xmm1_high_d);
2416 : _mm_storeu_pd(
2417 1272790 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 64),
2418 : xmm2_low_d);
2419 : _mm_storeu_pd(
2420 1272790 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 80),
2421 : xmm2_high_d);
2422 : _mm_storeu_pd(
2423 1272790 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 96),
2424 : xmm3_low_d);
2425 : _mm_storeu_pd(
2426 1272790 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 112),
2427 : xmm3_high_d);
2428 : }
2429 227242 : for (; n < nWordCount; n++)
2430 : {
2431 107081 : pDstData[n] = pSrcData[n];
2432 120161 : }
2433 : }
2434 : else
2435 : {
2436 22830 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2437 : nDstPixelStride, nWordCount);
2438 : }
2439 142991 : }
2440 :
2441 : template <>
2442 6111 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2443 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2444 : int nDstPixelStride, GPtrDiff_t nWordCount)
2445 : {
2446 6111 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2447 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2448 : {
2449 4936 : decltype(nWordCount) n = 0;
2450 : // In SSE2, min_epu16 does not exist, so shift from
2451 : // UInt16 to SInt16 to be able to use min_epi16
2452 4936 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2453 4936 : const __m128i xmm_m255_shifted = _mm_set1_epi16(255 - 32768);
2454 138166 : for (; n < nWordCount - 7; n += 8)
2455 : {
2456 133230 : __m128i xmm = _mm_loadu_si128(
2457 133230 : reinterpret_cast<const __m128i *>(pSrcData + n));
2458 133230 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2459 133230 : xmm = _mm_min_epi16(xmm, xmm_m255_shifted);
2460 133230 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2461 133230 : xmm = _mm_packus_epi16(xmm, xmm);
2462 133230 : GDALCopyXMMToInt64(xmm,
2463 133230 : reinterpret_cast<GPtrDiff_t *>(pDstData + n));
2464 : }
2465 15506 : for (; n < nWordCount; n++)
2466 : {
2467 10570 : pDstData[n] =
2468 10570 : pSrcData[n] >= 255 ? 255 : static_cast<GByte>(pSrcData[n]);
2469 4936 : }
2470 : }
2471 : else
2472 : {
2473 1175 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2474 : nDstPixelStride, nWordCount);
2475 : }
2476 6111 : }
2477 :
2478 : template <>
2479 221 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2480 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2481 : int nDstPixelStride, GPtrDiff_t nWordCount)
2482 : {
2483 221 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2484 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2485 : {
2486 15 : decltype(nWordCount) n = 0;
2487 : // In SSE2, min_epu16 does not exist, so shift from
2488 : // UInt16 to SInt16 to be able to use min_epi16
2489 15 : const __m128i xmm_UINT16_to_INT16 = _mm_set1_epi16(-32768);
2490 15 : const __m128i xmm_32767_shifted = _mm_set1_epi16(32767 - 32768);
2491 31 : for (; n < nWordCount - 7; n += 8)
2492 : {
2493 16 : __m128i xmm = _mm_loadu_si128(
2494 16 : reinterpret_cast<const __m128i *>(pSrcData + n));
2495 16 : xmm = _mm_add_epi16(xmm, xmm_UINT16_to_INT16);
2496 16 : xmm = _mm_min_epi16(xmm, xmm_32767_shifted);
2497 16 : xmm = _mm_sub_epi16(xmm, xmm_UINT16_to_INT16);
2498 16 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDstData + n), xmm);
2499 : }
2500 55 : for (; n < nWordCount; n++)
2501 : {
2502 40 : pDstData[n] =
2503 40 : pSrcData[n] >= 32767 ? 32767 : static_cast<GInt16>(pSrcData[n]);
2504 15 : }
2505 : }
2506 : else
2507 : {
2508 206 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2509 : nDstPixelStride, nWordCount);
2510 : }
2511 221 : }
2512 :
2513 : template <>
2514 349 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2515 : int nSrcPixelStride, float *const CPL_RESTRICT pDstData,
2516 : int nDstPixelStride, GPtrDiff_t nWordCount)
2517 : {
2518 349 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2519 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2520 : {
2521 343 : decltype(nWordCount) n = 0;
2522 343 : const __m128i xmm_zero = _mm_setzero_si128();
2523 343 : GByte *CPL_RESTRICT pabyDstDataPtr =
2524 : reinterpret_cast<GByte *>(pDstData);
2525 1311 : for (; n < nWordCount - 7; n += 8)
2526 : {
2527 968 : __m128i xmm = _mm_loadu_si128(
2528 968 : reinterpret_cast<const __m128i *>(pSrcData + n));
2529 968 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2530 968 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2531 968 : __m128 xmm0_f = _mm_cvtepi32_ps(xmm0);
2532 968 : __m128 xmm1_f = _mm_cvtepi32_ps(xmm1);
2533 968 : _mm_storeu_ps(reinterpret_cast<float *>(pabyDstDataPtr + n * 4),
2534 : xmm0_f);
2535 : _mm_storeu_ps(
2536 968 : reinterpret_cast<float *>(pabyDstDataPtr + n * 4 + 16), xmm1_f);
2537 : }
2538 1105 : for (; n < nWordCount; n++)
2539 : {
2540 762 : pDstData[n] = pSrcData[n];
2541 343 : }
2542 : }
2543 : else
2544 : {
2545 6 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2546 : nDstPixelStride, nWordCount);
2547 : }
2548 349 : }
2549 :
2550 : template <>
2551 279 : void GDALCopyWordsT(const GUInt16 *const CPL_RESTRICT pSrcData,
2552 : int nSrcPixelStride, double *const CPL_RESTRICT pDstData,
2553 : int nDstPixelStride, GPtrDiff_t nWordCount)
2554 : {
2555 279 : if (nSrcPixelStride == static_cast<int>(sizeof(*pSrcData)) &&
2556 : nDstPixelStride == static_cast<int>(sizeof(*pDstData)))
2557 : {
2558 171 : decltype(nWordCount) n = 0;
2559 171 : const __m128i xmm_zero = _mm_setzero_si128();
2560 171 : GByte *CPL_RESTRICT pabyDstDataPtr =
2561 : reinterpret_cast<GByte *>(pDstData);
2562 219 : for (; n < nWordCount - 7; n += 8)
2563 : {
2564 48 : __m128i xmm = _mm_loadu_si128(
2565 48 : reinterpret_cast<const __m128i *>(pSrcData + n));
2566 48 : __m128i xmm0 = _mm_unpacklo_epi16(xmm, xmm_zero);
2567 48 : __m128i xmm1 = _mm_unpackhi_epi16(xmm, xmm_zero);
2568 :
2569 48 : __m128d xmm0_low_d = _mm_cvtepi32_pd(xmm0);
2570 48 : __m128d xmm1_low_d = _mm_cvtepi32_pd(xmm1);
2571 48 : xmm0 = _mm_srli_si128(xmm0, 8);
2572 48 : xmm1 = _mm_srli_si128(xmm1, 8);
2573 48 : __m128d xmm0_high_d = _mm_cvtepi32_pd(xmm0);
2574 48 : __m128d xmm1_high_d = _mm_cvtepi32_pd(xmm1);
2575 :
2576 48 : _mm_storeu_pd(reinterpret_cast<double *>(pabyDstDataPtr + n * 8),
2577 : xmm0_low_d);
2578 : _mm_storeu_pd(
2579 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 16),
2580 : xmm0_high_d);
2581 : _mm_storeu_pd(
2582 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 32),
2583 : xmm1_low_d);
2584 : _mm_storeu_pd(
2585 48 : reinterpret_cast<double *>(pabyDstDataPtr + n * 8 + 48),
2586 : xmm1_high_d);
2587 : }
2588 429 : for (; n < nWordCount; n++)
2589 : {
2590 258 : pDstData[n] = pSrcData[n];
2591 171 : }
2592 : }
2593 : else
2594 : {
2595 108 : GDALCopyWordsGenericT(pSrcData, nSrcPixelStride, pDstData,
2596 : nDstPixelStride, nWordCount);
2597 : }
2598 279 : }
2599 :
2600 : template <>
2601 946 : void GDALCopyWordsT(const double *const CPL_RESTRICT pSrcData,
2602 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2603 : int nDstPixelStride, GPtrDiff_t nWordCount)
2604 : {
2605 946 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2606 : nDstPixelStride, nWordCount);
2607 946 : }
2608 :
2609 : #endif // defined(__x86_64) || defined(_M_X64)
2610 :
2611 : template <>
2612 2266830 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2613 : int nSrcPixelStride, GByte *const CPL_RESTRICT pDstData,
2614 : int nDstPixelStride, GPtrDiff_t nWordCount)
2615 : {
2616 2266830 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2617 : nDstPixelStride, nWordCount);
2618 2266830 : }
2619 :
2620 : template <>
2621 2933510 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2622 : int nSrcPixelStride, GInt16 *const CPL_RESTRICT pDstData,
2623 : int nDstPixelStride, GPtrDiff_t nWordCount)
2624 : {
2625 2933510 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2626 : nDstPixelStride, nWordCount);
2627 2933510 : }
2628 :
2629 : template <>
2630 1170340 : void GDALCopyWordsT(const float *const CPL_RESTRICT pSrcData,
2631 : int nSrcPixelStride, GUInt16 *const CPL_RESTRICT pDstData,
2632 : int nDstPixelStride, GPtrDiff_t nWordCount)
2633 : {
2634 1170340 : GDALCopyWordsT_8atatime(pSrcData, nSrcPixelStride, pDstData,
2635 : nDstPixelStride, nWordCount);
2636 1170340 : }
2637 :
2638 : /************************************************************************/
2639 : /* GDALCopyWordsComplexT() */
2640 : /************************************************************************/
2641 : /**
2642 : * Template function, used to copy data from pSrcData into buffer
2643 : * pDstData, with stride nSrcPixelStride in the source data and
2644 : * stride nDstPixelStride in the destination data. Deals with the
2645 : * complex case, where input is complex and output is complex.
2646 : *
2647 : * @param pSrcData the source data buffer
2648 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2649 : * of interest.
2650 : * @param pDstData the destination buffer.
2651 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2652 : * interest.
2653 : * @param nWordCount the total number of pixel words to copy
2654 : *
2655 : */
2656 : template <class Tin, class Tout>
2657 125244 : inline void GDALCopyWordsComplexT(const Tin *const CPL_RESTRICT pSrcData,
2658 : int nSrcPixelStride,
2659 : Tout *const CPL_RESTRICT pDstData,
2660 : int nDstPixelStride, GPtrDiff_t nWordCount)
2661 : {
2662 125244 : decltype(nWordCount) nDstOffset = 0;
2663 125244 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2664 125244 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2665 :
2666 7339801 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2667 : {
2668 7214552 : const Tin *const pPixelIn =
2669 7214552 : reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2670 7214552 : Tout *const pPixelOut =
2671 7214552 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2672 :
2673 7214552 : GDALCopyWord(pPixelIn[0], pPixelOut[0]);
2674 7214552 : GDALCopyWord(pPixelIn[1], pPixelOut[1]);
2675 :
2676 7214552 : nDstOffset += nDstPixelStride;
2677 : }
2678 125244 : }
2679 :
2680 : /************************************************************************/
2681 : /* GDALCopyWordsComplexOutT() */
2682 : /************************************************************************/
2683 : /**
2684 : * Template function, used to copy data from pSrcData into buffer
2685 : * pDstData, with stride nSrcPixelStride in the source data and
2686 : * stride nDstPixelStride in the destination data. Deals with the
2687 : * case where the value is real coming in, but complex going out.
2688 : *
2689 : * @param pSrcData the source data buffer
2690 : * @param nSrcPixelStride the stride, in the buffer pSrcData for pixels
2691 : * of interest, in bytes.
2692 : * @param pDstData the destination buffer.
2693 : * @param nDstPixelStride the stride in the buffer pDstData for pixels of
2694 : * interest, in bytes.
2695 : * @param nWordCount the total number of pixel words to copy
2696 : *
2697 : */
2698 : template <class Tin, class Tout>
2699 2640 : inline void GDALCopyWordsComplexOutT(const Tin *const CPL_RESTRICT pSrcData,
2700 : int nSrcPixelStride,
2701 : Tout *const CPL_RESTRICT pDstData,
2702 : int nDstPixelStride, GPtrDiff_t nWordCount)
2703 : {
2704 2640 : decltype(nWordCount) nDstOffset = 0;
2705 :
2706 2640 : const Tout tOutZero = static_cast<Tout>(0);
2707 :
2708 2640 : const char *const pSrcDataPtr = reinterpret_cast<const char *>(pSrcData);
2709 2640 : char *const pDstDataPtr = reinterpret_cast<char *>(pDstData);
2710 :
2711 1079135 : for (decltype(nWordCount) n = 0; n < nWordCount; n++)
2712 : {
2713 1076495 : const Tin tValue =
2714 1076495 : *reinterpret_cast<const Tin *>(pSrcDataPtr + n * nSrcPixelStride);
2715 1076495 : Tout *const pPixelOut =
2716 1076495 : reinterpret_cast<Tout *>(pDstDataPtr + nDstOffset);
2717 1076495 : GDALCopyWord(tValue, *pPixelOut);
2718 :
2719 1076495 : pPixelOut[1] = tOutZero;
2720 :
2721 1076495 : nDstOffset += nDstPixelStride;
2722 : }
2723 2640 : }
2724 :
2725 : /************************************************************************/
2726 : /* GDALCopyWordsFromT() */
2727 : /************************************************************************/
2728 : /**
2729 : * Template driver function. Given the input type T, call the appropriate
2730 : * GDALCopyWordsT function template for the desired output type. You should
2731 : * never call this function directly (call GDALCopyWords instead).
2732 : *
2733 : * @param pSrcData source data buffer
2734 : * @param nSrcPixelStride pixel stride in input buffer, in pixel words
2735 : * @param bInComplex input is complex
2736 : * @param pDstData destination data buffer
2737 : * @param eDstType destination data type
2738 : * @param nDstPixelStride pixel stride in output buffer, in pixel words
2739 : * @param nWordCount number of pixel words to be copied
2740 : */
2741 : template <class T>
2742 57550011 : inline void GDALCopyWordsFromT(const T *const CPL_RESTRICT pSrcData,
2743 : int nSrcPixelStride, bool bInComplex,
2744 : void *CPL_RESTRICT pDstData,
2745 : GDALDataType eDstType, int nDstPixelStride,
2746 : GPtrDiff_t nWordCount)
2747 : {
2748 57550011 : switch (eDstType)
2749 : {
2750 6616272 : case GDT_Byte:
2751 6616272 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2752 : static_cast<unsigned char *>(pDstData),
2753 : nDstPixelStride, nWordCount);
2754 6616271 : break;
2755 439 : case GDT_Int8:
2756 439 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2757 : static_cast<signed char *>(pDstData),
2758 : nDstPixelStride, nWordCount);
2759 439 : break;
2760 1210410 : case GDT_UInt16:
2761 1210410 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2762 : static_cast<unsigned short *>(pDstData),
2763 : nDstPixelStride, nWordCount);
2764 1210410 : break;
2765 7041324 : case GDT_Int16:
2766 7041324 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2767 : static_cast<short *>(pDstData), nDstPixelStride,
2768 : nWordCount);
2769 7041324 : break;
2770 4262 : case GDT_UInt32:
2771 4262 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2772 : static_cast<unsigned int *>(pDstData),
2773 : nDstPixelStride, nWordCount);
2774 4262 : break;
2775 23458380 : case GDT_Int32:
2776 23458380 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2777 : static_cast<int *>(pDstData), nDstPixelStride,
2778 : nWordCount);
2779 23458480 : break;
2780 565 : case GDT_UInt64:
2781 565 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2782 : static_cast<std::uint64_t *>(pDstData),
2783 : nDstPixelStride, nWordCount);
2784 565 : break;
2785 4037 : case GDT_Int64:
2786 4037 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2787 : static_cast<std::int64_t *>(pDstData),
2788 : nDstPixelStride, nWordCount);
2789 4037 : break;
2790 3930331 : case GDT_Float32:
2791 3930331 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2792 : static_cast<float *>(pDstData), nDstPixelStride,
2793 : nWordCount);
2794 3930331 : break;
2795 15156079 : case GDT_Float64:
2796 15156079 : GDALCopyWordsT(pSrcData, nSrcPixelStride,
2797 : static_cast<double *>(pDstData), nDstPixelStride,
2798 : nWordCount);
2799 15156079 : break;
2800 122404 : case GDT_CInt16:
2801 122404 : if (bInComplex)
2802 : {
2803 121394 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2804 : static_cast<short *>(pDstData),
2805 : nDstPixelStride, nWordCount);
2806 : }
2807 : else // input is not complex, so we need to promote to a complex
2808 : // buffer
2809 : {
2810 1010 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2811 : static_cast<short *>(pDstData),
2812 : nDstPixelStride, nWordCount);
2813 : }
2814 122404 : break;
2815 783 : case GDT_CInt32:
2816 783 : if (bInComplex)
2817 : {
2818 395 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2819 : static_cast<int *>(pDstData),
2820 : nDstPixelStride, nWordCount);
2821 : }
2822 : else // input is not complex, so we need to promote to a complex
2823 : // buffer
2824 : {
2825 388 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2826 : static_cast<int *>(pDstData),
2827 : nDstPixelStride, nWordCount);
2828 : }
2829 783 : break;
2830 3268 : case GDT_CFloat32:
2831 3268 : if (bInComplex)
2832 : {
2833 2693 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2834 : static_cast<float *>(pDstData),
2835 : nDstPixelStride, nWordCount);
2836 : }
2837 : else // input is not complex, so we need to promote to a complex
2838 : // buffer
2839 : {
2840 575 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2841 : static_cast<float *>(pDstData),
2842 : nDstPixelStride, nWordCount);
2843 : }
2844 3268 : break;
2845 1429 : case GDT_CFloat64:
2846 1429 : if (bInComplex)
2847 : {
2848 762 : GDALCopyWordsComplexT(pSrcData, nSrcPixelStride,
2849 : static_cast<double *>(pDstData),
2850 : nDstPixelStride, nWordCount);
2851 : }
2852 : else // input is not complex, so we need to promote to a complex
2853 : // buffer
2854 : {
2855 667 : GDALCopyWordsComplexOutT(pSrcData, nSrcPixelStride,
2856 : static_cast<double *>(pDstData),
2857 : nDstPixelStride, nWordCount);
2858 : }
2859 1429 : break;
2860 0 : case GDT_Unknown:
2861 : case GDT_TypeCount:
2862 0 : CPLAssert(false);
2863 : }
2864 57550111 : }
2865 :
2866 : } // end anonymous namespace
2867 :
2868 : /************************************************************************/
2869 : /* GDALReplicateWord() */
2870 : /************************************************************************/
2871 :
2872 : template <class T>
2873 228882 : inline void GDALReplicateWordT(void *pDstData, int nDstPixelStride,
2874 : GPtrDiff_t nWordCount)
2875 : {
2876 228882 : const T valSet = *static_cast<const T *>(pDstData);
2877 228882 : if (nDstPixelStride == static_cast<int>(sizeof(T)))
2878 : {
2879 200511 : T *pDstPtr = static_cast<T *>(pDstData) + 1;
2880 9103956 : while (nWordCount >= 4)
2881 : {
2882 8903448 : nWordCount -= 4;
2883 8903448 : pDstPtr[0] = valSet;
2884 8903448 : pDstPtr[1] = valSet;
2885 8903448 : pDstPtr[2] = valSet;
2886 8903448 : pDstPtr[3] = valSet;
2887 8903448 : pDstPtr += 4;
2888 : }
2889 667980 : while (nWordCount > 0)
2890 : {
2891 467469 : --nWordCount;
2892 467469 : *pDstPtr = valSet;
2893 467469 : pDstPtr++;
2894 : }
2895 : }
2896 : else
2897 : {
2898 28371 : GByte *pabyDstPtr = static_cast<GByte *>(pDstData) + nDstPixelStride;
2899 954250 : while (nWordCount > 0)
2900 : {
2901 925879 : --nWordCount;
2902 925879 : *reinterpret_cast<T *>(pabyDstPtr) = valSet;
2903 925879 : pabyDstPtr += nDstPixelStride;
2904 : }
2905 : }
2906 228882 : }
2907 :
2908 590835 : static void GDALReplicateWord(const void *CPL_RESTRICT pSrcData,
2909 : GDALDataType eSrcType,
2910 : void *CPL_RESTRICT pDstData,
2911 : GDALDataType eDstType, int nDstPixelStride,
2912 : GPtrDiff_t nWordCount)
2913 : {
2914 : /* -----------------------------------------------------------------------
2915 : */
2916 : /* Special case when the source data is always the same value */
2917 : /* (for VRTSourcedRasterBand::IRasterIO and
2918 : * VRTDerivedRasterBand::IRasterIO*/
2919 : /* for example) */
2920 : /* -----------------------------------------------------------------------
2921 : */
2922 : // Let the general translation case do the necessary conversions
2923 : // on the first destination element.
2924 590835 : GDALCopyWords(pSrcData, eSrcType, 0, pDstData, eDstType, 0, 1);
2925 :
2926 : // Now copy the first element to the nWordCount - 1 following destination
2927 : // elements.
2928 590835 : nWordCount--;
2929 590835 : GByte *pabyDstWord = reinterpret_cast<GByte *>(pDstData) + nDstPixelStride;
2930 :
2931 590835 : switch (eDstType)
2932 : {
2933 361869 : case GDT_Byte:
2934 : case GDT_Int8:
2935 : {
2936 361869 : if (nDstPixelStride == 1)
2937 : {
2938 326917 : if (nWordCount > 0)
2939 326917 : memset(pabyDstWord,
2940 326917 : *reinterpret_cast<const GByte *>(pDstData),
2941 : nWordCount);
2942 : }
2943 : else
2944 : {
2945 34952 : GByte valSet = *reinterpret_cast<const GByte *>(pDstData);
2946 5558530 : while (nWordCount > 0)
2947 : {
2948 5523580 : --nWordCount;
2949 5523580 : *pabyDstWord = valSet;
2950 5523580 : pabyDstWord += nDstPixelStride;
2951 : }
2952 : }
2953 361869 : break;
2954 : }
2955 :
2956 : #define CASE_DUPLICATE_SIMPLE(enum_type, c_type) \
2957 : case enum_type: \
2958 : { \
2959 : GDALReplicateWordT<c_type>(pDstData, nDstPixelStride, nWordCount); \
2960 : break; \
2961 : }
2962 :
2963 499 : CASE_DUPLICATE_SIMPLE(GDT_UInt16, GUInt16)
2964 170100 : CASE_DUPLICATE_SIMPLE(GDT_Int16, GInt16)
2965 90 : CASE_DUPLICATE_SIMPLE(GDT_UInt32, GUInt32)
2966 174 : CASE_DUPLICATE_SIMPLE(GDT_Int32, GInt32)
2967 15 : CASE_DUPLICATE_SIMPLE(GDT_UInt64, std::uint64_t)
2968 10 : CASE_DUPLICATE_SIMPLE(GDT_Int64, std::int64_t)
2969 52933 : CASE_DUPLICATE_SIMPLE(GDT_Float32, float)
2970 5061 : CASE_DUPLICATE_SIMPLE(GDT_Float64, double)
2971 :
2972 : #define CASE_DUPLICATE_COMPLEX(enum_type, c_type) \
2973 : case enum_type: \
2974 : { \
2975 : c_type valSet1 = reinterpret_cast<const c_type *>(pDstData)[0]; \
2976 : c_type valSet2 = reinterpret_cast<const c_type *>(pDstData)[1]; \
2977 : while (nWordCount > 0) \
2978 : { \
2979 : --nWordCount; \
2980 : reinterpret_cast<c_type *>(pabyDstWord)[0] = valSet1; \
2981 : reinterpret_cast<c_type *>(pabyDstWord)[1] = valSet2; \
2982 : pabyDstWord += nDstPixelStride; \
2983 : } \
2984 : break; \
2985 : }
2986 :
2987 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt16, GInt16)
2988 784 : CASE_DUPLICATE_COMPLEX(GDT_CInt32, GInt32)
2989 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat32, float)
2990 784 : CASE_DUPLICATE_COMPLEX(GDT_CFloat64, double)
2991 :
2992 0 : case GDT_Unknown:
2993 : case GDT_TypeCount:
2994 0 : CPLAssert(false);
2995 : }
2996 590835 : }
2997 :
2998 : /************************************************************************/
2999 : /* GDALUnrolledCopy() */
3000 : /************************************************************************/
3001 :
3002 : template <class T, int srcStride, int dstStride>
3003 2871108 : static inline void GDALUnrolledCopyGeneric(T *CPL_RESTRICT pDest,
3004 : const T *CPL_RESTRICT pSrc,
3005 : GPtrDiff_t nIters)
3006 : {
3007 2871108 : if (nIters >= 16)
3008 : {
3009 129237696 : for (GPtrDiff_t i = nIters / 16; i != 0; i--)
3010 : {
3011 126496244 : pDest[0 * dstStride] = pSrc[0 * srcStride];
3012 126496244 : pDest[1 * dstStride] = pSrc[1 * srcStride];
3013 126496244 : pDest[2 * dstStride] = pSrc[2 * srcStride];
3014 126496244 : pDest[3 * dstStride] = pSrc[3 * srcStride];
3015 126496244 : pDest[4 * dstStride] = pSrc[4 * srcStride];
3016 126496244 : pDest[5 * dstStride] = pSrc[5 * srcStride];
3017 126496244 : pDest[6 * dstStride] = pSrc[6 * srcStride];
3018 126496244 : pDest[7 * dstStride] = pSrc[7 * srcStride];
3019 126496244 : pDest[8 * dstStride] = pSrc[8 * srcStride];
3020 126496244 : pDest[9 * dstStride] = pSrc[9 * srcStride];
3021 126496244 : pDest[10 * dstStride] = pSrc[10 * srcStride];
3022 126496244 : pDest[11 * dstStride] = pSrc[11 * srcStride];
3023 126496244 : pDest[12 * dstStride] = pSrc[12 * srcStride];
3024 126496244 : pDest[13 * dstStride] = pSrc[13 * srcStride];
3025 126496244 : pDest[14 * dstStride] = pSrc[14 * srcStride];
3026 126496244 : pDest[15 * dstStride] = pSrc[15 * srcStride];
3027 126496244 : pDest += 16 * dstStride;
3028 126496244 : pSrc += 16 * srcStride;
3029 : }
3030 2741412 : nIters = nIters % 16;
3031 : }
3032 5164971 : for (GPtrDiff_t i = 0; i < nIters; i++)
3033 : {
3034 2293863 : pDest[i * dstStride] = *pSrc;
3035 2293863 : pSrc += srcStride;
3036 : }
3037 2871108 : }
3038 :
3039 : template <class T, int srcStride, int dstStride>
3040 2865911 : static inline void GDALUnrolledCopy(T *CPL_RESTRICT pDest,
3041 : const T *CPL_RESTRICT pSrc,
3042 : GPtrDiff_t nIters)
3043 : {
3044 2865911 : GDALUnrolledCopyGeneric<T, srcStride, dstStride>(pDest, pSrc, nIters);
3045 2865920 : }
3046 :
3047 : #if (defined(__x86_64) || defined(_M_X64))
3048 :
3049 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3050 :
3051 : #include "rasterio_ssse3.h"
3052 :
3053 : #endif
3054 :
3055 : template <>
3056 306527 : void GDALUnrolledCopy<GByte, 2, 1>(GByte *CPL_RESTRICT pDest,
3057 : const GByte *CPL_RESTRICT pSrc,
3058 : GPtrDiff_t nIters)
3059 : {
3060 306527 : decltype(nIters) i = 0;
3061 306527 : if (nIters > 16)
3062 : {
3063 148367 : const __m128i xmm_mask = _mm_set1_epi16(0xff);
3064 : // If we were sure that there would always be 1 trailing byte, we could
3065 : // check against nIters - 15
3066 2584570 : for (; i < nIters - 16; i += 16)
3067 : {
3068 : __m128i xmm0 =
3069 2436210 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3070 : __m128i xmm1 =
3071 4872410 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3072 : // Set higher 8bit of each int16 packed word to 0
3073 2436210 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3074 2436210 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3075 : // Pack int16 to uint8 and merge back both vector
3076 2436210 : xmm0 = _mm_packus_epi16(xmm0, xmm1);
3077 :
3078 : // Store result
3079 2436210 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3080 :
3081 2436210 : pSrc += 2 * 16;
3082 : }
3083 : }
3084 3918520 : for (; i < nIters; i++)
3085 : {
3086 3612000 : pDest[i] = *pSrc;
3087 3612000 : pSrc += 2;
3088 : }
3089 306527 : }
3090 :
3091 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
3092 :
3093 : template <>
3094 184387 : void GDALUnrolledCopy<GByte, 3, 1>(GByte *CPL_RESTRICT pDest,
3095 : const GByte *CPL_RESTRICT pSrc,
3096 : GPtrDiff_t nIters)
3097 : {
3098 184387 : if (nIters > 16 && CPLHaveRuntimeSSSE3())
3099 : {
3100 179190 : GDALUnrolledCopy_GByte_3_1_SSSE3(pDest, pSrc, nIters);
3101 : }
3102 : else
3103 : {
3104 5197 : GDALUnrolledCopyGeneric<GByte, 3, 1>(pDest, pSrc, nIters);
3105 : }
3106 184387 : }
3107 :
3108 : #endif
3109 :
3110 : template <>
3111 96503 : void GDALUnrolledCopy<GByte, 4, 1>(GByte *CPL_RESTRICT pDest,
3112 : const GByte *CPL_RESTRICT pSrc,
3113 : GPtrDiff_t nIters)
3114 : {
3115 96503 : decltype(nIters) i = 0;
3116 96503 : if (nIters > 16)
3117 : {
3118 91210 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
3119 : // If we were sure that there would always be 3 trailing bytes, we could
3120 : // check against nIters - 15
3121 3754210 : for (; i < nIters - 16; i += 16)
3122 : {
3123 : __m128i xmm0 =
3124 3663000 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 0));
3125 : __m128i xmm1 =
3126 3663000 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 16));
3127 : __m128i xmm2 =
3128 3663000 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 32));
3129 : __m128i xmm3 =
3130 7326000 : _mm_loadu_si128(reinterpret_cast<__m128i const *>(pSrc + 48));
3131 : // Set higher 24bit of each int32 packed word to 0
3132 3663000 : xmm0 = _mm_and_si128(xmm0, xmm_mask);
3133 3663000 : xmm1 = _mm_and_si128(xmm1, xmm_mask);
3134 3663000 : xmm2 = _mm_and_si128(xmm2, xmm_mask);
3135 3663000 : xmm3 = _mm_and_si128(xmm3, xmm_mask);
3136 : // Pack int32 to int16
3137 3663000 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
3138 3663000 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
3139 : // Pack int16 to uint8
3140 3663000 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
3141 :
3142 : // Store result
3143 3663000 : _mm_storeu_si128(reinterpret_cast<__m128i *>(pDest + i), xmm0);
3144 :
3145 3663000 : pSrc += 4 * 16;
3146 : }
3147 : }
3148 1054780 : for (; i < nIters; i++)
3149 : {
3150 958275 : pDest[i] = *pSrc;
3151 958275 : pSrc += 4;
3152 : }
3153 96503 : }
3154 : #endif // defined(__x86_64) || defined(_M_X64)
3155 :
3156 : /************************************************************************/
3157 : /* GDALFastCopy() */
3158 : /************************************************************************/
3159 :
3160 : template <class T>
3161 33109600 : static inline void GDALFastCopy(T *CPL_RESTRICT pDest, int nDestStride,
3162 : const T *CPL_RESTRICT pSrc, int nSrcStride,
3163 : GPtrDiff_t nIters)
3164 : {
3165 33109600 : constexpr int sizeofT = static_cast<int>(sizeof(T));
3166 33109600 : if (nIters == 1)
3167 : {
3168 20744110 : *pDest = *pSrc;
3169 : }
3170 12365387 : else if (nDestStride == sizeofT)
3171 : {
3172 9467942 : if (nSrcStride == sizeofT)
3173 : {
3174 8822625 : memcpy(pDest, pSrc, nIters * sizeof(T));
3175 : }
3176 645317 : else if (nSrcStride == 2 * sizeofT)
3177 : {
3178 309468 : GDALUnrolledCopy<T, 2, 1>(pDest, pSrc, nIters);
3179 : }
3180 335849 : else if (nSrcStride == 3 * sizeofT)
3181 : {
3182 197087 : GDALUnrolledCopy<T, 3, 1>(pDest, pSrc, nIters);
3183 : }
3184 138762 : else if (nSrcStride == 4 * sizeofT)
3185 : {
3186 125371 : GDALUnrolledCopy<T, 4, 1>(pDest, pSrc, nIters);
3187 : }
3188 : else
3189 : {
3190 13005720 : while (nIters-- > 0)
3191 : {
3192 12992330 : *pDest = *pSrc;
3193 12992330 : pSrc += nSrcStride / sizeofT;
3194 12992330 : pDest++;
3195 : }
3196 : }
3197 : }
3198 2897495 : else if (nSrcStride == sizeofT)
3199 : {
3200 2880619 : if (nDestStride == 2 * sizeofT)
3201 : {
3202 128599 : GDALUnrolledCopy<T, 1, 2>(pDest, pSrc, nIters);
3203 : }
3204 2752020 : else if (nDestStride == 3 * sizeofT)
3205 : {
3206 2062171 : GDALUnrolledCopy<T, 1, 3>(pDest, pSrc, nIters);
3207 : }
3208 689853 : else if (nDestStride == 4 * sizeofT)
3209 : {
3210 630632 : GDALUnrolledCopy<T, 1, 4>(pDest, pSrc, nIters);
3211 : }
3212 : else
3213 : {
3214 12625700 : while (nIters-- > 0)
3215 : {
3216 12566480 : *pDest = *pSrc;
3217 12566480 : pSrc++;
3218 12566480 : pDest += nDestStride / sizeofT;
3219 : }
3220 : }
3221 : }
3222 : else
3223 : {
3224 1115759 : while (nIters-- > 0)
3225 : {
3226 1098888 : *pDest = *pSrc;
3227 1098888 : pSrc += nSrcStride / sizeofT;
3228 1098888 : pDest += nDestStride / sizeofT;
3229 : }
3230 : }
3231 33109600 : }
3232 :
3233 : /************************************************************************/
3234 : /* GDALFastCopyByte() */
3235 : /************************************************************************/
3236 :
3237 276277 : static void GDALFastCopyByte(const GByte *CPL_RESTRICT pSrcData,
3238 : int nSrcPixelStride, GByte *CPL_RESTRICT pDstData,
3239 : int nDstPixelStride, GPtrDiff_t nWordCount)
3240 : {
3241 276277 : GDALFastCopy(pDstData, nDstPixelStride, pSrcData, nSrcPixelStride,
3242 : nWordCount);
3243 276277 : }
3244 :
3245 : /************************************************************************/
3246 : /* GDALCopyWords() */
3247 : /************************************************************************/
3248 :
3249 : /**
3250 : * Copy pixel words from buffer to buffer.
3251 : *
3252 : * @see GDALCopyWords64()
3253 : */
3254 101000000 : void CPL_STDCALL GDALCopyWords(const void *CPL_RESTRICT pSrcData,
3255 : GDALDataType eSrcType, int nSrcPixelStride,
3256 : void *CPL_RESTRICT pDstData,
3257 : GDALDataType eDstType, int nDstPixelStride,
3258 : int nWordCount)
3259 : {
3260 101000000 : GDALCopyWords64(pSrcData, eSrcType, nSrcPixelStride, pDstData, eDstType,
3261 : nDstPixelStride, nWordCount);
3262 100999000 : }
3263 :
3264 : /************************************************************************/
3265 : /* GDALCopyWords64() */
3266 : /************************************************************************/
3267 :
3268 : /**
3269 : * Copy pixel words from buffer to buffer.
3270 : *
3271 : * This function is used to copy pixel word values from one memory buffer
3272 : * to another, with support for conversion between data types, and differing
3273 : * step factors. The data type conversion is done using the normal GDAL
3274 : * rules. Values assigned to a lower range integer type are clipped. For
3275 : * instance assigning GDT_Int16 values to a GDT_Byte buffer will cause values
3276 : * less the 0 to be set to 0, and values larger than 255 to be set to 255.
3277 : * Assignment from floating point to integer uses default C type casting
3278 : * semantics. Assignment from non-complex to complex will result in the
3279 : * imaginary part being set to zero on output. Assignment from complex to
3280 : * non-complex will result in the complex portion being lost and the real
3281 : * component being preserved (<i>not magnitude!</i>).
3282 : *
3283 : * No assumptions are made about the source or destination words occurring
3284 : * on word boundaries. It is assumed that all values are in native machine
3285 : * byte order.
3286 : *
3287 : * @param pSrcData Pointer to source data to be converted.
3288 : * @param eSrcType the source data type (see GDALDataType enum)
3289 : * @param nSrcPixelStride Source pixel stride (i.e. distance between 2 words),
3290 : * in bytes
3291 : * @param pDstData Pointer to buffer where destination data should go
3292 : * @param eDstType the destination data type (see GDALDataType enum)
3293 : * @param nDstPixelStride Destination pixel stride (i.e. distance between 2
3294 : * words), in bytes
3295 : * @param nWordCount number of words to be copied
3296 : *
3297 : * @note
3298 : * When adding a new data type to GDAL, you must do the following to
3299 : * support it properly within the GDALCopyWords function:
3300 : * 1. Add the data type to the switch on eSrcType in GDALCopyWords.
3301 : * This should invoke the appropriate GDALCopyWordsFromT wrapper.
3302 : * 2. Add the data type to the switch on eDstType in GDALCopyWordsFromT.
3303 : * This should call the appropriate GDALCopyWordsT template.
3304 : * 3. If appropriate, overload the appropriate CopyWord template in the
3305 : * above namespace. This will ensure that any conversion issues are
3306 : * handled (cases like the float -> int32 case, where the min/max)
3307 : * values are subject to roundoff error.
3308 : */
3309 :
3310 105270000 : void CPL_STDCALL GDALCopyWords64(const void *CPL_RESTRICT pSrcData,
3311 : GDALDataType eSrcType, int nSrcPixelStride,
3312 : void *CPL_RESTRICT pDstData,
3313 : GDALDataType eDstType, int nDstPixelStride,
3314 : GPtrDiff_t nWordCount)
3315 :
3316 : {
3317 : // On platforms where alignment matters, be careful
3318 105270000 : const int nSrcDataTypeSize = GDALGetDataTypeSizeBytes(eSrcType);
3319 105269000 : const int nDstDataTypeSize = GDALGetDataTypeSizeBytes(eDstType);
3320 105269000 : assert(nSrcDataTypeSize != 0);
3321 105269000 : assert(nDstDataTypeSize != 0);
3322 105269000 : if (!(eSrcType == eDstType && nSrcPixelStride == nDstPixelStride) &&
3323 61458800 : ((reinterpret_cast<GPtrDiff_t>(pSrcData) % nSrcDataTypeSize) != 0 ||
3324 61459200 : (reinterpret_cast<GPtrDiff_t>(pDstData) % nDstDataTypeSize) != 0 ||
3325 61458900 : (nSrcPixelStride % nSrcDataTypeSize) != 0 ||
3326 61458900 : (nDstPixelStride % nDstDataTypeSize) != 0))
3327 : {
3328 605 : if (eSrcType == eDstType)
3329 : {
3330 19500 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3331 : {
3332 19000 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3333 : static_cast<const GByte *>(pSrcData) +
3334 19000 : nSrcPixelStride * i,
3335 : nDstDataTypeSize);
3336 : }
3337 : }
3338 : else
3339 : {
3340 : #define ALIGN_PTR(ptr, align) \
3341 : ((ptr) + ((align) - (reinterpret_cast<size_t>(ptr) % (align))) % (align))
3342 : // The largest we need is for CFloat64 (16 bytes), so 32 bytes to
3343 : // be sure to get correctly aligned pointer.
3344 : GByte abySrcBuffer[32];
3345 : GByte abyDstBuffer[32];
3346 105 : GByte *pabySrcBuffer = ALIGN_PTR(abySrcBuffer, nSrcDataTypeSize);
3347 105 : GByte *pabyDstBuffer = ALIGN_PTR(abyDstBuffer, nDstDataTypeSize);
3348 3360 : for (decltype(nWordCount) i = 0; i < nWordCount; i++)
3349 : {
3350 3255 : memcpy(pabySrcBuffer,
3351 : static_cast<const GByte *>(pSrcData) +
3352 3255 : nSrcPixelStride * i,
3353 : nSrcDataTypeSize);
3354 3255 : GDALCopyWords64(pabySrcBuffer, eSrcType, 0, pabyDstBuffer,
3355 : eDstType, 0, 1);
3356 3255 : memcpy(static_cast<GByte *>(pDstData) + nDstPixelStride * i,
3357 : pabyDstBuffer, nDstDataTypeSize);
3358 : }
3359 : }
3360 605 : return;
3361 : }
3362 :
3363 : // Deal with the case where we're replicating a single word into the
3364 : // provided buffer
3365 105269000 : if (nSrcPixelStride == 0 && nWordCount > 1)
3366 : {
3367 590835 : GDALReplicateWord(pSrcData, eSrcType, pDstData, eDstType,
3368 : nDstPixelStride, nWordCount);
3369 590835 : return;
3370 : }
3371 :
3372 104678000 : if (eSrcType == eDstType)
3373 : {
3374 47282000 : if (eSrcType == GDT_Byte || eSrcType == GDT_Int8)
3375 : {
3376 13506700 : GDALFastCopy(static_cast<GByte *>(pDstData), nDstPixelStride,
3377 : static_cast<const GByte *>(pSrcData), nSrcPixelStride,
3378 : nWordCount);
3379 13506200 : return;
3380 : }
3381 :
3382 33775300 : if (nSrcDataTypeSize == 2 && (nSrcPixelStride % 2) == 0 &&
3383 19326500 : (nDstPixelStride % 2) == 0)
3384 : {
3385 19326500 : GDALFastCopy(static_cast<short *>(pDstData), nDstPixelStride,
3386 : static_cast<const short *>(pSrcData), nSrcPixelStride,
3387 : nWordCount);
3388 19325700 : return;
3389 : }
3390 :
3391 14448800 : if (nWordCount == 1)
3392 : {
3393 : #if defined(CSA_BUILD) || defined(__COVERITY__)
3394 : // Avoid false positives...
3395 : memcpy(pDstData, pSrcData, nSrcDataTypeSize);
3396 : #else
3397 14040200 : if (nSrcDataTypeSize == 2)
3398 0 : memcpy(pDstData, pSrcData, 2);
3399 14040200 : else if (nSrcDataTypeSize == 4)
3400 14015600 : memcpy(pDstData, pSrcData, 4);
3401 24641 : else if (nSrcDataTypeSize == 8)
3402 24519 : memcpy(pDstData, pSrcData, 8);
3403 : else /* if( eSrcType == GDT_CFloat64 ) */
3404 122 : memcpy(pDstData, pSrcData, 16);
3405 : #endif
3406 14040200 : return;
3407 : }
3408 :
3409 : // Let memcpy() handle the case where we're copying a packed buffer
3410 : // of pixels.
3411 408616 : if (nSrcPixelStride == nDstPixelStride)
3412 : {
3413 257146 : if (nSrcPixelStride == nSrcDataTypeSize)
3414 : {
3415 254976 : memcpy(pDstData, pSrcData, nWordCount * nSrcDataTypeSize);
3416 254976 : return;
3417 : }
3418 : }
3419 : }
3420 :
3421 : // Handle the more general case -- deals with conversion of data types
3422 : // directly.
3423 57549500 : switch (eSrcType)
3424 : {
3425 12883100 : case GDT_Byte:
3426 12883100 : GDALCopyWordsFromT<unsigned char>(
3427 : static_cast<const unsigned char *>(pSrcData), nSrcPixelStride,
3428 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3429 12883100 : break;
3430 852 : case GDT_Int8:
3431 852 : GDALCopyWordsFromT<signed char>(
3432 : static_cast<const signed char *>(pSrcData), nSrcPixelStride,
3433 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3434 852 : break;
3435 49726 : case GDT_UInt16:
3436 49726 : GDALCopyWordsFromT<unsigned short>(
3437 : static_cast<const unsigned short *>(pSrcData), nSrcPixelStride,
3438 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3439 49726 : break;
3440 4520700 : case GDT_Int16:
3441 4520700 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3442 : nSrcPixelStride, false, pDstData,
3443 : eDstType, nDstPixelStride, nWordCount);
3444 4520700 : break;
3445 5733 : case GDT_UInt32:
3446 5733 : GDALCopyWordsFromT<unsigned int>(
3447 : static_cast<const unsigned int *>(pSrcData), nSrcPixelStride,
3448 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3449 5733 : break;
3450 12254600 : case GDT_Int32:
3451 12254600 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3452 : nSrcPixelStride, false, pDstData, eDstType,
3453 : nDstPixelStride, nWordCount);
3454 12254600 : break;
3455 482 : case GDT_UInt64:
3456 482 : GDALCopyWordsFromT<std::uint64_t>(
3457 : static_cast<const std::uint64_t *>(pSrcData), nSrcPixelStride,
3458 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3459 482 : break;
3460 6908 : case GDT_Int64:
3461 6908 : GDALCopyWordsFromT<std::int64_t>(
3462 : static_cast<const std::int64_t *>(pSrcData), nSrcPixelStride,
3463 : false, pDstData, eDstType, nDstPixelStride, nWordCount);
3464 6908 : break;
3465 6496430 : case GDT_Float32:
3466 6496430 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3467 : nSrcPixelStride, false, pDstData,
3468 : eDstType, nDstPixelStride, nWordCount);
3469 6496440 : break;
3470 20590300 : case GDT_Float64:
3471 20590300 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3472 : nSrcPixelStride, false, pDstData,
3473 : eDstType, nDstPixelStride, nWordCount);
3474 20590300 : break;
3475 566959 : case GDT_CInt16:
3476 566959 : GDALCopyWordsFromT<short>(static_cast<const short *>(pSrcData),
3477 : nSrcPixelStride, true, pDstData, eDstType,
3478 : nDstPixelStride, nWordCount);
3479 566959 : break;
3480 405 : case GDT_CInt32:
3481 405 : GDALCopyWordsFromT<int>(static_cast<const int *>(pSrcData),
3482 : nSrcPixelStride, true, pDstData, eDstType,
3483 : nDstPixelStride, nWordCount);
3484 405 : break;
3485 1317 : case GDT_CFloat32:
3486 1317 : GDALCopyWordsFromT<float>(static_cast<const float *>(pSrcData),
3487 : nSrcPixelStride, true, pDstData, eDstType,
3488 : nDstPixelStride, nWordCount);
3489 1317 : break;
3490 172481 : case GDT_CFloat64:
3491 172481 : GDALCopyWordsFromT<double>(static_cast<const double *>(pSrcData),
3492 : nSrcPixelStride, true, pDstData,
3493 : eDstType, nDstPixelStride, nWordCount);
3494 172481 : break;
3495 0 : case GDT_Unknown:
3496 : case GDT_TypeCount:
3497 0 : CPLAssert(false);
3498 : }
3499 : }
3500 :
3501 : /************************************************************************/
3502 : /* GDALCopyBits() */
3503 : /************************************************************************/
3504 :
3505 : /**
3506 : * Bitwise word copying.
3507 : *
3508 : * A function for moving sets of partial bytes around. Loosely
3509 : * speaking this is a bitwise analog to GDALCopyWords().
3510 : *
3511 : * It copies nStepCount "words" where each word is nBitCount bits long.
3512 : * The nSrcStep and nDstStep are the number of bits from the start of one
3513 : * word to the next (same as nBitCount if they are packed). The nSrcOffset
3514 : * and nDstOffset are the offset into the source and destination buffers
3515 : * to start at, also measured in bits.
3516 : *
3517 : * All bit offsets are assumed to start from the high order bit in a byte
3518 : * (i.e. most significant bit first). Currently this function is not very
3519 : * optimized, but it may be improved for some common cases in the future
3520 : * as needed.
3521 : *
3522 : * @param pabySrcData the source data buffer.
3523 : * @param nSrcOffset the offset (in bits) in pabySrcData to the start of the
3524 : * first word to copy.
3525 : * @param nSrcStep the offset in bits from the start one source word to the
3526 : * start of the next.
3527 : * @param pabyDstData the destination data buffer.
3528 : * @param nDstOffset the offset (in bits) in pabyDstData to the start of the
3529 : * first word to copy over.
3530 : * @param nDstStep the offset in bits from the start one word to the
3531 : * start of the next.
3532 : * @param nBitCount the number of bits in a word to be copied.
3533 : * @param nStepCount the number of words to copy.
3534 : */
3535 :
3536 0 : void GDALCopyBits(const GByte *pabySrcData, int nSrcOffset, int nSrcStep,
3537 : GByte *pabyDstData, int nDstOffset, int nDstStep,
3538 : int nBitCount, int nStepCount)
3539 :
3540 : {
3541 0 : VALIDATE_POINTER0(pabySrcData, "GDALCopyBits");
3542 :
3543 0 : for (int iStep = 0; iStep < nStepCount; iStep++)
3544 : {
3545 0 : for (int iBit = 0; iBit < nBitCount; iBit++)
3546 : {
3547 0 : if (pabySrcData[nSrcOffset >> 3] & (0x80 >> (nSrcOffset & 7)))
3548 0 : pabyDstData[nDstOffset >> 3] |= (0x80 >> (nDstOffset & 7));
3549 : else
3550 0 : pabyDstData[nDstOffset >> 3] &= ~(0x80 >> (nDstOffset & 7));
3551 :
3552 0 : nSrcOffset++;
3553 0 : nDstOffset++;
3554 : }
3555 :
3556 0 : nSrcOffset += (nSrcStep - nBitCount);
3557 0 : nDstOffset += (nDstStep - nBitCount);
3558 : }
3559 : }
3560 :
3561 : /************************************************************************/
3562 : /* GDALGetBestOverviewLevel() */
3563 : /* */
3564 : /* Returns the best overview level to satisfy the query or -1 if none */
3565 : /* Also updates nXOff, nYOff, nXSize, nYSize and psExtraArg when */
3566 : /* returning a valid overview level */
3567 : /************************************************************************/
3568 :
3569 0 : int GDALBandGetBestOverviewLevel(GDALRasterBand *poBand, int &nXOff, int &nYOff,
3570 : int &nXSize, int &nYSize, int nBufXSize,
3571 : int nBufYSize)
3572 : {
3573 0 : return GDALBandGetBestOverviewLevel2(poBand, nXOff, nYOff, nXSize, nYSize,
3574 0 : nBufXSize, nBufYSize, nullptr);
3575 : }
3576 :
3577 323324 : int GDALBandGetBestOverviewLevel2(GDALRasterBand *poBand, int &nXOff,
3578 : int &nYOff, int &nXSize, int &nYSize,
3579 : int nBufXSize, int nBufYSize,
3580 : GDALRasterIOExtraArg *psExtraArg)
3581 : {
3582 : /* -------------------------------------------------------------------- */
3583 : /* Compute the desired downsampling factor. It is */
3584 : /* based on the least reduced axis, and represents the number */
3585 : /* of source pixels to one destination pixel. */
3586 : /* -------------------------------------------------------------------- */
3587 323324 : const double dfDesiredDownsamplingFactor =
3588 323324 : ((nXSize / static_cast<double>(nBufXSize)) <
3589 160986 : (nYSize / static_cast<double>(nBufYSize)) ||
3590 : nBufYSize == 1)
3591 355200 : ? nXSize / static_cast<double>(nBufXSize)
3592 129110 : : nYSize / static_cast<double>(nBufYSize);
3593 :
3594 : /* -------------------------------------------------------------------- */
3595 : /* Find the overview level that largest downsampling factor (most */
3596 : /* downsampled) that is still less than (or only a little more) */
3597 : /* downsampled than the request. */
3598 : /* -------------------------------------------------------------------- */
3599 323324 : const int nOverviewCount = poBand->GetOverviewCount();
3600 323324 : GDALRasterBand *poBestOverview = nullptr;
3601 323324 : double dfBestDownsamplingFactor = 0;
3602 323324 : int nBestOverviewLevel = -1;
3603 :
3604 : const char *pszOversampligThreshold =
3605 323324 : CPLGetConfigOption("GDAL_OVERVIEW_OVERSAMPLING_THRESHOLD", nullptr);
3606 :
3607 : // Cf https://github.com/OSGeo/gdal/pull/9040#issuecomment-1898524693
3608 : // Do not exactly use a oversampling threshold of 1.0 because of numerical
3609 : // instability.
3610 323324 : const auto AdjustThreshold = [](double x)
3611 : {
3612 323324 : constexpr double EPS = 1e-2;
3613 323324 : return x == 1.0 ? x + EPS : x;
3614 : };
3615 646648 : const double dfOversamplingThreshold = AdjustThreshold(
3616 9 : pszOversampligThreshold ? CPLAtof(pszOversampligThreshold)
3617 323315 : : psExtraArg && psExtraArg->eResampleAlg != GRIORA_NearestNeighbour
3618 646630 : ? 1.0
3619 : : 1.2);
3620 342234 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3621 : {
3622 18910 : GDALRasterBand *poOverview = poBand->GetOverview(iOverview);
3623 37820 : if (poOverview == nullptr ||
3624 37818 : poOverview->GetXSize() > poBand->GetXSize() ||
3625 18908 : poOverview->GetYSize() > poBand->GetYSize())
3626 : {
3627 2 : continue;
3628 : }
3629 :
3630 : // Compute downsampling factor of this overview
3631 : const double dfDownsamplingFactor = std::min(
3632 18908 : poBand->GetXSize() / static_cast<double>(poOverview->GetXSize()),
3633 37816 : poBand->GetYSize() / static_cast<double>(poOverview->GetYSize()));
3634 :
3635 : // Is it nearly the requested factor and better (lower) than
3636 : // the current best factor?
3637 26565 : if (dfDownsamplingFactor >=
3638 18908 : dfDesiredDownsamplingFactor * dfOversamplingThreshold ||
3639 : dfDownsamplingFactor <= dfBestDownsamplingFactor)
3640 : {
3641 7657 : continue;
3642 : }
3643 :
3644 : // Ignore AVERAGE_BIT2GRAYSCALE overviews for RasterIO purposes.
3645 11251 : const char *pszResampling = poOverview->GetMetadataItem("RESAMPLING");
3646 :
3647 11251 : if (pszResampling != nullptr &&
3648 65 : STARTS_WITH_CI(pszResampling, "AVERAGE_BIT2"))
3649 16 : continue;
3650 :
3651 : // OK, this is our new best overview.
3652 11235 : poBestOverview = poOverview;
3653 11235 : nBestOverviewLevel = iOverview;
3654 11235 : dfBestDownsamplingFactor = dfDownsamplingFactor;
3655 : }
3656 :
3657 : /* -------------------------------------------------------------------- */
3658 : /* If we didn't find an overview that helps us, just return */
3659 : /* indicating failure and the full resolution image will be used. */
3660 : /* -------------------------------------------------------------------- */
3661 323324 : if (nBestOverviewLevel < 0)
3662 319937 : return -1;
3663 :
3664 : /* -------------------------------------------------------------------- */
3665 : /* Recompute the source window in terms of the selected */
3666 : /* overview. */
3667 : /* -------------------------------------------------------------------- */
3668 : const double dfXFactor =
3669 3387 : poBand->GetXSize() / static_cast<double>(poBestOverview->GetXSize());
3670 : const double dfYFactor =
3671 3387 : poBand->GetYSize() / static_cast<double>(poBestOverview->GetYSize());
3672 3387 : CPLDebug("GDAL", "Selecting overview %d x %d", poBestOverview->GetXSize(),
3673 : poBestOverview->GetYSize());
3674 :
3675 10161 : const int nOXOff = std::min(poBestOverview->GetXSize() - 1,
3676 3387 : static_cast<int>(nXOff / dfXFactor + 0.5));
3677 10161 : const int nOYOff = std::min(poBestOverview->GetYSize() - 1,
3678 3387 : static_cast<int>(nYOff / dfYFactor + 0.5));
3679 3387 : int nOXSize = std::max(1, static_cast<int>(nXSize / dfXFactor + 0.5));
3680 3387 : int nOYSize = std::max(1, static_cast<int>(nYSize / dfYFactor + 0.5));
3681 3387 : if (nOXOff + nOXSize > poBestOverview->GetXSize())
3682 0 : nOXSize = poBestOverview->GetXSize() - nOXOff;
3683 3387 : if (nOYOff + nOYSize > poBestOverview->GetYSize())
3684 2 : nOYSize = poBestOverview->GetYSize() - nOYOff;
3685 :
3686 3387 : if (psExtraArg)
3687 : {
3688 3387 : if (psExtraArg->bFloatingPointWindowValidity)
3689 : {
3690 537 : psExtraArg->dfXOff /= dfXFactor;
3691 537 : psExtraArg->dfXSize /= dfXFactor;
3692 537 : psExtraArg->dfYOff /= dfYFactor;
3693 537 : psExtraArg->dfYSize /= dfYFactor;
3694 : }
3695 2850 : else if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour)
3696 : {
3697 15 : psExtraArg->bFloatingPointWindowValidity = true;
3698 15 : psExtraArg->dfXOff = nXOff / dfXFactor;
3699 15 : psExtraArg->dfXSize = nXSize / dfXFactor;
3700 15 : psExtraArg->dfYOff = nYOff / dfYFactor;
3701 15 : psExtraArg->dfYSize = nYSize / dfYFactor;
3702 : }
3703 : }
3704 :
3705 3387 : nXOff = nOXOff;
3706 3387 : nYOff = nOYOff;
3707 3387 : nXSize = nOXSize;
3708 3387 : nYSize = nOYSize;
3709 :
3710 3387 : return nBestOverviewLevel;
3711 : }
3712 :
3713 : /************************************************************************/
3714 : /* OverviewRasterIO() */
3715 : /* */
3716 : /* Special work function to utilize available overviews to */
3717 : /* more efficiently satisfy downsampled requests. It will */
3718 : /* return CE_Failure if there are no appropriate overviews */
3719 : /* available but it doesn't emit any error messages. */
3720 : /************************************************************************/
3721 :
3722 : //! @cond Doxygen_Suppress
3723 1 : CPLErr GDALRasterBand::OverviewRasterIO(
3724 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3725 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3726 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg)
3727 :
3728 : {
3729 : GDALRasterIOExtraArg sExtraArg;
3730 1 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3731 :
3732 1 : const int nOverview = GDALBandGetBestOverviewLevel2(
3733 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, &sExtraArg);
3734 1 : if (nOverview < 0)
3735 1 : return CE_Failure;
3736 :
3737 : /* -------------------------------------------------------------------- */
3738 : /* Recast the call in terms of the new raster layer. */
3739 : /* -------------------------------------------------------------------- */
3740 0 : GDALRasterBand *poOverviewBand = GetOverview(nOverview);
3741 0 : if (poOverviewBand == nullptr)
3742 0 : return CE_Failure;
3743 :
3744 0 : return poOverviewBand->RasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3745 : pData, nBufXSize, nBufYSize, eBufType,
3746 0 : nPixelSpace, nLineSpace, &sExtraArg);
3747 : }
3748 :
3749 : /************************************************************************/
3750 : /* TryOverviewRasterIO() */
3751 : /************************************************************************/
3752 :
3753 161929 : CPLErr GDALRasterBand::TryOverviewRasterIO(
3754 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3755 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3756 : GSpacing nPixelSpace, GSpacing nLineSpace, GDALRasterIOExtraArg *psExtraArg,
3757 : int *pbTried)
3758 : {
3759 161929 : int nXOffMod = nXOff;
3760 161929 : int nYOffMod = nYOff;
3761 161929 : int nXSizeMod = nXSize;
3762 161929 : int nYSizeMod = nYSize;
3763 : GDALRasterIOExtraArg sExtraArg;
3764 :
3765 161929 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3766 :
3767 161929 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3768 : this, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize, nBufYSize,
3769 : &sExtraArg);
3770 :
3771 161929 : if (iOvrLevel >= 0)
3772 : {
3773 42 : GDALRasterBand *poOverviewBand = GetOverview(iOvrLevel);
3774 42 : if (poOverviewBand)
3775 : {
3776 42 : *pbTried = TRUE;
3777 42 : return poOverviewBand->RasterIO(
3778 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData,
3779 : nBufXSize, nBufYSize, eBufType, nPixelSpace, nLineSpace,
3780 42 : &sExtraArg);
3781 : }
3782 : }
3783 :
3784 161887 : *pbTried = FALSE;
3785 161887 : return CE_None;
3786 : }
3787 :
3788 : /************************************************************************/
3789 : /* TryOverviewRasterIO() */
3790 : /************************************************************************/
3791 :
3792 158057 : CPLErr GDALDataset::TryOverviewRasterIO(
3793 : GDALRWFlag eRWFlag, int nXOff, int nYOff, int nXSize, int nYSize,
3794 : void *pData, int nBufXSize, int nBufYSize, GDALDataType eBufType,
3795 : int nBandCount, int *panBandMap, GSpacing nPixelSpace, GSpacing nLineSpace,
3796 : GSpacing nBandSpace, GDALRasterIOExtraArg *psExtraArg, int *pbTried)
3797 : {
3798 158057 : int nXOffMod = nXOff;
3799 158057 : int nYOffMod = nYOff;
3800 158057 : int nXSizeMod = nXSize;
3801 158057 : int nYSizeMod = nYSize;
3802 : GDALRasterIOExtraArg sExtraArg;
3803 158057 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
3804 :
3805 316114 : int iOvrLevel = GDALBandGetBestOverviewLevel2(
3806 158057 : papoBands[0], nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, nBufXSize,
3807 : nBufYSize, &sExtraArg);
3808 :
3809 158093 : if (iOvrLevel >= 0 && papoBands[0]->GetOverview(iOvrLevel) != nullptr &&
3810 36 : papoBands[0]->GetOverview(iOvrLevel)->GetDataset() != nullptr)
3811 : {
3812 36 : *pbTried = TRUE;
3813 36 : return papoBands[0]->GetOverview(iOvrLevel)->GetDataset()->RasterIO(
3814 : eRWFlag, nXOffMod, nYOffMod, nXSizeMod, nYSizeMod, pData, nBufXSize,
3815 : nBufYSize, eBufType, nBandCount, panBandMap, nPixelSpace,
3816 36 : nLineSpace, nBandSpace, &sExtraArg);
3817 : }
3818 : else
3819 : {
3820 158021 : *pbTried = FALSE;
3821 158021 : return CE_None;
3822 : }
3823 : }
3824 :
3825 : /************************************************************************/
3826 : /* GetBestOverviewLevel() */
3827 : /* */
3828 : /* Returns the best overview level to satisfy the query or -1 if none */
3829 : /* Also updates nXOff, nYOff, nXSize, nYSize when returning a valid */
3830 : /* overview level */
3831 : /************************************************************************/
3832 :
3833 4 : static int GDALDatasetGetBestOverviewLevel(GDALDataset *poDS, int &nXOff,
3834 : int &nYOff, int &nXSize, int &nYSize,
3835 : int nBufXSize, int nBufYSize,
3836 : int nBandCount, int *panBandMap,
3837 : GDALRasterIOExtraArg *psExtraArg)
3838 : {
3839 4 : int nOverviewCount = 0;
3840 4 : GDALRasterBand *poFirstBand = nullptr;
3841 :
3842 : /* -------------------------------------------------------------------- */
3843 : /* Check that all bands have the same number of overviews and */
3844 : /* that they have all the same size and block dimensions */
3845 : /* -------------------------------------------------------------------- */
3846 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
3847 : {
3848 8 : GDALRasterBand *poBand = poDS->GetRasterBand(panBandMap[iBand]);
3849 8 : if (poBand == nullptr)
3850 0 : return -1;
3851 8 : if (iBand == 0)
3852 : {
3853 4 : poFirstBand = poBand;
3854 4 : nOverviewCount = poBand->GetOverviewCount();
3855 : }
3856 4 : else if (nOverviewCount != poBand->GetOverviewCount())
3857 : {
3858 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3859 : "mismatched overview count, use std method.");
3860 0 : return -1;
3861 : }
3862 : else
3863 : {
3864 4 : for (int iOverview = 0; iOverview < nOverviewCount; iOverview++)
3865 : {
3866 0 : GDALRasterBand *poOvrBand = poBand->GetOverview(iOverview);
3867 : GDALRasterBand *poOvrFirstBand =
3868 0 : poFirstBand->GetOverview(iOverview);
3869 0 : if (poOvrBand == nullptr || poOvrFirstBand == nullptr)
3870 0 : continue;
3871 :
3872 0 : if (poOvrFirstBand->GetXSize() != poOvrBand->GetXSize() ||
3873 0 : poOvrFirstBand->GetYSize() != poOvrBand->GetYSize())
3874 : {
3875 0 : CPLDebug("GDAL",
3876 : "GDALDataset::GetBestOverviewLevel() ... "
3877 : "mismatched overview sizes, use std method.");
3878 0 : return -1;
3879 : }
3880 0 : int nBlockXSizeFirst = 0;
3881 0 : int nBlockYSizeFirst = 0;
3882 0 : poOvrFirstBand->GetBlockSize(&nBlockXSizeFirst,
3883 : &nBlockYSizeFirst);
3884 :
3885 0 : int nBlockXSizeCurrent = 0;
3886 0 : int nBlockYSizeCurrent = 0;
3887 0 : poOvrBand->GetBlockSize(&nBlockXSizeCurrent,
3888 : &nBlockYSizeCurrent);
3889 :
3890 0 : if (nBlockXSizeFirst != nBlockXSizeCurrent ||
3891 0 : nBlockYSizeFirst != nBlockYSizeCurrent)
3892 : {
3893 0 : CPLDebug("GDAL", "GDALDataset::GetBestOverviewLevel() ... "
3894 : "mismatched block sizes, use std method.");
3895 0 : return -1;
3896 : }
3897 : }
3898 : }
3899 : }
3900 4 : if (poFirstBand == nullptr)
3901 0 : return -1;
3902 :
3903 4 : return GDALBandGetBestOverviewLevel2(poFirstBand, nXOff, nYOff, nXSize,
3904 : nYSize, nBufXSize, nBufYSize,
3905 4 : psExtraArg);
3906 : }
3907 :
3908 : /************************************************************************/
3909 : /* BlockBasedRasterIO() */
3910 : /* */
3911 : /* This convenience function implements a dataset level */
3912 : /* RasterIO() interface based on calling down to fetch blocks, */
3913 : /* much like the GDALRasterBand::IRasterIO(), but it handles */
3914 : /* all bands at once, so that a format driver that handles a */
3915 : /* request for different bands of the same block efficiently */
3916 : /* (i.e. without re-reading interleaved data) will efficiently. */
3917 : /* */
3918 : /* This method is intended to be called by an overridden */
3919 : /* IRasterIO() method in the driver specific GDALDataset */
3920 : /* derived class. */
3921 : /* */
3922 : /* Default internal implementation of RasterIO() ... utilizes */
3923 : /* the Block access methods to satisfy the request. This would */
3924 : /* normally only be overridden by formats with overviews. */
3925 : /* */
3926 : /* To keep things relatively simple, this method does not */
3927 : /* currently take advantage of some special cases addressed in */
3928 : /* GDALRasterBand::IRasterIO(), so it is likely best to only */
3929 : /* call it when you know it will help. That is in cases where */
3930 : /* data is at 1:1 to the buffer, and you know the driver is */
3931 : /* implementing interleaved IO efficiently on a block by block */
3932 : /* basis. Overviews will be used when possible. */
3933 : /************************************************************************/
3934 :
3935 63011 : CPLErr GDALDataset::BlockBasedRasterIO(GDALRWFlag eRWFlag, int nXOff, int nYOff,
3936 : int nXSize, int nYSize, void *pData,
3937 : int nBufXSize, int nBufYSize,
3938 : GDALDataType eBufType, int nBandCount,
3939 : int *panBandMap, GSpacing nPixelSpace,
3940 : GSpacing nLineSpace, GSpacing nBandSpace,
3941 : GDALRasterIOExtraArg *psExtraArg)
3942 :
3943 : {
3944 63011 : CPLAssert(nullptr != pData);
3945 :
3946 63011 : GByte **papabySrcBlock = nullptr;
3947 63011 : GDALRasterBlock *poBlock = nullptr;
3948 63011 : GDALRasterBlock **papoBlocks = nullptr;
3949 63011 : int nLBlockX = -1;
3950 63011 : int nLBlockY = -1;
3951 : int iBufYOff;
3952 : int iBufXOff;
3953 63011 : int nBlockXSize = 1;
3954 63011 : int nBlockYSize = 1;
3955 63011 : CPLErr eErr = CE_None;
3956 63011 : GDALDataType eDataType = GDT_Byte;
3957 :
3958 63011 : const bool bUseIntegerRequestCoords =
3959 63038 : (!psExtraArg->bFloatingPointWindowValidity ||
3960 27 : (nXOff == psExtraArg->dfXOff && nYOff == psExtraArg->dfYOff &&
3961 25 : nXSize == psExtraArg->dfXSize && nYSize == psExtraArg->dfYSize));
3962 :
3963 : /* -------------------------------------------------------------------- */
3964 : /* Ensure that all bands share a common block size and data type. */
3965 : /* -------------------------------------------------------------------- */
3966 298651 : for (int iBand = 0; iBand < nBandCount; iBand++)
3967 : {
3968 235640 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
3969 :
3970 235639 : if (iBand == 0)
3971 : {
3972 63009 : poBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
3973 63011 : eDataType = poBand->GetRasterDataType();
3974 : }
3975 : else
3976 : {
3977 172630 : int nThisBlockXSize = 0;
3978 172630 : int nThisBlockYSize = 0;
3979 172630 : poBand->GetBlockSize(&nThisBlockXSize, &nThisBlockYSize);
3980 172630 : if (nThisBlockXSize != nBlockXSize ||
3981 172630 : nThisBlockYSize != nBlockYSize)
3982 : {
3983 0 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
3984 : "mismatched block sizes, use std method.");
3985 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3986 : pData, nBufXSize, nBufYSize, eBufType,
3987 : nBandCount, panBandMap, nPixelSpace,
3988 0 : nLineSpace, nBandSpace, psExtraArg);
3989 : }
3990 :
3991 172630 : if (eDataType != poBand->GetRasterDataType() &&
3992 0 : (nXSize != nBufXSize || nYSize != nBufYSize))
3993 : {
3994 1 : CPLDebug("GDAL", "GDALDataset::BlockBasedRasterIO() ... "
3995 : "mismatched band data types, use std method.");
3996 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize,
3997 : pData, nBufXSize, nBufYSize, eBufType,
3998 : nBandCount, panBandMap, nPixelSpace,
3999 0 : nLineSpace, nBandSpace, psExtraArg);
4000 : }
4001 : }
4002 : }
4003 :
4004 : /* ==================================================================== */
4005 : /* In this special case at full resolution we step through in */
4006 : /* blocks, turning the request over to the per-band */
4007 : /* IRasterIO(), but ensuring that all bands of one block are */
4008 : /* called before proceeding to the next. */
4009 : /* ==================================================================== */
4010 :
4011 63011 : if (nXSize == nBufXSize && nYSize == nBufYSize && bUseIntegerRequestCoords)
4012 : {
4013 : GDALRasterIOExtraArg sDummyExtraArg;
4014 63007 : INIT_RASTERIO_EXTRA_ARG(sDummyExtraArg);
4015 :
4016 63007 : int nChunkYSize = 0;
4017 63007 : int nChunkXSize = 0;
4018 :
4019 176766 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff += nChunkYSize)
4020 : {
4021 114785 : const int nChunkYOff = iBufYOff + nYOff;
4022 114785 : nChunkYSize = nBlockYSize - (nChunkYOff % nBlockYSize);
4023 114785 : if (nChunkYOff + nChunkYSize > nYOff + nYSize)
4024 58468 : nChunkYSize = (nYOff + nYSize) - nChunkYOff;
4025 :
4026 753467 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff += nChunkXSize)
4027 : {
4028 639707 : const int nChunkXOff = iBufXOff + nXOff;
4029 639707 : nChunkXSize = nBlockXSize - (nChunkXOff % nBlockXSize);
4030 639707 : if (nChunkXOff + nChunkXSize > nXOff + nXSize)
4031 71265 : nChunkXSize = (nXOff + nXSize) - nChunkXOff;
4032 :
4033 639707 : GByte *pabyChunkData =
4034 639707 : static_cast<GByte *>(pData) + iBufXOff * nPixelSpace +
4035 639707 : static_cast<GPtrDiff_t>(iBufYOff) * nLineSpace;
4036 :
4037 3144200 : for (int iBand = 0; iBand < nBandCount; iBand++)
4038 : {
4039 2505520 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4040 :
4041 5011030 : eErr = poBand->GDALRasterBand::IRasterIO(
4042 : eRWFlag, nChunkXOff, nChunkYOff, nChunkXSize,
4043 : nChunkYSize,
4044 2505520 : pabyChunkData +
4045 2505520 : static_cast<GPtrDiff_t>(iBand) * nBandSpace,
4046 : nChunkXSize, nChunkYSize, eBufType, nPixelSpace,
4047 : nLineSpace, &sDummyExtraArg);
4048 2505520 : if (eErr != CE_None)
4049 1024 : return eErr;
4050 : }
4051 : }
4052 :
4053 133008 : if (psExtraArg->pfnProgress != nullptr &&
4054 19248 : !psExtraArg->pfnProgress(
4055 133008 : 1.0 * std::min(nBufYSize, iBufYOff + nChunkYSize) /
4056 : nBufYSize,
4057 : "", psExtraArg->pProgressData))
4058 : {
4059 1 : return CE_Failure;
4060 : }
4061 : }
4062 :
4063 61981 : return CE_None;
4064 : }
4065 :
4066 : /* Below code is not compatible with that case. It would need a complete */
4067 : /* separate code like done in GDALRasterBand::IRasterIO. */
4068 4 : if (eRWFlag == GF_Write && (nBufXSize < nXSize || nBufYSize < nYSize))
4069 : {
4070 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4071 : nBufXSize, nBufYSize, eBufType, nBandCount,
4072 : panBandMap, nPixelSpace, nLineSpace,
4073 0 : nBandSpace, psExtraArg);
4074 : }
4075 :
4076 : /* We could have a smarter implementation, but that will do for now */
4077 4 : if (psExtraArg->eResampleAlg != GRIORA_NearestNeighbour &&
4078 0 : (nBufXSize != nXSize || nBufYSize != nYSize))
4079 : {
4080 0 : return BandBasedRasterIO(eRWFlag, nXOff, nYOff, nXSize, nYSize, pData,
4081 : nBufXSize, nBufYSize, eBufType, nBandCount,
4082 : panBandMap, nPixelSpace, nLineSpace,
4083 0 : nBandSpace, psExtraArg);
4084 : }
4085 :
4086 : /* ==================================================================== */
4087 : /* Loop reading required source blocks to satisfy output */
4088 : /* request. This is the most general implementation. */
4089 : /* ==================================================================== */
4090 :
4091 4 : const int nBandDataSize = GDALGetDataTypeSizeBytes(eDataType);
4092 :
4093 : papabySrcBlock =
4094 4 : static_cast<GByte **>(CPLCalloc(sizeof(GByte *), nBandCount));
4095 : papoBlocks =
4096 4 : static_cast<GDALRasterBlock **>(CPLCalloc(sizeof(void *), nBandCount));
4097 :
4098 : /* -------------------------------------------------------------------- */
4099 : /* Select an overview level if appropriate. */
4100 : /* -------------------------------------------------------------------- */
4101 :
4102 : GDALRasterIOExtraArg sExtraArg;
4103 4 : GDALCopyRasterIOExtraArg(&sExtraArg, psExtraArg);
4104 4 : const int nOverviewLevel = GDALDatasetGetBestOverviewLevel(
4105 : this, nXOff, nYOff, nXSize, nYSize, nBufXSize, nBufYSize, nBandCount,
4106 : panBandMap, &sExtraArg);
4107 4 : if (nOverviewLevel >= 0)
4108 : {
4109 2 : GetRasterBand(panBandMap[0])
4110 2 : ->GetOverview(nOverviewLevel)
4111 2 : ->GetBlockSize(&nBlockXSize, &nBlockYSize);
4112 : }
4113 :
4114 4 : double dfXOff = nXOff;
4115 4 : double dfYOff = nYOff;
4116 4 : double dfXSize = nXSize;
4117 4 : double dfYSize = nYSize;
4118 4 : if (sExtraArg.bFloatingPointWindowValidity)
4119 : {
4120 2 : dfXOff = sExtraArg.dfXOff;
4121 2 : dfYOff = sExtraArg.dfYOff;
4122 2 : dfXSize = sExtraArg.dfXSize;
4123 2 : dfYSize = sExtraArg.dfYSize;
4124 : }
4125 :
4126 : /* -------------------------------------------------------------------- */
4127 : /* Compute stepping increment. */
4128 : /* -------------------------------------------------------------------- */
4129 4 : const double dfSrcXInc = dfXSize / static_cast<double>(nBufXSize);
4130 4 : const double dfSrcYInc = dfYSize / static_cast<double>(nBufYSize);
4131 :
4132 4 : constexpr double EPS = 1e-10;
4133 : /* -------------------------------------------------------------------- */
4134 : /* Loop over buffer computing source locations. */
4135 : /* -------------------------------------------------------------------- */
4136 36 : for (iBufYOff = 0; iBufYOff < nBufYSize; iBufYOff++)
4137 : {
4138 : GPtrDiff_t iSrcOffset;
4139 :
4140 : // Add small epsilon to avoid some numeric precision issues.
4141 32 : const double dfSrcY = (iBufYOff + 0.5) * dfSrcYInc + dfYOff + EPS;
4142 32 : const int iSrcY = static_cast<int>(std::min(
4143 32 : std::max(0.0, dfSrcY), static_cast<double>(nRasterYSize - 1)));
4144 :
4145 32 : GPtrDiff_t iBufOffset = static_cast<GPtrDiff_t>(iBufYOff) *
4146 : static_cast<GPtrDiff_t>(nLineSpace);
4147 :
4148 302 : for (iBufXOff = 0; iBufXOff < nBufXSize; iBufXOff++)
4149 : {
4150 270 : const double dfSrcX = (iBufXOff + 0.5) * dfSrcXInc + dfXOff + EPS;
4151 270 : const int iSrcX = static_cast<int>(std::min(
4152 270 : std::max(0.0, dfSrcX), static_cast<double>(nRasterXSize - 1)));
4153 :
4154 : // FIXME: this code likely doesn't work if the dirty block gets
4155 : // flushed to disk before being completely written. In the meantime,
4156 : // bJustInitialize should probably be set to FALSE even if it is not
4157 : // ideal performance wise, and for lossy compression
4158 :
4159 : /* --------------------------------------------------------------------
4160 : */
4161 : /* Ensure we have the appropriate block loaded. */
4162 : /* --------------------------------------------------------------------
4163 : */
4164 270 : if (iSrcX < nLBlockX * nBlockXSize ||
4165 270 : iSrcX - nBlockXSize >= nLBlockX * nBlockXSize ||
4166 266 : iSrcY < nLBlockY * nBlockYSize ||
4167 266 : iSrcY - nBlockYSize >= nLBlockY * nBlockYSize)
4168 : {
4169 4 : nLBlockX = iSrcX / nBlockXSize;
4170 4 : nLBlockY = iSrcY / nBlockYSize;
4171 :
4172 4 : const bool bJustInitialize =
4173 0 : eRWFlag == GF_Write && nYOff <= nLBlockY * nBlockYSize &&
4174 0 : nYOff + nYSize - nBlockYSize >= nLBlockY * nBlockYSize &&
4175 4 : nXOff <= nLBlockX * nBlockXSize &&
4176 0 : nXOff + nXSize - nBlockXSize >= nLBlockX * nBlockXSize;
4177 : /*bool bMemZeroBuffer = FALSE;
4178 : if( eRWFlag == GF_Write && !bJustInitialize &&
4179 : nXOff <= nLBlockX * nBlockXSize &&
4180 : nYOff <= nLBlockY * nBlockYSize &&
4181 : (nXOff + nXSize >= (nLBlockX+1) * nBlockXSize ||
4182 : (nXOff + nXSize == GetRasterXSize() &&
4183 : (nLBlockX+1) * nBlockXSize > GetRasterXSize())) &&
4184 : (nYOff + nYSize >= (nLBlockY+1) * nBlockYSize ||
4185 : (nYOff + nYSize == GetRasterYSize() &&
4186 : (nLBlockY+1) * nBlockYSize > GetRasterYSize())) )
4187 : {
4188 : bJustInitialize = TRUE;
4189 : bMemZeroBuffer = TRUE;
4190 : }*/
4191 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4192 : {
4193 8 : GDALRasterBand *poBand = GetRasterBand(panBandMap[iBand]);
4194 8 : if (nOverviewLevel >= 0)
4195 2 : poBand = poBand->GetOverview(nOverviewLevel);
4196 8 : poBlock = poBand->GetLockedBlockRef(nLBlockX, nLBlockY,
4197 : bJustInitialize);
4198 8 : if (poBlock == nullptr)
4199 : {
4200 0 : eErr = CE_Failure;
4201 0 : goto CleanupAndReturn;
4202 : }
4203 :
4204 8 : if (eRWFlag == GF_Write)
4205 0 : poBlock->MarkDirty();
4206 :
4207 8 : if (papoBlocks[iBand] != nullptr)
4208 0 : papoBlocks[iBand]->DropLock();
4209 :
4210 8 : papoBlocks[iBand] = poBlock;
4211 :
4212 8 : papabySrcBlock[iBand] =
4213 8 : static_cast<GByte *>(poBlock->GetDataRef());
4214 : /*if( bMemZeroBuffer )
4215 : {
4216 : memset(papabySrcBlock[iBand], 0,
4217 : static_cast<GPtrDiff_t>(nBandDataSize) * nBlockXSize
4218 : * nBlockYSize);
4219 : }*/
4220 : }
4221 : }
4222 :
4223 : /* --------------------------------------------------------------------
4224 : */
4225 : /* Copy over this pixel of data. */
4226 : /* --------------------------------------------------------------------
4227 : */
4228 270 : iSrcOffset = (static_cast<GPtrDiff_t>(iSrcX) -
4229 270 : static_cast<GPtrDiff_t>(nLBlockX) * nBlockXSize +
4230 270 : (static_cast<GPtrDiff_t>(iSrcY) -
4231 270 : static_cast<GPtrDiff_t>(nLBlockY) * nBlockYSize) *
4232 270 : nBlockXSize) *
4233 270 : nBandDataSize;
4234 :
4235 980 : for (int iBand = 0; iBand < nBandCount; iBand++)
4236 : {
4237 710 : GByte *pabySrcBlock = papabySrcBlock[iBand];
4238 710 : GPtrDiff_t iBandBufOffset =
4239 710 : iBufOffset + static_cast<GPtrDiff_t>(iBand) *
4240 : static_cast<GPtrDiff_t>(nBandSpace);
4241 :
4242 710 : if (eDataType == eBufType)
4243 : {
4244 710 : if (eRWFlag == GF_Read)
4245 710 : memcpy(static_cast<GByte *>(pData) + iBandBufOffset,
4246 710 : pabySrcBlock + iSrcOffset, nBandDataSize);
4247 : else
4248 0 : memcpy(pabySrcBlock + iSrcOffset,
4249 : static_cast<const GByte *>(pData) +
4250 0 : iBandBufOffset,
4251 : nBandDataSize);
4252 : }
4253 : else
4254 : {
4255 : /* type to type conversion ... ouch, this is expensive way
4256 : of handling single words */
4257 :
4258 0 : if (eRWFlag == GF_Read)
4259 0 : GDALCopyWords(pabySrcBlock + iSrcOffset, eDataType, 0,
4260 : static_cast<GByte *>(pData) +
4261 0 : iBandBufOffset,
4262 : eBufType, 0, 1);
4263 : else
4264 0 : GDALCopyWords(static_cast<const GByte *>(pData) +
4265 0 : iBandBufOffset,
4266 0 : eBufType, 0, pabySrcBlock + iSrcOffset,
4267 : eDataType, 0, 1);
4268 : }
4269 : }
4270 :
4271 270 : iBufOffset += static_cast<int>(nPixelSpace);
4272 : }
4273 : }
4274 :
4275 : /* -------------------------------------------------------------------- */
4276 : /* CleanupAndReturn. */
4277 : /* -------------------------------------------------------------------- */
4278 4 : CleanupAndReturn:
4279 4 : CPLFree(papabySrcBlock);
4280 4 : if (papoBlocks != nullptr)
4281 : {
4282 12 : for (int iBand = 0; iBand < nBandCount; iBand++)
4283 : {
4284 8 : if (papoBlocks[iBand] != nullptr)
4285 8 : papoBlocks[iBand]->DropLock();
4286 : }
4287 4 : CPLFree(papoBlocks);
4288 : }
4289 :
4290 4 : return eErr;
4291 : }
4292 :
4293 : //! @endcond
4294 :
4295 : /************************************************************************/
4296 : /* GDALCopyWholeRasterGetSwathSize() */
4297 : /************************************************************************/
4298 :
4299 1948 : static void GDALCopyWholeRasterGetSwathSize(GDALRasterBand *poSrcPrototypeBand,
4300 : GDALRasterBand *poDstPrototypeBand,
4301 : int nBandCount,
4302 : int bDstIsCompressed,
4303 : int bInterleave, int *pnSwathCols,
4304 : int *pnSwathLines)
4305 : {
4306 1948 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4307 1948 : int nSrcBlockXSize = 0;
4308 1948 : int nSrcBlockYSize = 0;
4309 1948 : int nBlockXSize = 0;
4310 1948 : int nBlockYSize = 0;
4311 :
4312 1948 : int nXSize = poSrcPrototypeBand->GetXSize();
4313 1948 : int nYSize = poSrcPrototypeBand->GetYSize();
4314 :
4315 1948 : poSrcPrototypeBand->GetBlockSize(&nSrcBlockXSize, &nSrcBlockYSize);
4316 1948 : poDstPrototypeBand->GetBlockSize(&nBlockXSize, &nBlockYSize);
4317 :
4318 1948 : const int nMaxBlockXSize = std::max(nBlockXSize, nSrcBlockXSize);
4319 1948 : const int nMaxBlockYSize = std::max(nBlockYSize, nSrcBlockYSize);
4320 :
4321 1948 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4322 1948 : if (bInterleave)
4323 508 : nPixelSize *= nBandCount;
4324 :
4325 : // aim for one row of blocks. Do not settle for less.
4326 1948 : int nSwathCols = nXSize;
4327 1948 : int nSwathLines = nMaxBlockYSize;
4328 :
4329 : const char *pszSrcCompression =
4330 1948 : poSrcPrototypeBand->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4331 1948 : if (pszSrcCompression == nullptr)
4332 : {
4333 1922 : auto poSrcDS = poSrcPrototypeBand->GetDataset();
4334 1922 : if (poSrcDS)
4335 : pszSrcCompression =
4336 1916 : poSrcDS->GetMetadataItem("COMPRESSION", "IMAGE_STRUCTURE");
4337 : }
4338 :
4339 : /* -------------------------------------------------------------------- */
4340 : /* What will our swath size be? */
4341 : /* -------------------------------------------------------------------- */
4342 : // When writing interleaved data in a compressed format, we want to be sure
4343 : // that each block will only be written once, so the swath size must not be
4344 : // greater than the block cache.
4345 1948 : const char *pszSwathSize = CPLGetConfigOption("GDAL_SWATH_SIZE", nullptr);
4346 : int nTargetSwathSize;
4347 1948 : if (pszSwathSize != nullptr)
4348 0 : nTargetSwathSize = static_cast<int>(
4349 0 : std::min(GIntBig(INT_MAX), CPLAtoGIntBig(pszSwathSize)));
4350 : else
4351 : {
4352 : // As a default, take one 1/4 of the cache size.
4353 1948 : nTargetSwathSize = static_cast<int>(
4354 1948 : std::min(GIntBig(INT_MAX), GDALGetCacheMax64() / 4));
4355 :
4356 : // but if the minimum idal swath buf size is less, then go for it to
4357 : // avoid unnecessarily abusing RAM usage.
4358 : // but try to use 10 MB at least.
4359 1948 : GIntBig nIdealSwathBufSize =
4360 1948 : static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize;
4361 1948 : int nMinTargetSwathSize = 10 * 1000 * 1000;
4362 :
4363 1948 : if ((poSrcPrototypeBand->GetSuggestedBlockAccessPattern() &
4364 1948 : GSBAP_LARGEST_CHUNK_POSSIBLE) != 0)
4365 : {
4366 2 : nMinTargetSwathSize = nTargetSwathSize;
4367 : }
4368 :
4369 1948 : if (nIdealSwathBufSize < nTargetSwathSize &&
4370 1938 : nIdealSwathBufSize < nMinTargetSwathSize)
4371 : {
4372 1935 : nIdealSwathBufSize = nMinTargetSwathSize;
4373 : }
4374 :
4375 1948 : if (pszSrcCompression != nullptr &&
4376 153 : EQUAL(pszSrcCompression, "JPEG2000") &&
4377 0 : (!bDstIsCompressed || ((nSrcBlockXSize % nBlockXSize) == 0 &&
4378 0 : (nSrcBlockYSize % nBlockYSize) == 0)))
4379 : {
4380 2 : nIdealSwathBufSize =
4381 4 : std::max(nIdealSwathBufSize, static_cast<GIntBig>(nSwathCols) *
4382 2 : nSrcBlockYSize * nPixelSize);
4383 : }
4384 1948 : if (nTargetSwathSize > nIdealSwathBufSize)
4385 1934 : nTargetSwathSize = static_cast<int>(
4386 1934 : std::min(GIntBig(INT_MAX), nIdealSwathBufSize));
4387 : }
4388 :
4389 1948 : if (nTargetSwathSize < 1000000)
4390 8 : nTargetSwathSize = 1000000;
4391 :
4392 : /* But let's check that */
4393 2148 : if (bDstIsCompressed && bInterleave &&
4394 200 : nTargetSwathSize > GDALGetCacheMax64())
4395 : {
4396 0 : CPLError(CE_Warning, CPLE_AppDefined,
4397 : "When translating into a compressed interleave format, "
4398 : "the block cache size (" CPL_FRMT_GIB ") "
4399 : "should be at least the size of the swath (%d) "
4400 : "(GDAL_SWATH_SIZE config. option)",
4401 : GDALGetCacheMax64(), nTargetSwathSize);
4402 : }
4403 :
4404 : #define IS_DIVIDER_OF(x, y) ((y) % (x) == 0)
4405 : #define ROUND_TO(x, y) (((x) / (y)) * (y))
4406 :
4407 : // if both input and output datasets are tiled, that the tile dimensions
4408 : // are "compatible", try to stick to a swath dimension that is a multiple
4409 : // of input and output block dimensions.
4410 1948 : if (nBlockXSize != nXSize && nSrcBlockXSize != nXSize &&
4411 32 : IS_DIVIDER_OF(nBlockXSize, nMaxBlockXSize) &&
4412 32 : IS_DIVIDER_OF(nSrcBlockXSize, nMaxBlockXSize) &&
4413 32 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4414 32 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4415 : {
4416 32 : if (static_cast<GIntBig>(nMaxBlockXSize) * nMaxBlockYSize *
4417 32 : nPixelSize <=
4418 32 : static_cast<GIntBig>(nTargetSwathSize))
4419 : {
4420 32 : nSwathCols = nTargetSwathSize / (nMaxBlockYSize * nPixelSize);
4421 32 : nSwathCols = ROUND_TO(nSwathCols, nMaxBlockXSize);
4422 32 : if (nSwathCols == 0)
4423 0 : nSwathCols = nMaxBlockXSize;
4424 32 : if (nSwathCols > nXSize)
4425 30 : nSwathCols = nXSize;
4426 32 : nSwathLines = nMaxBlockYSize;
4427 :
4428 32 : if (static_cast<GIntBig>(nSwathCols) * nSwathLines * nPixelSize >
4429 32 : static_cast<GIntBig>(nTargetSwathSize))
4430 : {
4431 0 : nSwathCols = nXSize;
4432 0 : nSwathLines = nBlockYSize;
4433 : }
4434 : }
4435 : }
4436 :
4437 1948 : const GIntBig nMemoryPerCol = static_cast<GIntBig>(nSwathCols) * nPixelSize;
4438 1948 : const GIntBig nSwathBufSize = nMemoryPerCol * nSwathLines;
4439 1948 : if (nSwathBufSize > static_cast<GIntBig>(nTargetSwathSize))
4440 : {
4441 1 : nSwathLines = static_cast<int>(nTargetSwathSize / nMemoryPerCol);
4442 1 : if (nSwathLines == 0)
4443 1 : nSwathLines = 1;
4444 :
4445 1 : CPLDebug(
4446 : "GDAL",
4447 : "GDALCopyWholeRasterGetSwathSize(): adjusting to %d line swath "
4448 : "since requirement (" CPL_FRMT_GIB " bytes) exceed target swath "
4449 : "size (%d bytes) (GDAL_SWATH_SIZE config. option)",
4450 1 : nSwathLines, nBlockYSize * nMemoryPerCol, nTargetSwathSize);
4451 : }
4452 : // If we are processing single scans, try to handle several at once.
4453 : // If we are handling swaths already, only grow the swath if a row
4454 : // of blocks is substantially less than our target buffer size.
4455 1947 : else if (nSwathLines == 1 ||
4456 1458 : nMemoryPerCol * nSwathLines <
4457 1458 : static_cast<GIntBig>(nTargetSwathSize) / 10)
4458 : {
4459 1920 : nSwathLines = std::min(
4460 : nYSize,
4461 1920 : std::max(1, static_cast<int>(nTargetSwathSize / nMemoryPerCol)));
4462 :
4463 : /* If possible try to align to source and target block height */
4464 1920 : if ((nSwathLines % nMaxBlockYSize) != 0 &&
4465 187 : nSwathLines > nMaxBlockYSize &&
4466 187 : IS_DIVIDER_OF(nBlockYSize, nMaxBlockYSize) &&
4467 159 : IS_DIVIDER_OF(nSrcBlockYSize, nMaxBlockYSize))
4468 139 : nSwathLines = ROUND_TO(nSwathLines, nMaxBlockYSize);
4469 : }
4470 :
4471 1948 : if (pszSrcCompression != nullptr && EQUAL(pszSrcCompression, "JPEG2000") &&
4472 0 : (!bDstIsCompressed || (IS_DIVIDER_OF(nBlockXSize, nSrcBlockXSize) &&
4473 0 : IS_DIVIDER_OF(nBlockYSize, nSrcBlockYSize))))
4474 : {
4475 : // Typical use case: converting from Pleaiades that is 2048x2048 tiled.
4476 2 : if (nSwathLines < nSrcBlockYSize)
4477 : {
4478 0 : nSwathLines = nSrcBlockYSize;
4479 :
4480 : // Number of pixels that can be read/write simultaneously.
4481 0 : nSwathCols = nTargetSwathSize / (nSrcBlockXSize * nPixelSize);
4482 0 : nSwathCols = ROUND_TO(nSwathCols, nSrcBlockXSize);
4483 0 : if (nSwathCols == 0)
4484 0 : nSwathCols = nSrcBlockXSize;
4485 0 : if (nSwathCols > nXSize)
4486 0 : nSwathCols = nXSize;
4487 :
4488 0 : CPLDebug(
4489 : "GDAL",
4490 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4491 : "too high block, "
4492 : "use partial width at one time");
4493 : }
4494 2 : else if ((nSwathLines % nSrcBlockYSize) != 0)
4495 : {
4496 : /* Round on a multiple of nSrcBlockYSize */
4497 0 : nSwathLines = ROUND_TO(nSwathLines, nSrcBlockYSize);
4498 0 : CPLDebug(
4499 : "GDAL",
4500 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4501 : "round nSwathLines to block height : %d",
4502 : nSwathLines);
4503 : }
4504 : }
4505 1946 : else if (bDstIsCompressed)
4506 : {
4507 355 : if (nSwathLines < nBlockYSize)
4508 : {
4509 137 : nSwathLines = nBlockYSize;
4510 :
4511 : // Number of pixels that can be read/write simultaneously.
4512 137 : nSwathCols = nTargetSwathSize / (nSwathLines * nPixelSize);
4513 137 : nSwathCols = ROUND_TO(nSwathCols, nBlockXSize);
4514 137 : if (nSwathCols == 0)
4515 0 : nSwathCols = nBlockXSize;
4516 137 : if (nSwathCols > nXSize)
4517 137 : nSwathCols = nXSize;
4518 :
4519 137 : CPLDebug(
4520 : "GDAL",
4521 : "GDALCopyWholeRasterGetSwathSize(): because of compression and "
4522 : "too high block, "
4523 : "use partial width at one time");
4524 : }
4525 218 : else if ((nSwathLines % nBlockYSize) != 0)
4526 : {
4527 : // Round on a multiple of nBlockYSize.
4528 9 : nSwathLines = ROUND_TO(nSwathLines, nBlockYSize);
4529 9 : CPLDebug(
4530 : "GDAL",
4531 : "GDALCopyWholeRasterGetSwathSize(): because of compression, "
4532 : "round nSwathLines to block height : %d",
4533 : nSwathLines);
4534 : }
4535 : }
4536 :
4537 1948 : *pnSwathCols = nSwathCols;
4538 1948 : *pnSwathLines = nSwathLines;
4539 1948 : }
4540 :
4541 : /************************************************************************/
4542 : /* GDALDatasetCopyWholeRaster() */
4543 : /************************************************************************/
4544 :
4545 : /**
4546 : * \brief Copy all dataset raster data.
4547 : *
4548 : * This function copies the complete raster contents of one dataset to
4549 : * another similarly configured dataset. The source and destination
4550 : * dataset must have the same number of bands, and the same width
4551 : * and height. The bands do not have to have the same data type.
4552 : *
4553 : * This function is primarily intended to support implementation of
4554 : * driver specific CreateCopy() functions. It implements efficient copying,
4555 : * in particular "chunking" the copy in substantial blocks and, if appropriate,
4556 : * performing the transfer in a pixel interleaved fashion.
4557 : *
4558 : * Currently the only papszOptions value supported are :
4559 : * <ul>
4560 : * <li>"INTERLEAVE=PIXEL/BAND" to force pixel (resp. band) interleaved read and
4561 : * write access pattern (this does not modify the layout of the destination
4562 : * data)</li> <li>"COMPRESSED=YES" to force alignment on target dataset block
4563 : * sizes to achieve best compression.</li> <li>"SKIP_HOLES=YES" to skip chunks
4564 : * for which GDALGetDataCoverageStatus() returns GDAL_DATA_COVERAGE_STATUS_EMPTY
4565 : * (GDAL >= 2.2)</li>
4566 : * </ul>
4567 : * More options may be supported in the future.
4568 : *
4569 : * @param hSrcDS the source dataset
4570 : * @param hDstDS the destination dataset
4571 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4572 : * @param pfnProgress progress reporting function.
4573 : * @param pProgressData callback data for progress function.
4574 : *
4575 : * @return CE_None on success, or CE_Failure on failure.
4576 : */
4577 :
4578 1930 : CPLErr CPL_STDCALL GDALDatasetCopyWholeRaster(GDALDatasetH hSrcDS,
4579 : GDALDatasetH hDstDS,
4580 : CSLConstList papszOptions,
4581 : GDALProgressFunc pfnProgress,
4582 : void *pProgressData)
4583 :
4584 : {
4585 1930 : VALIDATE_POINTER1(hSrcDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4586 1930 : VALIDATE_POINTER1(hDstDS, "GDALDatasetCopyWholeRaster", CE_Failure);
4587 :
4588 1930 : GDALDataset *poSrcDS = GDALDataset::FromHandle(hSrcDS);
4589 1930 : GDALDataset *poDstDS = GDALDataset::FromHandle(hDstDS);
4590 :
4591 1930 : if (pfnProgress == nullptr)
4592 0 : pfnProgress = GDALDummyProgress;
4593 :
4594 : /* -------------------------------------------------------------------- */
4595 : /* Confirm the datasets match in size and band counts. */
4596 : /* -------------------------------------------------------------------- */
4597 1930 : const int nXSize = poDstDS->GetRasterXSize();
4598 1930 : const int nYSize = poDstDS->GetRasterYSize();
4599 1930 : const int nBandCount = poDstDS->GetRasterCount();
4600 :
4601 1930 : if (poSrcDS->GetRasterXSize() != nXSize ||
4602 3860 : poSrcDS->GetRasterYSize() != nYSize ||
4603 1930 : poSrcDS->GetRasterCount() != nBandCount)
4604 : {
4605 0 : CPLError(CE_Failure, CPLE_AppDefined,
4606 : "Input and output dataset sizes or band counts do not\n"
4607 : "match in GDALDatasetCopyWholeRaster()");
4608 0 : return CE_Failure;
4609 : }
4610 :
4611 : /* -------------------------------------------------------------------- */
4612 : /* Report preliminary (0) progress. */
4613 : /* -------------------------------------------------------------------- */
4614 1930 : if (!pfnProgress(0.0, nullptr, pProgressData))
4615 : {
4616 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4617 : "User terminated CreateCopy()");
4618 1 : return CE_Failure;
4619 : }
4620 :
4621 : /* -------------------------------------------------------------------- */
4622 : /* Get our prototype band, and assume the others are similarly */
4623 : /* configured. */
4624 : /* -------------------------------------------------------------------- */
4625 1929 : if (nBandCount == 0)
4626 0 : return CE_None;
4627 :
4628 1929 : GDALRasterBand *poSrcPrototypeBand = poSrcDS->GetRasterBand(1);
4629 1929 : GDALRasterBand *poDstPrototypeBand = poDstDS->GetRasterBand(1);
4630 1929 : GDALDataType eDT = poDstPrototypeBand->GetRasterDataType();
4631 :
4632 : /* -------------------------------------------------------------------- */
4633 : /* Do we want to try and do the operation in a pixel */
4634 : /* interleaved fashion? */
4635 : /* -------------------------------------------------------------------- */
4636 1929 : bool bInterleave = false;
4637 : const char *pszInterleave =
4638 1929 : poSrcDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4639 1929 : if (pszInterleave != nullptr &&
4640 997 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4641 145 : bInterleave = true;
4642 :
4643 1929 : pszInterleave = poDstDS->GetMetadataItem("INTERLEAVE", "IMAGE_STRUCTURE");
4644 1929 : if (pszInterleave != nullptr &&
4645 1189 : (EQUAL(pszInterleave, "PIXEL") || EQUAL(pszInterleave, "LINE")))
4646 480 : bInterleave = true;
4647 :
4648 1929 : pszInterleave = CSLFetchNameValue(papszOptions, "INTERLEAVE");
4649 1929 : if (pszInterleave != nullptr && EQUAL(pszInterleave, "PIXEL"))
4650 5 : bInterleave = true;
4651 1924 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "BAND"))
4652 7 : bInterleave = false;
4653 : // attributes is specific to the TileDB driver
4654 1917 : else if (pszInterleave != nullptr && EQUAL(pszInterleave, "ATTRIBUTES"))
4655 4 : bInterleave = true;
4656 1913 : else if (pszInterleave != nullptr)
4657 : {
4658 0 : CPLError(CE_Warning, CPLE_NotSupported,
4659 : "Unsupported value for option INTERLEAVE");
4660 : }
4661 :
4662 : // If the destination is compressed, we must try to write blocks just once,
4663 : // to save disk space (GTiff case for example), and to avoid data loss
4664 : // (JPEG compression for example).
4665 1929 : bool bDstIsCompressed = false;
4666 : const char *pszDstCompressed =
4667 1929 : CSLFetchNameValue(papszOptions, "COMPRESSED");
4668 1929 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4669 338 : bDstIsCompressed = true;
4670 :
4671 : /* -------------------------------------------------------------------- */
4672 : /* What will our swath size be? */
4673 : /* -------------------------------------------------------------------- */
4674 :
4675 1929 : int nSwathCols = 0;
4676 1929 : int nSwathLines = 0;
4677 1929 : GDALCopyWholeRasterGetSwathSize(poSrcPrototypeBand, poDstPrototypeBand,
4678 : nBandCount, bDstIsCompressed, bInterleave,
4679 : &nSwathCols, &nSwathLines);
4680 :
4681 1929 : int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4682 1929 : if (bInterleave)
4683 508 : nPixelSize *= nBandCount;
4684 :
4685 1929 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
4686 1929 : if (pSwathBuf == nullptr)
4687 : {
4688 0 : return CE_Failure;
4689 : }
4690 :
4691 1929 : CPLDebug("GDAL",
4692 : "GDALDatasetCopyWholeRaster(): %d*%d swaths, bInterleave=%d",
4693 : nSwathCols, nSwathLines, static_cast<int>(bInterleave));
4694 :
4695 : // Advise the source raster that we are going to read it completely
4696 : // Note: this might already have been done by GDALCreateCopy() in the
4697 : // likely case this function is indirectly called by it
4698 1929 : poSrcDS->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nBandCount,
4699 1929 : nullptr, nullptr);
4700 :
4701 : /* ==================================================================== */
4702 : /* Band oriented (uninterleaved) case. */
4703 : /* ==================================================================== */
4704 1929 : CPLErr eErr = CE_None;
4705 : const bool bCheckHoles =
4706 1929 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
4707 :
4708 1929 : if (!bInterleave)
4709 : {
4710 : GDALRasterIOExtraArg sExtraArg;
4711 1421 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4712 1421 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4713 :
4714 4263 : const GIntBig nTotalBlocks = static_cast<GIntBig>(nBandCount) *
4715 1421 : DIV_ROUND_UP(nYSize, nSwathLines) *
4716 1421 : DIV_ROUND_UP(nXSize, nSwathCols);
4717 1421 : GIntBig nBlocksDone = 0;
4718 :
4719 3490 : for (int iBand = 0; iBand < nBandCount && eErr == CE_None; iBand++)
4720 : {
4721 2069 : int nBand = iBand + 1;
4722 :
4723 4292 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4724 : {
4725 2223 : int nThisLines = nSwathLines;
4726 :
4727 2223 : if (iY + nThisLines > nYSize)
4728 270 : nThisLines = nYSize - iY;
4729 :
4730 4446 : for (int iX = 0; iX < nXSize && eErr == CE_None;
4731 2223 : iX += nSwathCols)
4732 : {
4733 2223 : int nThisCols = nSwathCols;
4734 :
4735 2223 : if (iX + nThisCols > nXSize)
4736 0 : nThisCols = nXSize - iX;
4737 :
4738 2223 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4739 2223 : if (bCheckHoles)
4740 : {
4741 : nStatus = poSrcDS->GetRasterBand(nBand)
4742 904 : ->GetDataCoverageStatus(
4743 : iX, iY, nThisCols, nThisLines,
4744 : GDAL_DATA_COVERAGE_STATUS_DATA);
4745 : }
4746 2223 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4747 : {
4748 2220 : sExtraArg.pfnProgress = GDALScaledProgress;
4749 4440 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4750 2220 : nBlocksDone / static_cast<double>(nTotalBlocks),
4751 2220 : (nBlocksDone + 0.5) /
4752 2220 : static_cast<double>(nTotalBlocks),
4753 : pfnProgress, pProgressData);
4754 2220 : if (sExtraArg.pProgressData == nullptr)
4755 1304 : sExtraArg.pfnProgress = nullptr;
4756 :
4757 2220 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4758 : nThisLines, pSwathBuf,
4759 : nThisCols, nThisLines, eDT, 1,
4760 : &nBand, 0, 0, 0, &sExtraArg);
4761 :
4762 2220 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4763 :
4764 2220 : if (eErr == CE_None)
4765 2216 : eErr = poDstDS->RasterIO(
4766 : GF_Write, iX, iY, nThisCols, nThisLines,
4767 : pSwathBuf, nThisCols, nThisLines, eDT, 1,
4768 : &nBand, 0, 0, 0, nullptr);
4769 : }
4770 :
4771 2223 : nBlocksDone++;
4772 4407 : if (eErr == CE_None &&
4773 2184 : !pfnProgress(nBlocksDone /
4774 2184 : static_cast<double>(nTotalBlocks),
4775 : nullptr, pProgressData))
4776 : {
4777 3 : eErr = CE_Failure;
4778 3 : CPLError(CE_Failure, CPLE_UserInterrupt,
4779 : "User terminated CreateCopy()");
4780 : }
4781 : }
4782 : }
4783 : }
4784 : }
4785 :
4786 : /* ==================================================================== */
4787 : /* Pixel interleaved case. */
4788 : /* ==================================================================== */
4789 : else /* if( bInterleave ) */
4790 : {
4791 : GDALRasterIOExtraArg sExtraArg;
4792 508 : INIT_RASTERIO_EXTRA_ARG(sExtraArg);
4793 508 : CPL_IGNORE_RET_VAL(sExtraArg.pfnProgress); // to make cppcheck happy
4794 :
4795 508 : const GIntBig nTotalBlocks =
4796 508 : static_cast<GIntBig>(DIV_ROUND_UP(nYSize, nSwathLines)) *
4797 508 : DIV_ROUND_UP(nXSize, nSwathCols);
4798 508 : GIntBig nBlocksDone = 0;
4799 :
4800 1216 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4801 : {
4802 708 : int nThisLines = nSwathLines;
4803 :
4804 708 : if (iY + nThisLines > nYSize)
4805 173 : nThisLines = nYSize - iY;
4806 :
4807 1421 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
4808 : {
4809 713 : int nThisCols = nSwathCols;
4810 :
4811 713 : if (iX + nThisCols > nXSize)
4812 3 : nThisCols = nXSize - iX;
4813 :
4814 713 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
4815 713 : if (bCheckHoles)
4816 : {
4817 437 : for (int iBand = 0; iBand < nBandCount; iBand++)
4818 : {
4819 437 : nStatus |= poSrcDS->GetRasterBand(iBand + 1)
4820 437 : ->GetDataCoverageStatus(
4821 : iX, iY, nThisCols, nThisLines,
4822 : GDAL_DATA_COVERAGE_STATUS_DATA);
4823 437 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4824 437 : break;
4825 : }
4826 : }
4827 713 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
4828 : {
4829 713 : sExtraArg.pfnProgress = GDALScaledProgress;
4830 1426 : sExtraArg.pProgressData = GDALCreateScaledProgress(
4831 713 : nBlocksDone / static_cast<double>(nTotalBlocks),
4832 713 : (nBlocksDone + 0.5) / static_cast<double>(nTotalBlocks),
4833 : pfnProgress, pProgressData);
4834 713 : if (sExtraArg.pProgressData == nullptr)
4835 276 : sExtraArg.pfnProgress = nullptr;
4836 :
4837 713 : eErr = poSrcDS->RasterIO(GF_Read, iX, iY, nThisCols,
4838 : nThisLines, pSwathBuf, nThisCols,
4839 : nThisLines, eDT, nBandCount,
4840 : nullptr, 0, 0, 0, &sExtraArg);
4841 :
4842 713 : GDALDestroyScaledProgress(sExtraArg.pProgressData);
4843 :
4844 713 : if (eErr == CE_None)
4845 713 : eErr = poDstDS->RasterIO(
4846 : GF_Write, iX, iY, nThisCols, nThisLines, pSwathBuf,
4847 : nThisCols, nThisLines, eDT, nBandCount, nullptr, 0,
4848 : 0, 0, nullptr);
4849 : }
4850 :
4851 713 : nBlocksDone++;
4852 1423 : if (eErr == CE_None &&
4853 710 : !pfnProgress(nBlocksDone /
4854 710 : static_cast<double>(nTotalBlocks),
4855 : nullptr, pProgressData))
4856 : {
4857 1 : eErr = CE_Failure;
4858 1 : CPLError(CE_Failure, CPLE_UserInterrupt,
4859 : "User terminated CreateCopy()");
4860 : }
4861 : }
4862 : }
4863 : }
4864 :
4865 : /* -------------------------------------------------------------------- */
4866 : /* Cleanup */
4867 : /* -------------------------------------------------------------------- */
4868 1929 : CPLFree(pSwathBuf);
4869 :
4870 1929 : return eErr;
4871 : }
4872 :
4873 : /************************************************************************/
4874 : /* GDALRasterBandCopyWholeRaster() */
4875 : /************************************************************************/
4876 :
4877 : /**
4878 : * \brief Copy a whole raster band
4879 : *
4880 : * This function copies the complete raster contents of one band to
4881 : * another similarly configured band. The source and destination
4882 : * bands must have the same width and height. The bands do not have
4883 : * to have the same data type.
4884 : *
4885 : * It implements efficient copying, in particular "chunking" the copy in
4886 : * substantial blocks.
4887 : *
4888 : * Currently the only papszOptions value supported are :
4889 : * <ul>
4890 : * <li>"COMPRESSED=YES" to force alignment on target dataset block sizes to
4891 : * achieve best compression.</li>
4892 : * <li>"SKIP_HOLES=YES" to skip chunks for which GDALGetDataCoverageStatus()
4893 : * returns GDAL_DATA_COVERAGE_STATUS_EMPTY (GDAL >= 2.2)</li>
4894 : * </ul>
4895 : *
4896 : * @param hSrcBand the source band
4897 : * @param hDstBand the destination band
4898 : * @param papszOptions transfer hints in "StringList" Name=Value format.
4899 : * @param pfnProgress progress reporting function.
4900 : * @param pProgressData callback data for progress function.
4901 : *
4902 : * @return CE_None on success, or CE_Failure on failure.
4903 : */
4904 :
4905 19 : CPLErr CPL_STDCALL GDALRasterBandCopyWholeRaster(
4906 : GDALRasterBandH hSrcBand, GDALRasterBandH hDstBand,
4907 : const char *const *const papszOptions, GDALProgressFunc pfnProgress,
4908 : void *pProgressData)
4909 :
4910 : {
4911 19 : VALIDATE_POINTER1(hSrcBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
4912 19 : VALIDATE_POINTER1(hDstBand, "GDALRasterBandCopyWholeRaster", CE_Failure);
4913 :
4914 19 : GDALRasterBand *poSrcBand = GDALRasterBand::FromHandle(hSrcBand);
4915 19 : GDALRasterBand *poDstBand = GDALRasterBand::FromHandle(hDstBand);
4916 19 : CPLErr eErr = CE_None;
4917 :
4918 19 : if (pfnProgress == nullptr)
4919 2 : pfnProgress = GDALDummyProgress;
4920 :
4921 : /* -------------------------------------------------------------------- */
4922 : /* Confirm the datasets match in size and band counts. */
4923 : /* -------------------------------------------------------------------- */
4924 19 : int nXSize = poSrcBand->GetXSize();
4925 19 : int nYSize = poSrcBand->GetYSize();
4926 :
4927 19 : if (poDstBand->GetXSize() != nXSize || poDstBand->GetYSize() != nYSize)
4928 : {
4929 0 : CPLError(CE_Failure, CPLE_AppDefined,
4930 : "Input and output band sizes do not\n"
4931 : "match in GDALRasterBandCopyWholeRaster()");
4932 0 : return CE_Failure;
4933 : }
4934 :
4935 : /* -------------------------------------------------------------------- */
4936 : /* Report preliminary (0) progress. */
4937 : /* -------------------------------------------------------------------- */
4938 19 : if (!pfnProgress(0.0, nullptr, pProgressData))
4939 : {
4940 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
4941 : "User terminated CreateCopy()");
4942 0 : return CE_Failure;
4943 : }
4944 :
4945 19 : GDALDataType eDT = poDstBand->GetRasterDataType();
4946 :
4947 : // If the destination is compressed, we must try to write blocks just once,
4948 : // to save disk space (GTiff case for example), and to avoid data loss
4949 : // (JPEG compression for example).
4950 19 : bool bDstIsCompressed = false;
4951 : const char *pszDstCompressed =
4952 19 : CSLFetchNameValue(const_cast<char **>(papszOptions), "COMPRESSED");
4953 19 : if (pszDstCompressed != nullptr && CPLTestBool(pszDstCompressed))
4954 17 : bDstIsCompressed = true;
4955 :
4956 : /* -------------------------------------------------------------------- */
4957 : /* What will our swath size be? */
4958 : /* -------------------------------------------------------------------- */
4959 :
4960 19 : int nSwathCols = 0;
4961 19 : int nSwathLines = 0;
4962 19 : GDALCopyWholeRasterGetSwathSize(poSrcBand, poDstBand, 1, bDstIsCompressed,
4963 : FALSE, &nSwathCols, &nSwathLines);
4964 :
4965 19 : const int nPixelSize = GDALGetDataTypeSizeBytes(eDT);
4966 :
4967 19 : void *pSwathBuf = VSI_MALLOC3_VERBOSE(nSwathCols, nSwathLines, nPixelSize);
4968 19 : if (pSwathBuf == nullptr)
4969 : {
4970 0 : return CE_Failure;
4971 : }
4972 :
4973 19 : CPLDebug("GDAL", "GDALRasterBandCopyWholeRaster(): %d*%d swaths",
4974 : nSwathCols, nSwathLines);
4975 :
4976 : const bool bCheckHoles =
4977 19 : CPLTestBool(CSLFetchNameValueDef(papszOptions, "SKIP_HOLES", "NO"));
4978 :
4979 : // Advise the source raster that we are going to read it completely
4980 19 : poSrcBand->AdviseRead(0, 0, nXSize, nYSize, nXSize, nYSize, eDT, nullptr);
4981 :
4982 : /* ==================================================================== */
4983 : /* Band oriented (uninterleaved) case. */
4984 : /* ==================================================================== */
4985 :
4986 47 : for (int iY = 0; iY < nYSize && eErr == CE_None; iY += nSwathLines)
4987 : {
4988 28 : int nThisLines = nSwathLines;
4989 :
4990 28 : if (iY + nThisLines > nYSize)
4991 3 : nThisLines = nYSize - iY;
4992 :
4993 56 : for (int iX = 0; iX < nXSize && eErr == CE_None; iX += nSwathCols)
4994 : {
4995 28 : int nThisCols = nSwathCols;
4996 :
4997 28 : if (iX + nThisCols > nXSize)
4998 0 : nThisCols = nXSize - iX;
4999 :
5000 28 : int nStatus = GDAL_DATA_COVERAGE_STATUS_DATA;
5001 28 : if (bCheckHoles)
5002 : {
5003 0 : nStatus = poSrcBand->GetDataCoverageStatus(
5004 : iX, iY, nThisCols, nThisLines,
5005 : GDAL_DATA_COVERAGE_STATUS_DATA);
5006 : }
5007 28 : if (nStatus & GDAL_DATA_COVERAGE_STATUS_DATA)
5008 : {
5009 28 : eErr = poSrcBand->RasterIO(GF_Read, iX, iY, nThisCols,
5010 : nThisLines, pSwathBuf, nThisCols,
5011 : nThisLines, eDT, 0, 0, nullptr);
5012 :
5013 28 : if (eErr == CE_None)
5014 28 : eErr = poDstBand->RasterIO(GF_Write, iX, iY, nThisCols,
5015 : nThisLines, pSwathBuf, nThisCols,
5016 : nThisLines, eDT, 0, 0, nullptr);
5017 : }
5018 :
5019 56 : if (eErr == CE_None &&
5020 28 : !pfnProgress((iY + nThisLines) / static_cast<float>(nYSize),
5021 : nullptr, pProgressData))
5022 : {
5023 0 : eErr = CE_Failure;
5024 0 : CPLError(CE_Failure, CPLE_UserInterrupt,
5025 : "User terminated CreateCopy()");
5026 : }
5027 : }
5028 : }
5029 :
5030 : /* -------------------------------------------------------------------- */
5031 : /* Cleanup */
5032 : /* -------------------------------------------------------------------- */
5033 19 : CPLFree(pSwathBuf);
5034 :
5035 19 : return eErr;
5036 : }
5037 :
5038 : /************************************************************************/
5039 : /* GDALCopyRasterIOExtraArg () */
5040 : /************************************************************************/
5041 :
5042 323808 : void GDALCopyRasterIOExtraArg(GDALRasterIOExtraArg *psDestArg,
5043 : GDALRasterIOExtraArg *psSrcArg)
5044 : {
5045 323808 : INIT_RASTERIO_EXTRA_ARG(*psDestArg);
5046 323808 : if (psSrcArg)
5047 : {
5048 323808 : psDestArg->eResampleAlg = psSrcArg->eResampleAlg;
5049 323808 : psDestArg->pfnProgress = psSrcArg->pfnProgress;
5050 323808 : psDestArg->pProgressData = psSrcArg->pProgressData;
5051 323808 : psDestArg->bFloatingPointWindowValidity =
5052 323808 : psSrcArg->bFloatingPointWindowValidity;
5053 323808 : if (psSrcArg->bFloatingPointWindowValidity)
5054 : {
5055 3632 : psDestArg->dfXOff = psSrcArg->dfXOff;
5056 3632 : psDestArg->dfYOff = psSrcArg->dfYOff;
5057 3632 : psDestArg->dfXSize = psSrcArg->dfXSize;
5058 3632 : psDestArg->dfYSize = psSrcArg->dfYSize;
5059 : }
5060 : }
5061 323808 : }
5062 :
5063 : /************************************************************************/
5064 : /* HasOnlyNoData() */
5065 : /************************************************************************/
5066 :
5067 4798012 : template <class T> static inline bool IsEqualToNoData(T value, T noDataValue)
5068 : {
5069 4798012 : return value == noDataValue;
5070 : }
5071 :
5072 560355 : template <> bool IsEqualToNoData<float>(float value, float noDataValue)
5073 : {
5074 560355 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5075 : }
5076 :
5077 501114 : template <> bool IsEqualToNoData<double>(double value, double noDataValue)
5078 : {
5079 501114 : return std::isnan(noDataValue) ? std::isnan(value) : value == noDataValue;
5080 : }
5081 :
5082 : template <class T>
5083 2062 : static bool HasOnlyNoDataT(const T *pBuffer, T noDataValue, size_t nWidth,
5084 : size_t nHeight, size_t nLineStride,
5085 : size_t nComponents)
5086 : {
5087 : // Fast test: check the 4 corners and the middle pixel.
5088 3404 : for (size_t iBand = 0; iBand < nComponents; iBand++)
5089 : {
5090 4863 : if (!(IsEqualToNoData(pBuffer[iBand], noDataValue) &&
5091 2346 : IsEqualToNoData(pBuffer[(nWidth - 1) * nComponents + iBand],
5092 2270 : noDataValue) &&
5093 2270 : IsEqualToNoData(
5094 2270 : pBuffer[((nHeight - 1) / 2 * nLineStride + (nWidth - 1) / 2) *
5095 2270 : nComponents +
5096 : iBand],
5097 1353 : noDataValue) &&
5098 1353 : IsEqualToNoData(
5099 1353 : pBuffer[(nHeight - 1) * nLineStride * nComponents + iBand],
5100 : noDataValue) &&
5101 1345 : IsEqualToNoData(
5102 1345 : pBuffer[((nHeight - 1) * nLineStride + nWidth - 1) *
5103 1345 : nComponents +
5104 : iBand],
5105 : noDataValue)))
5106 : {
5107 1175 : return false;
5108 : }
5109 : }
5110 :
5111 : // Test all pixels.
5112 17778 : for (size_t iY = 0; iY < nHeight; iY++)
5113 : {
5114 16915 : const T *pBufferLine = pBuffer + iY * nLineStride * nComponents;
5115 5866544 : for (size_t iX = 0; iX < nWidth * nComponents; iX++)
5116 : {
5117 5849649 : if (!IsEqualToNoData(pBufferLine[iX], noDataValue))
5118 : {
5119 24 : return false;
5120 : }
5121 : }
5122 : }
5123 863 : return true;
5124 : }
5125 :
5126 : /************************************************************************/
5127 : /* GDALBufferHasOnlyNoData() */
5128 : /************************************************************************/
5129 :
5130 25296 : bool GDALBufferHasOnlyNoData(const void *pBuffer, double dfNoDataValue,
5131 : size_t nWidth, size_t nHeight, size_t nLineStride,
5132 : size_t nComponents, int nBitsPerSample,
5133 : GDALBufferSampleFormat nSampleFormat)
5134 : {
5135 : // In the case where the nodata is 0, we can compare several bytes at
5136 : // once. Select the largest natural integer type for the architecture.
5137 : #if SIZEOF_VOIDP >= 8 || defined(__x86_64__)
5138 : // We test __x86_64__ for x32 arch where SIZEOF_VOIDP == 4
5139 : typedef std::uint64_t WordType;
5140 : #else
5141 : typedef std::uint32_t WordType;
5142 : #endif
5143 25296 : if (dfNoDataValue == 0.0 && nWidth == nLineStride &&
5144 : // Do not use this optimized code path for floating point numbers,
5145 : // as it can't detect negative zero.
5146 : nSampleFormat != GSF_FLOATING_POINT)
5147 : {
5148 23228 : const GByte *pabyBuffer = static_cast<const GByte *>(pBuffer);
5149 23228 : const size_t nSize =
5150 23228 : (nWidth * nHeight * nComponents * nBitsPerSample + 7) / 8;
5151 23228 : size_t i = 0;
5152 : const size_t nInitialIters =
5153 46456 : std::min(sizeof(WordType) -
5154 23228 : static_cast<size_t>(
5155 : reinterpret_cast<std::uintptr_t>(pabyBuffer) %
5156 : sizeof(WordType)),
5157 23228 : nSize);
5158 194466 : for (; i < nInitialIters; i++)
5159 : {
5160 174542 : if (pabyBuffer[i])
5161 3304 : return false;
5162 : }
5163 16038800 : for (; i + sizeof(WordType) - 1 < nSize; i += sizeof(WordType))
5164 : {
5165 16024300 : if (*(reinterpret_cast<const WordType *>(pabyBuffer + i)))
5166 5389 : return false;
5167 : }
5168 51890 : for (; i < nSize; i++)
5169 : {
5170 37360 : if (pabyBuffer[i])
5171 5 : return false;
5172 : }
5173 14530 : return true;
5174 : }
5175 :
5176 2068 : if (nBitsPerSample == 8 && nSampleFormat == GSF_UNSIGNED_INT)
5177 : {
5178 2230 : return GDALIsValueInRange<uint8_t>(dfNoDataValue) &&
5179 1115 : HasOnlyNoDataT(static_cast<const uint8_t *>(pBuffer),
5180 1115 : static_cast<uint8_t>(dfNoDataValue), nWidth,
5181 1115 : nHeight, nLineStride, nComponents);
5182 : }
5183 953 : if (nBitsPerSample == 8 && nSampleFormat == GSF_SIGNED_INT)
5184 : {
5185 : // Use unsigned implementation by converting the nodatavalue to
5186 : // unsigned
5187 63 : return GDALIsValueInRange<int8_t>(dfNoDataValue) &&
5188 31 : HasOnlyNoDataT(
5189 : static_cast<const uint8_t *>(pBuffer),
5190 31 : static_cast<uint8_t>(static_cast<int8_t>(dfNoDataValue)),
5191 32 : nWidth, nHeight, nLineStride, nComponents);
5192 : }
5193 921 : if (nBitsPerSample == 16 && nSampleFormat == GSF_UNSIGNED_INT)
5194 : {
5195 21 : return GDALIsValueInRange<uint16_t>(dfNoDataValue) &&
5196 10 : HasOnlyNoDataT(static_cast<const uint16_t *>(pBuffer),
5197 10 : static_cast<uint16_t>(dfNoDataValue), nWidth,
5198 11 : nHeight, nLineStride, nComponents);
5199 : }
5200 910 : if (nBitsPerSample == 16 && nSampleFormat == GSF_SIGNED_INT)
5201 : {
5202 : // Use unsigned implementation by converting the nodatavalue to
5203 : // unsigned
5204 79 : return GDALIsValueInRange<int16_t>(dfNoDataValue) &&
5205 39 : HasOnlyNoDataT(
5206 : static_cast<const uint16_t *>(pBuffer),
5207 39 : static_cast<uint16_t>(static_cast<int16_t>(dfNoDataValue)),
5208 40 : nWidth, nHeight, nLineStride, nComponents);
5209 : }
5210 870 : if (nBitsPerSample == 32 && nSampleFormat == GSF_UNSIGNED_INT)
5211 : {
5212 73 : return GDALIsValueInRange<uint32_t>(dfNoDataValue) &&
5213 36 : HasOnlyNoDataT(static_cast<const uint32_t *>(pBuffer),
5214 : static_cast<uint32_t>(dfNoDataValue), nWidth,
5215 37 : nHeight, nLineStride, nComponents);
5216 : }
5217 833 : if (nBitsPerSample == 32 && nSampleFormat == GSF_SIGNED_INT)
5218 : {
5219 : // Use unsigned implementation by converting the nodatavalue to
5220 : // unsigned
5221 19 : return GDALIsValueInRange<int32_t>(dfNoDataValue) &&
5222 9 : HasOnlyNoDataT(
5223 : static_cast<const uint32_t *>(pBuffer),
5224 9 : static_cast<uint32_t>(static_cast<int32_t>(dfNoDataValue)),
5225 10 : nWidth, nHeight, nLineStride, nComponents);
5226 : }
5227 823 : if (nBitsPerSample == 64 && nSampleFormat == GSF_UNSIGNED_INT)
5228 : {
5229 56 : return GDALIsValueInRange<uint64_t>(dfNoDataValue) &&
5230 28 : HasOnlyNoDataT(static_cast<const uint64_t *>(pBuffer),
5231 : static_cast<uint64_t>(dfNoDataValue), nWidth,
5232 28 : nHeight, nLineStride, nComponents);
5233 : }
5234 795 : if (nBitsPerSample == 64 && nSampleFormat == GSF_SIGNED_INT)
5235 : {
5236 : // Use unsigned implementation by converting the nodatavalue to
5237 : // unsigned
5238 0 : return GDALIsValueInRange<int64_t>(dfNoDataValue) &&
5239 0 : HasOnlyNoDataT(
5240 : static_cast<const uint64_t *>(pBuffer),
5241 0 : static_cast<uint64_t>(static_cast<int64_t>(dfNoDataValue)),
5242 0 : nWidth, nHeight, nLineStride, nComponents);
5243 : }
5244 795 : if (nBitsPerSample == 32 && nSampleFormat == GSF_FLOATING_POINT)
5245 : {
5246 1323 : return (std::isnan(dfNoDataValue) ||
5247 1352 : GDALIsValueInRange<float>(dfNoDataValue)) &&
5248 675 : HasOnlyNoDataT(static_cast<const float *>(pBuffer),
5249 : static_cast<float>(dfNoDataValue), nWidth,
5250 676 : nHeight, nLineStride, nComponents);
5251 : }
5252 119 : if (nBitsPerSample == 64 && nSampleFormat == GSF_FLOATING_POINT)
5253 : {
5254 119 : return HasOnlyNoDataT(static_cast<const double *>(pBuffer),
5255 : dfNoDataValue, nWidth, nHeight, nLineStride,
5256 119 : nComponents);
5257 : }
5258 0 : return false;
5259 : }
5260 :
5261 : #if defined(__x86_64) || defined(_M_X64)
5262 :
5263 : #include <emmintrin.h>
5264 :
5265 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5266 : #include "rasterio_ssse3.h"
5267 : #endif
5268 :
5269 : /************************************************************************/
5270 : /* GDALDeinterleave3Byte() */
5271 : /************************************************************************/
5272 :
5273 : #if defined(__GNUC__) && !defined(__clang__)
5274 : __attribute__((optimize("no-tree-vectorize")))
5275 : #endif
5276 : static void
5277 58262 : GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5278 : GByte *CPL_RESTRICT pabyDest0,
5279 : GByte *CPL_RESTRICT pabyDest1,
5280 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5281 : {
5282 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5283 58262 : if (CPLHaveRuntimeSSSE3())
5284 : {
5285 58261 : return GDALDeinterleave3Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5286 58261 : pabyDest2, nIters);
5287 : }
5288 : #endif
5289 :
5290 2 : size_t i = 0;
5291 2 : if (((reinterpret_cast<uintptr_t>(pabySrc) |
5292 2 : reinterpret_cast<uintptr_t>(pabyDest0) |
5293 2 : reinterpret_cast<uintptr_t>(pabyDest1) |
5294 2 : reinterpret_cast<uintptr_t>(pabyDest2)) %
5295 : sizeof(unsigned int)) == 0)
5296 : {
5297 : // Slightly better than GCC autovectorizer
5298 17 : for (size_t j = 0; i + 3 < nIters; i += 4, ++j)
5299 : {
5300 15 : unsigned int word0 =
5301 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i);
5302 15 : unsigned int word1 =
5303 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 4);
5304 15 : unsigned int word2 =
5305 15 : *reinterpret_cast<const unsigned int *>(pabySrc + 3 * i + 8);
5306 15 : reinterpret_cast<unsigned int *>(pabyDest0)[j] =
5307 15 : (word0 & 0xff) | ((word0 >> 24) << 8) | (word1 & 0x00ff0000) |
5308 15 : ((word2 >> 8) << 24);
5309 15 : reinterpret_cast<unsigned int *>(pabyDest1)[j] =
5310 15 : ((word0 >> 8) & 0xff) | ((word1 & 0xff) << 8) |
5311 15 : (((word1 >> 24)) << 16) | ((word2 >> 16) << 24);
5312 15 : pabyDest2[j * 4] = static_cast<GByte>(word0 >> 16);
5313 15 : pabyDest2[j * 4 + 1] = static_cast<GByte>(word1 >> 8);
5314 15 : pabyDest2[j * 4 + 2] = static_cast<GByte>(word2);
5315 15 : pabyDest2[j * 4 + 3] = static_cast<GByte>(word2 >> 24);
5316 : }
5317 : }
5318 : #if defined(__clang__)
5319 : #pragma clang loop vectorize(disable)
5320 : #endif
5321 3 : for (; i < nIters; ++i)
5322 : {
5323 1 : pabyDest0[i] = pabySrc[3 * i + 0];
5324 1 : pabyDest1[i] = pabySrc[3 * i + 1];
5325 1 : pabyDest2[i] = pabySrc[3 * i + 2];
5326 : }
5327 : }
5328 :
5329 : /************************************************************************/
5330 : /* GDALDeinterleave4Byte() */
5331 : /************************************************************************/
5332 :
5333 : #if !defined(__GNUC__) || defined(__clang__)
5334 :
5335 : /************************************************************************/
5336 : /* deinterleave() */
5337 : /************************************************************************/
5338 :
5339 : template <bool SHIFT, bool MASK>
5340 : inline __m128i deinterleave(__m128i &xmm0_ori, __m128i &xmm1_ori,
5341 : __m128i &xmm2_ori, __m128i &xmm3_ori)
5342 : {
5343 : // Set higher 24bit of each int32 packed word to 0
5344 : if (SHIFT)
5345 : {
5346 : xmm0_ori = _mm_srli_epi32(xmm0_ori, 8);
5347 : xmm1_ori = _mm_srli_epi32(xmm1_ori, 8);
5348 : xmm2_ori = _mm_srli_epi32(xmm2_ori, 8);
5349 : xmm3_ori = _mm_srli_epi32(xmm3_ori, 8);
5350 : }
5351 : __m128i xmm0;
5352 : __m128i xmm1;
5353 : __m128i xmm2;
5354 : __m128i xmm3;
5355 : if (MASK)
5356 : {
5357 : const __m128i xmm_mask = _mm_set1_epi32(0xff);
5358 : xmm0 = _mm_and_si128(xmm0_ori, xmm_mask);
5359 : xmm1 = _mm_and_si128(xmm1_ori, xmm_mask);
5360 : xmm2 = _mm_and_si128(xmm2_ori, xmm_mask);
5361 : xmm3 = _mm_and_si128(xmm3_ori, xmm_mask);
5362 : }
5363 : else
5364 : {
5365 : xmm0 = xmm0_ori;
5366 : xmm1 = xmm1_ori;
5367 : xmm2 = xmm2_ori;
5368 : xmm3 = xmm3_ori;
5369 : }
5370 : // Pack int32 to int16
5371 : xmm0 = _mm_packs_epi32(xmm0, xmm1);
5372 : xmm2 = _mm_packs_epi32(xmm2, xmm3);
5373 : // Pack int16 to uint8
5374 : xmm0 = _mm_packus_epi16(xmm0, xmm2);
5375 : return xmm0;
5376 : }
5377 :
5378 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5379 : GByte *CPL_RESTRICT pabyDest0,
5380 : GByte *CPL_RESTRICT pabyDest1,
5381 : GByte *CPL_RESTRICT pabyDest2,
5382 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5383 : {
5384 : #ifdef HAVE_SSSE3_AT_COMPILE_TIME
5385 : if (CPLHaveRuntimeSSSE3())
5386 : {
5387 : return GDALDeinterleave4Byte_SSSE3(pabySrc, pabyDest0, pabyDest1,
5388 : pabyDest2, pabyDest3, nIters);
5389 : }
5390 : #endif
5391 :
5392 : // Not the optimal SSE2-only code, as gcc auto-vectorizer manages to
5393 : // do something slightly better.
5394 : size_t i = 0;
5395 : for (; i + 15 < nIters; i += 16)
5396 : {
5397 : __m128i xmm0_ori = _mm_loadu_si128(
5398 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 0));
5399 : __m128i xmm1_ori = _mm_loadu_si128(
5400 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 16));
5401 : __m128i xmm2_ori = _mm_loadu_si128(
5402 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 32));
5403 : __m128i xmm3_ori = _mm_loadu_si128(
5404 : reinterpret_cast<__m128i const *>(pabySrc + 4 * i + 48));
5405 :
5406 : _mm_storeu_si128(
5407 : reinterpret_cast<__m128i *>(pabyDest0 + i),
5408 : deinterleave<false, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5409 : _mm_storeu_si128(
5410 : reinterpret_cast<__m128i *>(pabyDest1 + i),
5411 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5412 : _mm_storeu_si128(
5413 : reinterpret_cast<__m128i *>(pabyDest2 + i),
5414 : deinterleave<true, true>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5415 : _mm_storeu_si128(
5416 : reinterpret_cast<__m128i *>(pabyDest3 + i),
5417 : deinterleave<true, false>(xmm0_ori, xmm1_ori, xmm2_ori, xmm3_ori));
5418 : }
5419 :
5420 : #if defined(__clang__)
5421 : #pragma clang loop vectorize(disable)
5422 : #endif
5423 : for (; i < nIters; ++i)
5424 : {
5425 : pabyDest0[i] = pabySrc[4 * i + 0];
5426 : pabyDest1[i] = pabySrc[4 * i + 1];
5427 : pabyDest2[i] = pabySrc[4 * i + 2];
5428 : pabyDest3[i] = pabySrc[4 * i + 3];
5429 : }
5430 : }
5431 : #else
5432 : // GCC autovectorizer does an excellent job
5433 50228 : __attribute__((optimize("tree-vectorize"))) static void GDALDeinterleave4Byte(
5434 : const GByte *CPL_RESTRICT pabySrc, GByte *CPL_RESTRICT pabyDest0,
5435 : GByte *CPL_RESTRICT pabyDest1, GByte *CPL_RESTRICT pabyDest2,
5436 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5437 : {
5438 522918000 : for (size_t i = 0; i < nIters; ++i)
5439 : {
5440 522867000 : pabyDest0[i] = pabySrc[4 * i + 0];
5441 522867000 : pabyDest1[i] = pabySrc[4 * i + 1];
5442 522867000 : pabyDest2[i] = pabySrc[4 * i + 2];
5443 522867000 : pabyDest3[i] = pabySrc[4 * i + 3];
5444 : }
5445 50228 : }
5446 : #endif
5447 :
5448 : #else
5449 :
5450 : /************************************************************************/
5451 : /* GDALDeinterleave3Byte() */
5452 : /************************************************************************/
5453 :
5454 : // TODO: Enabling below could help on non-Intel architectures where GCC knows
5455 : // how to auto-vectorize
5456 : // #if defined(__GNUC__)
5457 : //__attribute__((optimize("tree-vectorize")))
5458 : // #endif
5459 : static void GDALDeinterleave3Byte(const GByte *CPL_RESTRICT pabySrc,
5460 : GByte *CPL_RESTRICT pabyDest0,
5461 : GByte *CPL_RESTRICT pabyDest1,
5462 : GByte *CPL_RESTRICT pabyDest2, size_t nIters)
5463 : {
5464 : for (size_t i = 0; i < nIters; ++i)
5465 : {
5466 : pabyDest0[i] = pabySrc[3 * i + 0];
5467 : pabyDest1[i] = pabySrc[3 * i + 1];
5468 : pabyDest2[i] = pabySrc[3 * i + 2];
5469 : }
5470 : }
5471 :
5472 : /************************************************************************/
5473 : /* GDALDeinterleave4Byte() */
5474 : /************************************************************************/
5475 :
5476 : // TODO: Enabling below could help on non-Intel architectures where gcc knows
5477 : // how to auto-vectorize
5478 : // #if defined(__GNUC__)
5479 : //__attribute__((optimize("tree-vectorize")))
5480 : // #endif
5481 : static void GDALDeinterleave4Byte(const GByte *CPL_RESTRICT pabySrc,
5482 : GByte *CPL_RESTRICT pabyDest0,
5483 : GByte *CPL_RESTRICT pabyDest1,
5484 : GByte *CPL_RESTRICT pabyDest2,
5485 : GByte *CPL_RESTRICT pabyDest3, size_t nIters)
5486 : {
5487 : for (size_t i = 0; i < nIters; ++i)
5488 : {
5489 : pabyDest0[i] = pabySrc[4 * i + 0];
5490 : pabyDest1[i] = pabySrc[4 * i + 1];
5491 : pabyDest2[i] = pabySrc[4 * i + 2];
5492 : pabyDest3[i] = pabySrc[4 * i + 3];
5493 : }
5494 : }
5495 :
5496 : #endif
5497 :
5498 : /************************************************************************/
5499 : /* GDALDeinterleave() */
5500 : /************************************************************************/
5501 :
5502 : /*! Copy values from a pixel-interleave buffer to multiple per-component
5503 : buffers.
5504 :
5505 : In pseudo-code
5506 : \verbatim
5507 : for(size_t i = 0; i < nIters; ++i)
5508 : for(int iComp = 0; iComp < nComponents; iComp++ )
5509 : ppDestBuffer[iComp][i] = pSourceBuffer[nComponents * i + iComp]
5510 : \endverbatim
5511 :
5512 : The implementation is optimized for a few cases, like de-interleaving
5513 : of 3 or 4-components Byte buffers.
5514 :
5515 : \since GDAL 3.6
5516 : */
5517 109244 : void GDALDeinterleave(const void *pSourceBuffer, GDALDataType eSourceDT,
5518 : int nComponents, void **ppDestBuffer,
5519 : GDALDataType eDestDT, size_t nIters)
5520 : {
5521 109244 : if (eSourceDT == eDestDT)
5522 : {
5523 109222 : if (eSourceDT == GDT_Byte)
5524 : {
5525 108491 : if (nComponents == 3)
5526 : {
5527 58263 : const GByte *CPL_RESTRICT pabySrc =
5528 : static_cast<const GByte *>(pSourceBuffer);
5529 58263 : GByte *CPL_RESTRICT pabyDest0 =
5530 : static_cast<GByte *>(ppDestBuffer[0]);
5531 58263 : GByte *CPL_RESTRICT pabyDest1 =
5532 : static_cast<GByte *>(ppDestBuffer[1]);
5533 58263 : GByte *CPL_RESTRICT pabyDest2 =
5534 : static_cast<GByte *>(ppDestBuffer[2]);
5535 58263 : GDALDeinterleave3Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5536 : nIters);
5537 58263 : return;
5538 : }
5539 50228 : else if (nComponents == 4)
5540 : {
5541 50228 : const GByte *CPL_RESTRICT pabySrc =
5542 : static_cast<const GByte *>(pSourceBuffer);
5543 50228 : GByte *CPL_RESTRICT pabyDest0 =
5544 : static_cast<GByte *>(ppDestBuffer[0]);
5545 50228 : GByte *CPL_RESTRICT pabyDest1 =
5546 : static_cast<GByte *>(ppDestBuffer[1]);
5547 50228 : GByte *CPL_RESTRICT pabyDest2 =
5548 : static_cast<GByte *>(ppDestBuffer[2]);
5549 50228 : GByte *CPL_RESTRICT pabyDest3 =
5550 : static_cast<GByte *>(ppDestBuffer[3]);
5551 50228 : GDALDeinterleave4Byte(pabySrc, pabyDest0, pabyDest1, pabyDest2,
5552 : pabyDest3, nIters);
5553 50229 : return;
5554 : }
5555 : }
5556 : #if ((defined(__GNUC__) && !defined(__clang__)) || \
5557 : defined(__INTEL_CLANG_COMPILER)) && \
5558 : (defined(__x86_64) || defined(_M_X64)) && \
5559 : defined(HAVE_SSSE3_AT_COMPILE_TIME)
5560 1462 : else if ((eSourceDT == GDT_Int16 || eSourceDT == GDT_UInt16) &&
5561 731 : CPLHaveRuntimeSSSE3())
5562 : {
5563 731 : if (nComponents == 3)
5564 : {
5565 237 : const GUInt16 *CPL_RESTRICT panSrc =
5566 : static_cast<const GUInt16 *>(pSourceBuffer);
5567 237 : GUInt16 *CPL_RESTRICT panDest0 =
5568 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5569 237 : GUInt16 *CPL_RESTRICT panDest1 =
5570 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5571 237 : GUInt16 *CPL_RESTRICT panDest2 =
5572 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5573 237 : GDALDeinterleave3UInt16_SSSE3(panSrc, panDest0, panDest1,
5574 : panDest2, nIters);
5575 237 : return;
5576 : }
5577 : #if !defined(__INTEL_CLANG_COMPILER)
5578 : // ICC autovectorizer doesn't do a good job, at least with icx
5579 : // 2022.1.0.20220316
5580 494 : else if (nComponents == 4)
5581 : {
5582 494 : const GUInt16 *CPL_RESTRICT panSrc =
5583 : static_cast<const GUInt16 *>(pSourceBuffer);
5584 494 : GUInt16 *CPL_RESTRICT panDest0 =
5585 : static_cast<GUInt16 *>(ppDestBuffer[0]);
5586 494 : GUInt16 *CPL_RESTRICT panDest1 =
5587 : static_cast<GUInt16 *>(ppDestBuffer[1]);
5588 494 : GUInt16 *CPL_RESTRICT panDest2 =
5589 : static_cast<GUInt16 *>(ppDestBuffer[2]);
5590 494 : GUInt16 *CPL_RESTRICT panDest3 =
5591 : static_cast<GUInt16 *>(ppDestBuffer[3]);
5592 494 : GDALDeinterleave4UInt16_SSSE3(panSrc, panDest0, panDest1,
5593 : panDest2, panDest3, nIters);
5594 494 : return;
5595 : }
5596 : #endif
5597 : }
5598 : #endif
5599 : }
5600 :
5601 22 : const int nSourceDTSize = GDALGetDataTypeSizeBytes(eSourceDT);
5602 22 : const int nDestDTSize = GDALGetDataTypeSizeBytes(eDestDT);
5603 87 : for (int iComp = 0; iComp < nComponents; iComp++)
5604 : {
5605 65 : GDALCopyWords64(static_cast<const GByte *>(pSourceBuffer) +
5606 65 : iComp * nSourceDTSize,
5607 : eSourceDT, nComponents * nSourceDTSize,
5608 65 : ppDestBuffer[iComp], eDestDT, nDestDTSize, nIters);
5609 : }
5610 : }
|