LCOV - code coverage report
Current view: top level - ogr/ogrsf_frmts/generic - ograrrowarrayhelper.h (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 95 111 85.6 %
Date: 2024-11-21 22:18:42 Functions: 12 15 80.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  OpenGIS Simple Features Reference Implementation
       4             :  * Purpose:  Helper to fill ArrowArray
       5             :  * Author:   Even Rouault <even dot rouault at spatialys.com>
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2022, Even Rouault <even dot rouault at spatialys.com>
       9             :  *
      10             :  * SPDX-License-Identifier: MIT
      11             :  ****************************************************************************/
      12             : 
      13             : #pragma once
      14             : 
      15             : //! @cond Doxygen_Suppress
      16             : 
      17             : #include <algorithm>
      18             : #include <limits>
      19             : 
      20             : #include "cpl_time.h"
      21             : 
      22             : #include "ogrsf_frmts.h"
      23             : #include "ogr_recordbatch.h"
      24             : 
      25             : class CPL_DLL OGRArrowArrayHelper
      26             : {
      27             :     OGRArrowArrayHelper(const OGRArrowArrayHelper &) = delete;
      28             :     OGRArrowArrayHelper &operator=(const OGRArrowArrayHelper &) = delete;
      29             : 
      30             :   public:
      31             :     bool m_bIncludeFID = false;
      32             :     int m_nMaxBatchSize = 0;
      33             :     int m_nChildren = 0;
      34             :     const int m_nFieldCount = 0;
      35             :     const int m_nGeomFieldCount = 0;
      36             :     std::vector<int> m_mapOGRFieldToArrowField{};
      37             :     std::vector<int> m_mapOGRGeomFieldToArrowField{};
      38             :     std::vector<bool> m_abNullableFields{};
      39             :     std::vector<uint32_t> m_anArrowFieldMaxAlloc{};
      40             :     std::vector<int> m_anTZFlags{};
      41             :     int64_t *m_panFIDValues = nullptr;
      42             :     struct ArrowArray *m_out_array = nullptr;
      43             : 
      44             :     static uint32_t GetMemLimit();
      45             : 
      46             :     static int
      47             :     GetMaxFeaturesInBatch(const CPLStringList &aosArrowArrayStreamOptions);
      48             : 
      49             :     OGRArrowArrayHelper(GDALDataset *poDS, OGRFeatureDefn *poFeatureDefn,
      50             :                         const CPLStringList &aosArrowArrayStreamOptions,
      51             :                         struct ArrowArray *out_array);
      52             : 
      53        1667 :     bool SetNull(int iArrowField, int iFeat)
      54             :     {
      55        1667 :         auto psArray = m_out_array->children[iArrowField];
      56        1667 :         ++psArray->null_count;
      57        1667 :         uint8_t *pabyNull =
      58        1667 :             static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[0]));
      59        1667 :         if (psArray->buffers[0] == nullptr)
      60             :         {
      61             :             pabyNull = static_cast<uint8_t *>(
      62         735 :                 VSI_MALLOC_ALIGNED_AUTO_VERBOSE((m_nMaxBatchSize + 7) / 8));
      63         735 :             if (pabyNull == nullptr)
      64             :             {
      65           0 :                 return false;
      66             :             }
      67         735 :             memset(pabyNull, 0xFF, (m_nMaxBatchSize + 7) / 8);
      68         735 :             psArray->buffers[0] = pabyNull;
      69             :         }
      70        1667 :         pabyNull[iFeat / 8] &= static_cast<uint8_t>(~(1 << (iFeat % 8)));
      71             : 
      72        1667 :         if (psArray->n_buffers == 3)
      73             :         {
      74        1427 :             auto panOffsets =
      75        1427 :                 static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
      76        1427 :             panOffsets[iFeat + 1] = panOffsets[iFeat];
      77             :         }
      78        1667 :         return true;
      79             :     }
      80             : 
      81         151 :     inline static void SetBoolOn(struct ArrowArray *psArray, int iFeat)
      82             :     {
      83             :         static_cast<uint8_t *>(
      84         151 :             const_cast<void *>(psArray->buffers[1]))[iFeat / 8] |=
      85         151 :             static_cast<uint8_t>(1 << (iFeat % 8));
      86         151 :     }
      87             : 
      88           0 :     inline static void SetInt8(struct ArrowArray *psArray, int iFeat,
      89             :                                int8_t nVal)
      90             :     {
      91           0 :         static_cast<int8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
      92             :             nVal;
      93           0 :     }
      94             : 
      95           0 :     inline static void SetUInt8(struct ArrowArray *psArray, int iFeat,
      96             :                                 uint8_t nVal)
      97             :     {
      98           0 :         static_cast<uint8_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
      99             :             nVal;
     100           0 :     }
     101             : 
     102         132 :     inline static void SetInt16(struct ArrowArray *psArray, int iFeat,
     103             :                                 int16_t nVal)
     104             :     {
     105         132 :         static_cast<int16_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
     106             :             nVal;
     107         132 :     }
     108             : 
     109             :     inline static void SetUInt16(struct ArrowArray *psArray, int iFeat,
     110             :                                  uint16_t nVal)
     111             :     {
     112             :         static_cast<uint16_t *>(
     113             :             const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
     114             :     }
     115             : 
     116        1660 :     inline static void SetInt32(struct ArrowArray *psArray, int iFeat,
     117             :                                 int32_t nVal)
     118             :     {
     119        1660 :         static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
     120             :             nVal;
     121        1660 :     }
     122             : 
     123             :     inline static void SetUInt32(struct ArrowArray *psArray, int iFeat,
     124             :                                  uint32_t nVal)
     125             :     {
     126             :         static_cast<uint32_t *>(
     127             :             const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
     128             :     }
     129             : 
     130         181 :     inline static void SetInt64(struct ArrowArray *psArray, int iFeat,
     131             :                                 int64_t nVal)
     132             :     {
     133         181 :         static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
     134             :             nVal;
     135         181 :     }
     136             : 
     137             :     inline static void SetUInt64(struct ArrowArray *psArray, int iFeat,
     138             :                                  uint64_t nVal)
     139             :     {
     140             :         static_cast<uint64_t *>(
     141             :             const_cast<void *>(psArray->buffers[1]))[iFeat] = nVal;
     142             :     }
     143             : 
     144         111 :     inline static void SetFloat(struct ArrowArray *psArray, int iFeat,
     145             :                                 float fVal)
     146             :     {
     147         111 :         static_cast<float *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
     148             :             fVal;
     149         111 :     }
     150             : 
     151         221 :     inline static void SetDouble(struct ArrowArray *psArray, int iFeat,
     152             :                                  double dfVal)
     153             :     {
     154         221 :         static_cast<double *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
     155             :             dfVal;
     156         221 :     }
     157             : 
     158          91 :     static void SetDate(struct ArrowArray *psArray, int iFeat,
     159             :                         struct tm &brokenDown, const OGRField &ogrField)
     160             :     {
     161          91 :         brokenDown.tm_year = ogrField.Date.Year - 1900;
     162          91 :         brokenDown.tm_mon = ogrField.Date.Month - 1;
     163          91 :         brokenDown.tm_mday = ogrField.Date.Day;
     164          91 :         brokenDown.tm_hour = 0;
     165          91 :         brokenDown.tm_min = 0;
     166          91 :         brokenDown.tm_sec = 0;
     167          91 :         static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
     168          91 :             static_cast<int>(CPLYMDHMSToUnixTime(&brokenDown) / 86400);
     169          91 :     }
     170             : 
     171         115 :     static void SetDateTime(struct ArrowArray *psArray, int iFeat,
     172             :                             struct tm &brokenDown, int nFieldTZFlag,
     173             :                             const OGRField &ogrField)
     174             :     {
     175         115 :         brokenDown.tm_year = ogrField.Date.Year - 1900;
     176         115 :         brokenDown.tm_mon = ogrField.Date.Month - 1;
     177         115 :         brokenDown.tm_mday = ogrField.Date.Day;
     178         115 :         brokenDown.tm_hour = ogrField.Date.Hour;
     179         115 :         brokenDown.tm_min = ogrField.Date.Minute;
     180         115 :         brokenDown.tm_sec = static_cast<int>(ogrField.Date.Second);
     181             :         auto nVal =
     182         115 :             CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
     183         115 :             (static_cast<int>(ogrField.Date.Second * 1000 + 0.5) % 1000);
     184         115 :         if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
     185          95 :             ogrField.Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
     186             :         {
     187             :             // Convert for ogrField.Date.TZFlag to UTC
     188          95 :             const int TZOffset = (ogrField.Date.TZFlag - OGR_TZFLAG_UTC) * 15;
     189          95 :             const int TZOffsetMS = TZOffset * 60 * 1000;
     190          95 :             nVal -= TZOffsetMS;
     191             :         }
     192         115 :         static_cast<int64_t *>(const_cast<void *>(psArray->buffers[1]))[iFeat] =
     193             :             nVal;
     194         115 :     }
     195             : 
     196     1265050 :     GByte *GetPtrForStringOrBinary(int iArrowField, int iFeat, size_t nLen)
     197             :     {
     198     1265050 :         auto psArray = m_out_array->children[iArrowField];
     199     1265050 :         auto panOffsets =
     200     1265050 :             static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
     201     1265050 :         const uint32_t nCurLength = static_cast<uint32_t>(panOffsets[iFeat]);
     202     1265050 :         if (nLen > m_anArrowFieldMaxAlloc[iArrowField] - nCurLength)
     203             :         {
     204         404 :             if (nLen >
     205         404 :                 static_cast<uint32_t>(std::numeric_limits<int32_t>::max()) -
     206             :                     nCurLength)
     207             :             {
     208           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
     209             :                          "Too large string or binary content");
     210           0 :                 return nullptr;
     211             :             }
     212         404 :             uint32_t nNewSize = nCurLength + static_cast<uint32_t>(nLen);
     213         404 :             if ((m_anArrowFieldMaxAlloc[iArrowField] >> 31) == 0)
     214             :             {
     215             :                 const uint32_t nDoubleSize =
     216         404 :                     2U * m_anArrowFieldMaxAlloc[iArrowField];
     217         404 :                 if (nNewSize < nDoubleSize)
     218           0 :                     nNewSize = nDoubleSize;
     219             :             }
     220         404 :             void *newBuffer = VSI_MALLOC_ALIGNED_AUTO_VERBOSE(nNewSize);
     221         404 :             if (newBuffer == nullptr)
     222           0 :                 return nullptr;
     223         404 :             m_anArrowFieldMaxAlloc[iArrowField] = nNewSize;
     224         404 :             memcpy(newBuffer, psArray->buffers[2], nCurLength);
     225         404 :             VSIFreeAligned(const_cast<void *>(psArray->buffers[2]));
     226         403 :             psArray->buffers[2] = newBuffer;
     227             :         }
     228     1265050 :         GByte *paby =
     229     1265050 :             static_cast<GByte *>(const_cast<void *>(psArray->buffers[2])) +
     230     1265050 :             nCurLength;
     231     1265050 :         panOffsets[iFeat + 1] = panOffsets[iFeat] + static_cast<int32_t>(nLen);
     232     1265050 :         return paby;
     233             :     }
     234             : 
     235           0 :     static void SetEmptyStringOrBinary(struct ArrowArray *psArray, int iFeat)
     236             :     {
     237           0 :         auto panOffsets =
     238           0 :             static_cast<int32_t *>(const_cast<void *>(psArray->buffers[1]));
     239           0 :         panOffsets[iFeat + 1] = panOffsets[iFeat];
     240           0 :     }
     241             : 
     242         389 :     void Shrink(int nFeatures)
     243             :     {
     244         389 :         if (nFeatures < m_nMaxBatchSize)
     245             :         {
     246         318 :             m_out_array->length = nFeatures;
     247        2587 :             for (int i = 0; i < m_nChildren; i++)
     248             :             {
     249        2269 :                 m_out_array->children[i]->length = nFeatures;
     250             :             }
     251             :         }
     252         389 :     }
     253             : 
     254          17 :     void ClearArray()
     255             :     {
     256          17 :         if (m_out_array->release)
     257          16 :             m_out_array->release(m_out_array);
     258          17 :         memset(m_out_array, 0, sizeof(*m_out_array));
     259          17 :     }
     260             : 
     261             :     static bool FillDict(struct ArrowArray *psChild,
     262             :                          const OGRCodedFieldDomain *poCodedDomain);
     263             : };
     264             : 
     265             : //! @endcond

Generated by: LCOV version 1.14