LCOV - code coverage report
Current view: top level - port - cpl_csv.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 406 536 75.7 %
Date: 2025-09-10 17:48:50 Functions: 27 35 77.1 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  CPL - Common Portability Library
       4             :  * Purpose:  CSV (comma separated value) file access.
       5             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 1999, Frank Warmerdam
       9             :  * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * SPDX-License-Identifier: MIT
      12             :  ****************************************************************************/
      13             : 
      14             : #include "cpl_port.h"
      15             : #include "cpl_csv.h"
      16             : 
      17             : #include <cstddef>
      18             : #include <cstdlib>
      19             : #include <cstring>
      20             : #include <fcntl.h>
      21             : 
      22             : #include "cpl_conv.h"
      23             : #include "cpl_error.h"
      24             : #include "cpl_multiproc.h"
      25             : #include "gdal_csv.h"
      26             : 
      27             : #include <algorithm>
      28             : 
      29             : /* ==================================================================== */
      30             : /*      The CSVTable is a persistent set of info about an open CSV      */
      31             : /*      table.  While it doesn't currently maintain a record index,     */
      32             : /*      or in-memory copy of the table, it could be changed to do so    */
      33             : /*      in the future.                                                  */
      34             : /* ==================================================================== */
      35             : typedef struct ctb
      36             : {
      37             :     VSILFILE *fp;
      38             :     struct ctb *psNext;
      39             :     char *pszFilename;
      40             :     char **papszFieldNames;
      41             :     int *panFieldNamesLength;
      42             :     char **papszRecFields;
      43             :     int nFields;
      44             :     int iLastLine;
      45             :     bool bNonUniqueKey;
      46             : 
      47             :     /* Cache for whole file */
      48             :     int nLineCount;
      49             :     char **papszLines;
      50             :     int *panLineIndex;
      51             :     char *pszRawData;
      52             : } CSVTable;
      53             : 
      54             : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
      55             :                                 const char *pszFilename);
      56             : 
      57             : /************************************************************************/
      58             : /*                            CSVFreeTLS()                              */
      59             : /************************************************************************/
      60           2 : static void CSVFreeTLS(void *pData)
      61             : {
      62           2 :     CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
      63           2 :     CPLFree(pData);
      64           2 : }
      65             : 
      66             : /* It would likely be better to share this list between threads, but
      67             :    that will require some rework. */
      68             : 
      69             : /************************************************************************/
      70             : /*                             CSVAccess()                              */
      71             : /*                                                                      */
      72             : /*      This function will fetch a handle to the requested table.       */
      73             : /*      If not found in the ``open table list'' the table will be       */
      74             : /*      opened and added to the list.  Eventually this function may     */
      75             : /*      become public with an abstracted return type so that            */
      76             : /*      applications can set options about the table.  For now this     */
      77             : /*      isn't done.                                                     */
      78             : /************************************************************************/
      79             : 
      80      130337 : static CSVTable *CSVAccess(const char *pszFilename)
      81             : 
      82             : {
      83             :     /* -------------------------------------------------------------------- */
      84             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
      85             :     /*      if there isn't already one.                                     */
      86             :     /* -------------------------------------------------------------------- */
      87      130337 :     int bMemoryError = FALSE;
      88             :     CSVTable **ppsCSVTableList =
      89      130337 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
      90      130337 :     if (bMemoryError)
      91           0 :         return nullptr;
      92      130337 :     if (ppsCSVTableList == nullptr)
      93             :     {
      94             :         ppsCSVTableList =
      95           5 :             static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
      96           5 :         if (ppsCSVTableList == nullptr)
      97           0 :             return nullptr;
      98           5 :         CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
      99             :     }
     100             : 
     101             :     /* -------------------------------------------------------------------- */
     102             :     /*      Is the table already in the list.                               */
     103             :     /* -------------------------------------------------------------------- */
     104     1011440 :     for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
     105      881107 :          psTable = psTable->psNext)
     106             :     {
     107     1011410 :         if (EQUAL(psTable->pszFilename, pszFilename))
     108             :         {
     109             :             /*
     110             :              * Eventually we should consider promoting to the front of
     111             :              * the list to accelerate frequently accessed tables.
     112             :              */
     113      130307 :             return psTable;
     114             :         }
     115             :     }
     116             : 
     117             :     /* -------------------------------------------------------------------- */
     118             :     /*      If not, try to open it.                                         */
     119             :     /* -------------------------------------------------------------------- */
     120          30 :     VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
     121          30 :     if (fp == nullptr)
     122           0 :         return nullptr;
     123             : 
     124             :     /* -------------------------------------------------------------------- */
     125             :     /*      Create an information structure about this table, and add to    */
     126             :     /*      the front of the list.                                          */
     127             :     /* -------------------------------------------------------------------- */
     128             :     CSVTable *const psTable =
     129          30 :         static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
     130          30 :     if (psTable == nullptr)
     131             :     {
     132           0 :         VSIFCloseL(fp);
     133           0 :         return nullptr;
     134             :     }
     135             : 
     136          30 :     psTable->fp = fp;
     137          30 :     psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
     138          30 :     if (psTable->pszFilename == nullptr)
     139             :     {
     140           0 :         VSIFree(psTable);
     141           0 :         VSIFCloseL(fp);
     142           0 :         return nullptr;
     143             :     }
     144          30 :     psTable->bNonUniqueKey = false;  // As far as we know now.
     145          30 :     psTable->psNext = *ppsCSVTableList;
     146             : 
     147          30 :     *ppsCSVTableList = psTable;
     148             : 
     149             :     /* -------------------------------------------------------------------- */
     150             :     /*      Read the table header record containing the field names.        */
     151             :     /* -------------------------------------------------------------------- */
     152          30 :     psTable->papszFieldNames = CSVReadParseLineL(fp);
     153          30 :     psTable->nFields = CSLCount(psTable->papszFieldNames);
     154          30 :     psTable->panFieldNamesLength =
     155          30 :         static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
     156          30 :     for (int i = 0;
     157         185 :          i < psTable->nFields &&
     158             :          /* null-pointer check to avoid a false positive from CLang S.A. */
     159         155 :          psTable->papszFieldNames != nullptr;
     160             :          i++)
     161             :     {
     162         155 :         psTable->panFieldNamesLength[i] =
     163         155 :             static_cast<int>(strlen(psTable->papszFieldNames[i]));
     164             :     }
     165             : 
     166          30 :     return psTable;
     167             : }
     168             : 
     169             : /************************************************************************/
     170             : /*                            CSVDeaccess()                             */
     171             : /************************************************************************/
     172             : 
     173        1133 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
     174             :                                 const char *pszFilename)
     175             : 
     176             : {
     177        1133 :     if (ppsCSVTableList == nullptr)
     178        1124 :         return;
     179             : 
     180             :     /* -------------------------------------------------------------------- */
     181             :     /*      A NULL means deaccess all tables.                               */
     182             :     /* -------------------------------------------------------------------- */
     183           9 :     if (pszFilename == nullptr)
     184             :     {
     185           9 :         while (*ppsCSVTableList != nullptr)
     186           5 :             CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
     187           5 :                                 (*ppsCSVTableList)->pszFilename);
     188             : 
     189           4 :         return;
     190             :     }
     191             : 
     192             :     /* -------------------------------------------------------------------- */
     193             :     /*      Find this table.                                                */
     194             :     /* -------------------------------------------------------------------- */
     195           5 :     CSVTable *psLast = nullptr;
     196           5 :     CSVTable *psTable = *ppsCSVTableList;
     197           5 :     for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
     198           0 :          psTable = psTable->psNext)
     199             :     {
     200           0 :         psLast = psTable;
     201             :     }
     202             : 
     203           5 :     if (psTable == nullptr)
     204             :     {
     205           0 :         if (bCanUseTLS)
     206           0 :             CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
     207           0 :         return;
     208             :     }
     209             : 
     210             :     /* -------------------------------------------------------------------- */
     211             :     /*      Remove the link from the list.                                  */
     212             :     /* -------------------------------------------------------------------- */
     213           5 :     if (psLast != nullptr)
     214           0 :         psLast->psNext = psTable->psNext;
     215             :     else
     216           5 :         *ppsCSVTableList = psTable->psNext;
     217             : 
     218             :     /* -------------------------------------------------------------------- */
     219             :     /*      Free the table.                                                 */
     220             :     /* -------------------------------------------------------------------- */
     221           5 :     if (psTable->fp != nullptr)
     222           0 :         VSIFCloseL(psTable->fp);
     223             : 
     224           5 :     CSLDestroy(psTable->papszFieldNames);
     225           5 :     CPLFree(psTable->panFieldNamesLength);
     226           5 :     CSLDestroy(psTable->papszRecFields);
     227           5 :     CPLFree(psTable->pszFilename);
     228           5 :     CPLFree(psTable->panLineIndex);
     229           5 :     CPLFree(psTable->pszRawData);
     230           5 :     CPLFree(psTable->papszLines);
     231             : 
     232           5 :     CPLFree(psTable);
     233             : 
     234           5 :     if (bCanUseTLS)
     235           5 :         CPLReadLine(nullptr);
     236             : }
     237             : 
     238        1126 : void CSVDeaccess(const char *pszFilename)
     239             : {
     240             :     /* -------------------------------------------------------------------- */
     241             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
     242             :     /*      if there isn't already one.                                     */
     243             :     /* -------------------------------------------------------------------- */
     244        1126 :     int bMemoryError = FALSE;
     245             :     CSVTable **ppsCSVTableList =
     246        1126 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
     247             : 
     248        1126 :     CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
     249        1126 : }
     250             : 
     251             : /************************************************************************/
     252             : /*                            CSVSplitLine()                            */
     253             : /*                                                                      */
     254             : /*      Tokenize a CSV line into fields in the form of a string         */
     255             : /*      list.  This is used instead of the CPLTokenizeString()          */
     256             : /*      because it provides correct CSV escaping and quoting            */
     257             : /*      semantics.                                                      */
     258             : /************************************************************************/
     259             : 
     260      117449 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
     261             :                            bool bKeepLeadingAndClosingQuotes,
     262             :                            bool bMergeDelimiter)
     263             : 
     264             : {
     265      234898 :     CPLStringList aosRetList;
     266      117449 :     if (pszString == nullptr)
     267           0 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     268             : 
     269      117449 :     char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
     270      117449 :     int nTokenMax = 10;
     271      117449 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     272             : 
     273      117449 :     const char *pszIter = pszString;
     274      676277 :     while (*pszIter != '\0')
     275             :     {
     276      558828 :         bool bInString = false;
     277             : 
     278      558828 :         int nTokenLen = 0;
     279             : 
     280             :         // Try to find the next delimiter, marking end of token.
     281     4710510 :         do
     282             :         {
     283             :             // End if this is a delimiter skip it and break.
     284     5269340 :             if (!bInString &&
     285     2752100 :                 strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
     286             :             {
     287      441704 :                 pszIter += nDelimiterLength;
     288      441704 :                 if (bMergeDelimiter)
     289             :                 {
     290           9 :                     while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
     291             :                            0)
     292           5 :                         pszIter += nDelimiterLength;
     293             :                 }
     294      441704 :                 break;
     295             :             }
     296             : 
     297     4827630 :             if (*pszIter == '"')
     298             :             {
     299      396853 :                 if (!bInString && nTokenLen > 0)
     300             :                 {
     301             :                     // do not treat in a special way double quotes that appear
     302             :                     // in the middle of a field (similarly to OpenOffice)
     303             :                     // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
     304             :                 }
     305      396766 :                 else if (!bInString || pszIter[1] != '"')
     306             :                 {
     307      396086 :                     bInString = !bInString;
     308      396086 :                     if (!bKeepLeadingAndClosingQuotes)
     309      396052 :                         continue;
     310             :                 }
     311             :                 else  // Doubled quotes in string resolve to one quote.
     312             :                 {
     313         680 :                     pszIter++;
     314             :                 }
     315             :             }
     316             : 
     317     4431580 :             if (nTokenLen >= nTokenMax - 2)
     318             :             {
     319      125166 :                 nTokenMax = nTokenMax * 2 + 10;
     320      125166 :                 pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
     321             :             }
     322             : 
     323     4431580 :             pszToken[nTokenLen] = *pszIter;
     324     4431580 :             nTokenLen++;
     325     4827630 :         } while (*(++pszIter) != '\0');
     326             : 
     327      558828 :         pszToken[nTokenLen] = '\0';
     328      558828 :         aosRetList.AddString(pszToken);
     329             : 
     330             :         // If the last token is an empty token, then we have to catch
     331             :         // it now, otherwise we won't reenter the loop and it will be lost.
     332      558828 :         if (*pszIter == '\0' &&
     333      117409 :             pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
     334      117409 :             strncmp(pszIter - nDelimiterLength, pszDelimiter,
     335             :                     nDelimiterLength) == 0)
     336             :         {
     337         285 :             aosRetList.AddString("");
     338             :         }
     339             :     }
     340             : 
     341      117449 :     CPLFree(pszToken);
     342             : 
     343      117449 :     if (aosRetList.Count() == 0)
     344          40 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     345             :     else
     346      117409 :         return aosRetList.StealList();
     347             : }
     348             : 
     349             : /************************************************************************/
     350             : /*                          CSVFindNextLine()                           */
     351             : /*                                                                      */
     352             : /*      Find the start of the next line, while at the same time zero    */
     353             : /*      terminating this line.  Take into account that there may be     */
     354             : /*      newline indicators within quoted strings, and that quotes       */
     355             : /*      can be escaped with a backslash.                                */
     356             : /************************************************************************/
     357             : 
     358        6329 : static char *CSVFindNextLine(char *pszThisLine)
     359             : 
     360             : {
     361        6329 :     int i = 0;  // i is used after the for loop.
     362             : 
     363      278980 :     for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
     364             :     {
     365      278980 :         if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
     366       35430 :             nQuoteCount++;
     367             : 
     368      278980 :         if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
     369        6329 :             (nQuoteCount % 2) == 0)
     370        6329 :             break;
     371             :     }
     372             : 
     373       16573 :     while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
     374       10244 :         pszThisLine[i++] = '\0';
     375             : 
     376        6329 :     if (pszThisLine[i] == '\0')
     377          30 :         return nullptr;
     378             : 
     379        6299 :     return pszThisLine + i;
     380             : }
     381             : 
     382             : /************************************************************************/
     383             : /*                             CSVIngest()                              */
     384             : /*                                                                      */
     385             : /*      Load entire file into memory and setup index if possible.       */
     386             : /************************************************************************/
     387             : 
     388             : // TODO(schwehr): Clean up all the casting in CSVIngest.
     389       60654 : static void CSVIngest(CSVTable *psTable)
     390             : 
     391             : {
     392       60654 :     if (psTable->pszRawData != nullptr)
     393       60624 :         return;
     394             : 
     395             :     /* -------------------------------------------------------------------- */
     396             :     /*      Ingest whole file.                                              */
     397             :     /* -------------------------------------------------------------------- */
     398          30 :     if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
     399             :     {
     400           0 :         CPLError(CE_Failure, CPLE_FileIO,
     401             :                  "Failed using seek end and tell to get file length: %s",
     402             :                  psTable->pszFilename);
     403           0 :         return;
     404             :     }
     405          30 :     const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
     406          30 :     if (static_cast<long>(nFileLen) == -1)
     407             :     {
     408           0 :         CPLError(CE_Failure, CPLE_FileIO,
     409             :                  "Failed using seek end and tell to get file length: %s",
     410             :                  psTable->pszFilename);
     411           0 :         return;
     412             :     }
     413          30 :     VSIRewindL(psTable->fp);
     414             : 
     415          30 :     psTable->pszRawData = static_cast<char *>(
     416          30 :         VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
     417          30 :     if (psTable->pszRawData == nullptr)
     418           0 :         return;
     419          30 :     if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
     420          30 :                   psTable->fp) != static_cast<size_t>(nFileLen))
     421             :     {
     422           0 :         CPLFree(psTable->pszRawData);
     423           0 :         psTable->pszRawData = nullptr;
     424             : 
     425           0 :         CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
     426             :                  psTable->pszFilename);
     427           0 :         return;
     428             :     }
     429             : 
     430          30 :     psTable->pszRawData[nFileLen] = '\0';
     431             : 
     432             :     /* -------------------------------------------------------------------- */
     433             :     /*      Get count of newlines so we can allocate line array.            */
     434             :     /* -------------------------------------------------------------------- */
     435          30 :     int nMaxLineCount = 0;
     436      282925 :     for (int i = 0; i < static_cast<int>(nFileLen); i++)
     437             :     {
     438      282895 :         if (psTable->pszRawData[i] == 10)
     439        6329 :             nMaxLineCount++;
     440             :     }
     441             : 
     442          30 :     psTable->papszLines =
     443          30 :         static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
     444          30 :     if (psTable->papszLines == nullptr)
     445           0 :         return;
     446             : 
     447             :     /* -------------------------------------------------------------------- */
     448             :     /*      Build a list of record pointers into the raw data buffer        */
     449             :     /*      based on line terminators.  Zero terminate the line             */
     450             :     /*      strings.                                                        */
     451             :     /* -------------------------------------------------------------------- */
     452             :     /* skip header line */
     453          30 :     char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
     454             : 
     455          30 :     int iLine = 0;
     456        6329 :     while (pszThisLine != nullptr && iLine < nMaxLineCount)
     457             :     {
     458        6299 :         if (pszThisLine[0] != '#')
     459        6288 :             psTable->papszLines[iLine++] = pszThisLine;
     460        6299 :         pszThisLine = CSVFindNextLine(pszThisLine);
     461             :     }
     462             : 
     463          30 :     psTable->nLineCount = iLine;
     464             : 
     465             :     /* -------------------------------------------------------------------- */
     466             :     /*      Allocate and populate index array.  Ensure they are in          */
     467             :     /*      ascending order so that binary searches can be done on the      */
     468             :     /*      array.                                                          */
     469             :     /* -------------------------------------------------------------------- */
     470          30 :     psTable->panLineIndex = static_cast<int *>(
     471          30 :         VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
     472          30 :     if (psTable->panLineIndex == nullptr)
     473           0 :         return;
     474             : 
     475        6232 :     for (int i = 0; i < psTable->nLineCount; i++)
     476             :     {
     477        6204 :         psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
     478             : 
     479        6204 :         if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
     480             :         {
     481           2 :             CPLFree(psTable->panLineIndex);
     482           2 :             psTable->panLineIndex = nullptr;
     483           2 :             break;
     484             :         }
     485             :     }
     486             : 
     487          30 :     psTable->iLastLine = -1;
     488             : 
     489             :     /* -------------------------------------------------------------------- */
     490             :     /*      We should never need the file handle against, so close it.      */
     491             :     /* -------------------------------------------------------------------- */
     492          30 :     VSIFCloseL(psTable->fp);
     493          30 :     psTable->fp = nullptr;
     494             : }
     495             : 
     496       60654 : static void CSVIngest(const char *pszFilename)
     497             : 
     498             : {
     499       60654 :     CSVTable *psTable = CSVAccess(pszFilename);
     500       60654 :     if (psTable == nullptr)
     501             :     {
     502           0 :         CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
     503             :                  pszFilename);
     504           0 :         return;
     505             :     }
     506       60654 :     CSVIngest(psTable);
     507             : }
     508             : 
     509             : /************************************************************************/
     510             : /*                        CSVDetectSeperator()                          */
     511             : /************************************************************************/
     512             : 
     513             : /** Detect which field separator is used.
     514             :  *
     515             :  * Currently, it can detect comma, semicolon, space, tabulation or pipe.
     516             :  * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
     517             :  * most occurrences will be selected (and a warning emitted).
     518             :  * If no separator found, comma will be considered as the separator.
     519             :  *
     520             :  * @return ',', ';', ' ', tabulation character or '|'.
     521             :  */
     522         634 : char CSVDetectSeperator(const char *pszLine)
     523             : {
     524         634 :     bool bInString = false;
     525         634 :     int nCountComma = 0;
     526         634 :     int nCountSemicolon = 0;
     527         634 :     int nCountTab = 0;
     528         634 :     int nCountPipe = 0;
     529         634 :     int nCountSpace = 0;
     530             : 
     531       27498 :     for (; *pszLine != '\0'; pszLine++)
     532             :     {
     533       26864 :         if (!bInString && *pszLine == ',')
     534             :         {
     535        2205 :             nCountComma++;
     536             :         }
     537       24659 :         else if (!bInString && *pszLine == ';')
     538             :         {
     539          10 :             nCountSemicolon++;
     540             :         }
     541       24649 :         else if (!bInString && *pszLine == '\t')
     542             :         {
     543          29 :             nCountTab++;
     544             :         }
     545       24620 :         else if (!bInString && *pszLine == '|')
     546             :         {
     547           9 :             nCountPipe++;
     548             :         }
     549       24611 :         else if (!bInString && *pszLine == ' ')
     550             :         {
     551         290 :             nCountSpace++;
     552             :         }
     553       24321 :         else if (*pszLine == '"')
     554             :         {
     555         521 :             if (!bInString || pszLine[1] != '"')
     556             :             {
     557         521 :                 bInString = !bInString;
     558         521 :                 continue;
     559             :             }
     560             :             else /* doubled quotes in string resolve to one quote */
     561             :             {
     562           0 :                 pszLine++;
     563             :             }
     564             :         }
     565             :     }
     566             : 
     567             :     const int nMaxCountExceptSpace =
     568             :         std::max(std::max(nCountComma, nCountSemicolon),
     569         634 :                  std::max(nCountTab, nCountPipe));
     570         634 :     char chDelimiter = ',';
     571         634 :     if (nMaxCountExceptSpace == 0)
     572             :     {
     573          38 :         if (nCountSpace > 0)
     574           9 :             chDelimiter = ' ';
     575             :     }
     576             :     else
     577             :     {
     578         596 :         bool bWarn = false;
     579         596 :         if (nCountComma == nMaxCountExceptSpace)
     580             :         {
     581         580 :             chDelimiter = ',';
     582         580 :             bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
     583             :         }
     584          16 :         else if (nCountSemicolon == nMaxCountExceptSpace)
     585             :         {
     586           5 :             chDelimiter = ';';
     587           5 :             bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
     588             :         }
     589          11 :         else if (nCountTab == nMaxCountExceptSpace)
     590             :         {
     591           6 :             chDelimiter = '\t';
     592           6 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
     593             :         }
     594             :         else /* if( nCountPipe == nMaxCountExceptSpace ) */
     595             :         {
     596           5 :             chDelimiter = '|';
     597           5 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
     598             :         }
     599         596 :         if (bWarn)
     600             :         {
     601           6 :             CPLError(CE_Warning, CPLE_AppDefined,
     602             :                      "Selecting '%c' as CSV field separator, but "
     603             :                      "other candidate separator(s) have been found.",
     604             :                      chDelimiter);
     605             :         }
     606             :     }
     607             : 
     608         634 :     return chDelimiter;
     609             : }
     610             : 
     611             : /************************************************************************/
     612             : /*                      CSVReadParseLine3L()                            */
     613             : /*                                                                      */
     614             : /*      Read one line, and return split into fields.  The return        */
     615             : /*      result is a stringlist, in the sense of the CSL functions.      */
     616             : /************************************************************************/
     617             : 
     618             : static char **
     619       58522 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
     620             :                         size_t nMaxLineSize, const char *pszDelimiter,
     621             :                         bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
     622             :                         bool bMergeDelimiter, bool bSkipBOM)
     623             : {
     624       58522 :     const char *pszLine = pfnReadLine(fp, nMaxLineSize);
     625       58522 :     if (pszLine == nullptr)
     626        1447 :         return nullptr;
     627             : 
     628       57075 :     if (bSkipBOM)
     629             :     {
     630             :         // Skip BOM.
     631       56697 :         const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
     632       56697 :         if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
     633           4 :             pszLine += 3;
     634             :     }
     635             : 
     636             :     // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
     637       57075 :     if (!bHonourStrings)
     638             :     {
     639           2 :         return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
     640             :     }
     641             : 
     642             :     // If there are no quotes, then this is the simple case.
     643             :     // Parse, and return tokens.
     644       57073 :     if (strchr(pszLine, '\"') == nullptr)
     645       49229 :         return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
     646       49229 :                             bMergeDelimiter);
     647             : 
     648        7844 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     649        7844 :     bool bInString = false;           // keep in that scope !
     650       15688 :     std::string osWorkLine(pszLine);  // keep in that scope !
     651        7844 :     size_t i = 0;                     // keep in that scope !
     652             : 
     653             :     try
     654             :     {
     655             :         while (true)
     656             :         {
     657      808176 :             for (; i < osWorkLine.size(); ++i)
     658             :             {
     659      799576 :                 if (osWorkLine[i] == '\"')
     660             :                 {
     661       59919 :                     if (!bInString)
     662             :                     {
     663             :                         // Only consider " as the start of a quoted string
     664             :                         // if it is the first character of the line, or
     665             :                         // if it is immediately after the field delimiter.
     666       53044 :                         if (i == 0 ||
     667       23381 :                             (i >= nDelimiterLength &&
     668       23381 :                              osWorkLine.compare(i - nDelimiterLength,
     669             :                                                 nDelimiterLength, pszDelimiter,
     670             :                                                 nDelimiterLength) == 0))
     671             :                         {
     672       29576 :                             bInString = true;
     673             :                         }
     674             :                     }
     675       57495 :                     else if (i + 1 < osWorkLine.size() &&
     676       27239 :                              osWorkLine[i + 1] == '"')
     677             :                     {
     678             :                         // Escaped double quote in a quoted string
     679         681 :                         ++i;
     680             :                     }
     681             :                     else
     682             :                     {
     683       29575 :                         bInString = false;
     684             :                     }
     685             :                 }
     686             :             }
     687             : 
     688        8600 :             if (!bInString)
     689             :             {
     690        7843 :                 return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
     691             :                                     bKeepLeadingAndClosingQuotes,
     692        7843 :                                     bMergeDelimiter);
     693             :             }
     694             : 
     695         757 :             const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
     696         757 :             if (pszNewLine == nullptr)
     697           1 :                 break;
     698             : 
     699         756 :             osWorkLine.append("\n");
     700         756 :             osWorkLine.append(pszNewLine);
     701         756 :         }
     702             :     }
     703           0 :     catch (const std::exception &e)
     704             :     {
     705           0 :         CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
     706             :     }
     707             : 
     708           1 :     if (bInString)
     709             :     {
     710           1 :         CPLError(CE_Failure, CPLE_AppDefined,
     711             :                  "CSV file has unbalanced number of double-quotes. Corrupted "
     712             :                  "data will likely be returned");
     713             :     }
     714             : 
     715           1 :     return nullptr;
     716             : }
     717             : 
     718             : /************************************************************************/
     719             : /*                          CSVReadParseLine()                          */
     720             : /*                                                                      */
     721             : /*      Read one line, and return split into fields.  The return        */
     722             : /*      result is a stringlist, in the sense of the CSL functions.      */
     723             : /*                                                                      */
     724             : /*      Deprecated.  Replaced by CSVReadParseLineL().                   */
     725             : /************************************************************************/
     726             : 
     727           0 : char **CSVReadParseLine(FILE *fp)
     728             : {
     729           0 :     return CSVReadParseLine2(fp, ',');
     730             : }
     731             : 
     732           0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
     733             : {
     734           0 :     return CPLReadLine(static_cast<FILE *>(fp));
     735             : }
     736             : 
     737           0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
     738             : {
     739           0 :     CPLAssert(fp != nullptr);
     740           0 :     if (fp == nullptr)
     741           0 :         return nullptr;
     742             : 
     743           0 :     char szDelimiter[2] = {chDelimiter, 0};
     744           0 :     return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
     745             :                                    0,  // nMaxLineSize,
     746             :                                    szDelimiter,
     747             :                                    true,   // bHonourStrings
     748             :                                    false,  // bKeepLeadingAndClosingQuotes
     749             :                                    false,  // bMergeDelimiter
     750           0 :                                    true /* bSkipBOM */);
     751             : }
     752             : 
     753             : /************************************************************************/
     754             : /*                          CSVReadParseLineL()                         */
     755             : /*                                                                      */
     756             : /*      Read one line, and return split into fields.  The return        */
     757             : /*      result is a stringlist, in the sense of the CSL functions.      */
     758             : /*                                                                      */
     759             : /*      Replaces CSVReadParseLine().  These functions use the VSI       */
     760             : /*      layer to allow reading from other file containers.              */
     761             : /************************************************************************/
     762             : 
     763        3910 : char **CSVReadParseLineL(VSILFILE *fp)
     764             : {
     765        3910 :     return CSVReadParseLine2L(fp, ',');
     766             : }
     767             : 
     768        3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
     769             : 
     770             : {
     771        3910 :     CPLAssert(fp != nullptr);
     772        3910 :     if (fp == nullptr)
     773           0 :         return nullptr;
     774             : 
     775        3910 :     char szDelimiter[2] = {chDelimiter, 0};
     776        3910 :     return CSVReadParseLine3L(fp,
     777             :                               0,  // nMaxLineSize
     778             :                               szDelimiter,
     779             :                               true,   // bHonourStrings
     780             :                               false,  // bKeepLeadingAndClosingQuotes
     781             :                               false,  // bMergeDelimiter
     782        3910 :                               true /* bSkipBOM */);
     783             : }
     784             : 
     785             : /************************************************************************/
     786             : /*                      ReadLineLargeFile()                             */
     787             : /************************************************************************/
     788             : 
     789       59279 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
     790             : {
     791       59279 :     int nBufLength = 0;
     792       59279 :     return CPLReadLine3L(static_cast<VSILFILE *>(fp),
     793             :                          nMaxLineSize == 0 ? -1
     794             :                                            : static_cast<int>(nMaxLineSize),
     795      118558 :                          &nBufLength, nullptr);
     796             : }
     797             : 
     798             : /************************************************************************/
     799             : /*                      CSVReadParseLine3L()                            */
     800             : /*                                                                      */
     801             : /*      Read one line, and return split into fields.  The return        */
     802             : /*      result is a stringlist, in the sense of the CSL functions.      */
     803             : /************************************************************************/
     804             : 
     805             : /** Read one line, and return split into fields.
     806             :  * The return result is a stringlist, in the sense of the CSL functions.
     807             :  *
     808             :  * @param fp File handle. Must not be NULL
     809             :  * @param nMaxLineSize Maximum line size, or 0 for unlimited.
     810             :  * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
     811             :  * @param bHonourStrings Should be true, unless double quotes should not be
     812             :  *                       considered when separating fields.
     813             :  * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
     814             :  *                                     quote characters should be kept.
     815             :  * @param bMergeDelimiter Whether consecutive delimiters should be considered
     816             :  *                        as a single one. Should generally be set to false.
     817             :  * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
     818             :  */
     819       58522 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
     820             :                           const char *pszDelimiter, bool bHonourStrings,
     821             :                           bool bKeepLeadingAndClosingQuotes,
     822             :                           bool bMergeDelimiter, bool bSkipBOM)
     823             : 
     824             : {
     825       58522 :     return CSVReadParseLineGeneric(
     826             :         fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
     827       58522 :         bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
     828             : }
     829             : 
     830             : /************************************************************************/
     831             : /*                             CSVCompare()                             */
     832             : /*                                                                      */
     833             : /*      Compare a field to a search value using a particular            */
     834             : /*      criteria.                                                       */
     835             : /************************************************************************/
     836             : 
     837         610 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
     838             :                        CSVCompareCriteria eCriteria)
     839             : 
     840             : {
     841         610 :     if (eCriteria == CC_ExactString)
     842             :     {
     843           0 :         return (strcmp(pszFieldValue, pszTarget) == 0);
     844             :     }
     845         610 :     else if (eCriteria == CC_ApproxString)
     846             :     {
     847         270 :         return EQUAL(pszFieldValue, pszTarget);
     848             :     }
     849         340 :     else if (eCriteria == CC_Integer)
     850             :     {
     851         640 :         return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
     852         640 :                 atoi(pszFieldValue) == atoi(pszTarget));
     853             :     }
     854             : 
     855           0 :     return false;
     856             : }
     857             : 
     858             : /************************************************************************/
     859             : /*                            CSVScanLines()                            */
     860             : /*                                                                      */
     861             : /*      Read the file scanline for lines where the key field equals     */
     862             : /*      the indicated value with the suggested comparison criteria.     */
     863             : /*      Return the first matching line split into fields.               */
     864             : /*                                                                      */
     865             : /*      Deprecated.  Replaced by CSVScanLinesL().                       */
     866             : /************************************************************************/
     867             : 
     868           0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
     869             :                     CSVCompareCriteria eCriteria)
     870             : 
     871             : {
     872           0 :     CPLAssert(pszValue != nullptr);
     873           0 :     CPLAssert(iKeyField >= 0);
     874           0 :     CPLAssert(fp != nullptr);
     875             : 
     876           0 :     bool bSelected = false;
     877           0 :     const int nTestValue = atoi(pszValue);
     878           0 :     char **papszFields = nullptr;
     879             : 
     880           0 :     while (!bSelected)
     881             :     {
     882           0 :         papszFields = CSVReadParseLine(fp);
     883           0 :         if (papszFields == nullptr)
     884           0 :             return nullptr;
     885             : 
     886           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     887             :         {
     888             :             /* not selected */
     889             :         }
     890           0 :         else if (eCriteria == CC_Integer &&
     891           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     892             :         {
     893           0 :             bSelected = true;
     894             :         }
     895             :         else
     896             :         {
     897           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     898             :         }
     899             : 
     900           0 :         if (!bSelected)
     901             :         {
     902           0 :             CSLDestroy(papszFields);
     903           0 :             papszFields = nullptr;
     904             :         }
     905             :     }
     906             : 
     907           0 :     return papszFields;
     908             : }
     909             : 
     910             : /************************************************************************/
     911             : /*                            CSVScanLinesL()                           */
     912             : /*                                                                      */
     913             : /*      Read the file scanline for lines where the key field equals     */
     914             : /*      the indicated value with the suggested comparison criteria.     */
     915             : /*      Return the first matching line split into fields.               */
     916             : /************************************************************************/
     917             : 
     918           0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
     919             :                      CSVCompareCriteria eCriteria)
     920             : 
     921             : {
     922           0 :     CPLAssert(pszValue != nullptr);
     923           0 :     CPLAssert(iKeyField >= 0);
     924           0 :     CPLAssert(fp != nullptr);
     925             : 
     926           0 :     bool bSelected = false;
     927           0 :     const int nTestValue = atoi(pszValue);
     928           0 :     char **papszFields = nullptr;
     929             : 
     930           0 :     while (!bSelected)
     931             :     {
     932           0 :         papszFields = CSVReadParseLineL(fp);
     933           0 :         if (papszFields == nullptr)
     934           0 :             return nullptr;
     935             : 
     936           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     937             :         {
     938             :             /* not selected */
     939             :         }
     940           0 :         else if (eCriteria == CC_Integer &&
     941           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     942             :         {
     943           0 :             bSelected = true;
     944             :         }
     945             :         else
     946             :         {
     947           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     948             :         }
     949             : 
     950           0 :         if (!bSelected)
     951             :         {
     952           0 :             CSLDestroy(papszFields);
     953           0 :             papszFields = nullptr;
     954             :         }
     955             :     }
     956             : 
     957           0 :     return papszFields;
     958             : }
     959             : 
     960             : /************************************************************************/
     961             : /*                        CSVScanLinesIndexed()                         */
     962             : /*                                                                      */
     963             : /*      Read the file scanline for lines where the key field equals     */
     964             : /*      the indicated value with the suggested comparison criteria.     */
     965             : /*      Return the first matching line split into fields.               */
     966             : /************************************************************************/
     967             : 
     968          23 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
     969             : 
     970             : {
     971          23 :     CPLAssert(psTable->panLineIndex != nullptr);
     972             : 
     973             :     /* -------------------------------------------------------------------- */
     974             :     /*      Find target record with binary search.                          */
     975             :     /* -------------------------------------------------------------------- */
     976          23 :     int iTop = psTable->nLineCount - 1;
     977          23 :     int iBottom = 0;
     978          23 :     int iResult = -1;
     979             : 
     980         167 :     while (iTop >= iBottom)
     981             :     {
     982         167 :         const int iMiddle = (iTop + iBottom) / 2;
     983         167 :         if (psTable->panLineIndex[iMiddle] > nKeyValue)
     984          98 :             iTop = iMiddle - 1;
     985          69 :         else if (psTable->panLineIndex[iMiddle] < nKeyValue)
     986          46 :             iBottom = iMiddle + 1;
     987             :         else
     988             :         {
     989          23 :             iResult = iMiddle;
     990             :             // if a key is not unique, select the first instance of it.
     991          23 :             while (iResult > 0 &&
     992          23 :                    psTable->panLineIndex[iResult - 1] == nKeyValue)
     993             :             {
     994           0 :                 psTable->bNonUniqueKey = true;
     995           0 :                 iResult--;
     996             :             }
     997          23 :             break;
     998             :         }
     999             :     }
    1000             : 
    1001          23 :     if (iResult == -1)
    1002           0 :         return nullptr;
    1003             : 
    1004             :     /* -------------------------------------------------------------------- */
    1005             :     /*      Parse target line, and update iLastLine indicator.              */
    1006             :     /* -------------------------------------------------------------------- */
    1007          23 :     psTable->iLastLine = iResult;
    1008             : 
    1009          23 :     return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
    1010             : }
    1011             : 
    1012             : /************************************************************************/
    1013             : /*                        CSVScanLinesIngested()                        */
    1014             : /*                                                                      */
    1015             : /*      Read the file scanline for lines where the key field equals     */
    1016             : /*      the indicated value with the suggested comparison criteria.     */
    1017             : /*      Return the first matching line split into fields.               */
    1018             : /************************************************************************/
    1019             : 
    1020          30 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
    1021             :                                    const char *pszValue,
    1022             :                                    CSVCompareCriteria eCriteria)
    1023             : 
    1024             : {
    1025          30 :     CPLAssert(pszValue != nullptr);
    1026          30 :     CPLAssert(iKeyField >= 0);
    1027             : 
    1028          30 :     const int nTestValue = atoi(pszValue);
    1029             : 
    1030             :     /* -------------------------------------------------------------------- */
    1031             :     /*      Short cut for indexed files.                                    */
    1032             :     /* -------------------------------------------------------------------- */
    1033          30 :     if (iKeyField == 0 && eCriteria == CC_Integer &&
    1034          23 :         psTable->panLineIndex != nullptr)
    1035          23 :         return CSVScanLinesIndexed(psTable, nTestValue);
    1036             : 
    1037             :     /* -------------------------------------------------------------------- */
    1038             :     /*      Scan from in-core lines.                                        */
    1039             :     /* -------------------------------------------------------------------- */
    1040           7 :     char **papszFields = nullptr;
    1041           7 :     bool bSelected = false;
    1042             : 
    1043         484 :     while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
    1044             :     {
    1045         477 :         psTable->iLastLine++;
    1046         477 :         papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
    1047             :                                    false, false);
    1048             : 
    1049         477 :         if (CSLCount(papszFields) < iKeyField + 1)
    1050             :         {
    1051             :             /* not selected */
    1052             :         }
    1053         477 :         else if (eCriteria == CC_Integer &&
    1054         242 :                  atoi(papszFields[iKeyField]) == nTestValue)
    1055             :         {
    1056           2 :             bSelected = true;
    1057             :         }
    1058             :         else
    1059             :         {
    1060         475 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
    1061             :         }
    1062             : 
    1063         477 :         if (!bSelected)
    1064             :         {
    1065         470 :             CSLDestroy(papszFields);
    1066         470 :             papszFields = nullptr;
    1067             :         }
    1068             :     }
    1069             : 
    1070           7 :     return papszFields;
    1071             : }
    1072             : 
    1073             : /************************************************************************/
    1074             : /*                            CSVRewind()                               */
    1075             : /*                                                                      */
    1076             : /*      Rewind a CSV file based on a passed in filename.                */
    1077             : /*      This is aimed at being used with CSVGetNextLine().              */
    1078             : /************************************************************************/
    1079             : 
    1080        1851 : void CSVRewind(const char *pszFilename)
    1081             : 
    1082             : {
    1083             :     /* -------------------------------------------------------------------- */
    1084             :     /*      Get access to the table.                                        */
    1085             :     /* -------------------------------------------------------------------- */
    1086        1851 :     CPLAssert(pszFilename != nullptr);
    1087             : 
    1088        1851 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1089        1851 :     if (psTable != nullptr)
    1090        1851 :         psTable->iLastLine = -1;
    1091        1851 : }
    1092             : 
    1093             : /************************************************************************/
    1094             : /*                           CSVGetNextLine()                           */
    1095             : /*                                                                      */
    1096             : /*      Fetch the next line of a CSV file based on a passed in          */
    1097             : /*      filename.  Returns NULL at end of file, or if file is not       */
    1098             : /*      really established.                                             */
    1099             : /*      This ingests the whole file into memory if not already done.    */
    1100             : /*      When reaching end of file, CSVRewind() may be used to read      */
    1101             : /*      again from the beginning.                                       */
    1102             : /************************************************************************/
    1103             : 
    1104       60510 : char **CSVGetNextLine(const char *pszFilename)
    1105             : 
    1106             : {
    1107             : 
    1108             :     /* -------------------------------------------------------------------- */
    1109             :     /*      Get access to the table.                                        */
    1110             :     /* -------------------------------------------------------------------- */
    1111       60510 :     CPLAssert(pszFilename != nullptr);
    1112             : 
    1113       60510 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1114       60510 :     if (psTable == nullptr)
    1115           0 :         return nullptr;
    1116             : 
    1117       60510 :     CSVIngest(psTable->pszFilename);
    1118             : 
    1119             :     /* -------------------------------------------------------------------- */
    1120             :     /*      If we use CSVGetNextLine() we can pretty much assume we have    */
    1121             :     /*      a non-unique key.                                               */
    1122             :     /* -------------------------------------------------------------------- */
    1123       60510 :     psTable->bNonUniqueKey = true;
    1124             : 
    1125             :     /* -------------------------------------------------------------------- */
    1126             :     /*      Do we have a next line available?  This only works for          */
    1127             :     /*      ingested tables I believe.                                      */
    1128             :     /* -------------------------------------------------------------------- */
    1129       60510 :     if (psTable->iLastLine + 1 >= psTable->nLineCount)
    1130         633 :         return nullptr;
    1131             : 
    1132       59877 :     psTable->iLastLine++;
    1133       59877 :     CSLDestroy(psTable->papszRecFields);
    1134      119754 :     psTable->papszRecFields = CSVSplitLine(
    1135       59877 :         psTable->papszLines[psTable->iLastLine], ",", false, false);
    1136             : 
    1137       59877 :     return psTable->papszRecFields;
    1138             : }
    1139             : 
    1140             : /************************************************************************/
    1141             : /*                            CSVScanFile()                             */
    1142             : /*                                                                      */
    1143             : /*      Scan a whole file using criteria similar to above, but also     */
    1144             : /*      taking care of file opening and closing.                        */
    1145             : /************************************************************************/
    1146             : 
    1147         144 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
    1148             :                           const char *pszValue, CSVCompareCriteria eCriteria)
    1149             : {
    1150         144 :     CSVIngest(psTable->pszFilename);
    1151             : 
    1152             :     /* -------------------------------------------------------------------- */
    1153             :     /*      Does the current record match the criteria?  If so, just        */
    1154             :     /*      return it again.                                                */
    1155             :     /* -------------------------------------------------------------------- */
    1156         144 :     if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
    1157         402 :         CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
    1158         114 :         !psTable->bNonUniqueKey)
    1159             :     {
    1160         114 :         return psTable->papszRecFields;
    1161             :     }
    1162             : 
    1163             :     /* -------------------------------------------------------------------- */
    1164             :     /*      Scan the file from the beginning, replacing the ``current       */
    1165             :     /*      record'' in our structure with the one that is found.           */
    1166             :     /* -------------------------------------------------------------------- */
    1167          30 :     psTable->iLastLine = -1;
    1168          30 :     CSLDestroy(psTable->papszRecFields);
    1169             : 
    1170          30 :     if (psTable->pszRawData != nullptr)
    1171          30 :         psTable->papszRecFields =
    1172          30 :             CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
    1173             :     else
    1174             :     {
    1175           0 :         VSIRewindL(psTable->fp);
    1176           0 :         CPLReadLineL(psTable->fp); /* throw away the header line */
    1177             : 
    1178           0 :         psTable->papszRecFields =
    1179           0 :             CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
    1180             :     }
    1181             : 
    1182          30 :     return psTable->papszRecFields;
    1183             : }
    1184             : 
    1185           4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
    1186             :                    CSVCompareCriteria eCriteria)
    1187             : 
    1188             : {
    1189             :     /* -------------------------------------------------------------------- */
    1190             :     /*      Get access to the table.                                        */
    1191             :     /* -------------------------------------------------------------------- */
    1192           4 :     CPLAssert(pszFilename != nullptr);
    1193             : 
    1194           4 :     if (iKeyField < 0)
    1195           0 :         return nullptr;
    1196             : 
    1197           4 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1198           4 :     if (psTable == nullptr)
    1199           0 :         return nullptr;
    1200             : 
    1201           4 :     return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
    1202             : }
    1203             : 
    1204             : /************************************************************************/
    1205             : /*                           CPLGetFieldId()                            */
    1206             : /*                                                                      */
    1207             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1208             : /*      and find the field with the indicated name.  Returns -1 if      */
    1209             : /*      it fails to find the field name.  Comparison is case            */
    1210             : /*      insensitive, but otherwise exact.  After this function has      */
    1211             : /*      been called the file pointer will be positioned just after      */
    1212             : /*      the first record.                                               */
    1213             : /*                                                                      */
    1214             : /*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
    1215             : /************************************************************************/
    1216             : 
    1217           0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
    1218             : 
    1219             : {
    1220           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1221             : 
    1222           0 :     VSIRewind(fp);
    1223             : 
    1224           0 :     char **papszFields = CSVReadParseLine(fp);
    1225           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1226             :     {
    1227           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1228             :         {
    1229           0 :             CSLDestroy(papszFields);
    1230           0 :             return i;
    1231             :         }
    1232             :     }
    1233             : 
    1234           0 :     CSLDestroy(papszFields);
    1235             : 
    1236           0 :     return -1;
    1237             : }
    1238             : 
    1239             : /************************************************************************/
    1240             : /*                           CPLGetFieldIdL()                           */
    1241             : /*                                                                      */
    1242             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1243             : /*      and find the field with the indicated name.  Returns -1 if      */
    1244             : /*      it fails to find the field name.  Comparison is case            */
    1245             : /*      insensitive, but otherwise exact.  After this function has      */
    1246             : /*      been called the file pointer will be positioned just after      */
    1247             : /*      the first record.                                               */
    1248             : /************************************************************************/
    1249             : 
    1250           0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
    1251             : 
    1252             : {
    1253           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1254             : 
    1255           0 :     VSIRewindL(fp);
    1256             : 
    1257           0 :     char **papszFields = CSVReadParseLineL(fp);
    1258           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1259             :     {
    1260           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1261             :         {
    1262           0 :             CSLDestroy(papszFields);
    1263           0 :             return i;
    1264             :         }
    1265             :     }
    1266             : 
    1267           0 :     CSLDestroy(papszFields);
    1268             : 
    1269           0 :     return -1;
    1270             : }
    1271             : 
    1272             : /************************************************************************/
    1273             : /*                         CSVGetFileFieldId()                          */
    1274             : /*                                                                      */
    1275             : /*      Same as CPLGetFieldId(), except that we get the file based      */
    1276             : /*      on filename, rather than having an existing handle.             */
    1277             : /************************************************************************/
    1278             : 
    1279        7458 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
    1280             : 
    1281             : {
    1282             :     /* -------------------------------------------------------------------- */
    1283             :     /*      Find the requested field.                                       */
    1284             :     /* -------------------------------------------------------------------- */
    1285        7458 :     const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
    1286       18760 :     for (int i = 0; psTable->papszFieldNames != nullptr &&
    1287       18760 :                     psTable->papszFieldNames[i] != nullptr;
    1288             :          i++)
    1289             :     {
    1290       18760 :         if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
    1291       10473 :             EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
    1292             :         {
    1293        7458 :             return i;
    1294             :         }
    1295             :     }
    1296             : 
    1297           0 :     return -1;
    1298             : }
    1299             : 
    1300        7178 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
    1301             : 
    1302             : {
    1303             :     /* -------------------------------------------------------------------- */
    1304             :     /*      Get access to the table.                                        */
    1305             :     /* -------------------------------------------------------------------- */
    1306        7178 :     CPLAssert(pszFilename != nullptr);
    1307             : 
    1308        7178 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1309        7178 :     if (psTable == nullptr)
    1310           0 :         return -1;
    1311        7178 :     return CSVGetFileFieldId(psTable, pszFieldName);
    1312             : }
    1313             : 
    1314             : /************************************************************************/
    1315             : /*                         CSVScanFileByName()                          */
    1316             : /*                                                                      */
    1317             : /*      Same as CSVScanFile(), but using a field name instead of a      */
    1318             : /*      field number.                                                   */
    1319             : /************************************************************************/
    1320             : 
    1321           4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
    1322             :                          const char *pszValue, CSVCompareCriteria eCriteria)
    1323             : 
    1324             : {
    1325           4 :     const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
    1326           4 :     if (iKeyField == -1)
    1327           0 :         return nullptr;
    1328             : 
    1329           4 :     return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
    1330             : }
    1331             : 
    1332             : /************************************************************************/
    1333             : /*                            CSVGetField()                             */
    1334             : /*                                                                      */
    1335             : /*      The all-in-one function to fetch a particular field value       */
    1336             : /*      from a CSV file.  Note this function will return an empty       */
    1337             : /*      string, rather than NULL if it fails to find the desired        */
    1338             : /*      value for some reason.  The caller can't establish that the     */
    1339             : /*      fetch failed.                                                   */
    1340             : /************************************************************************/
    1341             : 
    1342         140 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
    1343             :                         const char *pszKeyFieldValue,
    1344             :                         CSVCompareCriteria eCriteria,
    1345             :                         const char *pszTargetField)
    1346             : 
    1347             : {
    1348             :     /* -------------------------------------------------------------------- */
    1349             :     /*      Find the table.                                                 */
    1350             :     /* -------------------------------------------------------------------- */
    1351         140 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1352         140 :     if (psTable == nullptr)
    1353           0 :         return "";
    1354             : 
    1355         140 :     const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
    1356         140 :     if (iKeyField == -1)
    1357           0 :         return "";
    1358             : 
    1359             :     /* -------------------------------------------------------------------- */
    1360             :     /*      Find the correct record.                                        */
    1361             :     /* -------------------------------------------------------------------- */
    1362             :     char **papszRecord =
    1363         140 :         CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
    1364         140 :     if (papszRecord == nullptr)
    1365           0 :         return "";
    1366             : 
    1367             :     /* -------------------------------------------------------------------- */
    1368             :     /*      Figure out which field we want out of this.                     */
    1369             :     /* -------------------------------------------------------------------- */
    1370         140 :     const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
    1371         140 :     if (iTargetField < 0)
    1372           0 :         return "";
    1373             : 
    1374         388 :     for (int i = 0; papszRecord[i] != nullptr; ++i)
    1375             :     {
    1376         388 :         if (i == iTargetField)
    1377         140 :             return papszRecord[iTargetField];
    1378             :     }
    1379           0 :     return "";
    1380             : }
    1381             : 
    1382             : /************************************************************************/
    1383             : /*                       GDALDefaultCSVFilename()                       */
    1384             : /************************************************************************/
    1385             : 
    1386             : typedef struct
    1387             : {
    1388             :     char szPath[512];
    1389             :     bool bCSVFinderInitialized;
    1390             : } DefaultCSVFileNameTLS;
    1391             : 
    1392        2498 : const char *GDALDefaultCSVFilename(const char *pszBasename)
    1393             : 
    1394             : {
    1395             :     /* -------------------------------------------------------------------- */
    1396             :     /*      Do we already have this file accessed?  If so, just return      */
    1397             :     /*      the existing path without any further probing.                  */
    1398             :     /* -------------------------------------------------------------------- */
    1399        2498 :     int bMemoryError = FALSE;
    1400             :     CSVTable **ppsCSVTableList =
    1401        2498 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
    1402        2498 :     if (ppsCSVTableList != nullptr)
    1403             :     {
    1404        2492 :         const size_t nBasenameLen = strlen(pszBasename);
    1405             : 
    1406       23272 :         for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
    1407       20780 :              psTable = psTable->psNext)
    1408             :         {
    1409       22718 :             const size_t nFullLen = strlen(psTable->pszFilename);
    1410             : 
    1411       22718 :             if (nFullLen > nBasenameLen &&
    1412       22718 :                 strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
    1413        1938 :                        pszBasename) == 0 &&
    1414        1938 :                 strchr("/\\",
    1415        1938 :                        psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
    1416             :                     nullptr)
    1417             :             {
    1418        1938 :                 return psTable->pszFilename;
    1419             :             }
    1420             :         }
    1421             :     }
    1422             : 
    1423             :     /* -------------------------------------------------------------------- */
    1424             :     /*      Otherwise we need to look harder for it.                        */
    1425             :     /* -------------------------------------------------------------------- */
    1426             :     DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1427         560 :         CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
    1428         560 :     if (pTLSData == nullptr && !bMemoryError)
    1429             :     {
    1430             :         pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1431           5 :             VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
    1432           5 :         if (pTLSData)
    1433           5 :             CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
    1434             :     }
    1435         560 :     if (pTLSData == nullptr)
    1436           0 :         return "/not_existing_dir/not_existing_path";
    1437             : 
    1438         560 :     const char *pszResult = CPLFindFile("gdal", pszBasename);
    1439             : 
    1440         560 :     if (pszResult != nullptr)
    1441          43 :         return pszResult;
    1442             : 
    1443         517 :     if (!pTLSData->bCSVFinderInitialized)
    1444             :     {
    1445           2 :         pTLSData->bCSVFinderInitialized = true;
    1446             : 
    1447           2 :         if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
    1448           2 :             CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
    1449             : 
    1450           2 :         pszResult = CPLFindFile("gdal", pszBasename);
    1451             : 
    1452           2 :         if (pszResult != nullptr)
    1453           0 :             return pszResult;
    1454             :     }
    1455             : 
    1456             :     // For systems like sandboxes that do not allow other checks.
    1457         517 :     CPLDebug("CPL_CSV",
    1458             :              "Failed to find file in GDALDefaultCSVFilename.  "
    1459             :              "Returning original basename: %s",
    1460             :              pszBasename);
    1461         517 :     CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
    1462         517 :     return pTLSData->szPath;
    1463             : }
    1464             : 
    1465             : /************************************************************************/
    1466             : /*                            CSVFilename()                             */
    1467             : /*                                                                      */
    1468             : /*      Return the full path to a particular CSV file.  This will       */
    1469             : /*      eventually be something the application can override.           */
    1470             : /************************************************************************/
    1471             : 
    1472             : CPL_C_START
    1473             : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
    1474             : CPL_C_END
    1475             : 
    1476        2498 : const char *CSVFilename(const char *pszBasename)
    1477             : 
    1478             : {
    1479        2498 :     if (pfnCSVFilenameHook == nullptr)
    1480        2498 :         return GDALDefaultCSVFilename(pszBasename);
    1481             : 
    1482           0 :     return pfnCSVFilenameHook(pszBasename);
    1483             : }
    1484             : 
    1485             : /************************************************************************/
    1486             : /*                         SetCSVFilenameHook()                         */
    1487             : /*                                                                      */
    1488             : /*      Applications can use this to set a function that will           */
    1489             : /*      massage CSV filenames.                                          */
    1490             : /************************************************************************/
    1491             : 
    1492             : /**
    1493             :  * Override CSV file search method.
    1494             :  *
    1495             :  * @param pfnNewHook The pointer to a function which will return the
    1496             :  * full path for a given filename.
    1497             :  *
    1498             : 
    1499             : This function allows an application to override how the GTIFGetDefn()
    1500             : and related function find the CSV (Comma Separated Value) values
    1501             : required. The pfnHook argument should be a pointer to a function that
    1502             : will take in a CSV filename and return a full path to the file. The
    1503             : returned string should be to an internal static buffer so that the
    1504             : caller doesn't have to free the result.
    1505             : 
    1506             : Example:
    1507             : 
    1508             : The listgeo utility uses the following override function if the user
    1509             : specified a CSV file directory with the -t commandline switch (argument
    1510             : put into CSVDirName).
    1511             : 
    1512             : \code{.cpp}
    1513             : 
    1514             :     ...
    1515             :     SetCSVFilenameHook( CSVFileOverride );
    1516             :     ...
    1517             : 
    1518             : static const char *CSVFileOverride( const char * pszInput )
    1519             : 
    1520             : {
    1521             :     static char szPath[1024] = {};
    1522             : 
    1523             :     sprintf( szPath, "%s/%s", CSVDirName, pszInput );
    1524             : 
    1525             :     return szPath;
    1526             : }
    1527             : \endcode
    1528             : 
    1529             : */
    1530             : 
    1531             : CPL_C_START
    1532           0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
    1533             : 
    1534             : {
    1535           0 :     pfnCSVFilenameHook = pfnNewHook;
    1536           0 : }
    1537             : 
    1538             : CPL_C_END

Generated by: LCOV version 1.14