LCOV - code coverage report
Current view: top level - port - cpl_csv.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 406 536 75.7 %
Date: 2025-02-20 10:14:44 Functions: 27 35 77.1 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  CPL - Common Portability Library
       4             :  * Purpose:  CSV (comma separated value) file access.
       5             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 1999, Frank Warmerdam
       9             :  * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * SPDX-License-Identifier: MIT
      12             :  ****************************************************************************/
      13             : 
      14             : #include "cpl_port.h"
      15             : #include "cpl_csv.h"
      16             : 
      17             : #include <cstddef>
      18             : #include <cstdlib>
      19             : #include <cstring>
      20             : #if HAVE_FCNTL_H
      21             : #include <fcntl.h>
      22             : #endif
      23             : 
      24             : #include "cpl_conv.h"
      25             : #include "cpl_error.h"
      26             : #include "cpl_multiproc.h"
      27             : #include "gdal_csv.h"
      28             : 
      29             : #include <algorithm>
      30             : 
      31             : /* ==================================================================== */
      32             : /*      The CSVTable is a persistent set of info about an open CSV      */
      33             : /*      table.  While it doesn't currently maintain a record index,     */
      34             : /*      or in-memory copy of the table, it could be changed to do so    */
      35             : /*      in the future.                                                  */
      36             : /* ==================================================================== */
      37             : typedef struct ctb
      38             : {
      39             :     VSILFILE *fp;
      40             :     struct ctb *psNext;
      41             :     char *pszFilename;
      42             :     char **papszFieldNames;
      43             :     int *panFieldNamesLength;
      44             :     char **papszRecFields;
      45             :     int nFields;
      46             :     int iLastLine;
      47             :     bool bNonUniqueKey;
      48             : 
      49             :     /* Cache for whole file */
      50             :     int nLineCount;
      51             :     char **papszLines;
      52             :     int *panLineIndex;
      53             :     char *pszRawData;
      54             : } CSVTable;
      55             : 
      56             : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
      57             :                                 const char *pszFilename);
      58             : 
      59             : /************************************************************************/
      60             : /*                            CSVFreeTLS()                              */
      61             : /************************************************************************/
      62           2 : static void CSVFreeTLS(void *pData)
      63             : {
      64           2 :     CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
      65           2 :     CPLFree(pData);
      66           2 : }
      67             : 
      68             : /* It would likely be better to share this list between threads, but
      69             :    that will require some rework. */
      70             : 
      71             : /************************************************************************/
      72             : /*                             CSVAccess()                              */
      73             : /*                                                                      */
      74             : /*      This function will fetch a handle to the requested table.       */
      75             : /*      If not found in the ``open table list'' the table will be       */
      76             : /*      opened and added to the list.  Eventually this function may     */
      77             : /*      become public with an abstracted return type so that            */
      78             : /*      applications can set options about the table.  For now this     */
      79             : /*      isn't done.                                                     */
      80             : /************************************************************************/
      81             : 
      82      130205 : static CSVTable *CSVAccess(const char *pszFilename)
      83             : 
      84             : {
      85             :     /* -------------------------------------------------------------------- */
      86             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
      87             :     /*      if there isn't already one.                                     */
      88             :     /* -------------------------------------------------------------------- */
      89      130205 :     int bMemoryError = FALSE;
      90             :     CSVTable **ppsCSVTableList =
      91      130205 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
      92      130205 :     if (bMemoryError)
      93           0 :         return nullptr;
      94      130205 :     if (ppsCSVTableList == nullptr)
      95             :     {
      96             :         ppsCSVTableList =
      97           5 :             static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
      98           5 :         if (ppsCSVTableList == nullptr)
      99           0 :             return nullptr;
     100           5 :         CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
     101             :     }
     102             : 
     103             :     /* -------------------------------------------------------------------- */
     104             :     /*      Is the table already in the list.                               */
     105             :     /* -------------------------------------------------------------------- */
     106     1009480 :     for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
     107      879271 :          psTable = psTable->psNext)
     108             :     {
     109     1009450 :         if (EQUAL(psTable->pszFilename, pszFilename))
     110             :         {
     111             :             /*
     112             :              * Eventually we should consider promoting to the front of
     113             :              * the list to accelerate frequently accessed tables.
     114             :              */
     115      130175 :             return psTable;
     116             :         }
     117             :     }
     118             : 
     119             :     /* -------------------------------------------------------------------- */
     120             :     /*      If not, try to open it.                                         */
     121             :     /* -------------------------------------------------------------------- */
     122          30 :     VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
     123          30 :     if (fp == nullptr)
     124           0 :         return nullptr;
     125             : 
     126             :     /* -------------------------------------------------------------------- */
     127             :     /*      Create an information structure about this table, and add to    */
     128             :     /*      the front of the list.                                          */
     129             :     /* -------------------------------------------------------------------- */
     130             :     CSVTable *const psTable =
     131          30 :         static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
     132          30 :     if (psTable == nullptr)
     133             :     {
     134           0 :         VSIFCloseL(fp);
     135           0 :         return nullptr;
     136             :     }
     137             : 
     138          30 :     psTable->fp = fp;
     139          30 :     psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
     140          30 :     if (psTable->pszFilename == nullptr)
     141             :     {
     142           0 :         VSIFree(psTable);
     143           0 :         VSIFCloseL(fp);
     144           0 :         return nullptr;
     145             :     }
     146          30 :     psTable->bNonUniqueKey = false;  // As far as we know now.
     147          30 :     psTable->psNext = *ppsCSVTableList;
     148             : 
     149          30 :     *ppsCSVTableList = psTable;
     150             : 
     151             :     /* -------------------------------------------------------------------- */
     152             :     /*      Read the table header record containing the field names.        */
     153             :     /* -------------------------------------------------------------------- */
     154          30 :     psTable->papszFieldNames = CSVReadParseLineL(fp);
     155          30 :     psTable->nFields = CSLCount(psTable->papszFieldNames);
     156          30 :     psTable->panFieldNamesLength =
     157          30 :         static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
     158          30 :     for (int i = 0;
     159         185 :          i < psTable->nFields &&
     160             :          /* null-pointer check to avoid a false positive from CLang S.A. */
     161         155 :          psTable->papszFieldNames != nullptr;
     162             :          i++)
     163             :     {
     164         155 :         psTable->panFieldNamesLength[i] =
     165         155 :             static_cast<int>(strlen(psTable->papszFieldNames[i]));
     166             :     }
     167             : 
     168          30 :     return psTable;
     169             : }
     170             : 
     171             : /************************************************************************/
     172             : /*                            CSVDeaccess()                             */
     173             : /************************************************************************/
     174             : 
     175         955 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
     176             :                                 const char *pszFilename)
     177             : 
     178             : {
     179         955 :     if (ppsCSVTableList == nullptr)
     180         946 :         return;
     181             : 
     182             :     /* -------------------------------------------------------------------- */
     183             :     /*      A NULL means deaccess all tables.                               */
     184             :     /* -------------------------------------------------------------------- */
     185           9 :     if (pszFilename == nullptr)
     186             :     {
     187           9 :         while (*ppsCSVTableList != nullptr)
     188           5 :             CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
     189           5 :                                 (*ppsCSVTableList)->pszFilename);
     190             : 
     191           4 :         return;
     192             :     }
     193             : 
     194             :     /* -------------------------------------------------------------------- */
     195             :     /*      Find this table.                                                */
     196             :     /* -------------------------------------------------------------------- */
     197           5 :     CSVTable *psLast = nullptr;
     198           5 :     CSVTable *psTable = *ppsCSVTableList;
     199           5 :     for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
     200           0 :          psTable = psTable->psNext)
     201             :     {
     202           0 :         psLast = psTable;
     203             :     }
     204             : 
     205           5 :     if (psTable == nullptr)
     206             :     {
     207           0 :         if (bCanUseTLS)
     208           0 :             CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
     209           0 :         return;
     210             :     }
     211             : 
     212             :     /* -------------------------------------------------------------------- */
     213             :     /*      Remove the link from the list.                                  */
     214             :     /* -------------------------------------------------------------------- */
     215           5 :     if (psLast != nullptr)
     216           0 :         psLast->psNext = psTable->psNext;
     217             :     else
     218           5 :         *ppsCSVTableList = psTable->psNext;
     219             : 
     220             :     /* -------------------------------------------------------------------- */
     221             :     /*      Free the table.                                                 */
     222             :     /* -------------------------------------------------------------------- */
     223           5 :     if (psTable->fp != nullptr)
     224           0 :         VSIFCloseL(psTable->fp);
     225             : 
     226           5 :     CSLDestroy(psTable->papszFieldNames);
     227           5 :     CPLFree(psTable->panFieldNamesLength);
     228           5 :     CSLDestroy(psTable->papszRecFields);
     229           5 :     CPLFree(psTable->pszFilename);
     230           5 :     CPLFree(psTable->panLineIndex);
     231           5 :     CPLFree(psTable->pszRawData);
     232           5 :     CPLFree(psTable->papszLines);
     233             : 
     234           5 :     CPLFree(psTable);
     235             : 
     236           5 :     if (bCanUseTLS)
     237           5 :         CPLReadLine(nullptr);
     238             : }
     239             : 
     240         948 : void CSVDeaccess(const char *pszFilename)
     241             : {
     242             :     /* -------------------------------------------------------------------- */
     243             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
     244             :     /*      if there isn't already one.                                     */
     245             :     /* -------------------------------------------------------------------- */
     246         948 :     int bMemoryError = FALSE;
     247             :     CSVTable **ppsCSVTableList =
     248         948 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
     249             : 
     250         948 :     CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
     251         948 : }
     252             : 
     253             : /************************************************************************/
     254             : /*                            CSVSplitLine()                            */
     255             : /*                                                                      */
     256             : /*      Tokenize a CSV line into fields in the form of a string         */
     257             : /*      list.  This is used instead of the CPLTokenizeString()          */
     258             : /*      because it provides correct CSV escaping and quoting            */
     259             : /*      semantics.                                                      */
     260             : /************************************************************************/
     261             : 
     262      116329 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
     263             :                            bool bKeepLeadingAndClosingQuotes,
     264             :                            bool bMergeDelimiter)
     265             : 
     266             : {
     267      232658 :     CPLStringList aosRetList;
     268      116329 :     if (pszString == nullptr)
     269           0 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     270             : 
     271      116329 :     char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
     272      116329 :     int nTokenMax = 10;
     273      116329 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     274             : 
     275      116329 :     const char *pszIter = pszString;
     276      669294 :     while (*pszIter != '\0')
     277             :     {
     278      552965 :         bool bInString = false;
     279             : 
     280      552965 :         int nTokenLen = 0;
     281             : 
     282             :         // Try to find the next delimiter, marking end of token.
     283     4647150 :         do
     284             :         {
     285             :             // End if this is a delimiter skip it and break.
     286     5200120 :             if (!bInString &&
     287     2711680 :                 strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
     288             :             {
     289      436957 :                 pszIter += nDelimiterLength;
     290      436957 :                 if (bMergeDelimiter)
     291             :                 {
     292           9 :                     while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
     293             :                            0)
     294           5 :                         pszIter += nDelimiterLength;
     295             :                 }
     296      436957 :                 break;
     297             :             }
     298             : 
     299     4763160 :             if (*pszIter == '"')
     300             :             {
     301      395871 :                 if (!bInString && nTokenLen > 0)
     302             :                 {
     303             :                     // do not treat in a special way double quotes that appear
     304             :                     // in the middle of a field (similarly to OpenOffice)
     305             :                     // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
     306             :                 }
     307      395784 :                 else if (!bInString || pszIter[1] != '"')
     308             :                 {
     309      395098 :                     bInString = !bInString;
     310      395098 :                     if (!bKeepLeadingAndClosingQuotes)
     311      395064 :                         continue;
     312             :                 }
     313             :                 else  // Doubled quotes in string resolve to one quote.
     314             :                 {
     315         686 :                     pszIter++;
     316             :                 }
     317             :             }
     318             : 
     319     4368100 :             if (nTokenLen >= nTokenMax - 2)
     320             :             {
     321      122941 :                 nTokenMax = nTokenMax * 2 + 10;
     322      122941 :                 pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
     323             :             }
     324             : 
     325     4368100 :             pszToken[nTokenLen] = *pszIter;
     326     4368100 :             nTokenLen++;
     327     4763160 :         } while (*(++pszIter) != '\0');
     328             : 
     329      552965 :         pszToken[nTokenLen] = '\0';
     330      552965 :         aosRetList.AddString(pszToken);
     331             : 
     332             :         // If the last token is an empty token, then we have to catch
     333             :         // it now, otherwise we won't reenter the loop and it will be lost.
     334      552965 :         if (*pszIter == '\0' &&
     335      116289 :             pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
     336      116289 :             strncmp(pszIter - nDelimiterLength, pszDelimiter,
     337             :                     nDelimiterLength) == 0)
     338             :         {
     339         281 :             aosRetList.AddString("");
     340             :         }
     341             :     }
     342             : 
     343      116329 :     CPLFree(pszToken);
     344             : 
     345      116329 :     if (aosRetList.Count() == 0)
     346          40 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     347             :     else
     348      116289 :         return aosRetList.StealList();
     349             : }
     350             : 
     351             : /************************************************************************/
     352             : /*                          CSVFindNextLine()                           */
     353             : /*                                                                      */
     354             : /*      Find the start of the next line, while at the same time zero    */
     355             : /*      terminating this line.  Take into account that there may be     */
     356             : /*      newline indicators within quoted strings, and that quotes       */
     357             : /*      can be escaped with a backslash.                                */
     358             : /************************************************************************/
     359             : 
     360        6329 : static char *CSVFindNextLine(char *pszThisLine)
     361             : 
     362             : {
     363        6329 :     int i = 0;  // i is used after the for loop.
     364             : 
     365      275213 :     for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
     366             :     {
     367      275213 :         if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
     368       35430 :             nQuoteCount++;
     369             : 
     370      275213 :         if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
     371        6329 :             (nQuoteCount % 2) == 0)
     372        6329 :             break;
     373             :     }
     374             : 
     375       16573 :     while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
     376       10244 :         pszThisLine[i++] = '\0';
     377             : 
     378        6329 :     if (pszThisLine[i] == '\0')
     379          30 :         return nullptr;
     380             : 
     381        6299 :     return pszThisLine + i;
     382             : }
     383             : 
     384             : /************************************************************************/
     385             : /*                             CSVIngest()                              */
     386             : /*                                                                      */
     387             : /*      Load entire file into memory and setup index if possible.       */
     388             : /************************************************************************/
     389             : 
     390             : // TODO(schwehr): Clean up all the casting in CSVIngest.
     391       60606 : static void CSVIngest(CSVTable *psTable)
     392             : 
     393             : {
     394       60606 :     if (psTable->pszRawData != nullptr)
     395       60576 :         return;
     396             : 
     397             :     /* -------------------------------------------------------------------- */
     398             :     /*      Ingest whole file.                                              */
     399             :     /* -------------------------------------------------------------------- */
     400          30 :     if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
     401             :     {
     402           0 :         CPLError(CE_Failure, CPLE_FileIO,
     403             :                  "Failed using seek end and tell to get file length: %s",
     404             :                  psTable->pszFilename);
     405           0 :         return;
     406             :     }
     407          30 :     const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
     408          30 :     if (static_cast<long>(nFileLen) == -1)
     409             :     {
     410           0 :         CPLError(CE_Failure, CPLE_FileIO,
     411             :                  "Failed using seek end and tell to get file length: %s",
     412             :                  psTable->pszFilename);
     413           0 :         return;
     414             :     }
     415          30 :     VSIRewindL(psTable->fp);
     416             : 
     417          30 :     psTable->pszRawData = static_cast<char *>(
     418          30 :         VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
     419          30 :     if (psTable->pszRawData == nullptr)
     420           0 :         return;
     421          30 :     if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
     422          30 :                   psTable->fp) != static_cast<size_t>(nFileLen))
     423             :     {
     424           0 :         CPLFree(psTable->pszRawData);
     425           0 :         psTable->pszRawData = nullptr;
     426             : 
     427           0 :         CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
     428             :                  psTable->pszFilename);
     429           0 :         return;
     430             :     }
     431             : 
     432          30 :     psTable->pszRawData[nFileLen] = '\0';
     433             : 
     434             :     /* -------------------------------------------------------------------- */
     435             :     /*      Get count of newlines so we can allocate line array.            */
     436             :     /* -------------------------------------------------------------------- */
     437          30 :     int nMaxLineCount = 0;
     438      279158 :     for (int i = 0; i < static_cast<int>(nFileLen); i++)
     439             :     {
     440      279128 :         if (psTable->pszRawData[i] == 10)
     441        6329 :             nMaxLineCount++;
     442             :     }
     443             : 
     444          30 :     psTable->papszLines =
     445          30 :         static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
     446          30 :     if (psTable->papszLines == nullptr)
     447           0 :         return;
     448             : 
     449             :     /* -------------------------------------------------------------------- */
     450             :     /*      Build a list of record pointers into the raw data buffer        */
     451             :     /*      based on line terminators.  Zero terminate the line             */
     452             :     /*      strings.                                                        */
     453             :     /* -------------------------------------------------------------------- */
     454             :     /* skip header line */
     455          30 :     char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
     456             : 
     457          30 :     int iLine = 0;
     458        6329 :     while (pszThisLine != nullptr && iLine < nMaxLineCount)
     459             :     {
     460        6299 :         if (pszThisLine[0] != '#')
     461        6288 :             psTable->papszLines[iLine++] = pszThisLine;
     462        6299 :         pszThisLine = CSVFindNextLine(pszThisLine);
     463             :     }
     464             : 
     465          30 :     psTable->nLineCount = iLine;
     466             : 
     467             :     /* -------------------------------------------------------------------- */
     468             :     /*      Allocate and populate index array.  Ensure they are in          */
     469             :     /*      ascending order so that binary searches can be done on the      */
     470             :     /*      array.                                                          */
     471             :     /* -------------------------------------------------------------------- */
     472          30 :     psTable->panLineIndex = static_cast<int *>(
     473          30 :         VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
     474          30 :     if (psTable->panLineIndex == nullptr)
     475           0 :         return;
     476             : 
     477        6232 :     for (int i = 0; i < psTable->nLineCount; i++)
     478             :     {
     479        6204 :         psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
     480             : 
     481        6204 :         if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
     482             :         {
     483           2 :             CPLFree(psTable->panLineIndex);
     484           2 :             psTable->panLineIndex = nullptr;
     485           2 :             break;
     486             :         }
     487             :     }
     488             : 
     489          30 :     psTable->iLastLine = -1;
     490             : 
     491             :     /* -------------------------------------------------------------------- */
     492             :     /*      We should never need the file handle against, so close it.      */
     493             :     /* -------------------------------------------------------------------- */
     494          30 :     VSIFCloseL(psTable->fp);
     495          30 :     psTable->fp = nullptr;
     496             : }
     497             : 
     498       60606 : static void CSVIngest(const char *pszFilename)
     499             : 
     500             : {
     501       60606 :     CSVTable *psTable = CSVAccess(pszFilename);
     502       60606 :     if (psTable == nullptr)
     503             :     {
     504           0 :         CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
     505             :                  pszFilename);
     506           0 :         return;
     507             :     }
     508       60606 :     CSVIngest(psTable);
     509             : }
     510             : 
     511             : /************************************************************************/
     512             : /*                        CSVDetectSeperator()                          */
     513             : /************************************************************************/
     514             : 
     515             : /** Detect which field separator is used.
     516             :  *
     517             :  * Currently, it can detect comma, semicolon, space, tabulation or pipe.
     518             :  * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
     519             :  * most occurrences will be selected (and a warning emitted).
     520             :  * If no separator found, comma will be considered as the separator.
     521             :  *
     522             :  * @return ',', ';', ' ', tabulation character or '|'.
     523             :  */
     524         597 : char CSVDetectSeperator(const char *pszLine)
     525             : {
     526         597 :     bool bInString = false;
     527         597 :     int nCountComma = 0;
     528         597 :     int nCountSemicolon = 0;
     529         597 :     int nCountTab = 0;
     530         597 :     int nCountPipe = 0;
     531         597 :     int nCountSpace = 0;
     532             : 
     533       26802 :     for (; *pszLine != '\0'; pszLine++)
     534             :     {
     535       26205 :         if (!bInString && *pszLine == ',')
     536             :         {
     537        2133 :             nCountComma++;
     538             :         }
     539       24072 :         else if (!bInString && *pszLine == ';')
     540             :         {
     541          10 :             nCountSemicolon++;
     542             :         }
     543       24062 :         else if (!bInString && *pszLine == '\t')
     544             :         {
     545          29 :             nCountTab++;
     546             :         }
     547       24033 :         else if (!bInString && *pszLine == '|')
     548             :         {
     549           9 :             nCountPipe++;
     550             :         }
     551       24024 :         else if (!bInString && *pszLine == ' ')
     552             :         {
     553         290 :             nCountSpace++;
     554             :         }
     555       23734 :         else if (*pszLine == '"')
     556             :         {
     557         519 :             if (!bInString || pszLine[1] != '"')
     558             :             {
     559         519 :                 bInString = !bInString;
     560         519 :                 continue;
     561             :             }
     562             :             else /* doubled quotes in string resolve to one quote */
     563             :             {
     564           0 :                 pszLine++;
     565             :             }
     566             :         }
     567             :     }
     568             : 
     569             :     const int nMaxCountExceptSpace =
     570             :         std::max(std::max(nCountComma, nCountSemicolon),
     571         597 :                  std::max(nCountTab, nCountPipe));
     572         597 :     char chDelimiter = ',';
     573         597 :     if (nMaxCountExceptSpace == 0)
     574             :     {
     575          35 :         if (nCountSpace > 0)
     576           9 :             chDelimiter = ' ';
     577             :     }
     578             :     else
     579             :     {
     580         562 :         bool bWarn = false;
     581         562 :         if (nCountComma == nMaxCountExceptSpace)
     582             :         {
     583         546 :             chDelimiter = ',';
     584         546 :             bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
     585             :         }
     586          16 :         else if (nCountSemicolon == nMaxCountExceptSpace)
     587             :         {
     588           5 :             chDelimiter = ';';
     589           5 :             bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
     590             :         }
     591          11 :         else if (nCountTab == nMaxCountExceptSpace)
     592             :         {
     593           6 :             chDelimiter = '\t';
     594           6 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
     595             :         }
     596             :         else /* if( nCountPipe == nMaxCountExceptSpace ) */
     597             :         {
     598           5 :             chDelimiter = '|';
     599           5 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
     600             :         }
     601         562 :         if (bWarn)
     602             :         {
     603           6 :             CPLError(CE_Warning, CPLE_AppDefined,
     604             :                      "Selecting '%c' as CSV field separator, but "
     605             :                      "other candidate separator(s) have been found.",
     606             :                      chDelimiter);
     607             :         }
     608             :     }
     609             : 
     610         597 :     return chDelimiter;
     611             : }
     612             : 
     613             : /************************************************************************/
     614             : /*                      CSVReadParseLine3L()                            */
     615             : /*                                                                      */
     616             : /*      Read one line, and return split into fields.  The return        */
     617             : /*      result is a stringlist, in the sense of the CSL functions.      */
     618             : /************************************************************************/
     619             : 
     620             : static char **
     621       57364 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
     622             :                         size_t nMaxLineSize, const char *pszDelimiter,
     623             :                         bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
     624             :                         bool bMergeDelimiter, bool bSkipBOM)
     625             : {
     626       57364 :     const char *pszLine = pfnReadLine(fp, nMaxLineSize);
     627       57364 :     if (pszLine == nullptr)
     628        1365 :         return nullptr;
     629             : 
     630       55999 :     if (bSkipBOM)
     631             :     {
     632             :         // Skip BOM.
     633       55632 :         const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
     634       55632 :         if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
     635           4 :             pszLine += 3;
     636             :     }
     637             : 
     638             :     // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
     639       55999 :     if (!bHonourStrings)
     640             :     {
     641           2 :         return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
     642             :     }
     643             : 
     644             :     // If there are no quotes, then this is the simple case.
     645             :     // Parse, and return tokens.
     646       55997 :     if (strchr(pszLine, '\"') == nullptr)
     647       48355 :         return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
     648       48355 :                             bMergeDelimiter);
     649             : 
     650        7642 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     651        7642 :     bool bInString = false;           // keep in that scope !
     652       15284 :     std::string osWorkLine(pszLine);  // keep in that scope !
     653        7642 :     size_t i = 0;                     // keep in that scope !
     654             : 
     655             :     try
     656             :     {
     657             :         while (true)
     658             :         {
     659      792644 :             for (; i < osWorkLine.size(); ++i)
     660             :             {
     661      784246 :                 if (osWorkLine[i] == '\"')
     662             :                 {
     663       59057 :                     if (!bInString)
     664             :                     {
     665             :                         // Only consider " as the start of a quoted string
     666             :                         // if it is the first character of the line, or
     667             :                         // if it is immediately after the field delimiter.
     668       52238 :                         if (i == 0 ||
     669       23009 :                             (i >= nDelimiterLength &&
     670       23009 :                              osWorkLine.compare(i - nDelimiterLength,
     671             :                                                 nDelimiterLength, pszDelimiter,
     672             :                                                 nDelimiterLength) == 0))
     673             :                         {
     674       29142 :                             bInString = true;
     675             :                         }
     676             :                     }
     677       56781 :                     else if (i + 1 < osWorkLine.size() &&
     678       26953 :                              osWorkLine[i + 1] == '"')
     679             :                     {
     680             :                         // Escaped double quote in a quoted string
     681         687 :                         ++i;
     682             :                     }
     683             :                     else
     684             :                     {
     685       29141 :                         bInString = false;
     686             :                     }
     687             :                 }
     688             :             }
     689             : 
     690        8398 :             if (!bInString)
     691             :             {
     692        7641 :                 return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
     693             :                                     bKeepLeadingAndClosingQuotes,
     694        7641 :                                     bMergeDelimiter);
     695             :             }
     696             : 
     697         757 :             const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
     698         757 :             if (pszNewLine == nullptr)
     699           1 :                 break;
     700             : 
     701         756 :             osWorkLine.append("\n");
     702         756 :             osWorkLine.append(pszNewLine);
     703         756 :         }
     704             :     }
     705           0 :     catch (const std::exception &e)
     706             :     {
     707           0 :         CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
     708             :     }
     709             : 
     710           1 :     if (bInString)
     711             :     {
     712           1 :         CPLError(CE_Failure, CPLE_AppDefined,
     713             :                  "CSV file has unbalanced number of double-quotes. Corrupted "
     714             :                  "data will likely be returned");
     715             :     }
     716             : 
     717           1 :     return nullptr;
     718             : }
     719             : 
     720             : /************************************************************************/
     721             : /*                          CSVReadParseLine()                          */
     722             : /*                                                                      */
     723             : /*      Read one line, and return split into fields.  The return        */
     724             : /*      result is a stringlist, in the sense of the CSL functions.      */
     725             : /*                                                                      */
     726             : /*      Deprecated.  Replaced by CSVReadParseLineL().                   */
     727             : /************************************************************************/
     728             : 
     729           0 : char **CSVReadParseLine(FILE *fp)
     730             : {
     731           0 :     return CSVReadParseLine2(fp, ',');
     732             : }
     733             : 
     734           0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
     735             : {
     736           0 :     return CPLReadLine(static_cast<FILE *>(fp));
     737             : }
     738             : 
     739           0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
     740             : {
     741           0 :     CPLAssert(fp != nullptr);
     742           0 :     if (fp == nullptr)
     743           0 :         return nullptr;
     744             : 
     745           0 :     char szDelimiter[2] = {chDelimiter, 0};
     746           0 :     return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
     747             :                                    0,  // nMaxLineSize,
     748             :                                    szDelimiter,
     749             :                                    true,   // bHonourStrings
     750             :                                    false,  // bKeepLeadingAndClosingQuotes
     751             :                                    false,  // bMergeDelimiter
     752           0 :                                    true /* bSkipBOM */);
     753             : }
     754             : 
     755             : /************************************************************************/
     756             : /*                          CSVReadParseLineL()                         */
     757             : /*                                                                      */
     758             : /*      Read one line, and return split into fields.  The return        */
     759             : /*      result is a stringlist, in the sense of the CSL functions.      */
     760             : /*                                                                      */
     761             : /*      Replaces CSVReadParseLine().  These functions use the VSI       */
     762             : /*      layer to allow reading from other file containers.              */
     763             : /************************************************************************/
     764             : 
     765        3910 : char **CSVReadParseLineL(VSILFILE *fp)
     766             : {
     767        3910 :     return CSVReadParseLine2L(fp, ',');
     768             : }
     769             : 
     770        3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
     771             : 
     772             : {
     773        3910 :     CPLAssert(fp != nullptr);
     774        3910 :     if (fp == nullptr)
     775           0 :         return nullptr;
     776             : 
     777        3910 :     char szDelimiter[2] = {chDelimiter, 0};
     778        3910 :     return CSVReadParseLine3L(fp,
     779             :                               0,  // nMaxLineSize
     780             :                               szDelimiter,
     781             :                               true,   // bHonourStrings
     782             :                               false,  // bKeepLeadingAndClosingQuotes
     783             :                               false,  // bMergeDelimiter
     784        3910 :                               true /* bSkipBOM */);
     785             : }
     786             : 
     787             : /************************************************************************/
     788             : /*                      ReadLineLargeFile()                             */
     789             : /************************************************************************/
     790             : 
     791       58121 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
     792             : {
     793       58121 :     int nBufLength = 0;
     794       58121 :     return CPLReadLine3L(static_cast<VSILFILE *>(fp),
     795             :                          nMaxLineSize == 0 ? -1
     796             :                                            : static_cast<int>(nMaxLineSize),
     797      116242 :                          &nBufLength, nullptr);
     798             : }
     799             : 
     800             : /************************************************************************/
     801             : /*                      CSVReadParseLine3L()                            */
     802             : /*                                                                      */
     803             : /*      Read one line, and return split into fields.  The return        */
     804             : /*      result is a stringlist, in the sense of the CSL functions.      */
     805             : /************************************************************************/
     806             : 
     807             : /** Read one line, and return split into fields.
     808             :  * The return result is a stringlist, in the sense of the CSL functions.
     809             :  *
     810             :  * @param fp File handle. Must not be NULL
     811             :  * @param nMaxLineSize Maximum line size, or 0 for unlimited.
     812             :  * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
     813             :  * @param bHonourStrings Should be true, unless double quotes should not be
     814             :  *                       considered when separating fields.
     815             :  * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
     816             :  *                                     quote characters should be kept.
     817             :  * @param bMergeDelimiter Whether consecutive delimiters should be considered
     818             :  *                        as a single one. Should generally be set to false.
     819             :  * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
     820             :  */
     821       57364 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
     822             :                           const char *pszDelimiter, bool bHonourStrings,
     823             :                           bool bKeepLeadingAndClosingQuotes,
     824             :                           bool bMergeDelimiter, bool bSkipBOM)
     825             : 
     826             : {
     827       57364 :     return CSVReadParseLineGeneric(
     828             :         fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
     829       57364 :         bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
     830             : }
     831             : 
     832             : /************************************************************************/
     833             : /*                             CSVCompare()                             */
     834             : /*                                                                      */
     835             : /*      Compare a field to a search value using a particular            */
     836             : /*      criteria.                                                       */
     837             : /************************************************************************/
     838             : 
     839         610 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
     840             :                        CSVCompareCriteria eCriteria)
     841             : 
     842             : {
     843         610 :     if (eCriteria == CC_ExactString)
     844             :     {
     845           0 :         return (strcmp(pszFieldValue, pszTarget) == 0);
     846             :     }
     847         610 :     else if (eCriteria == CC_ApproxString)
     848             :     {
     849         270 :         return EQUAL(pszFieldValue, pszTarget);
     850             :     }
     851         340 :     else if (eCriteria == CC_Integer)
     852             :     {
     853         640 :         return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
     854         640 :                 atoi(pszFieldValue) == atoi(pszTarget));
     855             :     }
     856             : 
     857           0 :     return false;
     858             : }
     859             : 
     860             : /************************************************************************/
     861             : /*                            CSVScanLines()                            */
     862             : /*                                                                      */
     863             : /*      Read the file scanline for lines where the key field equals     */
     864             : /*      the indicated value with the suggested comparison criteria.     */
     865             : /*      Return the first matching line split into fields.               */
     866             : /*                                                                      */
     867             : /*      Deprecated.  Replaced by CSVScanLinesL().                       */
     868             : /************************************************************************/
     869             : 
     870           0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
     871             :                     CSVCompareCriteria eCriteria)
     872             : 
     873             : {
     874           0 :     CPLAssert(pszValue != nullptr);
     875           0 :     CPLAssert(iKeyField >= 0);
     876           0 :     CPLAssert(fp != nullptr);
     877             : 
     878           0 :     bool bSelected = false;
     879           0 :     const int nTestValue = atoi(pszValue);
     880           0 :     char **papszFields = nullptr;
     881             : 
     882           0 :     while (!bSelected)
     883             :     {
     884           0 :         papszFields = CSVReadParseLine(fp);
     885           0 :         if (papszFields == nullptr)
     886           0 :             return nullptr;
     887             : 
     888           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     889             :         {
     890             :             /* not selected */
     891             :         }
     892           0 :         else if (eCriteria == CC_Integer &&
     893           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     894             :         {
     895           0 :             bSelected = true;
     896             :         }
     897             :         else
     898             :         {
     899           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     900             :         }
     901             : 
     902           0 :         if (!bSelected)
     903             :         {
     904           0 :             CSLDestroy(papszFields);
     905           0 :             papszFields = nullptr;
     906             :         }
     907             :     }
     908             : 
     909           0 :     return papszFields;
     910             : }
     911             : 
     912             : /************************************************************************/
     913             : /*                            CSVScanLinesL()                           */
     914             : /*                                                                      */
     915             : /*      Read the file scanline for lines where the key field equals     */
     916             : /*      the indicated value with the suggested comparison criteria.     */
     917             : /*      Return the first matching line split into fields.               */
     918             : /************************************************************************/
     919             : 
     920           0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
     921             :                      CSVCompareCriteria eCriteria)
     922             : 
     923             : {
     924           0 :     CPLAssert(pszValue != nullptr);
     925           0 :     CPLAssert(iKeyField >= 0);
     926           0 :     CPLAssert(fp != nullptr);
     927             : 
     928           0 :     bool bSelected = false;
     929           0 :     const int nTestValue = atoi(pszValue);
     930           0 :     char **papszFields = nullptr;
     931             : 
     932           0 :     while (!bSelected)
     933             :     {
     934           0 :         papszFields = CSVReadParseLineL(fp);
     935           0 :         if (papszFields == nullptr)
     936           0 :             return nullptr;
     937             : 
     938           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     939             :         {
     940             :             /* not selected */
     941             :         }
     942           0 :         else if (eCriteria == CC_Integer &&
     943           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     944             :         {
     945           0 :             bSelected = true;
     946             :         }
     947             :         else
     948             :         {
     949           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     950             :         }
     951             : 
     952           0 :         if (!bSelected)
     953             :         {
     954           0 :             CSLDestroy(papszFields);
     955           0 :             papszFields = nullptr;
     956             :         }
     957             :     }
     958             : 
     959           0 :     return papszFields;
     960             : }
     961             : 
     962             : /************************************************************************/
     963             : /*                        CSVScanLinesIndexed()                         */
     964             : /*                                                                      */
     965             : /*      Read the file scanline for lines where the key field equals     */
     966             : /*      the indicated value with the suggested comparison criteria.     */
     967             : /*      Return the first matching line split into fields.               */
     968             : /************************************************************************/
     969             : 
     970          23 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
     971             : 
     972             : {
     973          23 :     CPLAssert(psTable->panLineIndex != nullptr);
     974             : 
     975             :     /* -------------------------------------------------------------------- */
     976             :     /*      Find target record with binary search.                          */
     977             :     /* -------------------------------------------------------------------- */
     978          23 :     int iTop = psTable->nLineCount - 1;
     979          23 :     int iBottom = 0;
     980          23 :     int iResult = -1;
     981             : 
     982         167 :     while (iTop >= iBottom)
     983             :     {
     984         167 :         const int iMiddle = (iTop + iBottom) / 2;
     985         167 :         if (psTable->panLineIndex[iMiddle] > nKeyValue)
     986          98 :             iTop = iMiddle - 1;
     987          69 :         else if (psTable->panLineIndex[iMiddle] < nKeyValue)
     988          46 :             iBottom = iMiddle + 1;
     989             :         else
     990             :         {
     991          23 :             iResult = iMiddle;
     992             :             // if a key is not unique, select the first instance of it.
     993          23 :             while (iResult > 0 &&
     994          23 :                    psTable->panLineIndex[iResult - 1] == nKeyValue)
     995             :             {
     996           0 :                 psTable->bNonUniqueKey = true;
     997           0 :                 iResult--;
     998             :             }
     999          23 :             break;
    1000             :         }
    1001             :     }
    1002             : 
    1003          23 :     if (iResult == -1)
    1004           0 :         return nullptr;
    1005             : 
    1006             :     /* -------------------------------------------------------------------- */
    1007             :     /*      Parse target line, and update iLastLine indicator.              */
    1008             :     /* -------------------------------------------------------------------- */
    1009          23 :     psTable->iLastLine = iResult;
    1010             : 
    1011          23 :     return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
    1012             : }
    1013             : 
    1014             : /************************************************************************/
    1015             : /*                        CSVScanLinesIngested()                        */
    1016             : /*                                                                      */
    1017             : /*      Read the file scanline for lines where the key field equals     */
    1018             : /*      the indicated value with the suggested comparison criteria.     */
    1019             : /*      Return the first matching line split into fields.               */
    1020             : /************************************************************************/
    1021             : 
    1022          30 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
    1023             :                                    const char *pszValue,
    1024             :                                    CSVCompareCriteria eCriteria)
    1025             : 
    1026             : {
    1027          30 :     CPLAssert(pszValue != nullptr);
    1028          30 :     CPLAssert(iKeyField >= 0);
    1029             : 
    1030          30 :     const int nTestValue = atoi(pszValue);
    1031             : 
    1032             :     /* -------------------------------------------------------------------- */
    1033             :     /*      Short cut for indexed files.                                    */
    1034             :     /* -------------------------------------------------------------------- */
    1035          30 :     if (iKeyField == 0 && eCriteria == CC_Integer &&
    1036          23 :         psTable->panLineIndex != nullptr)
    1037          23 :         return CSVScanLinesIndexed(psTable, nTestValue);
    1038             : 
    1039             :     /* -------------------------------------------------------------------- */
    1040             :     /*      Scan from in-core lines.                                        */
    1041             :     /* -------------------------------------------------------------------- */
    1042           7 :     char **papszFields = nullptr;
    1043           7 :     bool bSelected = false;
    1044             : 
    1045         484 :     while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
    1046             :     {
    1047         477 :         psTable->iLastLine++;
    1048         477 :         papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
    1049             :                                    false, false);
    1050             : 
    1051         477 :         if (CSLCount(papszFields) < iKeyField + 1)
    1052             :         {
    1053             :             /* not selected */
    1054             :         }
    1055         477 :         else if (eCriteria == CC_Integer &&
    1056         242 :                  atoi(papszFields[iKeyField]) == nTestValue)
    1057             :         {
    1058           2 :             bSelected = true;
    1059             :         }
    1060             :         else
    1061             :         {
    1062         475 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
    1063             :         }
    1064             : 
    1065         477 :         if (!bSelected)
    1066             :         {
    1067         470 :             CSLDestroy(papszFields);
    1068         470 :             papszFields = nullptr;
    1069             :         }
    1070             :     }
    1071             : 
    1072           7 :     return papszFields;
    1073             : }
    1074             : 
    1075             : /************************************************************************/
    1076             : /*                            CSVRewind()                               */
    1077             : /*                                                                      */
    1078             : /*      Rewind a CSV file based on a passed in filename.                */
    1079             : /*      This is aimed at being used with CSVGetNextLine().              */
    1080             : /************************************************************************/
    1081             : 
    1082        1843 : void CSVRewind(const char *pszFilename)
    1083             : 
    1084             : {
    1085             :     /* -------------------------------------------------------------------- */
    1086             :     /*      Get access to the table.                                        */
    1087             :     /* -------------------------------------------------------------------- */
    1088        1843 :     CPLAssert(pszFilename != nullptr);
    1089             : 
    1090        1843 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1091        1843 :     if (psTable != nullptr)
    1092        1843 :         psTable->iLastLine = -1;
    1093        1843 : }
    1094             : 
    1095             : /************************************************************************/
    1096             : /*                           CSVGetNextLine()                           */
    1097             : /*                                                                      */
    1098             : /*      Fetch the next line of a CSV file based on a passed in          */
    1099             : /*      filename.  Returns NULL at end of file, or if file is not       */
    1100             : /*      really established.                                             */
    1101             : /*      This ingests the whole file into memory if not already done.    */
    1102             : /*      When reaching end of file, CSVRewind() may be used to read      */
    1103             : /*      again from the beginning.                                       */
    1104             : /************************************************************************/
    1105             : 
    1106       60462 : char **CSVGetNextLine(const char *pszFilename)
    1107             : 
    1108             : {
    1109             : 
    1110             :     /* -------------------------------------------------------------------- */
    1111             :     /*      Get access to the table.                                        */
    1112             :     /* -------------------------------------------------------------------- */
    1113       60462 :     CPLAssert(pszFilename != nullptr);
    1114             : 
    1115       60462 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1116       60462 :     if (psTable == nullptr)
    1117           0 :         return nullptr;
    1118             : 
    1119       60462 :     CSVIngest(psTable->pszFilename);
    1120             : 
    1121             :     /* -------------------------------------------------------------------- */
    1122             :     /*      If we use CSVGetNextLine() we can pretty much assume we have    */
    1123             :     /*      a non-unique key.                                               */
    1124             :     /* -------------------------------------------------------------------- */
    1125       60462 :     psTable->bNonUniqueKey = true;
    1126             : 
    1127             :     /* -------------------------------------------------------------------- */
    1128             :     /*      Do we have a next line available?  This only works for          */
    1129             :     /*      ingested tables I believe.                                      */
    1130             :     /* -------------------------------------------------------------------- */
    1131       60462 :     if (psTable->iLastLine + 1 >= psTable->nLineCount)
    1132         629 :         return nullptr;
    1133             : 
    1134       59833 :     psTable->iLastLine++;
    1135       59833 :     CSLDestroy(psTable->papszRecFields);
    1136      119666 :     psTable->papszRecFields = CSVSplitLine(
    1137       59833 :         psTable->papszLines[psTable->iLastLine], ",", false, false);
    1138             : 
    1139       59833 :     return psTable->papszRecFields;
    1140             : }
    1141             : 
    1142             : /************************************************************************/
    1143             : /*                            CSVScanFile()                             */
    1144             : /*                                                                      */
    1145             : /*      Scan a whole file using criteria similar to above, but also     */
    1146             : /*      taking care of file opening and closing.                        */
    1147             : /************************************************************************/
    1148             : 
    1149         144 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
    1150             :                           const char *pszValue, CSVCompareCriteria eCriteria)
    1151             : {
    1152         144 :     CSVIngest(psTable->pszFilename);
    1153             : 
    1154             :     /* -------------------------------------------------------------------- */
    1155             :     /*      Does the current record match the criteria?  If so, just        */
    1156             :     /*      return it again.                                                */
    1157             :     /* -------------------------------------------------------------------- */
    1158         144 :     if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
    1159         402 :         CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
    1160         114 :         !psTable->bNonUniqueKey)
    1161             :     {
    1162         114 :         return psTable->papszRecFields;
    1163             :     }
    1164             : 
    1165             :     /* -------------------------------------------------------------------- */
    1166             :     /*      Scan the file from the beginning, replacing the ``current       */
    1167             :     /*      record'' in our structure with the one that is found.           */
    1168             :     /* -------------------------------------------------------------------- */
    1169          30 :     psTable->iLastLine = -1;
    1170          30 :     CSLDestroy(psTable->papszRecFields);
    1171             : 
    1172          30 :     if (psTable->pszRawData != nullptr)
    1173          30 :         psTable->papszRecFields =
    1174          30 :             CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
    1175             :     else
    1176             :     {
    1177           0 :         VSIRewindL(psTable->fp);
    1178           0 :         CPLReadLineL(psTable->fp); /* throw away the header line */
    1179             : 
    1180           0 :         psTable->papszRecFields =
    1181           0 :             CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
    1182             :     }
    1183             : 
    1184          30 :     return psTable->papszRecFields;
    1185             : }
    1186             : 
    1187           4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
    1188             :                    CSVCompareCriteria eCriteria)
    1189             : 
    1190             : {
    1191             :     /* -------------------------------------------------------------------- */
    1192             :     /*      Get access to the table.                                        */
    1193             :     /* -------------------------------------------------------------------- */
    1194           4 :     CPLAssert(pszFilename != nullptr);
    1195             : 
    1196           4 :     if (iKeyField < 0)
    1197           0 :         return nullptr;
    1198             : 
    1199           4 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1200           4 :     if (psTable == nullptr)
    1201           0 :         return nullptr;
    1202             : 
    1203           4 :     return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
    1204             : }
    1205             : 
    1206             : /************************************************************************/
    1207             : /*                           CPLGetFieldId()                            */
    1208             : /*                                                                      */
    1209             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1210             : /*      and find the field with the indicated name.  Returns -1 if      */
    1211             : /*      it fails to find the field name.  Comparison is case            */
    1212             : /*      insensitive, but otherwise exact.  After this function has      */
    1213             : /*      been called the file pointer will be positioned just after      */
    1214             : /*      the first record.                                               */
    1215             : /*                                                                      */
    1216             : /*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
    1217             : /************************************************************************/
    1218             : 
    1219           0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
    1220             : 
    1221             : {
    1222           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1223             : 
    1224           0 :     VSIRewind(fp);
    1225             : 
    1226           0 :     char **papszFields = CSVReadParseLine(fp);
    1227           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1228             :     {
    1229           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1230             :         {
    1231           0 :             CSLDestroy(papszFields);
    1232           0 :             return i;
    1233             :         }
    1234             :     }
    1235             : 
    1236           0 :     CSLDestroy(papszFields);
    1237             : 
    1238           0 :     return -1;
    1239             : }
    1240             : 
    1241             : /************************************************************************/
    1242             : /*                           CPLGetFieldIdL()                           */
    1243             : /*                                                                      */
    1244             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1245             : /*      and find the field with the indicated name.  Returns -1 if      */
    1246             : /*      it fails to find the field name.  Comparison is case            */
    1247             : /*      insensitive, but otherwise exact.  After this function has      */
    1248             : /*      been called the file pointer will be positioned just after      */
    1249             : /*      the first record.                                               */
    1250             : /************************************************************************/
    1251             : 
    1252           0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
    1253             : 
    1254             : {
    1255           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1256             : 
    1257           0 :     VSIRewindL(fp);
    1258             : 
    1259           0 :     char **papszFields = CSVReadParseLineL(fp);
    1260           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1261             :     {
    1262           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1263             :         {
    1264           0 :             CSLDestroy(papszFields);
    1265           0 :             return i;
    1266             :         }
    1267             :     }
    1268             : 
    1269           0 :     CSLDestroy(papszFields);
    1270             : 
    1271           0 :     return -1;
    1272             : }
    1273             : 
    1274             : /************************************************************************/
    1275             : /*                         CSVGetFileFieldId()                          */
    1276             : /*                                                                      */
    1277             : /*      Same as CPLGetFieldId(), except that we get the file based      */
    1278             : /*      on filename, rather than having an existing handle.             */
    1279             : /************************************************************************/
    1280             : 
    1281        7430 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
    1282             : 
    1283             : {
    1284             :     /* -------------------------------------------------------------------- */
    1285             :     /*      Find the requested field.                                       */
    1286             :     /* -------------------------------------------------------------------- */
    1287        7430 :     const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
    1288       18696 :     for (int i = 0; psTable->papszFieldNames != nullptr &&
    1289       18696 :                     psTable->papszFieldNames[i] != nullptr;
    1290             :          i++)
    1291             :     {
    1292       18696 :         if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
    1293       10433 :             EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
    1294             :         {
    1295        7430 :             return i;
    1296             :         }
    1297             :     }
    1298             : 
    1299           0 :     return -1;
    1300             : }
    1301             : 
    1302        7150 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
    1303             : 
    1304             : {
    1305             :     /* -------------------------------------------------------------------- */
    1306             :     /*      Get access to the table.                                        */
    1307             :     /* -------------------------------------------------------------------- */
    1308        7150 :     CPLAssert(pszFilename != nullptr);
    1309             : 
    1310        7150 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1311        7150 :     if (psTable == nullptr)
    1312           0 :         return -1;
    1313        7150 :     return CSVGetFileFieldId(psTable, pszFieldName);
    1314             : }
    1315             : 
    1316             : /************************************************************************/
    1317             : /*                         CSVScanFileByName()                          */
    1318             : /*                                                                      */
    1319             : /*      Same as CSVScanFile(), but using a field name instead of a      */
    1320             : /*      field number.                                                   */
    1321             : /************************************************************************/
    1322             : 
    1323           4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
    1324             :                          const char *pszValue, CSVCompareCriteria eCriteria)
    1325             : 
    1326             : {
    1327           4 :     const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
    1328           4 :     if (iKeyField == -1)
    1329           0 :         return nullptr;
    1330             : 
    1331           4 :     return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
    1332             : }
    1333             : 
    1334             : /************************************************************************/
    1335             : /*                            CSVGetField()                             */
    1336             : /*                                                                      */
    1337             : /*      The all-in-one function to fetch a particular field value       */
    1338             : /*      from a CSV file.  Note this function will return an empty       */
    1339             : /*      string, rather than NULL if it fails to find the desired        */
    1340             : /*      value for some reason.  The caller can't establish that the     */
    1341             : /*      fetch failed.                                                   */
    1342             : /************************************************************************/
    1343             : 
    1344         140 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
    1345             :                         const char *pszKeyFieldValue,
    1346             :                         CSVCompareCriteria eCriteria,
    1347             :                         const char *pszTargetField)
    1348             : 
    1349             : {
    1350             :     /* -------------------------------------------------------------------- */
    1351             :     /*      Find the table.                                                 */
    1352             :     /* -------------------------------------------------------------------- */
    1353         140 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1354         140 :     if (psTable == nullptr)
    1355           0 :         return "";
    1356             : 
    1357         140 :     const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
    1358         140 :     if (iKeyField == -1)
    1359           0 :         return "";
    1360             : 
    1361             :     /* -------------------------------------------------------------------- */
    1362             :     /*      Find the correct record.                                        */
    1363             :     /* -------------------------------------------------------------------- */
    1364             :     char **papszRecord =
    1365         140 :         CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
    1366         140 :     if (papszRecord == nullptr)
    1367           0 :         return "";
    1368             : 
    1369             :     /* -------------------------------------------------------------------- */
    1370             :     /*      Figure out which field we want out of this.                     */
    1371             :     /* -------------------------------------------------------------------- */
    1372         140 :     const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
    1373         140 :     if (iTargetField < 0)
    1374           0 :         return "";
    1375             : 
    1376         388 :     for (int i = 0; papszRecord[i] != nullptr; ++i)
    1377             :     {
    1378         388 :         if (i == iTargetField)
    1379         140 :             return papszRecord[iTargetField];
    1380             :     }
    1381           0 :     return "";
    1382             : }
    1383             : 
    1384             : /************************************************************************/
    1385             : /*                       GDALDefaultCSVFilename()                       */
    1386             : /************************************************************************/
    1387             : 
    1388             : typedef struct
    1389             : {
    1390             :     char szPath[512];
    1391             :     bool bCSVFinderInitialized;
    1392             : } DefaultCSVFileNameTLS;
    1393             : 
    1394        2488 : const char *GDALDefaultCSVFilename(const char *pszBasename)
    1395             : 
    1396             : {
    1397             :     /* -------------------------------------------------------------------- */
    1398             :     /*      Do we already have this file accessed?  If so, just return      */
    1399             :     /*      the existing path without any further probing.                  */
    1400             :     /* -------------------------------------------------------------------- */
    1401        2488 :     int bMemoryError = FALSE;
    1402             :     CSVTable **ppsCSVTableList =
    1403        2488 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
    1404        2488 :     if (ppsCSVTableList != nullptr)
    1405             :     {
    1406        2482 :         const size_t nBasenameLen = strlen(pszBasename);
    1407             : 
    1408       23118 :         for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
    1409       20636 :              psTable = psTable->psNext)
    1410             :         {
    1411       22566 :             const size_t nFullLen = strlen(psTable->pszFilename);
    1412             : 
    1413       22566 :             if (nFullLen > nBasenameLen &&
    1414       22566 :                 strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
    1415        1930 :                        pszBasename) == 0 &&
    1416        1930 :                 strchr("/\\",
    1417        1930 :                        psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
    1418             :                     nullptr)
    1419             :             {
    1420        1930 :                 return psTable->pszFilename;
    1421             :             }
    1422             :         }
    1423             :     }
    1424             : 
    1425             :     /* -------------------------------------------------------------------- */
    1426             :     /*      Otherwise we need to look harder for it.                        */
    1427             :     /* -------------------------------------------------------------------- */
    1428             :     DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1429         558 :         CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
    1430         558 :     if (pTLSData == nullptr && !bMemoryError)
    1431             :     {
    1432             :         pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1433           5 :             VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
    1434           5 :         if (pTLSData)
    1435           5 :             CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
    1436             :     }
    1437         558 :     if (pTLSData == nullptr)
    1438           0 :         return "/not_existing_dir/not_existing_path";
    1439             : 
    1440         558 :     const char *pszResult = CPLFindFile("gdal", pszBasename);
    1441             : 
    1442         558 :     if (pszResult != nullptr)
    1443          43 :         return pszResult;
    1444             : 
    1445         515 :     if (!pTLSData->bCSVFinderInitialized)
    1446             :     {
    1447           2 :         pTLSData->bCSVFinderInitialized = true;
    1448             : 
    1449           2 :         if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
    1450           2 :             CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
    1451             : 
    1452           2 :         pszResult = CPLFindFile("gdal", pszBasename);
    1453             : 
    1454           2 :         if (pszResult != nullptr)
    1455           0 :             return pszResult;
    1456             :     }
    1457             : 
    1458             :     // For systems like sandboxes that do not allow other checks.
    1459         515 :     CPLDebug("CPL_CSV",
    1460             :              "Failed to find file in GDALDefaultCSVFilename.  "
    1461             :              "Returning original basename: %s",
    1462             :              pszBasename);
    1463         515 :     CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
    1464         515 :     return pTLSData->szPath;
    1465             : }
    1466             : 
    1467             : /************************************************************************/
    1468             : /*                            CSVFilename()                             */
    1469             : /*                                                                      */
    1470             : /*      Return the full path to a particular CSV file.  This will       */
    1471             : /*      eventually be something the application can override.           */
    1472             : /************************************************************************/
    1473             : 
    1474             : CPL_C_START
    1475             : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
    1476             : CPL_C_END
    1477             : 
    1478        2488 : const char *CSVFilename(const char *pszBasename)
    1479             : 
    1480             : {
    1481        2488 :     if (pfnCSVFilenameHook == nullptr)
    1482        2488 :         return GDALDefaultCSVFilename(pszBasename);
    1483             : 
    1484           0 :     return pfnCSVFilenameHook(pszBasename);
    1485             : }
    1486             : 
    1487             : /************************************************************************/
    1488             : /*                         SetCSVFilenameHook()                         */
    1489             : /*                                                                      */
    1490             : /*      Applications can use this to set a function that will           */
    1491             : /*      massage CSV filenames.                                          */
    1492             : /************************************************************************/
    1493             : 
    1494             : /**
    1495             :  * Override CSV file search method.
    1496             :  *
    1497             :  * @param pfnNewHook The pointer to a function which will return the
    1498             :  * full path for a given filename.
    1499             :  *
    1500             : 
    1501             : This function allows an application to override how the GTIFGetDefn()
    1502             : and related function find the CSV (Comma Separated Value) values
    1503             : required. The pfnHook argument should be a pointer to a function that
    1504             : will take in a CSV filename and return a full path to the file. The
    1505             : returned string should be to an internal static buffer so that the
    1506             : caller doesn't have to free the result.
    1507             : 
    1508             : Example:
    1509             : 
    1510             : The listgeo utility uses the following override function if the user
    1511             : specified a CSV file directory with the -t commandline switch (argument
    1512             : put into CSVDirName).
    1513             : 
    1514             : \code{.cpp}
    1515             : 
    1516             :     ...
    1517             :     SetCSVFilenameHook( CSVFileOverride );
    1518             :     ...
    1519             : 
    1520             : static const char *CSVFileOverride( const char * pszInput )
    1521             : 
    1522             : {
    1523             :     static char szPath[1024] = {};
    1524             : 
    1525             :     sprintf( szPath, "%s/%s", CSVDirName, pszInput );
    1526             : 
    1527             :     return szPath;
    1528             : }
    1529             : \endcode
    1530             : 
    1531             : */
    1532             : 
    1533             : CPL_C_START
    1534           0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
    1535             : 
    1536             : {
    1537           0 :     pfnCSVFilenameHook = pfnNewHook;
    1538           0 : }
    1539             : 
    1540             : CPL_C_END

Generated by: LCOV version 1.14