LCOV - code coverage report
Current view: top level - port - cpl_csv.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 402 534 75.3 %
Date: 2025-01-18 12:42:00 Functions: 27 35 77.1 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  CPL - Common Portability Library
       4             :  * Purpose:  CSV (comma separated value) file access.
       5             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 1999, Frank Warmerdam
       9             :  * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * SPDX-License-Identifier: MIT
      12             :  ****************************************************************************/
      13             : 
      14             : #include "cpl_port.h"
      15             : #include "cpl_csv.h"
      16             : 
      17             : #include <cstddef>
      18             : #include <cstdlib>
      19             : #include <cstring>
      20             : #if HAVE_FCNTL_H
      21             : #include <fcntl.h>
      22             : #endif
      23             : 
      24             : #include "cpl_conv.h"
      25             : #include "cpl_error.h"
      26             : #include "cpl_multiproc.h"
      27             : #include "gdal_csv.h"
      28             : 
      29             : #include <algorithm>
      30             : 
      31             : /* ==================================================================== */
      32             : /*      The CSVTable is a persistent set of info about an open CSV      */
      33             : /*      table.  While it doesn't currently maintain a record index,     */
      34             : /*      or in-memory copy of the table, it could be changed to do so    */
      35             : /*      in the future.                                                  */
      36             : /* ==================================================================== */
      37             : typedef struct ctb
      38             : {
      39             :     VSILFILE *fp;
      40             :     struct ctb *psNext;
      41             :     char *pszFilename;
      42             :     char **papszFieldNames;
      43             :     int *panFieldNamesLength;
      44             :     char **papszRecFields;
      45             :     int nFields;
      46             :     int iLastLine;
      47             :     bool bNonUniqueKey;
      48             : 
      49             :     /* Cache for whole file */
      50             :     int nLineCount;
      51             :     char **papszLines;
      52             :     int *panLineIndex;
      53             :     char *pszRawData;
      54             : } CSVTable;
      55             : 
      56             : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
      57             :                                 const char *pszFilename);
      58             : 
      59             : /************************************************************************/
      60             : /*                            CSVFreeTLS()                              */
      61             : /************************************************************************/
      62           2 : static void CSVFreeTLS(void *pData)
      63             : {
      64           2 :     CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
      65           2 :     CPLFree(pData);
      66           2 : }
      67             : 
      68             : /* It would likely be better to share this list between threads, but
      69             :    that will require some rework. */
      70             : 
      71             : /************************************************************************/
      72             : /*                             CSVAccess()                              */
      73             : /*                                                                      */
      74             : /*      This function will fetch a handle to the requested table.       */
      75             : /*      If not found in the ``open table list'' the table will be       */
      76             : /*      opened and added to the list.  Eventually this function may     */
      77             : /*      become public with an abstracted return type so that            */
      78             : /*      applications can set options about the table.  For now this     */
      79             : /*      isn't done.                                                     */
      80             : /************************************************************************/
      81             : 
      82      130205 : static CSVTable *CSVAccess(const char *pszFilename)
      83             : 
      84             : {
      85             :     /* -------------------------------------------------------------------- */
      86             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
      87             :     /*      if there isn't already one.                                     */
      88             :     /* -------------------------------------------------------------------- */
      89      130205 :     int bMemoryError = FALSE;
      90             :     CSVTable **ppsCSVTableList =
      91      130205 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
      92      130205 :     if (bMemoryError)
      93           0 :         return nullptr;
      94      130205 :     if (ppsCSVTableList == nullptr)
      95             :     {
      96             :         ppsCSVTableList =
      97           5 :             static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
      98           5 :         if (ppsCSVTableList == nullptr)
      99           0 :             return nullptr;
     100           5 :         CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
     101             :     }
     102             : 
     103             :     /* -------------------------------------------------------------------- */
     104             :     /*      Is the table already in the list.                               */
     105             :     /* -------------------------------------------------------------------- */
     106     1009480 :     for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
     107      879271 :          psTable = psTable->psNext)
     108             :     {
     109     1009450 :         if (EQUAL(psTable->pszFilename, pszFilename))
     110             :         {
     111             :             /*
     112             :              * Eventually we should consider promoting to the front of
     113             :              * the list to accelerate frequently accessed tables.
     114             :              */
     115      130175 :             return psTable;
     116             :         }
     117             :     }
     118             : 
     119             :     /* -------------------------------------------------------------------- */
     120             :     /*      If not, try to open it.                                         */
     121             :     /* -------------------------------------------------------------------- */
     122          30 :     VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
     123          30 :     if (fp == nullptr)
     124           0 :         return nullptr;
     125             : 
     126             :     /* -------------------------------------------------------------------- */
     127             :     /*      Create an information structure about this table, and add to    */
     128             :     /*      the front of the list.                                          */
     129             :     /* -------------------------------------------------------------------- */
     130             :     CSVTable *const psTable =
     131          30 :         static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
     132          30 :     if (psTable == nullptr)
     133             :     {
     134           0 :         VSIFCloseL(fp);
     135           0 :         return nullptr;
     136             :     }
     137             : 
     138          30 :     psTable->fp = fp;
     139          30 :     psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
     140          30 :     if (psTable->pszFilename == nullptr)
     141             :     {
     142           0 :         VSIFree(psTable);
     143           0 :         VSIFCloseL(fp);
     144           0 :         return nullptr;
     145             :     }
     146          30 :     psTable->bNonUniqueKey = false;  // As far as we know now.
     147          30 :     psTable->psNext = *ppsCSVTableList;
     148             : 
     149          30 :     *ppsCSVTableList = psTable;
     150             : 
     151             :     /* -------------------------------------------------------------------- */
     152             :     /*      Read the table header record containing the field names.        */
     153             :     /* -------------------------------------------------------------------- */
     154          30 :     psTable->papszFieldNames = CSVReadParseLineL(fp);
     155          30 :     psTable->nFields = CSLCount(psTable->papszFieldNames);
     156          30 :     psTable->panFieldNamesLength =
     157          30 :         static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
     158          30 :     for (int i = 0;
     159         185 :          i < psTable->nFields &&
     160             :          /* null-pointer check to avoid a false positive from CLang S.A. */
     161         155 :          psTable->papszFieldNames != nullptr;
     162             :          i++)
     163             :     {
     164         155 :         psTable->panFieldNamesLength[i] =
     165         155 :             static_cast<int>(strlen(psTable->papszFieldNames[i]));
     166             :     }
     167             : 
     168          30 :     return psTable;
     169             : }
     170             : 
     171             : /************************************************************************/
     172             : /*                            CSVDeaccess()                             */
     173             : /************************************************************************/
     174             : 
     175         953 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
     176             :                                 const char *pszFilename)
     177             : 
     178             : {
     179         953 :     if (ppsCSVTableList == nullptr)
     180         944 :         return;
     181             : 
     182             :     /* -------------------------------------------------------------------- */
     183             :     /*      A NULL means deaccess all tables.                               */
     184             :     /* -------------------------------------------------------------------- */
     185           9 :     if (pszFilename == nullptr)
     186             :     {
     187           9 :         while (*ppsCSVTableList != nullptr)
     188           5 :             CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
     189           5 :                                 (*ppsCSVTableList)->pszFilename);
     190             : 
     191           4 :         return;
     192             :     }
     193             : 
     194             :     /* -------------------------------------------------------------------- */
     195             :     /*      Find this table.                                                */
     196             :     /* -------------------------------------------------------------------- */
     197           5 :     CSVTable *psLast = nullptr;
     198           5 :     CSVTable *psTable = *ppsCSVTableList;
     199           5 :     for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
     200           0 :          psTable = psTable->psNext)
     201             :     {
     202           0 :         psLast = psTable;
     203             :     }
     204             : 
     205           5 :     if (psTable == nullptr)
     206             :     {
     207           0 :         if (bCanUseTLS)
     208           0 :             CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
     209           0 :         return;
     210             :     }
     211             : 
     212             :     /* -------------------------------------------------------------------- */
     213             :     /*      Remove the link from the list.                                  */
     214             :     /* -------------------------------------------------------------------- */
     215           5 :     if (psLast != nullptr)
     216           0 :         psLast->psNext = psTable->psNext;
     217             :     else
     218           5 :         *ppsCSVTableList = psTable->psNext;
     219             : 
     220             :     /* -------------------------------------------------------------------- */
     221             :     /*      Free the table.                                                 */
     222             :     /* -------------------------------------------------------------------- */
     223           5 :     if (psTable->fp != nullptr)
     224           0 :         VSIFCloseL(psTable->fp);
     225             : 
     226           5 :     CSLDestroy(psTable->papszFieldNames);
     227           5 :     CPLFree(psTable->panFieldNamesLength);
     228           5 :     CSLDestroy(psTable->papszRecFields);
     229           5 :     CPLFree(psTable->pszFilename);
     230           5 :     CPLFree(psTable->panLineIndex);
     231           5 :     CPLFree(psTable->pszRawData);
     232           5 :     CPLFree(psTable->papszLines);
     233             : 
     234           5 :     CPLFree(psTable);
     235             : 
     236           5 :     if (bCanUseTLS)
     237           5 :         CPLReadLine(nullptr);
     238             : }
     239             : 
     240         946 : void CSVDeaccess(const char *pszFilename)
     241             : {
     242             :     /* -------------------------------------------------------------------- */
     243             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
     244             :     /*      if there isn't already one.                                     */
     245             :     /* -------------------------------------------------------------------- */
     246         946 :     int bMemoryError = FALSE;
     247             :     CSVTable **ppsCSVTableList =
     248         946 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
     249             : 
     250         946 :     CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
     251         946 : }
     252             : 
     253             : /************************************************************************/
     254             : /*                            CSVSplitLine()                            */
     255             : /*                                                                      */
     256             : /*      Tokenize a CSV line into fields in the form of a string         */
     257             : /*      list.  This is used instead of the CPLTokenizeString()          */
     258             : /*      because it provides correct CSV escaping and quoting            */
     259             : /*      semantics.                                                      */
     260             : /************************************************************************/
     261             : 
     262      116303 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
     263             :                            bool bKeepLeadingAndClosingQuotes,
     264             :                            bool bMergeDelimiter)
     265             : 
     266             : {
     267      232606 :     CPLStringList aosRetList;
     268      116303 :     if (pszString == nullptr)
     269           0 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     270             : 
     271      116303 :     char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
     272      116303 :     int nTokenMax = 10;
     273      116303 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     274             : 
     275      116303 :     const char *pszIter = pszString;
     276      669210 :     while (*pszIter != '\0')
     277             :     {
     278      552907 :         bool bInString = false;
     279             : 
     280      552907 :         int nTokenLen = 0;
     281             : 
     282             :         // Try to find the next delimiter, marking end of token.
     283     4646140 :         do
     284             :         {
     285             :             // End if this is a delimiter skip it and break.
     286     5199050 :             if (!bInString &&
     287     2711060 :                 strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
     288             :             {
     289      436925 :                 pszIter += nDelimiterLength;
     290      436925 :                 if (bMergeDelimiter)
     291             :                 {
     292           9 :                     while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
     293             :                            0)
     294           5 :                         pszIter += nDelimiterLength;
     295             :                 }
     296      436925 :                 break;
     297             :             }
     298             : 
     299     4762120 :             if (*pszIter == '"')
     300             :             {
     301      395873 :                 if (!bInString && nTokenLen > 0)
     302             :                 {
     303             :                     // do not treat in a special way double quotes that appear
     304             :                     // in the middle of a field (similarly to OpenOffice)
     305             :                     // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
     306             :                 }
     307      395786 :                 else if (!bInString || pszIter[1] != '"')
     308             :                 {
     309      395100 :                     bInString = !bInString;
     310      395100 :                     if (!bKeepLeadingAndClosingQuotes)
     311      395066 :                         continue;
     312             :                 }
     313             :                 else  // Doubled quotes in string resolve to one quote.
     314             :                 {
     315         686 :                     pszIter++;
     316             :                 }
     317             :             }
     318             : 
     319     4367060 :             if (nTokenLen >= nTokenMax - 2)
     320             :             {
     321      122920 :                 nTokenMax = nTokenMax * 2 + 10;
     322      122920 :                 pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
     323             :             }
     324             : 
     325     4367060 :             pszToken[nTokenLen] = *pszIter;
     326     4367060 :             nTokenLen++;
     327     4762120 :         } while (*(++pszIter) != '\0');
     328             : 
     329      552907 :         pszToken[nTokenLen] = '\0';
     330      552907 :         aosRetList.AddString(pszToken);
     331             : 
     332             :         // If the last token is an empty token, then we have to catch
     333             :         // it now, otherwise we won't reenter the loop and it will be lost.
     334      552907 :         if (*pszIter == '\0' &&
     335      116263 :             pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
     336      116263 :             strncmp(pszIter - nDelimiterLength, pszDelimiter,
     337             :                     nDelimiterLength) == 0)
     338             :         {
     339         281 :             aosRetList.AddString("");
     340             :         }
     341             :     }
     342             : 
     343      116303 :     CPLFree(pszToken);
     344             : 
     345      116303 :     if (aosRetList.Count() == 0)
     346          40 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     347             :     else
     348      116263 :         return aosRetList.StealList();
     349             : }
     350             : 
     351             : /************************************************************************/
     352             : /*                          CSVFindNextLine()                           */
     353             : /*                                                                      */
     354             : /*      Find the start of the next line, while at the same time zero    */
     355             : /*      terminating this line.  Take into account that there may be     */
     356             : /*      newline indicators within quoted strings, and that quotes       */
     357             : /*      can be escaped with a backslash.                                */
     358             : /************************************************************************/
     359             : 
     360        6329 : static char *CSVFindNextLine(char *pszThisLine)
     361             : 
     362             : {
     363        6329 :     int i = 0;  // i is used after the for loop.
     364             : 
     365      275213 :     for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
     366             :     {
     367      275213 :         if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
     368       35430 :             nQuoteCount++;
     369             : 
     370      275213 :         if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
     371        6329 :             (nQuoteCount % 2) == 0)
     372        6329 :             break;
     373             :     }
     374             : 
     375       16573 :     while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
     376       10244 :         pszThisLine[i++] = '\0';
     377             : 
     378        6329 :     if (pszThisLine[i] == '\0')
     379          30 :         return nullptr;
     380             : 
     381        6299 :     return pszThisLine + i;
     382             : }
     383             : 
     384             : /************************************************************************/
     385             : /*                             CSVIngest()                              */
     386             : /*                                                                      */
     387             : /*      Load entire file into memory and setup index if possible.       */
     388             : /************************************************************************/
     389             : 
     390             : // TODO(schwehr): Clean up all the casting in CSVIngest.
     391       60606 : static void CSVIngest(CSVTable *psTable)
     392             : 
     393             : {
     394       60606 :     if (psTable->pszRawData != nullptr)
     395       60576 :         return;
     396             : 
     397             :     /* -------------------------------------------------------------------- */
     398             :     /*      Ingest whole file.                                              */
     399             :     /* -------------------------------------------------------------------- */
     400          30 :     if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
     401             :     {
     402           0 :         CPLError(CE_Failure, CPLE_FileIO,
     403             :                  "Failed using seek end and tell to get file length: %s",
     404             :                  psTable->pszFilename);
     405           0 :         return;
     406             :     }
     407          30 :     const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
     408          30 :     if (static_cast<long>(nFileLen) == -1)
     409             :     {
     410           0 :         CPLError(CE_Failure, CPLE_FileIO,
     411             :                  "Failed using seek end and tell to get file length: %s",
     412             :                  psTable->pszFilename);
     413           0 :         return;
     414             :     }
     415          30 :     VSIRewindL(psTable->fp);
     416             : 
     417          30 :     psTable->pszRawData = static_cast<char *>(
     418          30 :         VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
     419          30 :     if (psTable->pszRawData == nullptr)
     420           0 :         return;
     421          30 :     if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
     422          30 :                   psTable->fp) != static_cast<size_t>(nFileLen))
     423             :     {
     424           0 :         CPLFree(psTable->pszRawData);
     425           0 :         psTable->pszRawData = nullptr;
     426             : 
     427           0 :         CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
     428             :                  psTable->pszFilename);
     429           0 :         return;
     430             :     }
     431             : 
     432          30 :     psTable->pszRawData[nFileLen] = '\0';
     433             : 
     434             :     /* -------------------------------------------------------------------- */
     435             :     /*      Get count of newlines so we can allocate line array.            */
     436             :     /* -------------------------------------------------------------------- */
     437          30 :     int nMaxLineCount = 0;
     438      279158 :     for (int i = 0; i < static_cast<int>(nFileLen); i++)
     439             :     {
     440      279128 :         if (psTable->pszRawData[i] == 10)
     441        6329 :             nMaxLineCount++;
     442             :     }
     443             : 
     444          30 :     psTable->papszLines =
     445          30 :         static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
     446          30 :     if (psTable->papszLines == nullptr)
     447           0 :         return;
     448             : 
     449             :     /* -------------------------------------------------------------------- */
     450             :     /*      Build a list of record pointers into the raw data buffer        */
     451             :     /*      based on line terminators.  Zero terminate the line             */
     452             :     /*      strings.                                                        */
     453             :     /* -------------------------------------------------------------------- */
     454             :     /* skip header line */
     455          30 :     char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
     456             : 
     457          30 :     int iLine = 0;
     458        6329 :     while (pszThisLine != nullptr && iLine < nMaxLineCount)
     459             :     {
     460        6299 :         if (pszThisLine[0] != '#')
     461        6288 :             psTable->papszLines[iLine++] = pszThisLine;
     462        6299 :         pszThisLine = CSVFindNextLine(pszThisLine);
     463             :     }
     464             : 
     465          30 :     psTable->nLineCount = iLine;
     466             : 
     467             :     /* -------------------------------------------------------------------- */
     468             :     /*      Allocate and populate index array.  Ensure they are in          */
     469             :     /*      ascending order so that binary searches can be done on the      */
     470             :     /*      array.                                                          */
     471             :     /* -------------------------------------------------------------------- */
     472          30 :     psTable->panLineIndex = static_cast<int *>(
     473          30 :         VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
     474          30 :     if (psTable->panLineIndex == nullptr)
     475           0 :         return;
     476             : 
     477        6232 :     for (int i = 0; i < psTable->nLineCount; i++)
     478             :     {
     479        6204 :         psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
     480             : 
     481        6204 :         if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
     482             :         {
     483           2 :             CPLFree(psTable->panLineIndex);
     484           2 :             psTable->panLineIndex = nullptr;
     485           2 :             break;
     486             :         }
     487             :     }
     488             : 
     489          30 :     psTable->iLastLine = -1;
     490             : 
     491             :     /* -------------------------------------------------------------------- */
     492             :     /*      We should never need the file handle against, so close it.      */
     493             :     /* -------------------------------------------------------------------- */
     494          30 :     VSIFCloseL(psTable->fp);
     495          30 :     psTable->fp = nullptr;
     496             : }
     497             : 
     498       60606 : static void CSVIngest(const char *pszFilename)
     499             : 
     500             : {
     501       60606 :     CSVTable *psTable = CSVAccess(pszFilename);
     502       60606 :     if (psTable == nullptr)
     503             :     {
     504           0 :         CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
     505             :                  pszFilename);
     506           0 :         return;
     507             :     }
     508       60606 :     CSVIngest(psTable);
     509             : }
     510             : 
     511             : /************************************************************************/
     512             : /*                        CSVDetectSeperator()                          */
     513             : /************************************************************************/
     514             : 
     515             : /** Detect which field separator is used.
     516             :  *
     517             :  * Currently, it can detect comma, semicolon, space, tabulation or pipe.
     518             :  * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
     519             :  * most occurrences will be selected (and a warning emitted).
     520             :  * If no separator found, comma will be considered as the separator.
     521             :  *
     522             :  * @return ',', ';', ' ', tabulation character or '|'.
     523             :  */
     524         594 : char CSVDetectSeperator(const char *pszLine)
     525             : {
     526         594 :     bool bInString = false;
     527         594 :     int nCountComma = 0;
     528         594 :     int nCountSemicolon = 0;
     529         594 :     int nCountTab = 0;
     530         594 :     int nCountPipe = 0;
     531         594 :     int nCountSpace = 0;
     532             : 
     533       26771 :     for (; *pszLine != '\0'; pszLine++)
     534             :     {
     535       26177 :         if (!bInString && *pszLine == ',')
     536             :         {
     537        2130 :             nCountComma++;
     538             :         }
     539       24047 :         else if (!bInString && *pszLine == ';')
     540             :         {
     541          10 :             nCountSemicolon++;
     542             :         }
     543       24037 :         else if (!bInString && *pszLine == '\t')
     544             :         {
     545          29 :             nCountTab++;
     546             :         }
     547       24008 :         else if (!bInString && *pszLine == '|')
     548             :         {
     549           9 :             nCountPipe++;
     550             :         }
     551       23999 :         else if (!bInString && *pszLine == ' ')
     552             :         {
     553         290 :             nCountSpace++;
     554             :         }
     555       23709 :         else if (*pszLine == '"')
     556             :         {
     557         519 :             if (!bInString || pszLine[1] != '"')
     558             :             {
     559         519 :                 bInString = !bInString;
     560         519 :                 continue;
     561             :             }
     562             :             else /* doubled quotes in string resolve to one quote */
     563             :             {
     564           0 :                 pszLine++;
     565             :             }
     566             :         }
     567             :     }
     568             : 
     569             :     const int nMaxCountExceptSpace =
     570             :         std::max(std::max(nCountComma, nCountSemicolon),
     571         594 :                  std::max(nCountTab, nCountPipe));
     572         594 :     char chDelimiter = ',';
     573         594 :     if (nMaxCountExceptSpace == 0)
     574             :     {
     575          35 :         if (nCountSpace > 0)
     576           9 :             chDelimiter = ' ';
     577             :     }
     578             :     else
     579             :     {
     580         559 :         bool bWarn = false;
     581         559 :         if (nCountComma == nMaxCountExceptSpace)
     582             :         {
     583         543 :             chDelimiter = ',';
     584         543 :             bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
     585             :         }
     586          16 :         else if (nCountSemicolon == nMaxCountExceptSpace)
     587             :         {
     588           5 :             chDelimiter = ';';
     589           5 :             bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
     590             :         }
     591          11 :         else if (nCountTab == nMaxCountExceptSpace)
     592             :         {
     593           6 :             chDelimiter = '\t';
     594           6 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
     595             :         }
     596             :         else /* if( nCountPipe == nMaxCountExceptSpace ) */
     597             :         {
     598           5 :             chDelimiter = '|';
     599           5 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
     600             :         }
     601         559 :         if (bWarn)
     602             :         {
     603           6 :             CPLError(CE_Warning, CPLE_AppDefined,
     604             :                      "Selecting '%c' as CSV field separator, but "
     605             :                      "other candidate separator(s) have been found.",
     606             :                      chDelimiter);
     607             :         }
     608             :     }
     609             : 
     610         594 :     return chDelimiter;
     611             : }
     612             : 
     613             : /************************************************************************/
     614             : /*                      CSVReadParseLine3L()                            */
     615             : /*                                                                      */
     616             : /*      Read one line, and return split into fields.  The return        */
     617             : /*      result is a stringlist, in the sense of the CSL functions.      */
     618             : /************************************************************************/
     619             : 
     620             : static char **
     621       57338 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
     622             :                         size_t nMaxLineSize, const char *pszDelimiter,
     623             :                         bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
     624             :                         bool bMergeDelimiter, bool bSkipBOM)
     625             : {
     626       57338 :     const char *pszLine = pfnReadLine(fp, nMaxLineSize);
     627       57338 :     if (pszLine == nullptr)
     628        1366 :         return nullptr;
     629             : 
     630       55972 :     if (bSkipBOM)
     631             :     {
     632             :         // Skip BOM.
     633       55605 :         const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
     634       55605 :         if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
     635           4 :             pszLine += 3;
     636             :     }
     637             : 
     638             :     // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
     639       55972 :     if (!bHonourStrings)
     640             :     {
     641           2 :         return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
     642             :     }
     643             : 
     644             :     // If there are no quotes, then this is the simple case.
     645             :     // Parse, and return tokens.
     646       55970 :     if (strchr(pszLine, '\"') == nullptr)
     647       48336 :         return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
     648       48336 :                             bMergeDelimiter);
     649             : 
     650        7634 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     651        7634 :     bool bInString = false;           // keep in that scope !
     652       15268 :     std::string osWorkLine(pszLine);  // keep in that scope !
     653        7634 :     size_t i = 0;                     // keep in that scope !
     654             : 
     655             :     try
     656             :     {
     657             :         while (true)
     658             :         {
     659      792187 :             for (; i < osWorkLine.size(); ++i)
     660             :             {
     661      783798 :                 if (osWorkLine[i] == '\"')
     662             :                 {
     663       59057 :                     if (!bInString)
     664             :                     {
     665             :                         // Only consider " as the start of a quoted string
     666             :                         // if it is the first character of the line, or
     667             :                         // if it is immediately after the field delimiter.
     668       52233 :                         if (i == 0 ||
     669       23004 :                             (i >= nDelimiterLength &&
     670       23004 :                              osWorkLine.compare(i - nDelimiterLength,
     671             :                                                 nDelimiterLength, pszDelimiter,
     672             :                                                 nDelimiterLength) == 0))
     673             :                         {
     674       29142 :                             bInString = true;
     675             :                         }
     676             :                     }
     677       56793 :                     else if (i + 1 < osWorkLine.size() &&
     678       26965 :                              osWorkLine[i + 1] == '"')
     679             :                     {
     680             :                         // Escaped double quote in a quoted string
     681         686 :                         ++i;
     682             :                     }
     683             :                     else
     684             :                     {
     685       29142 :                         bInString = false;
     686             :                     }
     687             :                 }
     688             :             }
     689             : 
     690        8389 :             if (!bInString)
     691             :             {
     692        7634 :                 return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
     693             :                                     bKeepLeadingAndClosingQuotes,
     694        7634 :                                     bMergeDelimiter);
     695             :             }
     696             : 
     697         755 :             const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
     698         755 :             if (pszNewLine == nullptr)
     699           0 :                 break;
     700             : 
     701         755 :             osWorkLine.append("\n");
     702         755 :             osWorkLine.append(pszNewLine);
     703         755 :         }
     704             :     }
     705           0 :     catch (const std::exception &e)
     706             :     {
     707           0 :         CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
     708             :     }
     709           0 :     return nullptr;
     710             : }
     711             : 
     712             : /************************************************************************/
     713             : /*                          CSVReadParseLine()                          */
     714             : /*                                                                      */
     715             : /*      Read one line, and return split into fields.  The return        */
     716             : /*      result is a stringlist, in the sense of the CSL functions.      */
     717             : /*                                                                      */
     718             : /*      Deprecated.  Replaced by CSVReadParseLineL().                   */
     719             : /************************************************************************/
     720             : 
     721           0 : char **CSVReadParseLine(FILE *fp)
     722             : {
     723           0 :     return CSVReadParseLine2(fp, ',');
     724             : }
     725             : 
     726           0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
     727             : {
     728           0 :     return CPLReadLine(static_cast<FILE *>(fp));
     729             : }
     730             : 
     731           0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
     732             : {
     733           0 :     CPLAssert(fp != nullptr);
     734           0 :     if (fp == nullptr)
     735           0 :         return nullptr;
     736             : 
     737           0 :     char szDelimiter[2] = {chDelimiter, 0};
     738           0 :     return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
     739             :                                    0,  // nMaxLineSize,
     740             :                                    szDelimiter,
     741             :                                    true,   // bHonourStrings
     742             :                                    false,  // bKeepLeadingAndClosingQuotes
     743             :                                    false,  // bMergeDelimiter
     744           0 :                                    true /* bSkipBOM */);
     745             : }
     746             : 
     747             : /************************************************************************/
     748             : /*                          CSVReadParseLineL()                         */
     749             : /*                                                                      */
     750             : /*      Read one line, and return split into fields.  The return        */
     751             : /*      result is a stringlist, in the sense of the CSL functions.      */
     752             : /*                                                                      */
     753             : /*      Replaces CSVReadParseLine().  These functions use the VSI       */
     754             : /*      layer to allow reading from other file containers.              */
     755             : /************************************************************************/
     756             : 
     757        3910 : char **CSVReadParseLineL(VSILFILE *fp)
     758             : {
     759        3910 :     return CSVReadParseLine2L(fp, ',');
     760             : }
     761             : 
     762        3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
     763             : 
     764             : {
     765        3910 :     CPLAssert(fp != nullptr);
     766        3910 :     if (fp == nullptr)
     767           0 :         return nullptr;
     768             : 
     769        3910 :     char szDelimiter[2] = {chDelimiter, 0};
     770        3910 :     return CSVReadParseLine3L(fp,
     771             :                               0,  // nMaxLineSize
     772             :                               szDelimiter,
     773             :                               true,   // bHonourStrings
     774             :                               false,  // bKeepLeadingAndClosingQuotes
     775             :                               false,  // bMergeDelimiter
     776        3910 :                               true /* bSkipBOM */);
     777             : }
     778             : 
     779             : /************************************************************************/
     780             : /*                      ReadLineLargeFile()                             */
     781             : /************************************************************************/
     782             : 
     783       58093 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
     784             : {
     785       58093 :     int nBufLength = 0;
     786       58093 :     return CPLReadLine3L(static_cast<VSILFILE *>(fp),
     787             :                          nMaxLineSize == 0 ? -1
     788             :                                            : static_cast<int>(nMaxLineSize),
     789      116186 :                          &nBufLength, nullptr);
     790             : }
     791             : 
     792             : /************************************************************************/
     793             : /*                      CSVReadParseLine3L()                            */
     794             : /*                                                                      */
     795             : /*      Read one line, and return split into fields.  The return        */
     796             : /*      result is a stringlist, in the sense of the CSL functions.      */
     797             : /************************************************************************/
     798             : 
     799             : /** Read one line, and return split into fields.
     800             :  * The return result is a stringlist, in the sense of the CSL functions.
     801             :  *
     802             :  * @param fp File handle. Must not be NULL
     803             :  * @param nMaxLineSize Maximum line size, or 0 for unlimited.
     804             :  * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
     805             :  * @param bHonourStrings Should be true, unless double quotes should not be
     806             :  *                       considered when separating fields.
     807             :  * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
     808             :  *                                     quote characters should be kept.
     809             :  * @param bMergeDelimiter Whether consecutive delimiters should be considered
     810             :  *                        as a single one. Should generally be set to false.
     811             :  * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
     812             :  */
     813       57338 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
     814             :                           const char *pszDelimiter, bool bHonourStrings,
     815             :                           bool bKeepLeadingAndClosingQuotes,
     816             :                           bool bMergeDelimiter, bool bSkipBOM)
     817             : 
     818             : {
     819       57338 :     return CSVReadParseLineGeneric(
     820             :         fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
     821       57338 :         bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
     822             : }
     823             : 
     824             : /************************************************************************/
     825             : /*                             CSVCompare()                             */
     826             : /*                                                                      */
     827             : /*      Compare a field to a search value using a particular            */
     828             : /*      criteria.                                                       */
     829             : /************************************************************************/
     830             : 
     831         610 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
     832             :                        CSVCompareCriteria eCriteria)
     833             : 
     834             : {
     835         610 :     if (eCriteria == CC_ExactString)
     836             :     {
     837           0 :         return (strcmp(pszFieldValue, pszTarget) == 0);
     838             :     }
     839         610 :     else if (eCriteria == CC_ApproxString)
     840             :     {
     841         270 :         return EQUAL(pszFieldValue, pszTarget);
     842             :     }
     843         340 :     else if (eCriteria == CC_Integer)
     844             :     {
     845         640 :         return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
     846         640 :                 atoi(pszFieldValue) == atoi(pszTarget));
     847             :     }
     848             : 
     849           0 :     return false;
     850             : }
     851             : 
     852             : /************************************************************************/
     853             : /*                            CSVScanLines()                            */
     854             : /*                                                                      */
     855             : /*      Read the file scanline for lines where the key field equals     */
     856             : /*      the indicated value with the suggested comparison criteria.     */
     857             : /*      Return the first matching line split into fields.               */
     858             : /*                                                                      */
     859             : /*      Deprecated.  Replaced by CSVScanLinesL().                       */
     860             : /************************************************************************/
     861             : 
     862           0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
     863             :                     CSVCompareCriteria eCriteria)
     864             : 
     865             : {
     866           0 :     CPLAssert(pszValue != nullptr);
     867           0 :     CPLAssert(iKeyField >= 0);
     868           0 :     CPLAssert(fp != nullptr);
     869             : 
     870           0 :     bool bSelected = false;
     871           0 :     const int nTestValue = atoi(pszValue);
     872           0 :     char **papszFields = nullptr;
     873             : 
     874           0 :     while (!bSelected)
     875             :     {
     876           0 :         papszFields = CSVReadParseLine(fp);
     877           0 :         if (papszFields == nullptr)
     878           0 :             return nullptr;
     879             : 
     880           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     881             :         {
     882             :             /* not selected */
     883             :         }
     884           0 :         else if (eCriteria == CC_Integer &&
     885           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     886             :         {
     887           0 :             bSelected = true;
     888             :         }
     889             :         else
     890             :         {
     891           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     892             :         }
     893             : 
     894           0 :         if (!bSelected)
     895             :         {
     896           0 :             CSLDestroy(papszFields);
     897           0 :             papszFields = nullptr;
     898             :         }
     899             :     }
     900             : 
     901           0 :     return papszFields;
     902             : }
     903             : 
     904             : /************************************************************************/
     905             : /*                            CSVScanLinesL()                           */
     906             : /*                                                                      */
     907             : /*      Read the file scanline for lines where the key field equals     */
     908             : /*      the indicated value with the suggested comparison criteria.     */
     909             : /*      Return the first matching line split into fields.               */
     910             : /************************************************************************/
     911             : 
     912           0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
     913             :                      CSVCompareCriteria eCriteria)
     914             : 
     915             : {
     916           0 :     CPLAssert(pszValue != nullptr);
     917           0 :     CPLAssert(iKeyField >= 0);
     918           0 :     CPLAssert(fp != nullptr);
     919             : 
     920           0 :     bool bSelected = false;
     921           0 :     const int nTestValue = atoi(pszValue);
     922           0 :     char **papszFields = nullptr;
     923             : 
     924           0 :     while (!bSelected)
     925             :     {
     926           0 :         papszFields = CSVReadParseLineL(fp);
     927           0 :         if (papszFields == nullptr)
     928           0 :             return nullptr;
     929             : 
     930           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     931             :         {
     932             :             /* not selected */
     933             :         }
     934           0 :         else if (eCriteria == CC_Integer &&
     935           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     936             :         {
     937           0 :             bSelected = true;
     938             :         }
     939             :         else
     940             :         {
     941           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     942             :         }
     943             : 
     944           0 :         if (!bSelected)
     945             :         {
     946           0 :             CSLDestroy(papszFields);
     947           0 :             papszFields = nullptr;
     948             :         }
     949             :     }
     950             : 
     951           0 :     return papszFields;
     952             : }
     953             : 
     954             : /************************************************************************/
     955             : /*                        CSVScanLinesIndexed()                         */
     956             : /*                                                                      */
     957             : /*      Read the file scanline for lines where the key field equals     */
     958             : /*      the indicated value with the suggested comparison criteria.     */
     959             : /*      Return the first matching line split into fields.               */
     960             : /************************************************************************/
     961             : 
     962          23 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
     963             : 
     964             : {
     965          23 :     CPLAssert(psTable->panLineIndex != nullptr);
     966             : 
     967             :     /* -------------------------------------------------------------------- */
     968             :     /*      Find target record with binary search.                          */
     969             :     /* -------------------------------------------------------------------- */
     970          23 :     int iTop = psTable->nLineCount - 1;
     971          23 :     int iBottom = 0;
     972          23 :     int iResult = -1;
     973             : 
     974         167 :     while (iTop >= iBottom)
     975             :     {
     976         167 :         const int iMiddle = (iTop + iBottom) / 2;
     977         167 :         if (psTable->panLineIndex[iMiddle] > nKeyValue)
     978          98 :             iTop = iMiddle - 1;
     979          69 :         else if (psTable->panLineIndex[iMiddle] < nKeyValue)
     980          46 :             iBottom = iMiddle + 1;
     981             :         else
     982             :         {
     983          23 :             iResult = iMiddle;
     984             :             // if a key is not unique, select the first instance of it.
     985          23 :             while (iResult > 0 &&
     986          23 :                    psTable->panLineIndex[iResult - 1] == nKeyValue)
     987             :             {
     988           0 :                 psTable->bNonUniqueKey = true;
     989           0 :                 iResult--;
     990             :             }
     991          23 :             break;
     992             :         }
     993             :     }
     994             : 
     995          23 :     if (iResult == -1)
     996           0 :         return nullptr;
     997             : 
     998             :     /* -------------------------------------------------------------------- */
     999             :     /*      Parse target line, and update iLastLine indicator.              */
    1000             :     /* -------------------------------------------------------------------- */
    1001          23 :     psTable->iLastLine = iResult;
    1002             : 
    1003          23 :     return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
    1004             : }
    1005             : 
    1006             : /************************************************************************/
    1007             : /*                        CSVScanLinesIngested()                        */
    1008             : /*                                                                      */
    1009             : /*      Read the file scanline for lines where the key field equals     */
    1010             : /*      the indicated value with the suggested comparison criteria.     */
    1011             : /*      Return the first matching line split into fields.               */
    1012             : /************************************************************************/
    1013             : 
    1014          30 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
    1015             :                                    const char *pszValue,
    1016             :                                    CSVCompareCriteria eCriteria)
    1017             : 
    1018             : {
    1019          30 :     CPLAssert(pszValue != nullptr);
    1020          30 :     CPLAssert(iKeyField >= 0);
    1021             : 
    1022          30 :     const int nTestValue = atoi(pszValue);
    1023             : 
    1024             :     /* -------------------------------------------------------------------- */
    1025             :     /*      Short cut for indexed files.                                    */
    1026             :     /* -------------------------------------------------------------------- */
    1027          30 :     if (iKeyField == 0 && eCriteria == CC_Integer &&
    1028          23 :         psTable->panLineIndex != nullptr)
    1029          23 :         return CSVScanLinesIndexed(psTable, nTestValue);
    1030             : 
    1031             :     /* -------------------------------------------------------------------- */
    1032             :     /*      Scan from in-core lines.                                        */
    1033             :     /* -------------------------------------------------------------------- */
    1034           7 :     char **papszFields = nullptr;
    1035           7 :     bool bSelected = false;
    1036             : 
    1037         484 :     while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
    1038             :     {
    1039         477 :         psTable->iLastLine++;
    1040         477 :         papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
    1041             :                                    false, false);
    1042             : 
    1043         477 :         if (CSLCount(papszFields) < iKeyField + 1)
    1044             :         {
    1045             :             /* not selected */
    1046             :         }
    1047         477 :         else if (eCriteria == CC_Integer &&
    1048         242 :                  atoi(papszFields[iKeyField]) == nTestValue)
    1049             :         {
    1050           2 :             bSelected = true;
    1051             :         }
    1052             :         else
    1053             :         {
    1054         475 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
    1055             :         }
    1056             : 
    1057         477 :         if (!bSelected)
    1058             :         {
    1059         470 :             CSLDestroy(papszFields);
    1060         470 :             papszFields = nullptr;
    1061             :         }
    1062             :     }
    1063             : 
    1064           7 :     return papszFields;
    1065             : }
    1066             : 
    1067             : /************************************************************************/
    1068             : /*                            CSVRewind()                               */
    1069             : /*                                                                      */
    1070             : /*      Rewind a CSV file based on a passed in filename.                */
    1071             : /*      This is aimed at being used with CSVGetNextLine().              */
    1072             : /************************************************************************/
    1073             : 
    1074        1843 : void CSVRewind(const char *pszFilename)
    1075             : 
    1076             : {
    1077             :     /* -------------------------------------------------------------------- */
    1078             :     /*      Get access to the table.                                        */
    1079             :     /* -------------------------------------------------------------------- */
    1080        1843 :     CPLAssert(pszFilename != nullptr);
    1081             : 
    1082        1843 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1083        1843 :     if (psTable != nullptr)
    1084        1843 :         psTable->iLastLine = -1;
    1085        1843 : }
    1086             : 
    1087             : /************************************************************************/
    1088             : /*                           CSVGetNextLine()                           */
    1089             : /*                                                                      */
    1090             : /*      Fetch the next line of a CSV file based on a passed in          */
    1091             : /*      filename.  Returns NULL at end of file, or if file is not       */
    1092             : /*      really established.                                             */
    1093             : /*      This ingests the whole file into memory if not already done.    */
    1094             : /*      When reaching end of file, CSVRewind() may be used to read      */
    1095             : /*      again from the beginning.                                       */
    1096             : /************************************************************************/
    1097             : 
    1098       60462 : char **CSVGetNextLine(const char *pszFilename)
    1099             : 
    1100             : {
    1101             : 
    1102             :     /* -------------------------------------------------------------------- */
    1103             :     /*      Get access to the table.                                        */
    1104             :     /* -------------------------------------------------------------------- */
    1105       60462 :     CPLAssert(pszFilename != nullptr);
    1106             : 
    1107       60462 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1108       60462 :     if (psTable == nullptr)
    1109           0 :         return nullptr;
    1110             : 
    1111       60462 :     CSVIngest(psTable->pszFilename);
    1112             : 
    1113             :     /* -------------------------------------------------------------------- */
    1114             :     /*      If we use CSVGetNextLine() we can pretty much assume we have    */
    1115             :     /*      a non-unique key.                                               */
    1116             :     /* -------------------------------------------------------------------- */
    1117       60462 :     psTable->bNonUniqueKey = true;
    1118             : 
    1119             :     /* -------------------------------------------------------------------- */
    1120             :     /*      Do we have a next line available?  This only works for          */
    1121             :     /*      ingested tables I believe.                                      */
    1122             :     /* -------------------------------------------------------------------- */
    1123       60462 :     if (psTable->iLastLine + 1 >= psTable->nLineCount)
    1124         629 :         return nullptr;
    1125             : 
    1126       59833 :     psTable->iLastLine++;
    1127       59833 :     CSLDestroy(psTable->papszRecFields);
    1128      119666 :     psTable->papszRecFields = CSVSplitLine(
    1129       59833 :         psTable->papszLines[psTable->iLastLine], ",", false, false);
    1130             : 
    1131       59833 :     return psTable->papszRecFields;
    1132             : }
    1133             : 
    1134             : /************************************************************************/
    1135             : /*                            CSVScanFile()                             */
    1136             : /*                                                                      */
    1137             : /*      Scan a whole file using criteria similar to above, but also     */
    1138             : /*      taking care of file opening and closing.                        */
    1139             : /************************************************************************/
    1140             : 
    1141         144 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
    1142             :                           const char *pszValue, CSVCompareCriteria eCriteria)
    1143             : {
    1144         144 :     CSVIngest(psTable->pszFilename);
    1145             : 
    1146             :     /* -------------------------------------------------------------------- */
    1147             :     /*      Does the current record match the criteria?  If so, just        */
    1148             :     /*      return it again.                                                */
    1149             :     /* -------------------------------------------------------------------- */
    1150         144 :     if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
    1151         402 :         CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
    1152         114 :         !psTable->bNonUniqueKey)
    1153             :     {
    1154         114 :         return psTable->papszRecFields;
    1155             :     }
    1156             : 
    1157             :     /* -------------------------------------------------------------------- */
    1158             :     /*      Scan the file from the beginning, replacing the ``current       */
    1159             :     /*      record'' in our structure with the one that is found.           */
    1160             :     /* -------------------------------------------------------------------- */
    1161          30 :     psTable->iLastLine = -1;
    1162          30 :     CSLDestroy(psTable->papszRecFields);
    1163             : 
    1164          30 :     if (psTable->pszRawData != nullptr)
    1165          30 :         psTable->papszRecFields =
    1166          30 :             CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
    1167             :     else
    1168             :     {
    1169           0 :         VSIRewindL(psTable->fp);
    1170           0 :         CPLReadLineL(psTable->fp); /* throw away the header line */
    1171             : 
    1172           0 :         psTable->papszRecFields =
    1173           0 :             CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
    1174             :     }
    1175             : 
    1176          30 :     return psTable->papszRecFields;
    1177             : }
    1178             : 
    1179           4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
    1180             :                    CSVCompareCriteria eCriteria)
    1181             : 
    1182             : {
    1183             :     /* -------------------------------------------------------------------- */
    1184             :     /*      Get access to the table.                                        */
    1185             :     /* -------------------------------------------------------------------- */
    1186           4 :     CPLAssert(pszFilename != nullptr);
    1187             : 
    1188           4 :     if (iKeyField < 0)
    1189           0 :         return nullptr;
    1190             : 
    1191           4 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1192           4 :     if (psTable == nullptr)
    1193           0 :         return nullptr;
    1194             : 
    1195           4 :     return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
    1196             : }
    1197             : 
    1198             : /************************************************************************/
    1199             : /*                           CPLGetFieldId()                            */
    1200             : /*                                                                      */
    1201             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1202             : /*      and find the field with the indicated name.  Returns -1 if      */
    1203             : /*      it fails to find the field name.  Comparison is case            */
    1204             : /*      insensitive, but otherwise exact.  After this function has      */
    1205             : /*      been called the file pointer will be positioned just after      */
    1206             : /*      the first record.                                               */
    1207             : /*                                                                      */
    1208             : /*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
    1209             : /************************************************************************/
    1210             : 
    1211           0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
    1212             : 
    1213             : {
    1214           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1215             : 
    1216           0 :     VSIRewind(fp);
    1217             : 
    1218           0 :     char **papszFields = CSVReadParseLine(fp);
    1219           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1220             :     {
    1221           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1222             :         {
    1223           0 :             CSLDestroy(papszFields);
    1224           0 :             return i;
    1225             :         }
    1226             :     }
    1227             : 
    1228           0 :     CSLDestroy(papszFields);
    1229             : 
    1230           0 :     return -1;
    1231             : }
    1232             : 
    1233             : /************************************************************************/
    1234             : /*                           CPLGetFieldIdL()                           */
    1235             : /*                                                                      */
    1236             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1237             : /*      and find the field with the indicated name.  Returns -1 if      */
    1238             : /*      it fails to find the field name.  Comparison is case            */
    1239             : /*      insensitive, but otherwise exact.  After this function has      */
    1240             : /*      been called the file pointer will be positioned just after      */
    1241             : /*      the first record.                                               */
    1242             : /************************************************************************/
    1243             : 
    1244           0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
    1245             : 
    1246             : {
    1247           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1248             : 
    1249           0 :     VSIRewindL(fp);
    1250             : 
    1251           0 :     char **papszFields = CSVReadParseLineL(fp);
    1252           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1253             :     {
    1254           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1255             :         {
    1256           0 :             CSLDestroy(papszFields);
    1257           0 :             return i;
    1258             :         }
    1259             :     }
    1260             : 
    1261           0 :     CSLDestroy(papszFields);
    1262             : 
    1263           0 :     return -1;
    1264             : }
    1265             : 
    1266             : /************************************************************************/
    1267             : /*                         CSVGetFileFieldId()                          */
    1268             : /*                                                                      */
    1269             : /*      Same as CPLGetFieldId(), except that we get the file based      */
    1270             : /*      on filename, rather than having an existing handle.             */
    1271             : /************************************************************************/
    1272             : 
    1273        7430 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
    1274             : 
    1275             : {
    1276             :     /* -------------------------------------------------------------------- */
    1277             :     /*      Find the requested field.                                       */
    1278             :     /* -------------------------------------------------------------------- */
    1279        7430 :     const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
    1280       18696 :     for (int i = 0; psTable->papszFieldNames != nullptr &&
    1281       18696 :                     psTable->papszFieldNames[i] != nullptr;
    1282             :          i++)
    1283             :     {
    1284       18696 :         if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
    1285       10433 :             EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
    1286             :         {
    1287        7430 :             return i;
    1288             :         }
    1289             :     }
    1290             : 
    1291           0 :     return -1;
    1292             : }
    1293             : 
    1294        7150 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
    1295             : 
    1296             : {
    1297             :     /* -------------------------------------------------------------------- */
    1298             :     /*      Get access to the table.                                        */
    1299             :     /* -------------------------------------------------------------------- */
    1300        7150 :     CPLAssert(pszFilename != nullptr);
    1301             : 
    1302        7150 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1303        7150 :     if (psTable == nullptr)
    1304           0 :         return -1;
    1305        7150 :     return CSVGetFileFieldId(psTable, pszFieldName);
    1306             : }
    1307             : 
    1308             : /************************************************************************/
    1309             : /*                         CSVScanFileByName()                          */
    1310             : /*                                                                      */
    1311             : /*      Same as CSVScanFile(), but using a field name instead of a      */
    1312             : /*      field number.                                                   */
    1313             : /************************************************************************/
    1314             : 
    1315           4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
    1316             :                          const char *pszValue, CSVCompareCriteria eCriteria)
    1317             : 
    1318             : {
    1319           4 :     const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
    1320           4 :     if (iKeyField == -1)
    1321           0 :         return nullptr;
    1322             : 
    1323           4 :     return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
    1324             : }
    1325             : 
    1326             : /************************************************************************/
    1327             : /*                            CSVGetField()                             */
    1328             : /*                                                                      */
    1329             : /*      The all-in-one function to fetch a particular field value       */
    1330             : /*      from a CSV file.  Note this function will return an empty       */
    1331             : /*      string, rather than NULL if it fails to find the desired        */
    1332             : /*      value for some reason.  The caller can't establish that the     */
    1333             : /*      fetch failed.                                                   */
    1334             : /************************************************************************/
    1335             : 
    1336         140 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
    1337             :                         const char *pszKeyFieldValue,
    1338             :                         CSVCompareCriteria eCriteria,
    1339             :                         const char *pszTargetField)
    1340             : 
    1341             : {
    1342             :     /* -------------------------------------------------------------------- */
    1343             :     /*      Find the table.                                                 */
    1344             :     /* -------------------------------------------------------------------- */
    1345         140 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1346         140 :     if (psTable == nullptr)
    1347           0 :         return "";
    1348             : 
    1349         140 :     const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
    1350         140 :     if (iKeyField == -1)
    1351           0 :         return "";
    1352             : 
    1353             :     /* -------------------------------------------------------------------- */
    1354             :     /*      Find the correct record.                                        */
    1355             :     /* -------------------------------------------------------------------- */
    1356             :     char **papszRecord =
    1357         140 :         CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
    1358         140 :     if (papszRecord == nullptr)
    1359           0 :         return "";
    1360             : 
    1361             :     /* -------------------------------------------------------------------- */
    1362             :     /*      Figure out which field we want out of this.                     */
    1363             :     /* -------------------------------------------------------------------- */
    1364         140 :     const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
    1365         140 :     if (iTargetField < 0)
    1366           0 :         return "";
    1367             : 
    1368         388 :     for (int i = 0; papszRecord[i] != nullptr; ++i)
    1369             :     {
    1370         388 :         if (i == iTargetField)
    1371         140 :             return papszRecord[iTargetField];
    1372             :     }
    1373           0 :     return "";
    1374             : }
    1375             : 
    1376             : /************************************************************************/
    1377             : /*                       GDALDefaultCSVFilename()                       */
    1378             : /************************************************************************/
    1379             : 
    1380             : typedef struct
    1381             : {
    1382             :     char szPath[512];
    1383             :     bool bCSVFinderInitialized;
    1384             : } DefaultCSVFileNameTLS;
    1385             : 
    1386        2488 : const char *GDALDefaultCSVFilename(const char *pszBasename)
    1387             : 
    1388             : {
    1389             :     /* -------------------------------------------------------------------- */
    1390             :     /*      Do we already have this file accessed?  If so, just return      */
    1391             :     /*      the existing path without any further probing.                  */
    1392             :     /* -------------------------------------------------------------------- */
    1393        2488 :     int bMemoryError = FALSE;
    1394             :     CSVTable **ppsCSVTableList =
    1395        2488 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
    1396        2488 :     if (ppsCSVTableList != nullptr)
    1397             :     {
    1398        2482 :         const size_t nBasenameLen = strlen(pszBasename);
    1399             : 
    1400       23118 :         for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
    1401       20636 :              psTable = psTable->psNext)
    1402             :         {
    1403       22566 :             const size_t nFullLen = strlen(psTable->pszFilename);
    1404             : 
    1405       22566 :             if (nFullLen > nBasenameLen &&
    1406       22566 :                 strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
    1407        1930 :                        pszBasename) == 0 &&
    1408        1930 :                 strchr("/\\",
    1409        1930 :                        psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
    1410             :                     nullptr)
    1411             :             {
    1412        1930 :                 return psTable->pszFilename;
    1413             :             }
    1414             :         }
    1415             :     }
    1416             : 
    1417             :     /* -------------------------------------------------------------------- */
    1418             :     /*      Otherwise we need to look harder for it.                        */
    1419             :     /* -------------------------------------------------------------------- */
    1420             :     DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1421         558 :         CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
    1422         558 :     if (pTLSData == nullptr && !bMemoryError)
    1423             :     {
    1424             :         pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1425           5 :             VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
    1426           5 :         if (pTLSData)
    1427           5 :             CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
    1428             :     }
    1429         558 :     if (pTLSData == nullptr)
    1430           0 :         return "/not_existing_dir/not_existing_path";
    1431             : 
    1432         558 :     const char *pszResult = CPLFindFile("gdal", pszBasename);
    1433             : 
    1434         558 :     if (pszResult != nullptr)
    1435          43 :         return pszResult;
    1436             : 
    1437         515 :     if (!pTLSData->bCSVFinderInitialized)
    1438             :     {
    1439           2 :         pTLSData->bCSVFinderInitialized = true;
    1440             : 
    1441           2 :         if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
    1442           2 :             CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
    1443             : 
    1444           2 :         pszResult = CPLFindFile("gdal", pszBasename);
    1445             : 
    1446           2 :         if (pszResult != nullptr)
    1447           0 :             return pszResult;
    1448             :     }
    1449             : 
    1450             :     // For systems like sandboxes that do not allow other checks.
    1451         515 :     CPLDebug("CPL_CSV",
    1452             :              "Failed to find file in GDALDefaultCSVFilename.  "
    1453             :              "Returning original basename: %s",
    1454             :              pszBasename);
    1455         515 :     CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
    1456         515 :     return pTLSData->szPath;
    1457             : }
    1458             : 
    1459             : /************************************************************************/
    1460             : /*                            CSVFilename()                             */
    1461             : /*                                                                      */
    1462             : /*      Return the full path to a particular CSV file.  This will       */
    1463             : /*      eventually be something the application can override.           */
    1464             : /************************************************************************/
    1465             : 
    1466             : CPL_C_START
    1467             : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
    1468             : CPL_C_END
    1469             : 
    1470        2488 : const char *CSVFilename(const char *pszBasename)
    1471             : 
    1472             : {
    1473        2488 :     if (pfnCSVFilenameHook == nullptr)
    1474        2488 :         return GDALDefaultCSVFilename(pszBasename);
    1475             : 
    1476           0 :     return pfnCSVFilenameHook(pszBasename);
    1477             : }
    1478             : 
    1479             : /************************************************************************/
    1480             : /*                         SetCSVFilenameHook()                         */
    1481             : /*                                                                      */
    1482             : /*      Applications can use this to set a function that will           */
    1483             : /*      massage CSV filenames.                                          */
    1484             : /************************************************************************/
    1485             : 
    1486             : /**
    1487             :  * Override CSV file search method.
    1488             :  *
    1489             :  * @param pfnNewHook The pointer to a function which will return the
    1490             :  * full path for a given filename.
    1491             :  *
    1492             : 
    1493             : This function allows an application to override how the GTIFGetDefn()
    1494             : and related function find the CSV (Comma Separated Value) values
    1495             : required. The pfnHook argument should be a pointer to a function that
    1496             : will take in a CSV filename and return a full path to the file. The
    1497             : returned string should be to an internal static buffer so that the
    1498             : caller doesn't have to free the result.
    1499             : 
    1500             : Example:
    1501             : 
    1502             : The listgeo utility uses the following override function if the user
    1503             : specified a CSV file directory with the -t commandline switch (argument
    1504             : put into CSVDirName).
    1505             : 
    1506             : \code{.cpp}
    1507             : 
    1508             :     ...
    1509             :     SetCSVFilenameHook( CSVFileOverride );
    1510             :     ...
    1511             : 
    1512             : static const char *CSVFileOverride( const char * pszInput )
    1513             : 
    1514             : {
    1515             :     static char szPath[1024] = {};
    1516             : 
    1517             :     sprintf( szPath, "%s/%s", CSVDirName, pszInput );
    1518             : 
    1519             :     return szPath;
    1520             : }
    1521             : \endcode
    1522             : 
    1523             : */
    1524             : 
    1525             : CPL_C_START
    1526           0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
    1527             : 
    1528             : {
    1529           0 :     pfnCSVFilenameHook = pfnNewHook;
    1530           0 : }
    1531             : 
    1532             : CPL_C_END

Generated by: LCOV version 1.14