LCOV - code coverage report
Current view: top level - port - cpl_csv.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 394 525 75.0 %
Date: 2024-11-21 22:18:42 Functions: 27 35 77.1 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  CPL - Common Portability Library
       4             :  * Purpose:  CSV (comma separated value) file access.
       5             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 1999, Frank Warmerdam
       9             :  * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * SPDX-License-Identifier: MIT
      12             :  ****************************************************************************/
      13             : 
      14             : #include "cpl_port.h"
      15             : #include "cpl_csv.h"
      16             : 
      17             : #include <cstddef>
      18             : #include <cstdlib>
      19             : #include <cstring>
      20             : #if HAVE_FCNTL_H
      21             : #include <fcntl.h>
      22             : #endif
      23             : 
      24             : #include "cpl_conv.h"
      25             : #include "cpl_error.h"
      26             : #include "cpl_multiproc.h"
      27             : #include "gdal_csv.h"
      28             : 
      29             : #include <algorithm>
      30             : 
      31             : /* ==================================================================== */
      32             : /*      The CSVTable is a persistent set of info about an open CSV      */
      33             : /*      table.  While it doesn't currently maintain a record index,     */
      34             : /*      or in-memory copy of the table, it could be changed to do so    */
      35             : /*      in the future.                                                  */
      36             : /* ==================================================================== */
      37             : typedef struct ctb
      38             : {
      39             :     VSILFILE *fp;
      40             :     struct ctb *psNext;
      41             :     char *pszFilename;
      42             :     char **papszFieldNames;
      43             :     int *panFieldNamesLength;
      44             :     char **papszRecFields;
      45             :     int nFields;
      46             :     int iLastLine;
      47             :     bool bNonUniqueKey;
      48             : 
      49             :     /* Cache for whole file */
      50             :     int nLineCount;
      51             :     char **papszLines;
      52             :     int *panLineIndex;
      53             :     char *pszRawData;
      54             : } CSVTable;
      55             : 
      56             : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
      57             :                                 const char *pszFilename);
      58             : 
      59             : /************************************************************************/
      60             : /*                            CSVFreeTLS()                              */
      61             : /************************************************************************/
      62           2 : static void CSVFreeTLS(void *pData)
      63             : {
      64           2 :     CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
      65           2 :     CPLFree(pData);
      66           2 : }
      67             : 
      68             : /* It would likely be better to share this list between threads, but
      69             :    that will require some rework. */
      70             : 
      71             : /************************************************************************/
      72             : /*                             CSVAccess()                              */
      73             : /*                                                                      */
      74             : /*      This function will fetch a handle to the requested table.       */
      75             : /*      If not found in the ``open table list'' the table will be       */
      76             : /*      opened and added to the list.  Eventually this function may     */
      77             : /*      become public with an abstracted return type so that            */
      78             : /*      applications can set options about the table.  For now this     */
      79             : /*      isn't done.                                                     */
      80             : /************************************************************************/
      81             : 
      82      129591 : static CSVTable *CSVAccess(const char *pszFilename)
      83             : 
      84             : {
      85             :     /* -------------------------------------------------------------------- */
      86             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
      87             :     /*      if there isn't already one.                                     */
      88             :     /* -------------------------------------------------------------------- */
      89      129591 :     int bMemoryError = FALSE;
      90             :     CSVTable **ppsCSVTableList =
      91      129591 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
      92      129591 :     if (bMemoryError)
      93           0 :         return nullptr;
      94      129591 :     if (ppsCSVTableList == nullptr)
      95             :     {
      96             :         ppsCSVTableList =
      97           5 :             static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
      98           5 :         if (ppsCSVTableList == nullptr)
      99           0 :             return nullptr;
     100           5 :         CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
     101             :     }
     102             : 
     103             :     /* -------------------------------------------------------------------- */
     104             :     /*      Is the table already in the list.                               */
     105             :     /* -------------------------------------------------------------------- */
     106     1000290 :     for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
     107      870703 :          psTable = psTable->psNext)
     108             :     {
     109     1000260 :         if (EQUAL(psTable->pszFilename, pszFilename))
     110             :         {
     111             :             /*
     112             :              * Eventually we should consider promoting to the front of
     113             :              * the list to accelerate frequently accessed tables.
     114             :              */
     115      129561 :             return psTable;
     116             :         }
     117             :     }
     118             : 
     119             :     /* -------------------------------------------------------------------- */
     120             :     /*      If not, try to open it.                                         */
     121             :     /* -------------------------------------------------------------------- */
     122          30 :     VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
     123          30 :     if (fp == nullptr)
     124           0 :         return nullptr;
     125             : 
     126             :     /* -------------------------------------------------------------------- */
     127             :     /*      Create an information structure about this table, and add to    */
     128             :     /*      the front of the list.                                          */
     129             :     /* -------------------------------------------------------------------- */
     130             :     CSVTable *const psTable =
     131          30 :         static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
     132          30 :     if (psTable == nullptr)
     133             :     {
     134           0 :         VSIFCloseL(fp);
     135           0 :         return nullptr;
     136             :     }
     137             : 
     138          30 :     psTable->fp = fp;
     139          30 :     psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
     140          30 :     if (psTable->pszFilename == nullptr)
     141             :     {
     142           0 :         VSIFree(psTable);
     143           0 :         VSIFCloseL(fp);
     144           0 :         return nullptr;
     145             :     }
     146          30 :     psTable->bNonUniqueKey = false;  // As far as we know now.
     147          30 :     psTable->psNext = *ppsCSVTableList;
     148             : 
     149          30 :     *ppsCSVTableList = psTable;
     150             : 
     151             :     /* -------------------------------------------------------------------- */
     152             :     /*      Read the table header record containing the field names.        */
     153             :     /* -------------------------------------------------------------------- */
     154          30 :     psTable->papszFieldNames = CSVReadParseLineL(fp);
     155          30 :     psTable->nFields = CSLCount(psTable->papszFieldNames);
     156          30 :     psTable->panFieldNamesLength =
     157          30 :         static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
     158          30 :     for (int i = 0;
     159         185 :          i < psTable->nFields &&
     160             :          /* null-pointer check to avoid a false positive from CLang S.A. */
     161         155 :          psTable->papszFieldNames != nullptr;
     162             :          i++)
     163             :     {
     164         155 :         psTable->panFieldNamesLength[i] =
     165         155 :             static_cast<int>(strlen(psTable->papszFieldNames[i]));
     166             :     }
     167             : 
     168          30 :     return psTable;
     169             : }
     170             : 
     171             : /************************************************************************/
     172             : /*                            CSVDeaccess()                             */
     173             : /************************************************************************/
     174             : 
     175         945 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
     176             :                                 const char *pszFilename)
     177             : 
     178             : {
     179         945 :     if (ppsCSVTableList == nullptr)
     180         936 :         return;
     181             : 
     182             :     /* -------------------------------------------------------------------- */
     183             :     /*      A NULL means deaccess all tables.                               */
     184             :     /* -------------------------------------------------------------------- */
     185           9 :     if (pszFilename == nullptr)
     186             :     {
     187           9 :         while (*ppsCSVTableList != nullptr)
     188           5 :             CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
     189           5 :                                 (*ppsCSVTableList)->pszFilename);
     190             : 
     191           4 :         return;
     192             :     }
     193             : 
     194             :     /* -------------------------------------------------------------------- */
     195             :     /*      Find this table.                                                */
     196             :     /* -------------------------------------------------------------------- */
     197           5 :     CSVTable *psLast = nullptr;
     198           5 :     CSVTable *psTable = *ppsCSVTableList;
     199           5 :     for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
     200           0 :          psTable = psTable->psNext)
     201             :     {
     202           0 :         psLast = psTable;
     203             :     }
     204             : 
     205           5 :     if (psTable == nullptr)
     206             :     {
     207           0 :         if (bCanUseTLS)
     208           0 :             CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
     209           0 :         return;
     210             :     }
     211             : 
     212             :     /* -------------------------------------------------------------------- */
     213             :     /*      Remove the link from the list.                                  */
     214             :     /* -------------------------------------------------------------------- */
     215           5 :     if (psLast != nullptr)
     216           0 :         psLast->psNext = psTable->psNext;
     217             :     else
     218           5 :         *ppsCSVTableList = psTable->psNext;
     219             : 
     220             :     /* -------------------------------------------------------------------- */
     221             :     /*      Free the table.                                                 */
     222             :     /* -------------------------------------------------------------------- */
     223           5 :     if (psTable->fp != nullptr)
     224           0 :         VSIFCloseL(psTable->fp);
     225             : 
     226           5 :     CSLDestroy(psTable->papszFieldNames);
     227           5 :     CPLFree(psTable->panFieldNamesLength);
     228           5 :     CSLDestroy(psTable->papszRecFields);
     229           5 :     CPLFree(psTable->pszFilename);
     230           5 :     CPLFree(psTable->panLineIndex);
     231           5 :     CPLFree(psTable->pszRawData);
     232           5 :     CPLFree(psTable->papszLines);
     233             : 
     234           5 :     CPLFree(psTable);
     235             : 
     236           5 :     if (bCanUseTLS)
     237           5 :         CPLReadLine(nullptr);
     238             : }
     239             : 
     240         938 : void CSVDeaccess(const char *pszFilename)
     241             : {
     242             :     /* -------------------------------------------------------------------- */
     243             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
     244             :     /*      if there isn't already one.                                     */
     245             :     /* -------------------------------------------------------------------- */
     246         938 :     int bMemoryError = FALSE;
     247             :     CSVTable **ppsCSVTableList =
     248         938 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
     249             : 
     250         938 :     CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
     251         938 : }
     252             : 
     253             : /************************************************************************/
     254             : /*                            CSVSplitLine()                            */
     255             : /*                                                                      */
     256             : /*      Tokenize a CSV line into fields in the form of a string         */
     257             : /*      list.  This is used instead of the CPLTokenizeString()          */
     258             : /*      because it provides correct CSV escaping and quoting            */
     259             : /*      semantics.                                                      */
     260             : /************************************************************************/
     261             : 
     262      115950 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
     263             :                            bool bKeepLeadingAndClosingQuotes,
     264             :                            bool bMergeDelimiter)
     265             : 
     266             : {
     267      231900 :     CPLStringList aosRetList;
     268      115950 :     if (pszString == nullptr)
     269           0 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     270             : 
     271      115950 :     char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
     272      115950 :     int nTokenMax = 10;
     273      115950 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     274             : 
     275      115950 :     const char *pszIter = pszString;
     276      667368 :     while (*pszIter != '\0')
     277             :     {
     278      551418 :         bool bInString = false;
     279             : 
     280      551418 :         int nTokenLen = 0;
     281             : 
     282             :         // Try to find the next delimiter, marking end of token.
     283     4633940 :         do
     284             :         {
     285             :             // End if this is a delimiter skip it and break.
     286     5185360 :             if (!bInString &&
     287     2706880 :                 strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
     288             :             {
     289      435788 :                 pszIter += nDelimiterLength;
     290      435788 :                 if (bMergeDelimiter)
     291             :                 {
     292           9 :                     while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
     293             :                            0)
     294           5 :                         pszIter += nDelimiterLength;
     295             :                 }
     296      435788 :                 break;
     297             :             }
     298             : 
     299     4749570 :             if (*pszIter == '"')
     300             :             {
     301      394207 :                 if (!bInString && nTokenLen > 0)
     302             :                 {
     303             :                     // do not treat in a special way double quotes that appear
     304             :                     // in the middle of a field (similarly to OpenOffice)
     305             :                     // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
     306             :                 }
     307      394122 :                 else if (!bInString || pszIter[1] != '"')
     308             :                 {
     309      393456 :                     bInString = !bInString;
     310      393456 :                     if (!bKeepLeadingAndClosingQuotes)
     311      393422 :                         continue;
     312             :                 }
     313             :                 else  // Doubled quotes in string resolve to one quote.
     314             :                 {
     315         666 :                     pszIter++;
     316             :                 }
     317             :             }
     318             : 
     319     4356140 :             if (nTokenLen >= nTokenMax - 2)
     320             :             {
     321      122636 :                 nTokenMax = nTokenMax * 2 + 10;
     322      122636 :                 pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
     323             :             }
     324             : 
     325     4356140 :             pszToken[nTokenLen] = *pszIter;
     326     4356140 :             nTokenLen++;
     327     4749570 :         } while (*(++pszIter) != '\0');
     328             : 
     329      551418 :         pszToken[nTokenLen] = '\0';
     330      551418 :         aosRetList.AddString(pszToken);
     331             : 
     332             :         // If the last token is an empty token, then we have to catch
     333             :         // it now, otherwise we won't reenter the loop and it will be lost.
     334      551418 :         if (*pszIter == '\0' &&
     335      115910 :             pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
     336      115910 :             strncmp(pszIter - nDelimiterLength, pszDelimiter,
     337             :                     nDelimiterLength) == 0)
     338             :         {
     339         280 :             aosRetList.AddString("");
     340             :         }
     341             :     }
     342             : 
     343      115950 :     CPLFree(pszToken);
     344             : 
     345      115950 :     if (aosRetList.Count() == 0)
     346          40 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     347             :     else
     348      115910 :         return aosRetList.StealList();
     349             : }
     350             : 
     351             : /************************************************************************/
     352             : /*                          CSVFindNextLine()                           */
     353             : /*                                                                      */
     354             : /*      Find the start of the next line, while at the same time zero    */
     355             : /*      terminating this line.  Take into account that there may be     */
     356             : /*      newline indicators within quoted strings, and that quotes       */
     357             : /*      can be escaped with a backslash.                                */
     358             : /************************************************************************/
     359             : 
     360        6329 : static char *CSVFindNextLine(char *pszThisLine)
     361             : 
     362             : {
     363        6329 :     int i = 0;  // i is used after the for loop.
     364             : 
     365      275213 :     for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
     366             :     {
     367      275213 :         if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
     368       35430 :             nQuoteCount++;
     369             : 
     370      275213 :         if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
     371        6329 :             (nQuoteCount % 2) == 0)
     372        6329 :             break;
     373             :     }
     374             : 
     375       16573 :     while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
     376       10244 :         pszThisLine[i++] = '\0';
     377             : 
     378        6329 :     if (pszThisLine[i] == '\0')
     379          30 :         return nullptr;
     380             : 
     381        6299 :     return pszThisLine + i;
     382             : }
     383             : 
     384             : /************************************************************************/
     385             : /*                             CSVIngest()                              */
     386             : /*                                                                      */
     387             : /*      Load entire file into memory and setup index if possible.       */
     388             : /************************************************************************/
     389             : 
     390             : // TODO(schwehr): Clean up all the casting in CSVIngest.
     391       60312 : static void CSVIngest(CSVTable *psTable)
     392             : 
     393             : {
     394       60312 :     if (psTable->pszRawData != nullptr)
     395       60282 :         return;
     396             : 
     397             :     /* -------------------------------------------------------------------- */
     398             :     /*      Ingest whole file.                                              */
     399             :     /* -------------------------------------------------------------------- */
     400          30 :     if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
     401             :     {
     402           0 :         CPLError(CE_Failure, CPLE_FileIO,
     403             :                  "Failed using seek end and tell to get file length: %s",
     404             :                  psTable->pszFilename);
     405           0 :         return;
     406             :     }
     407          30 :     const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
     408          30 :     if (static_cast<long>(nFileLen) == -1)
     409             :     {
     410           0 :         CPLError(CE_Failure, CPLE_FileIO,
     411             :                  "Failed using seek end and tell to get file length: %s",
     412             :                  psTable->pszFilename);
     413           0 :         return;
     414             :     }
     415          30 :     VSIRewindL(psTable->fp);
     416             : 
     417          30 :     psTable->pszRawData = static_cast<char *>(
     418          30 :         VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
     419          30 :     if (psTable->pszRawData == nullptr)
     420           0 :         return;
     421          30 :     if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
     422          30 :                   psTable->fp) != static_cast<size_t>(nFileLen))
     423             :     {
     424           0 :         CPLFree(psTable->pszRawData);
     425           0 :         psTable->pszRawData = nullptr;
     426             : 
     427           0 :         CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
     428             :                  psTable->pszFilename);
     429           0 :         return;
     430             :     }
     431             : 
     432          30 :     psTable->pszRawData[nFileLen] = '\0';
     433             : 
     434             :     /* -------------------------------------------------------------------- */
     435             :     /*      Get count of newlines so we can allocate line array.            */
     436             :     /* -------------------------------------------------------------------- */
     437          30 :     int nMaxLineCount = 0;
     438      279158 :     for (int i = 0; i < static_cast<int>(nFileLen); i++)
     439             :     {
     440      279128 :         if (psTable->pszRawData[i] == 10)
     441        6329 :             nMaxLineCount++;
     442             :     }
     443             : 
     444          30 :     psTable->papszLines =
     445          30 :         static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
     446          30 :     if (psTable->papszLines == nullptr)
     447           0 :         return;
     448             : 
     449             :     /* -------------------------------------------------------------------- */
     450             :     /*      Build a list of record pointers into the raw data buffer        */
     451             :     /*      based on line terminators.  Zero terminate the line             */
     452             :     /*      strings.                                                        */
     453             :     /* -------------------------------------------------------------------- */
     454             :     /* skip header line */
     455          30 :     char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
     456             : 
     457          30 :     int iLine = 0;
     458        6329 :     while (pszThisLine != nullptr && iLine < nMaxLineCount)
     459             :     {
     460        6299 :         if (pszThisLine[0] != '#')
     461        6288 :             psTable->papszLines[iLine++] = pszThisLine;
     462        6299 :         pszThisLine = CSVFindNextLine(pszThisLine);
     463             :     }
     464             : 
     465          30 :     psTable->nLineCount = iLine;
     466             : 
     467             :     /* -------------------------------------------------------------------- */
     468             :     /*      Allocate and populate index array.  Ensure they are in          */
     469             :     /*      ascending order so that binary searches can be done on the      */
     470             :     /*      array.                                                          */
     471             :     /* -------------------------------------------------------------------- */
     472          30 :     psTable->panLineIndex = static_cast<int *>(
     473          30 :         VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
     474          30 :     if (psTable->panLineIndex == nullptr)
     475           0 :         return;
     476             : 
     477        6232 :     for (int i = 0; i < psTable->nLineCount; i++)
     478             :     {
     479        6204 :         psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
     480             : 
     481        6204 :         if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
     482             :         {
     483           2 :             CPLFree(psTable->panLineIndex);
     484           2 :             psTable->panLineIndex = nullptr;
     485           2 :             break;
     486             :         }
     487             :     }
     488             : 
     489          30 :     psTable->iLastLine = -1;
     490             : 
     491             :     /* -------------------------------------------------------------------- */
     492             :     /*      We should never need the file handle against, so close it.      */
     493             :     /* -------------------------------------------------------------------- */
     494          30 :     VSIFCloseL(psTable->fp);
     495          30 :     psTable->fp = nullptr;
     496             : }
     497             : 
     498       60312 : static void CSVIngest(const char *pszFilename)
     499             : 
     500             : {
     501       60312 :     CSVTable *psTable = CSVAccess(pszFilename);
     502       60312 :     if (psTable == nullptr)
     503             :     {
     504           0 :         CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
     505             :                  pszFilename);
     506           0 :         return;
     507             :     }
     508       60312 :     CSVIngest(psTable);
     509             : }
     510             : 
     511             : /************************************************************************/
     512             : /*                        CSVDetectSeperator()                          */
     513             : /************************************************************************/
     514             : 
     515             : /** Detect which field separator is used.
     516             :  *
     517             :  * Currently, it can detect comma, semicolon, space, tabulation or pipe.
     518             :  * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
     519             :  * most occurrences will be selected (and a warning emitted).
     520             :  * If no separator found, comma will be considered as the separator.
     521             :  *
     522             :  * @return ',', ';', ' ', tabulation character or '|'.
     523             :  */
     524         581 : char CSVDetectSeperator(const char *pszLine)
     525             : {
     526         581 :     bool bInString = false;
     527         581 :     int nCountComma = 0;
     528         581 :     int nCountSemicolon = 0;
     529         581 :     int nCountTab = 0;
     530         581 :     int nCountPipe = 0;
     531         581 :     int nCountSpace = 0;
     532             : 
     533       26359 :     for (; *pszLine != '\0'; pszLine++)
     534             :     {
     535       25778 :         if (!bInString && *pszLine == ',')
     536             :         {
     537        2075 :             nCountComma++;
     538             :         }
     539       23703 :         else if (!bInString && *pszLine == ';')
     540             :         {
     541          10 :             nCountSemicolon++;
     542             :         }
     543       23693 :         else if (!bInString && *pszLine == '\t')
     544             :         {
     545          29 :             nCountTab++;
     546             :         }
     547       23664 :         else if (!bInString && *pszLine == '|')
     548             :         {
     549           9 :             nCountPipe++;
     550             :         }
     551       23655 :         else if (!bInString && *pszLine == ' ')
     552             :         {
     553         290 :             nCountSpace++;
     554             :         }
     555       23365 :         else if (*pszLine == '"')
     556             :         {
     557         519 :             if (!bInString || pszLine[1] != '"')
     558             :             {
     559         519 :                 bInString = !bInString;
     560         519 :                 continue;
     561             :             }
     562             :             else /* doubled quotes in string resolve to one quote */
     563             :             {
     564           0 :                 pszLine++;
     565             :             }
     566             :         }
     567             :     }
     568             : 
     569             :     const int nMaxCountExceptSpace =
     570             :         std::max(std::max(nCountComma, nCountSemicolon),
     571         581 :                  std::max(nCountTab, nCountPipe));
     572         581 :     char chDelimiter = ',';
     573         581 :     if (nMaxCountExceptSpace == 0)
     574             :     {
     575          35 :         if (nCountSpace > 0)
     576           9 :             chDelimiter = ' ';
     577             :     }
     578             :     else
     579             :     {
     580         546 :         bool bWarn = false;
     581         546 :         if (nCountComma == nMaxCountExceptSpace)
     582             :         {
     583         530 :             chDelimiter = ',';
     584         530 :             bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
     585             :         }
     586          16 :         else if (nCountSemicolon == nMaxCountExceptSpace)
     587             :         {
     588           5 :             chDelimiter = ';';
     589           5 :             bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
     590             :         }
     591          11 :         else if (nCountTab == nMaxCountExceptSpace)
     592             :         {
     593           6 :             chDelimiter = '\t';
     594           6 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
     595             :         }
     596             :         else /* if( nCountPipe == nMaxCountExceptSpace ) */
     597             :         {
     598           5 :             chDelimiter = '|';
     599           5 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
     600             :         }
     601         546 :         if (bWarn)
     602             :         {
     603           6 :             CPLError(CE_Warning, CPLE_AppDefined,
     604             :                      "Selecting '%c' as CSV field separator, but "
     605             :                      "other candidate separator(s) have been found.",
     606             :                      chDelimiter);
     607             :         }
     608             :     }
     609             : 
     610         581 :     return chDelimiter;
     611             : }
     612             : 
     613             : /************************************************************************/
     614             : /*                      CSVReadParseLine3L()                            */
     615             : /*                                                                      */
     616             : /*      Read one line, and return split into fields.  The return        */
     617             : /*      result is a stringlist, in the sense of the CSL functions.      */
     618             : /************************************************************************/
     619             : 
     620             : static char **
     621       57273 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
     622             :                         size_t nMaxLineSize, const char *pszDelimiter,
     623             :                         bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
     624             :                         bool bMergeDelimiter, bool bSkipBOM)
     625             : {
     626       57273 :     const char *pszLine = pfnReadLine(fp, nMaxLineSize);
     627       57273 :     if (pszLine == nullptr)
     628        1361 :         return nullptr;
     629             : 
     630       55912 :     if (bSkipBOM)
     631             :     {
     632             :         // Skip BOM.
     633       55545 :         const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
     634       55545 :         if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
     635           4 :             pszLine += 3;
     636             :     }
     637             : 
     638             :     // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
     639       55912 :     if (!bHonourStrings)
     640             :     {
     641           2 :         return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
     642             :     }
     643             : 
     644             :     // If there are no quotes, then this is the simple case.
     645             :     // Parse, and return tokens.
     646       55910 :     if (strchr(pszLine, '\"') == nullptr)
     647       48293 :         return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
     648       48293 :                             bMergeDelimiter);
     649             : 
     650             :     try
     651             :     {
     652             :         // We must now count the quotes in our working string, and as
     653             :         // long as it is odd, keep adding new lines.
     654        7617 :         std::string osWorkLine(pszLine);
     655             : 
     656        7617 :         size_t i = 0;
     657        7617 :         int nCount = 0;
     658             : 
     659             :         while (true)
     660             :         {
     661      791978 :             for (; i < osWorkLine.size(); i++)
     662             :             {
     663      783606 :                 if (osWorkLine[i] == '\"')
     664       59591 :                     nCount++;
     665             :             }
     666             : 
     667        8372 :             if (nCount % 2 == 0)
     668        7616 :                 break;
     669             : 
     670         756 :             pszLine = pfnReadLine(fp, nMaxLineSize);
     671         756 :             if (pszLine == nullptr)
     672           1 :                 break;
     673             : 
     674         755 :             osWorkLine.append("\n");
     675         755 :             osWorkLine.append(pszLine);
     676             :         }
     677             : 
     678             :         char **papszReturn =
     679        7617 :             CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
     680             :                          bKeepLeadingAndClosingQuotes, bMergeDelimiter);
     681             : 
     682        7617 :         return papszReturn;
     683             :     }
     684           0 :     catch (const std::exception &e)
     685             :     {
     686           0 :         CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
     687           0 :         return nullptr;
     688             :     }
     689             : }
     690             : 
     691             : /************************************************************************/
     692             : /*                          CSVReadParseLine()                          */
     693             : /*                                                                      */
     694             : /*      Read one line, and return split into fields.  The return        */
     695             : /*      result is a stringlist, in the sense of the CSL functions.      */
     696             : /*                                                                      */
     697             : /*      Deprecated.  Replaced by CSVReadParseLineL().                   */
     698             : /************************************************************************/
     699             : 
     700           0 : char **CSVReadParseLine(FILE *fp)
     701             : {
     702           0 :     return CSVReadParseLine2(fp, ',');
     703             : }
     704             : 
     705           0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
     706             : {
     707           0 :     return CPLReadLine(static_cast<FILE *>(fp));
     708             : }
     709             : 
     710           0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
     711             : {
     712           0 :     CPLAssert(fp != nullptr);
     713           0 :     if (fp == nullptr)
     714           0 :         return nullptr;
     715             : 
     716           0 :     char szDelimiter[2] = {chDelimiter, 0};
     717           0 :     return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
     718             :                                    0,  // nMaxLineSize,
     719             :                                    szDelimiter,
     720             :                                    true,   // bHonourStrings
     721             :                                    false,  // bKeepLeadingAndClosingQuotes
     722             :                                    false,  // bMergeDelimiter
     723           0 :                                    true /* bSkipBOM */);
     724             : }
     725             : 
     726             : /************************************************************************/
     727             : /*                          CSVReadParseLineL()                         */
     728             : /*                                                                      */
     729             : /*      Read one line, and return split into fields.  The return        */
     730             : /*      result is a stringlist, in the sense of the CSL functions.      */
     731             : /*                                                                      */
     732             : /*      Replaces CSVReadParseLine().  These functions use the VSI       */
     733             : /*      layer to allow reading from other file containers.              */
     734             : /************************************************************************/
     735             : 
     736        3910 : char **CSVReadParseLineL(VSILFILE *fp)
     737             : {
     738        3910 :     return CSVReadParseLine2L(fp, ',');
     739             : }
     740             : 
     741        3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
     742             : 
     743             : {
     744        3910 :     CPLAssert(fp != nullptr);
     745        3910 :     if (fp == nullptr)
     746           0 :         return nullptr;
     747             : 
     748        3910 :     char szDelimiter[2] = {chDelimiter, 0};
     749        3910 :     return CSVReadParseLine3L(fp,
     750             :                               0,  // nMaxLineSize
     751             :                               szDelimiter,
     752             :                               true,   // bHonourStrings
     753             :                               false,  // bKeepLeadingAndClosingQuotes
     754             :                               false,  // bMergeDelimiter
     755        3910 :                               true /* bSkipBOM */);
     756             : }
     757             : 
     758             : /************************************************************************/
     759             : /*                      ReadLineLargeFile()                             */
     760             : /************************************************************************/
     761             : 
     762       58029 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
     763             : {
     764       58029 :     int nBufLength = 0;
     765       58029 :     return CPLReadLine3L(static_cast<VSILFILE *>(fp),
     766             :                          nMaxLineSize == 0 ? -1
     767             :                                            : static_cast<int>(nMaxLineSize),
     768      116058 :                          &nBufLength, nullptr);
     769             : }
     770             : 
     771             : /************************************************************************/
     772             : /*                      CSVReadParseLine3L()                            */
     773             : /*                                                                      */
     774             : /*      Read one line, and return split into fields.  The return        */
     775             : /*      result is a stringlist, in the sense of the CSL functions.      */
     776             : /************************************************************************/
     777             : 
     778             : /** Read one line, and return split into fields.
     779             :  * The return result is a stringlist, in the sense of the CSL functions.
     780             :  *
     781             :  * @param fp File handle. Must not be NULL
     782             :  * @param nMaxLineSize Maximum line size, or 0 for unlimited.
     783             :  * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
     784             :  * @param bHonourStrings Should be true, unless double quotes should not be
     785             :  *                       considered when separating fields.
     786             :  * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
     787             :  *                                     quote characters should be kept.
     788             :  * @param bMergeDelimiter Whether consecutive delimiters should be considered
     789             :  *                        as a single one. Should generally be set to false.
     790             :  * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
     791             :  */
     792       57273 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
     793             :                           const char *pszDelimiter, bool bHonourStrings,
     794             :                           bool bKeepLeadingAndClosingQuotes,
     795             :                           bool bMergeDelimiter, bool bSkipBOM)
     796             : 
     797             : {
     798       57273 :     return CSVReadParseLineGeneric(
     799             :         fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
     800       57273 :         bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
     801             : }
     802             : 
     803             : /************************************************************************/
     804             : /*                             CSVCompare()                             */
     805             : /*                                                                      */
     806             : /*      Compare a field to a search value using a particular            */
     807             : /*      criteria.                                                       */
     808             : /************************************************************************/
     809             : 
     810         609 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
     811             :                        CSVCompareCriteria eCriteria)
     812             : 
     813             : {
     814         609 :     if (eCriteria == CC_ExactString)
     815             :     {
     816           0 :         return (strcmp(pszFieldValue, pszTarget) == 0);
     817             :     }
     818         609 :     else if (eCriteria == CC_ApproxString)
     819             :     {
     820         270 :         return EQUAL(pszFieldValue, pszTarget);
     821             :     }
     822         339 :     else if (eCriteria == CC_Integer)
     823             :     {
     824         638 :         return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
     825         638 :                 atoi(pszFieldValue) == atoi(pszTarget));
     826             :     }
     827             : 
     828           0 :     return false;
     829             : }
     830             : 
     831             : /************************************************************************/
     832             : /*                            CSVScanLines()                            */
     833             : /*                                                                      */
     834             : /*      Read the file scanline for lines where the key field equals     */
     835             : /*      the indicated value with the suggested comparison criteria.     */
     836             : /*      Return the first matching line split into fields.               */
     837             : /*                                                                      */
     838             : /*      Deprecated.  Replaced by CSVScanLinesL().                       */
     839             : /************************************************************************/
     840             : 
     841           0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
     842             :                     CSVCompareCriteria eCriteria)
     843             : 
     844             : {
     845           0 :     CPLAssert(pszValue != nullptr);
     846           0 :     CPLAssert(iKeyField >= 0);
     847           0 :     CPLAssert(fp != nullptr);
     848             : 
     849           0 :     bool bSelected = false;
     850           0 :     const int nTestValue = atoi(pszValue);
     851           0 :     char **papszFields = nullptr;
     852             : 
     853           0 :     while (!bSelected)
     854             :     {
     855           0 :         papszFields = CSVReadParseLine(fp);
     856           0 :         if (papszFields == nullptr)
     857           0 :             return nullptr;
     858             : 
     859           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     860             :         {
     861             :             /* not selected */
     862             :         }
     863           0 :         else if (eCriteria == CC_Integer &&
     864           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     865             :         {
     866           0 :             bSelected = true;
     867             :         }
     868             :         else
     869             :         {
     870           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     871             :         }
     872             : 
     873           0 :         if (!bSelected)
     874             :         {
     875           0 :             CSLDestroy(papszFields);
     876           0 :             papszFields = nullptr;
     877             :         }
     878             :     }
     879             : 
     880           0 :     return papszFields;
     881             : }
     882             : 
     883             : /************************************************************************/
     884             : /*                            CSVScanLinesL()                           */
     885             : /*                                                                      */
     886             : /*      Read the file scanline for lines where the key field equals     */
     887             : /*      the indicated value with the suggested comparison criteria.     */
     888             : /*      Return the first matching line split into fields.               */
     889             : /************************************************************************/
     890             : 
     891           0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
     892             :                      CSVCompareCriteria eCriteria)
     893             : 
     894             : {
     895           0 :     CPLAssert(pszValue != nullptr);
     896           0 :     CPLAssert(iKeyField >= 0);
     897           0 :     CPLAssert(fp != nullptr);
     898             : 
     899           0 :     bool bSelected = false;
     900           0 :     const int nTestValue = atoi(pszValue);
     901           0 :     char **papszFields = nullptr;
     902             : 
     903           0 :     while (!bSelected)
     904             :     {
     905           0 :         papszFields = CSVReadParseLineL(fp);
     906           0 :         if (papszFields == nullptr)
     907           0 :             return nullptr;
     908             : 
     909           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     910             :         {
     911             :             /* not selected */
     912             :         }
     913           0 :         else if (eCriteria == CC_Integer &&
     914           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     915             :         {
     916           0 :             bSelected = true;
     917             :         }
     918             :         else
     919             :         {
     920           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     921             :         }
     922             : 
     923           0 :         if (!bSelected)
     924             :         {
     925           0 :             CSLDestroy(papszFields);
     926           0 :             papszFields = nullptr;
     927             :         }
     928             :     }
     929             : 
     930           0 :     return papszFields;
     931             : }
     932             : 
     933             : /************************************************************************/
     934             : /*                        CSVScanLinesIndexed()                         */
     935             : /*                                                                      */
     936             : /*      Read the file scanline for lines where the key field equals     */
     937             : /*      the indicated value with the suggested comparison criteria.     */
     938             : /*      Return the first matching line split into fields.               */
     939             : /************************************************************************/
     940             : 
     941          22 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
     942             : 
     943             : {
     944          22 :     CPLAssert(psTable->panLineIndex != nullptr);
     945             : 
     946             :     /* -------------------------------------------------------------------- */
     947             :     /*      Find target record with binary search.                          */
     948             :     /* -------------------------------------------------------------------- */
     949          22 :     int iTop = psTable->nLineCount - 1;
     950          22 :     int iBottom = 0;
     951          22 :     int iResult = -1;
     952             : 
     953         159 :     while (iTop >= iBottom)
     954             :     {
     955         159 :         const int iMiddle = (iTop + iBottom) / 2;
     956         159 :         if (psTable->panLineIndex[iMiddle] > nKeyValue)
     957          93 :             iTop = iMiddle - 1;
     958          66 :         else if (psTable->panLineIndex[iMiddle] < nKeyValue)
     959          44 :             iBottom = iMiddle + 1;
     960             :         else
     961             :         {
     962          22 :             iResult = iMiddle;
     963             :             // if a key is not unique, select the first instance of it.
     964          22 :             while (iResult > 0 &&
     965          22 :                    psTable->panLineIndex[iResult - 1] == nKeyValue)
     966             :             {
     967           0 :                 psTable->bNonUniqueKey = true;
     968           0 :                 iResult--;
     969             :             }
     970          22 :             break;
     971             :         }
     972             :     }
     973             : 
     974          22 :     if (iResult == -1)
     975           0 :         return nullptr;
     976             : 
     977             :     /* -------------------------------------------------------------------- */
     978             :     /*      Parse target line, and update iLastLine indicator.              */
     979             :     /* -------------------------------------------------------------------- */
     980          22 :     psTable->iLastLine = iResult;
     981             : 
     982          22 :     return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
     983             : }
     984             : 
     985             : /************************************************************************/
     986             : /*                        CSVScanLinesIngested()                        */
     987             : /*                                                                      */
     988             : /*      Read the file scanline for lines where the key field equals     */
     989             : /*      the indicated value with the suggested comparison criteria.     */
     990             : /*      Return the first matching line split into fields.               */
     991             : /************************************************************************/
     992             : 
     993          29 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
     994             :                                    const char *pszValue,
     995             :                                    CSVCompareCriteria eCriteria)
     996             : 
     997             : {
     998          29 :     CPLAssert(pszValue != nullptr);
     999          29 :     CPLAssert(iKeyField >= 0);
    1000             : 
    1001          29 :     const int nTestValue = atoi(pszValue);
    1002             : 
    1003             :     /* -------------------------------------------------------------------- */
    1004             :     /*      Short cut for indexed files.                                    */
    1005             :     /* -------------------------------------------------------------------- */
    1006          29 :     if (iKeyField == 0 && eCriteria == CC_Integer &&
    1007          22 :         psTable->panLineIndex != nullptr)
    1008          22 :         return CSVScanLinesIndexed(psTable, nTestValue);
    1009             : 
    1010             :     /* -------------------------------------------------------------------- */
    1011             :     /*      Scan from in-core lines.                                        */
    1012             :     /* -------------------------------------------------------------------- */
    1013           7 :     char **papszFields = nullptr;
    1014           7 :     bool bSelected = false;
    1015             : 
    1016         484 :     while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
    1017             :     {
    1018         477 :         psTable->iLastLine++;
    1019         477 :         papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
    1020             :                                    false, false);
    1021             : 
    1022         477 :         if (CSLCount(papszFields) < iKeyField + 1)
    1023             :         {
    1024             :             /* not selected */
    1025             :         }
    1026         477 :         else if (eCriteria == CC_Integer &&
    1027         242 :                  atoi(papszFields[iKeyField]) == nTestValue)
    1028             :         {
    1029           2 :             bSelected = true;
    1030             :         }
    1031             :         else
    1032             :         {
    1033         475 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
    1034             :         }
    1035             : 
    1036         477 :         if (!bSelected)
    1037             :         {
    1038         470 :             CSLDestroy(papszFields);
    1039         470 :             papszFields = nullptr;
    1040             :         }
    1041             :     }
    1042             : 
    1043           7 :     return papszFields;
    1044             : }
    1045             : 
    1046             : /************************************************************************/
    1047             : /*                            CSVRewind()                               */
    1048             : /*                                                                      */
    1049             : /*      Rewind a CSV file based on a passed in filename.                */
    1050             : /*      This is aimed at being used with CSVGetNextLine().              */
    1051             : /************************************************************************/
    1052             : 
    1053        1838 : void CSVRewind(const char *pszFilename)
    1054             : 
    1055             : {
    1056             :     /* -------------------------------------------------------------------- */
    1057             :     /*      Get access to the table.                                        */
    1058             :     /* -------------------------------------------------------------------- */
    1059        1838 :     CPLAssert(pszFilename != nullptr);
    1060             : 
    1061        1838 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1062        1838 :     if (psTable != nullptr)
    1063        1838 :         psTable->iLastLine = -1;
    1064        1838 : }
    1065             : 
    1066             : /************************************************************************/
    1067             : /*                           CSVGetNextLine()                           */
    1068             : /*                                                                      */
    1069             : /*      Fetch the next line of a CSV file based on a passed in          */
    1070             : /*      filename.  Returns NULL at end of file, or if file is not       */
    1071             : /*      really established.                                             */
    1072             : /*      This ingests the whole file into memory if not already done.    */
    1073             : /*      When reaching end of file, CSVRewind() may be used to read      */
    1074             : /*      again from the beginning.                                       */
    1075             : /************************************************************************/
    1076             : 
    1077       60169 : char **CSVGetNextLine(const char *pszFilename)
    1078             : 
    1079             : {
    1080             : 
    1081             :     /* -------------------------------------------------------------------- */
    1082             :     /*      Get access to the table.                                        */
    1083             :     /* -------------------------------------------------------------------- */
    1084       60169 :     CPLAssert(pszFilename != nullptr);
    1085             : 
    1086       60169 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1087       60169 :     if (psTable == nullptr)
    1088           0 :         return nullptr;
    1089             : 
    1090       60169 :     CSVIngest(psTable->pszFilename);
    1091             : 
    1092             :     /* -------------------------------------------------------------------- */
    1093             :     /*      If we use CSVGetNextLine() we can pretty much assume we have    */
    1094             :     /*      a non-unique key.                                               */
    1095             :     /* -------------------------------------------------------------------- */
    1096       60169 :     psTable->bNonUniqueKey = true;
    1097             : 
    1098             :     /* -------------------------------------------------------------------- */
    1099             :     /*      Do we have a next line available?  This only works for          */
    1100             :     /*      ingested tables I believe.                                      */
    1101             :     /* -------------------------------------------------------------------- */
    1102       60169 :     if (psTable->iLastLine + 1 >= psTable->nLineCount)
    1103         628 :         return nullptr;
    1104             : 
    1105       59541 :     psTable->iLastLine++;
    1106       59541 :     CSLDestroy(psTable->papszRecFields);
    1107      119082 :     psTable->papszRecFields = CSVSplitLine(
    1108       59541 :         psTable->papszLines[psTable->iLastLine], ",", false, false);
    1109             : 
    1110       59541 :     return psTable->papszRecFields;
    1111             : }
    1112             : 
    1113             : /************************************************************************/
    1114             : /*                            CSVScanFile()                             */
    1115             : /*                                                                      */
    1116             : /*      Scan a whole file using criteria similar to above, but also     */
    1117             : /*      taking care of file opening and closing.                        */
    1118             : /************************************************************************/
    1119             : 
    1120         143 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
    1121             :                           const char *pszValue, CSVCompareCriteria eCriteria)
    1122             : {
    1123         143 :     CSVIngest(psTable->pszFilename);
    1124             : 
    1125             :     /* -------------------------------------------------------------------- */
    1126             :     /*      Does the current record match the criteria?  If so, just        */
    1127             :     /*      return it again.                                                */
    1128             :     /* -------------------------------------------------------------------- */
    1129         143 :     if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
    1130         400 :         CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
    1131         114 :         !psTable->bNonUniqueKey)
    1132             :     {
    1133         114 :         return psTable->papszRecFields;
    1134             :     }
    1135             : 
    1136             :     /* -------------------------------------------------------------------- */
    1137             :     /*      Scan the file from the beginning, replacing the ``current       */
    1138             :     /*      record'' in our structure with the one that is found.           */
    1139             :     /* -------------------------------------------------------------------- */
    1140          29 :     psTable->iLastLine = -1;
    1141          29 :     CSLDestroy(psTable->papszRecFields);
    1142             : 
    1143          29 :     if (psTable->pszRawData != nullptr)
    1144          29 :         psTable->papszRecFields =
    1145          29 :             CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
    1146             :     else
    1147             :     {
    1148           0 :         VSIRewindL(psTable->fp);
    1149           0 :         CPLReadLineL(psTable->fp); /* throw away the header line */
    1150             : 
    1151           0 :         psTable->papszRecFields =
    1152           0 :             CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
    1153             :     }
    1154             : 
    1155          29 :     return psTable->papszRecFields;
    1156             : }
    1157             : 
    1158           4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
    1159             :                    CSVCompareCriteria eCriteria)
    1160             : 
    1161             : {
    1162             :     /* -------------------------------------------------------------------- */
    1163             :     /*      Get access to the table.                                        */
    1164             :     /* -------------------------------------------------------------------- */
    1165           4 :     CPLAssert(pszFilename != nullptr);
    1166             : 
    1167           4 :     if (iKeyField < 0)
    1168           0 :         return nullptr;
    1169             : 
    1170           4 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1171           4 :     if (psTable == nullptr)
    1172           0 :         return nullptr;
    1173             : 
    1174           4 :     return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
    1175             : }
    1176             : 
    1177             : /************************************************************************/
    1178             : /*                           CPLGetFieldId()                            */
    1179             : /*                                                                      */
    1180             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1181             : /*      and find the field with the indicated name.  Returns -1 if      */
    1182             : /*      it fails to find the field name.  Comparison is case            */
    1183             : /*      insensitive, but otherwise exact.  After this function has      */
    1184             : /*      been called the file pointer will be positioned just after      */
    1185             : /*      the first record.                                               */
    1186             : /*                                                                      */
    1187             : /*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
    1188             : /************************************************************************/
    1189             : 
    1190           0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
    1191             : 
    1192             : {
    1193           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1194             : 
    1195           0 :     VSIRewind(fp);
    1196             : 
    1197           0 :     char **papszFields = CSVReadParseLine(fp);
    1198           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1199             :     {
    1200           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1201             :         {
    1202           0 :             CSLDestroy(papszFields);
    1203           0 :             return i;
    1204             :         }
    1205             :     }
    1206             : 
    1207           0 :     CSLDestroy(papszFields);
    1208             : 
    1209           0 :     return -1;
    1210             : }
    1211             : 
    1212             : /************************************************************************/
    1213             : /*                           CPLGetFieldIdL()                           */
    1214             : /*                                                                      */
    1215             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1216             : /*      and find the field with the indicated name.  Returns -1 if      */
    1217             : /*      it fails to find the field name.  Comparison is case            */
    1218             : /*      insensitive, but otherwise exact.  After this function has      */
    1219             : /*      been called the file pointer will be positioned just after      */
    1220             : /*      the first record.                                               */
    1221             : /************************************************************************/
    1222             : 
    1223           0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
    1224             : 
    1225             : {
    1226           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1227             : 
    1228           0 :     VSIRewindL(fp);
    1229             : 
    1230           0 :     char **papszFields = CSVReadParseLineL(fp);
    1231           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1232             :     {
    1233           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1234             :         {
    1235           0 :             CSLDestroy(papszFields);
    1236           0 :             return i;
    1237             :         }
    1238             :     }
    1239             : 
    1240           0 :     CSLDestroy(papszFields);
    1241             : 
    1242           0 :     return -1;
    1243             : }
    1244             : 
    1245             : /************************************************************************/
    1246             : /*                         CSVGetFileFieldId()                          */
    1247             : /*                                                                      */
    1248             : /*      Same as CPLGetFieldId(), except that we get the file based      */
    1249             : /*      on filename, rather than having an existing handle.             */
    1250             : /************************************************************************/
    1251             : 
    1252        7407 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
    1253             : 
    1254             : {
    1255             :     /* -------------------------------------------------------------------- */
    1256             :     /*      Find the requested field.                                       */
    1257             :     /* -------------------------------------------------------------------- */
    1258        7407 :     const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
    1259       18637 :     for (int i = 0; psTable->papszFieldNames != nullptr &&
    1260       18637 :                     psTable->papszFieldNames[i] != nullptr;
    1261             :          i++)
    1262             :     {
    1263       18637 :         if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
    1264       10401 :             EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
    1265             :         {
    1266        7407 :             return i;
    1267             :         }
    1268             :     }
    1269             : 
    1270           0 :     return -1;
    1271             : }
    1272             : 
    1273        7129 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
    1274             : 
    1275             : {
    1276             :     /* -------------------------------------------------------------------- */
    1277             :     /*      Get access to the table.                                        */
    1278             :     /* -------------------------------------------------------------------- */
    1279        7129 :     CPLAssert(pszFilename != nullptr);
    1280             : 
    1281        7129 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1282        7129 :     if (psTable == nullptr)
    1283           0 :         return -1;
    1284        7129 :     return CSVGetFileFieldId(psTable, pszFieldName);
    1285             : }
    1286             : 
    1287             : /************************************************************************/
    1288             : /*                         CSVScanFileByName()                          */
    1289             : /*                                                                      */
    1290             : /*      Same as CSVScanFile(), but using a field name instead of a      */
    1291             : /*      field number.                                                   */
    1292             : /************************************************************************/
    1293             : 
    1294           4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
    1295             :                          const char *pszValue, CSVCompareCriteria eCriteria)
    1296             : 
    1297             : {
    1298           4 :     const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
    1299           4 :     if (iKeyField == -1)
    1300           0 :         return nullptr;
    1301             : 
    1302           4 :     return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
    1303             : }
    1304             : 
    1305             : /************************************************************************/
    1306             : /*                            CSVGetField()                             */
    1307             : /*                                                                      */
    1308             : /*      The all-in-one function to fetch a particular field value       */
    1309             : /*      from a CSV file.  Note this function will return an empty       */
    1310             : /*      string, rather than NULL if it fails to find the desired        */
    1311             : /*      value for some reason.  The caller can't establish that the     */
    1312             : /*      fetch failed.                                                   */
    1313             : /************************************************************************/
    1314             : 
    1315         139 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
    1316             :                         const char *pszKeyFieldValue,
    1317             :                         CSVCompareCriteria eCriteria,
    1318             :                         const char *pszTargetField)
    1319             : 
    1320             : {
    1321             :     /* -------------------------------------------------------------------- */
    1322             :     /*      Find the table.                                                 */
    1323             :     /* -------------------------------------------------------------------- */
    1324         139 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1325         139 :     if (psTable == nullptr)
    1326           0 :         return "";
    1327             : 
    1328         139 :     const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
    1329         139 :     if (iKeyField == -1)
    1330           0 :         return "";
    1331             : 
    1332             :     /* -------------------------------------------------------------------- */
    1333             :     /*      Find the correct record.                                        */
    1334             :     /* -------------------------------------------------------------------- */
    1335             :     char **papszRecord =
    1336         139 :         CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
    1337         139 :     if (papszRecord == nullptr)
    1338           0 :         return "";
    1339             : 
    1340             :     /* -------------------------------------------------------------------- */
    1341             :     /*      Figure out which field we want out of this.                     */
    1342             :     /* -------------------------------------------------------------------- */
    1343         139 :     const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
    1344         139 :     if (iTargetField < 0)
    1345           0 :         return "";
    1346             : 
    1347         386 :     for (int i = 0; papszRecord[i] != nullptr; ++i)
    1348             :     {
    1349         386 :         if (i == iTargetField)
    1350         139 :             return papszRecord[iTargetField];
    1351             :     }
    1352           0 :     return "";
    1353             : }
    1354             : 
    1355             : /************************************************************************/
    1356             : /*                       GDALDefaultCSVFilename()                       */
    1357             : /************************************************************************/
    1358             : 
    1359             : typedef struct
    1360             : {
    1361             :     char szPath[512];
    1362             :     bool bCSVFinderInitialized;
    1363             : } DefaultCSVFileNameTLS;
    1364             : 
    1365        2482 : const char *GDALDefaultCSVFilename(const char *pszBasename)
    1366             : 
    1367             : {
    1368             :     /* -------------------------------------------------------------------- */
    1369             :     /*      Do we already have this file accessed?  If so, just return      */
    1370             :     /*      the existing path without any further probing.                  */
    1371             :     /* -------------------------------------------------------------------- */
    1372        2482 :     int bMemoryError = FALSE;
    1373             :     CSVTable **ppsCSVTableList =
    1374        2482 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
    1375        2482 :     if (ppsCSVTableList != nullptr)
    1376             :     {
    1377        2476 :         const size_t nBasenameLen = strlen(pszBasename);
    1378             : 
    1379       23033 :         for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
    1380       20557 :              psTable = psTable->psNext)
    1381             :         {
    1382       22481 :             const size_t nFullLen = strlen(psTable->pszFilename);
    1383             : 
    1384       22481 :             if (nFullLen > nBasenameLen &&
    1385       22481 :                 strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
    1386        1924 :                        pszBasename) == 0 &&
    1387        1924 :                 strchr("/\\",
    1388        1924 :                        psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
    1389             :                     nullptr)
    1390             :             {
    1391        1924 :                 return psTable->pszFilename;
    1392             :             }
    1393             :         }
    1394             :     }
    1395             : 
    1396             :     /* -------------------------------------------------------------------- */
    1397             :     /*      Otherwise we need to look harder for it.                        */
    1398             :     /* -------------------------------------------------------------------- */
    1399             :     DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1400         558 :         CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
    1401         558 :     if (pTLSData == nullptr && !bMemoryError)
    1402             :     {
    1403             :         pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1404           5 :             VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
    1405           5 :         if (pTLSData)
    1406           5 :             CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
    1407             :     }
    1408         558 :     if (pTLSData == nullptr)
    1409           0 :         return "/not_existing_dir/not_existing_path";
    1410             : 
    1411         558 :     const char *pszResult = CPLFindFile("gdal", pszBasename);
    1412             : 
    1413         558 :     if (pszResult != nullptr)
    1414          43 :         return pszResult;
    1415             : 
    1416         515 :     if (!pTLSData->bCSVFinderInitialized)
    1417             :     {
    1418           2 :         pTLSData->bCSVFinderInitialized = true;
    1419             : 
    1420           2 :         if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
    1421           2 :             CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
    1422             : 
    1423           2 :         pszResult = CPLFindFile("gdal", pszBasename);
    1424             : 
    1425           2 :         if (pszResult != nullptr)
    1426           0 :             return pszResult;
    1427             :     }
    1428             : 
    1429             :     // For systems like sandboxes that do not allow other checks.
    1430         515 :     CPLDebug("CPL_CSV",
    1431             :              "Failed to find file in GDALDefaultCSVFilename.  "
    1432             :              "Returning original basename: %s",
    1433             :              pszBasename);
    1434         515 :     CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
    1435         515 :     return pTLSData->szPath;
    1436             : }
    1437             : 
    1438             : /************************************************************************/
    1439             : /*                            CSVFilename()                             */
    1440             : /*                                                                      */
    1441             : /*      Return the full path to a particular CSV file.  This will       */
    1442             : /*      eventually be something the application can override.           */
    1443             : /************************************************************************/
    1444             : 
    1445             : CPL_C_START
    1446             : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
    1447             : CPL_C_END
    1448             : 
    1449        2482 : const char *CSVFilename(const char *pszBasename)
    1450             : 
    1451             : {
    1452        2482 :     if (pfnCSVFilenameHook == nullptr)
    1453        2482 :         return GDALDefaultCSVFilename(pszBasename);
    1454             : 
    1455           0 :     return pfnCSVFilenameHook(pszBasename);
    1456             : }
    1457             : 
    1458             : /************************************************************************/
    1459             : /*                         SetCSVFilenameHook()                         */
    1460             : /*                                                                      */
    1461             : /*      Applications can use this to set a function that will           */
    1462             : /*      massage CSV filenames.                                          */
    1463             : /************************************************************************/
    1464             : 
    1465             : /**
    1466             :  * Override CSV file search method.
    1467             :  *
    1468             :  * @param pfnNewHook The pointer to a function which will return the
    1469             :  * full path for a given filename.
    1470             :  *
    1471             : 
    1472             : This function allows an application to override how the GTIFGetDefn()
    1473             : and related function find the CSV (Comma Separated Value) values
    1474             : required. The pfnHook argument should be a pointer to a function that
    1475             : will take in a CSV filename and return a full path to the file. The
    1476             : returned string should be to an internal static buffer so that the
    1477             : caller doesn't have to free the result.
    1478             : 
    1479             : <b>Example:</b><br>
    1480             : 
    1481             : The listgeo utility uses the following override function if the user
    1482             : specified a CSV file directory with the -t commandline switch (argument
    1483             : put into CSVDirName).  <p>
    1484             : 
    1485             : <pre>
    1486             : 
    1487             :     ...
    1488             :     SetCSVFilenameHook( CSVFileOverride );
    1489             :     ...
    1490             : 
    1491             : static const char *CSVFileOverride( const char * pszInput )
    1492             : 
    1493             : {
    1494             :     static char szPath[1024] = {};
    1495             : 
    1496             :     sprintf( szPath, "%s/%s", CSVDirName, pszInput );
    1497             : 
    1498             :     return szPath;
    1499             : }
    1500             : </pre>
    1501             : 
    1502             : */
    1503             : 
    1504             : CPL_C_START
    1505           0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
    1506             : 
    1507             : {
    1508           0 :     pfnCSVFilenameHook = pfnNewHook;
    1509           0 : }
    1510             : 
    1511             : CPL_C_END

Generated by: LCOV version 1.14