LCOV - code coverage report
Current view: top level - port - cpl_csv.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 394 525 75.0 %
Date: 2024-04-28 23:18:46 Functions: 27 35 77.1 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  CPL - Common Portability Library
       4             :  * Purpose:  CSV (comma separated value) file access.
       5             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 1999, Frank Warmerdam
       9             :  * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * Permission is hereby granted, free of charge, to any person obtaining a
      12             :  * copy of this software and associated documentation files (the "Software"),
      13             :  * to deal in the Software without restriction, including without limitation
      14             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      15             :  * and/or sell copies of the Software, and to permit persons to whom the
      16             :  * Software is furnished to do so, subject to the following conditions:
      17             :  *
      18             :  * The above copyright notice and this permission notice shall be included
      19             :  * in all copies or substantial portions of the Software.
      20             :  *
      21             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      22             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      23             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
      24             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      25             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      26             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      27             :  * DEALINGS IN THE SOFTWARE.
      28             :  ****************************************************************************/
      29             : 
      30             : #include "cpl_port.h"
      31             : #include "cpl_csv.h"
      32             : 
      33             : #include <cstddef>
      34             : #include <cstdlib>
      35             : #include <cstring>
      36             : #if HAVE_FCNTL_H
      37             : #include <fcntl.h>
      38             : #endif
      39             : 
      40             : #include "cpl_conv.h"
      41             : #include "cpl_error.h"
      42             : #include "cpl_multiproc.h"
      43             : #include "gdal_csv.h"
      44             : 
      45             : #include <algorithm>
      46             : 
      47             : /* ==================================================================== */
      48             : /*      The CSVTable is a persistent set of info about an open CSV      */
      49             : /*      table.  While it doesn't currently maintain a record index,     */
      50             : /*      or in-memory copy of the table, it could be changed to do so    */
      51             : /*      in the future.                                                  */
      52             : /* ==================================================================== */
      53             : typedef struct ctb
      54             : {
      55             :     VSILFILE *fp;
      56             :     struct ctb *psNext;
      57             :     char *pszFilename;
      58             :     char **papszFieldNames;
      59             :     int *panFieldNamesLength;
      60             :     char **papszRecFields;
      61             :     int nFields;
      62             :     int iLastLine;
      63             :     bool bNonUniqueKey;
      64             : 
      65             :     /* Cache for whole file */
      66             :     int nLineCount;
      67             :     char **papszLines;
      68             :     int *panLineIndex;
      69             :     char *pszRawData;
      70             : } CSVTable;
      71             : 
      72             : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
      73             :                                 const char *pszFilename);
      74             : 
      75             : /************************************************************************/
      76             : /*                            CSVFreeTLS()                              */
      77             : /************************************************************************/
      78           2 : static void CSVFreeTLS(void *pData)
      79             : {
      80           2 :     CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
      81           2 :     CPLFree(pData);
      82           2 : }
      83             : 
      84             : /* It would likely be better to share this list between threads, but
      85             :    that will require some rework. */
      86             : 
      87             : /************************************************************************/
      88             : /*                             CSVAccess()                              */
      89             : /*                                                                      */
      90             : /*      This function will fetch a handle to the requested table.       */
      91             : /*      If not found in the ``open table list'' the table will be       */
      92             : /*      opened and added to the list.  Eventually this function may     */
      93             : /*      become public with an abstracted return type so that            */
      94             : /*      applications can set options about the table.  For now this     */
      95             : /*      isn't done.                                                     */
      96             : /************************************************************************/
      97             : 
      98      115981 : static CSVTable *CSVAccess(const char *pszFilename)
      99             : 
     100             : {
     101             :     /* -------------------------------------------------------------------- */
     102             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
     103             :     /*      if there isn't already one.                                     */
     104             :     /* -------------------------------------------------------------------- */
     105      115981 :     int bMemoryError = FALSE;
     106             :     CSVTable **ppsCSVTableList =
     107      115981 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
     108      115981 :     if (bMemoryError)
     109           0 :         return nullptr;
     110      115981 :     if (ppsCSVTableList == nullptr)
     111             :     {
     112             :         ppsCSVTableList =
     113           5 :             static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
     114           5 :         if (ppsCSVTableList == nullptr)
     115           0 :             return nullptr;
     116           5 :         CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
     117             :     }
     118             : 
     119             :     /* -------------------------------------------------------------------- */
     120             :     /*      Is the table already in the list.                               */
     121             :     /* -------------------------------------------------------------------- */
     122      928290 :     for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
     123      812309 :          psTable = psTable->psNext)
     124             :     {
     125      928260 :         if (EQUAL(psTable->pszFilename, pszFilename))
     126             :         {
     127             :             /*
     128             :              * Eventually we should consider promoting to the front of
     129             :              * the list to accelerate frequently accessed tables.
     130             :              */
     131      115951 :             return psTable;
     132             :         }
     133             :     }
     134             : 
     135             :     /* -------------------------------------------------------------------- */
     136             :     /*      If not, try to open it.                                         */
     137             :     /* -------------------------------------------------------------------- */
     138          30 :     VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
     139          30 :     if (fp == nullptr)
     140           0 :         return nullptr;
     141             : 
     142             :     /* -------------------------------------------------------------------- */
     143             :     /*      Create an information structure about this table, and add to    */
     144             :     /*      the front of the list.                                          */
     145             :     /* -------------------------------------------------------------------- */
     146             :     CSVTable *const psTable =
     147          30 :         static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
     148          30 :     if (psTable == nullptr)
     149             :     {
     150           0 :         VSIFCloseL(fp);
     151           0 :         return nullptr;
     152             :     }
     153             : 
     154          30 :     psTable->fp = fp;
     155          30 :     psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
     156          30 :     if (psTable->pszFilename == nullptr)
     157             :     {
     158           0 :         VSIFree(psTable);
     159           0 :         VSIFCloseL(fp);
     160           0 :         return nullptr;
     161             :     }
     162          30 :     psTable->bNonUniqueKey = false;  // As far as we know now.
     163          30 :     psTable->psNext = *ppsCSVTableList;
     164             : 
     165          30 :     *ppsCSVTableList = psTable;
     166             : 
     167             :     /* -------------------------------------------------------------------- */
     168             :     /*      Read the table header record containing the field names.        */
     169             :     /* -------------------------------------------------------------------- */
     170          30 :     psTable->papszFieldNames = CSVReadParseLineL(fp);
     171          30 :     psTable->nFields = CSLCount(psTable->papszFieldNames);
     172          30 :     psTable->panFieldNamesLength =
     173          30 :         static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
     174          30 :     for (int i = 0;
     175         185 :          i < psTable->nFields &&
     176             :          /* null-pointer check to avoid a false positive from CLang S.A. */
     177         155 :          psTable->papszFieldNames != nullptr;
     178             :          i++)
     179             :     {
     180         155 :         psTable->panFieldNamesLength[i] =
     181         155 :             static_cast<int>(strlen(psTable->papszFieldNames[i]));
     182             :     }
     183             : 
     184          30 :     return psTable;
     185             : }
     186             : 
     187             : /************************************************************************/
     188             : /*                            CSVDeaccess()                             */
     189             : /************************************************************************/
     190             : 
     191         863 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
     192             :                                 const char *pszFilename)
     193             : 
     194             : {
     195         863 :     if (ppsCSVTableList == nullptr)
     196         854 :         return;
     197             : 
     198             :     /* -------------------------------------------------------------------- */
     199             :     /*      A NULL means deaccess all tables.                               */
     200             :     /* -------------------------------------------------------------------- */
     201           9 :     if (pszFilename == nullptr)
     202             :     {
     203           9 :         while (*ppsCSVTableList != nullptr)
     204           5 :             CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
     205           5 :                                 (*ppsCSVTableList)->pszFilename);
     206             : 
     207           4 :         return;
     208             :     }
     209             : 
     210             :     /* -------------------------------------------------------------------- */
     211             :     /*      Find this table.                                                */
     212             :     /* -------------------------------------------------------------------- */
     213           5 :     CSVTable *psLast = nullptr;
     214           5 :     CSVTable *psTable = *ppsCSVTableList;
     215           5 :     for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
     216           0 :          psTable = psTable->psNext)
     217             :     {
     218           0 :         psLast = psTable;
     219             :     }
     220             : 
     221           5 :     if (psTable == nullptr)
     222             :     {
     223           0 :         if (bCanUseTLS)
     224           0 :             CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
     225           0 :         return;
     226             :     }
     227             : 
     228             :     /* -------------------------------------------------------------------- */
     229             :     /*      Remove the link from the list.                                  */
     230             :     /* -------------------------------------------------------------------- */
     231           5 :     if (psLast != nullptr)
     232           0 :         psLast->psNext = psTable->psNext;
     233             :     else
     234           5 :         *ppsCSVTableList = psTable->psNext;
     235             : 
     236             :     /* -------------------------------------------------------------------- */
     237             :     /*      Free the table.                                                 */
     238             :     /* -------------------------------------------------------------------- */
     239           5 :     if (psTable->fp != nullptr)
     240           0 :         VSIFCloseL(psTable->fp);
     241             : 
     242           5 :     CSLDestroy(psTable->papszFieldNames);
     243           5 :     CPLFree(psTable->panFieldNamesLength);
     244           5 :     CSLDestroy(psTable->papszRecFields);
     245           5 :     CPLFree(psTable->pszFilename);
     246           5 :     CPLFree(psTable->panLineIndex);
     247           5 :     CPLFree(psTable->pszRawData);
     248           5 :     CPLFree(psTable->papszLines);
     249             : 
     250           5 :     CPLFree(psTable);
     251             : 
     252           5 :     if (bCanUseTLS)
     253           5 :         CPLReadLine(nullptr);
     254             : }
     255             : 
     256         856 : void CSVDeaccess(const char *pszFilename)
     257             : {
     258             :     /* -------------------------------------------------------------------- */
     259             :     /*      Fetch the table, and allocate the thread-local pointer to it    */
     260             :     /*      if there isn't already one.                                     */
     261             :     /* -------------------------------------------------------------------- */
     262         856 :     int bMemoryError = FALSE;
     263             :     CSVTable **ppsCSVTableList =
     264         856 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
     265             : 
     266         856 :     CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
     267         856 : }
     268             : 
     269             : /************************************************************************/
     270             : /*                            CSVSplitLine()                            */
     271             : /*                                                                      */
     272             : /*      Tokenize a CSV line into fields in the form of a string         */
     273             : /*      list.  This is used instead of the CPLTokenizeString()          */
     274             : /*      because it provides correct CSV escaping and quoting            */
     275             : /*      semantics.                                                      */
     276             : /************************************************************************/
     277             : 
     278      108480 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
     279             :                            bool bKeepLeadingAndClosingQuotes,
     280             :                            bool bMergeDelimiter)
     281             : 
     282             : {
     283      216960 :     CPLStringList aosRetList;
     284      108480 :     if (pszString == nullptr)
     285           0 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     286             : 
     287      108480 :     char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
     288      108480 :     int nTokenMax = 10;
     289      108480 :     const size_t nDelimiterLength = strlen(pszDelimiter);
     290             : 
     291      108480 :     const char *pszIter = pszString;
     292      618741 :     while (*pszIter != '\0')
     293             :     {
     294      510261 :         bool bInString = false;
     295             : 
     296      510261 :         int nTokenLen = 0;
     297             : 
     298             :         // Try to find the next delimiter, marking end of token.
     299     4332680 :         do
     300             :         {
     301             :             // End if this is a delimiter skip it and break.
     302     4842940 :             if (!bInString &&
     303     2579290 :                 strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
     304             :             {
     305      402094 :                 pszIter += nDelimiterLength;
     306      402094 :                 if (bMergeDelimiter)
     307             :                 {
     308           9 :                     while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
     309             :                            0)
     310           5 :                         pszIter += nDelimiterLength;
     311             :                 }
     312      402094 :                 break;
     313             :             }
     314             : 
     315     4440840 :             if (*pszIter == '"')
     316             :             {
     317      350303 :                 if (!bInString && nTokenLen > 0)
     318             :                 {
     319             :                     // do not treat in a special way double quotes that appear
     320             :                     // in the middle of a field (similarly to OpenOffice)
     321             :                     // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
     322             :                 }
     323      350218 :                 else if (!bInString || pszIter[1] != '"')
     324             :                 {
     325      349552 :                     bInString = !bInString;
     326      349552 :                     if (!bKeepLeadingAndClosingQuotes)
     327      349518 :                         continue;
     328             :                 }
     329             :                 else  // Doubled quotes in string resolve to one quote.
     330             :                 {
     331         666 :                     pszIter++;
     332             :                 }
     333             :             }
     334             : 
     335     4091330 :             if (nTokenLen >= nTokenMax - 2)
     336             :             {
     337      116098 :                 nTokenMax = nTokenMax * 2 + 10;
     338      116098 :                 pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
     339             :             }
     340             : 
     341     4091330 :             pszToken[nTokenLen] = *pszIter;
     342     4091330 :             nTokenLen++;
     343     4440840 :         } while (*(++pszIter) != '\0');
     344             : 
     345      510261 :         pszToken[nTokenLen] = '\0';
     346      510261 :         aosRetList.AddString(pszToken);
     347             : 
     348             :         // If the last token is an empty token, then we have to catch
     349             :         // it now, otherwise we won't reenter the loop and it will be lost.
     350      510261 :         if (*pszIter == '\0' &&
     351      108440 :             pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
     352      108440 :             strncmp(pszIter - nDelimiterLength, pszDelimiter,
     353             :                     nDelimiterLength) == 0)
     354             :         {
     355         273 :             aosRetList.AddString("");
     356             :         }
     357             :     }
     358             : 
     359      108480 :     CPLFree(pszToken);
     360             : 
     361      108480 :     if (aosRetList.Count() == 0)
     362          40 :         return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
     363             :     else
     364      108440 :         return aosRetList.StealList();
     365             : }
     366             : 
     367             : /************************************************************************/
     368             : /*                          CSVFindNextLine()                           */
     369             : /*                                                                      */
     370             : /*      Find the start of the next line, while at the same time zero    */
     371             : /*      terminating this line.  Take into account that there may be     */
     372             : /*      newline indicators within quoted strings, and that quotes       */
     373             : /*      can be escaped with a backslash.                                */
     374             : /************************************************************************/
     375             : 
     376        6329 : static char *CSVFindNextLine(char *pszThisLine)
     377             : 
     378             : {
     379        6329 :     int i = 0;  // i is used after the for loop.
     380             : 
     381      275213 :     for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
     382             :     {
     383      275213 :         if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
     384       35430 :             nQuoteCount++;
     385             : 
     386      275213 :         if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
     387        6329 :             (nQuoteCount % 2) == 0)
     388        6329 :             break;
     389             :     }
     390             : 
     391       16573 :     while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
     392       10244 :         pszThisLine[i++] = '\0';
     393             : 
     394        6329 :     if (pszThisLine[i] == '\0')
     395          30 :         return nullptr;
     396             : 
     397        6299 :     return pszThisLine + i;
     398             : }
     399             : 
     400             : /************************************************************************/
     401             : /*                             CSVIngest()                              */
     402             : /*                                                                      */
     403             : /*      Load entire file into memory and setup index if possible.       */
     404             : /************************************************************************/
     405             : 
     406             : // TODO(schwehr): Clean up all the casting in CSVIngest.
     407       53685 : static void CSVIngest(CSVTable *psTable)
     408             : 
     409             : {
     410       53685 :     if (psTable->pszRawData != nullptr)
     411       53655 :         return;
     412             : 
     413             :     /* -------------------------------------------------------------------- */
     414             :     /*      Ingest whole file.                                              */
     415             :     /* -------------------------------------------------------------------- */
     416          30 :     if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
     417             :     {
     418           0 :         CPLError(CE_Failure, CPLE_FileIO,
     419             :                  "Failed using seek end and tell to get file length: %s",
     420             :                  psTable->pszFilename);
     421           0 :         return;
     422             :     }
     423          30 :     const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
     424          30 :     if (static_cast<long>(nFileLen) == -1)
     425             :     {
     426           0 :         CPLError(CE_Failure, CPLE_FileIO,
     427             :                  "Failed using seek end and tell to get file length: %s",
     428             :                  psTable->pszFilename);
     429           0 :         return;
     430             :     }
     431          30 :     VSIRewindL(psTable->fp);
     432             : 
     433          30 :     psTable->pszRawData = static_cast<char *>(
     434          30 :         VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
     435          30 :     if (psTable->pszRawData == nullptr)
     436           0 :         return;
     437          30 :     if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
     438          30 :                   psTable->fp) != static_cast<size_t>(nFileLen))
     439             :     {
     440           0 :         CPLFree(psTable->pszRawData);
     441           0 :         psTable->pszRawData = nullptr;
     442             : 
     443           0 :         CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
     444             :                  psTable->pszFilename);
     445           0 :         return;
     446             :     }
     447             : 
     448          30 :     psTable->pszRawData[nFileLen] = '\0';
     449             : 
     450             :     /* -------------------------------------------------------------------- */
     451             :     /*      Get count of newlines so we can allocate line array.            */
     452             :     /* -------------------------------------------------------------------- */
     453          30 :     int nMaxLineCount = 0;
     454      279158 :     for (int i = 0; i < static_cast<int>(nFileLen); i++)
     455             :     {
     456      279128 :         if (psTable->pszRawData[i] == 10)
     457        6329 :             nMaxLineCount++;
     458             :     }
     459             : 
     460          30 :     psTable->papszLines =
     461          30 :         static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
     462          30 :     if (psTable->papszLines == nullptr)
     463           0 :         return;
     464             : 
     465             :     /* -------------------------------------------------------------------- */
     466             :     /*      Build a list of record pointers into the raw data buffer        */
     467             :     /*      based on line terminators.  Zero terminate the line             */
     468             :     /*      strings.                                                        */
     469             :     /* -------------------------------------------------------------------- */
     470             :     /* skip header line */
     471          30 :     char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
     472             : 
     473          30 :     int iLine = 0;
     474        6329 :     while (pszThisLine != nullptr && iLine < nMaxLineCount)
     475             :     {
     476        6299 :         if (pszThisLine[0] != '#')
     477        6288 :             psTable->papszLines[iLine++] = pszThisLine;
     478        6299 :         pszThisLine = CSVFindNextLine(pszThisLine);
     479             :     }
     480             : 
     481          30 :     psTable->nLineCount = iLine;
     482             : 
     483             :     /* -------------------------------------------------------------------- */
     484             :     /*      Allocate and populate index array.  Ensure they are in          */
     485             :     /*      ascending order so that binary searches can be done on the      */
     486             :     /*      array.                                                          */
     487             :     /* -------------------------------------------------------------------- */
     488          30 :     psTable->panLineIndex = static_cast<int *>(
     489          30 :         VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
     490          30 :     if (psTable->panLineIndex == nullptr)
     491           0 :         return;
     492             : 
     493        6232 :     for (int i = 0; i < psTable->nLineCount; i++)
     494             :     {
     495        6204 :         psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
     496             : 
     497        6204 :         if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
     498             :         {
     499           2 :             CPLFree(psTable->panLineIndex);
     500           2 :             psTable->panLineIndex = nullptr;
     501           2 :             break;
     502             :         }
     503             :     }
     504             : 
     505          30 :     psTable->iLastLine = -1;
     506             : 
     507             :     /* -------------------------------------------------------------------- */
     508             :     /*      We should never need the file handle against, so close it.      */
     509             :     /* -------------------------------------------------------------------- */
     510          30 :     VSIFCloseL(psTable->fp);
     511          30 :     psTable->fp = nullptr;
     512             : }
     513             : 
     514       53685 : static void CSVIngest(const char *pszFilename)
     515             : 
     516             : {
     517       53685 :     CSVTable *psTable = CSVAccess(pszFilename);
     518       53685 :     if (psTable == nullptr)
     519             :     {
     520           0 :         CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
     521             :                  pszFilename);
     522           0 :         return;
     523             :     }
     524       53685 :     CSVIngest(psTable);
     525             : }
     526             : 
     527             : /************************************************************************/
     528             : /*                        CSVDetectSeperator()                          */
     529             : /************************************************************************/
     530             : 
     531             : /** Detect which field separator is used.
     532             :  *
     533             :  * Currently, it can detect comma, semicolon, space, tabulation or pipe.
     534             :  * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
     535             :  * most occurrences will be selected (and a warning emitted).
     536             :  * If no separator found, comma will be considered as the separator.
     537             :  *
     538             :  * @return ',', ';', ' ', tabulation character or '|'.
     539             :  */
     540         568 : char CSVDetectSeperator(const char *pszLine)
     541             : {
     542         568 :     bool bInString = false;
     543         568 :     int nCountComma = 0;
     544         568 :     int nCountSemicolon = 0;
     545         568 :     int nCountTab = 0;
     546         568 :     int nCountPipe = 0;
     547         568 :     int nCountSpace = 0;
     548             : 
     549       25353 :     for (; *pszLine != '\0'; pszLine++)
     550             :     {
     551       24785 :         if (!bInString && *pszLine == ',')
     552             :         {
     553        2011 :             nCountComma++;
     554             :         }
     555       22774 :         else if (!bInString && *pszLine == ';')
     556             :         {
     557          10 :             nCountSemicolon++;
     558             :         }
     559       22764 :         else if (!bInString && *pszLine == '\t')
     560             :         {
     561          29 :             nCountTab++;
     562             :         }
     563       22735 :         else if (!bInString && *pszLine == '|')
     564             :         {
     565           9 :             nCountPipe++;
     566             :         }
     567       22726 :         else if (!bInString && *pszLine == ' ')
     568             :         {
     569         258 :             nCountSpace++;
     570             :         }
     571       22468 :         else if (*pszLine == '"')
     572             :         {
     573         519 :             if (!bInString || pszLine[1] != '"')
     574             :             {
     575         519 :                 bInString = !bInString;
     576         519 :                 continue;
     577             :             }
     578             :             else /* doubled quotes in string resolve to one quote */
     579             :             {
     580           0 :                 pszLine++;
     581             :             }
     582             :         }
     583             :     }
     584             : 
     585             :     const int nMaxCountExceptSpace =
     586             :         std::max(std::max(nCountComma, nCountSemicolon),
     587         568 :                  std::max(nCountTab, nCountPipe));
     588         568 :     char chDelimiter = ',';
     589         568 :     if (nMaxCountExceptSpace == 0)
     590             :     {
     591          33 :         if (nCountSpace > 0)
     592           8 :             chDelimiter = ' ';
     593             :     }
     594             :     else
     595             :     {
     596         535 :         bool bWarn = false;
     597         535 :         if (nCountComma == nMaxCountExceptSpace)
     598             :         {
     599         519 :             chDelimiter = ',';
     600         519 :             bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
     601             :         }
     602          16 :         else if (nCountSemicolon == nMaxCountExceptSpace)
     603             :         {
     604           5 :             chDelimiter = ';';
     605           5 :             bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
     606             :         }
     607          11 :         else if (nCountTab == nMaxCountExceptSpace)
     608             :         {
     609           6 :             chDelimiter = '\t';
     610           6 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
     611             :         }
     612             :         else /* if( nCountPipe == nMaxCountExceptSpace ) */
     613             :         {
     614           5 :             chDelimiter = '|';
     615           5 :             bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
     616             :         }
     617         535 :         if (bWarn)
     618             :         {
     619           6 :             CPLError(CE_Warning, CPLE_AppDefined,
     620             :                      "Selecting '%c' as CSV field separator, but "
     621             :                      "other candidate separator(s) have been found.",
     622             :                      chDelimiter);
     623             :         }
     624             :     }
     625             : 
     626         568 :     return chDelimiter;
     627             : }
     628             : 
     629             : /************************************************************************/
     630             : /*                      CSVReadParseLine3L()                            */
     631             : /*                                                                      */
     632             : /*      Read one line, and return split into fields.  The return        */
     633             : /*      result is a stringlist, in the sense of the CSL functions.      */
     634             : /************************************************************************/
     635             : 
     636             : static char **
     637       56410 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
     638             :                         size_t nMaxLineSize, const char *pszDelimiter,
     639             :                         bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
     640             :                         bool bMergeDelimiter, bool bSkipBOM)
     641             : {
     642       56410 :     const char *pszLine = pfnReadLine(fp, nMaxLineSize);
     643       56410 :     if (pszLine == nullptr)
     644        1352 :         return nullptr;
     645             : 
     646       55058 :     if (bSkipBOM)
     647             :     {
     648             :         // Skip BOM.
     649       54702 :         const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
     650       54702 :         if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
     651           4 :             pszLine += 3;
     652             :     }
     653             : 
     654             :     // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
     655       55058 :     if (!bHonourStrings)
     656             :     {
     657           2 :         return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
     658             :     }
     659             : 
     660             :     // If there are no quotes, then this is the simple case.
     661             :     // Parse, and return tokens.
     662       55056 :     if (strchr(pszLine, '\"') == nullptr)
     663       47550 :         return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
     664       47550 :                             bMergeDelimiter);
     665             : 
     666             :     try
     667             :     {
     668             :         // We must now count the quotes in our working string, and as
     669             :         // long as it is odd, keep adding new lines.
     670        7506 :         std::string osWorkLine(pszLine);
     671             : 
     672        7506 :         size_t i = 0;
     673        7506 :         int nCount = 0;
     674             : 
     675             :         while (true)
     676             :         {
     677      783060 :             for (; i < osWorkLine.size(); i++)
     678             :             {
     679      774799 :                 if (osWorkLine[i] == '\"')
     680       58975 :                     nCount++;
     681             :             }
     682             : 
     683        8261 :             if (nCount % 2 == 0)
     684        7505 :                 break;
     685             : 
     686         756 :             pszLine = pfnReadLine(fp, nMaxLineSize);
     687         756 :             if (pszLine == nullptr)
     688           1 :                 break;
     689             : 
     690         755 :             osWorkLine.append("\n");
     691         755 :             osWorkLine.append(pszLine);
     692             :         }
     693             : 
     694             :         char **papszReturn =
     695        7506 :             CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
     696             :                          bKeepLeadingAndClosingQuotes, bMergeDelimiter);
     697             : 
     698        7506 :         return papszReturn;
     699             :     }
     700           0 :     catch (const std::exception &e)
     701             :     {
     702           0 :         CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
     703           0 :         return nullptr;
     704             :     }
     705             : }
     706             : 
     707             : /************************************************************************/
     708             : /*                          CSVReadParseLine()                          */
     709             : /*                                                                      */
     710             : /*      Read one line, and return split into fields.  The return        */
     711             : /*      result is a stringlist, in the sense of the CSL functions.      */
     712             : /*                                                                      */
     713             : /*      Deprecated.  Replaced by CSVReadParseLineL().                   */
     714             : /************************************************************************/
     715             : 
     716           0 : char **CSVReadParseLine(FILE *fp)
     717             : {
     718           0 :     return CSVReadParseLine2(fp, ',');
     719             : }
     720             : 
     721           0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
     722             : {
     723           0 :     return CPLReadLine(static_cast<FILE *>(fp));
     724             : }
     725             : 
     726           0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
     727             : {
     728           0 :     CPLAssert(fp != nullptr);
     729           0 :     if (fp == nullptr)
     730           0 :         return nullptr;
     731             : 
     732           0 :     char szDelimiter[2] = {chDelimiter, 0};
     733           0 :     return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
     734             :                                    0,  // nMaxLineSize,
     735             :                                    szDelimiter,
     736             :                                    true,   // bHonourStrings
     737             :                                    false,  // bKeepLeadingAndClosingQuotes
     738             :                                    false,  // bMergeDelimiter
     739           0 :                                    true /* bSkipBOM */);
     740             : }
     741             : 
     742             : /************************************************************************/
     743             : /*                          CSVReadParseLineL()                         */
     744             : /*                                                                      */
     745             : /*      Read one line, and return split into fields.  The return        */
     746             : /*      result is a stringlist, in the sense of the CSL functions.      */
     747             : /*                                                                      */
     748             : /*      Replaces CSVReadParseLine().  These functions use the VSI       */
     749             : /*      layer to allow reading from other file containers.              */
     750             : /************************************************************************/
     751             : 
     752        3910 : char **CSVReadParseLineL(VSILFILE *fp)
     753             : {
     754        3910 :     return CSVReadParseLine2L(fp, ',');
     755             : }
     756             : 
     757        3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
     758             : 
     759             : {
     760        3910 :     CPLAssert(fp != nullptr);
     761        3910 :     if (fp == nullptr)
     762           0 :         return nullptr;
     763             : 
     764        3910 :     char szDelimiter[2] = {chDelimiter, 0};
     765        3910 :     return CSVReadParseLine3L(fp,
     766             :                               0,  // nMaxLineSize
     767             :                               szDelimiter,
     768             :                               true,   // bHonourStrings
     769             :                               false,  // bKeepLeadingAndClosingQuotes
     770             :                               false,  // bMergeDelimiter
     771        3910 :                               true /* bSkipBOM */);
     772             : }
     773             : 
     774             : /************************************************************************/
     775             : /*                      ReadLineLargeFile()                             */
     776             : /************************************************************************/
     777             : 
     778       57166 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
     779             : {
     780       57166 :     int nBufLength = 0;
     781       57166 :     return CPLReadLine3L(static_cast<VSILFILE *>(fp),
     782             :                          nMaxLineSize == 0 ? -1
     783             :                                            : static_cast<int>(nMaxLineSize),
     784      114332 :                          &nBufLength, nullptr);
     785             : }
     786             : 
     787             : /************************************************************************/
     788             : /*                      CSVReadParseLine3L()                            */
     789             : /*                                                                      */
     790             : /*      Read one line, and return split into fields.  The return        */
     791             : /*      result is a stringlist, in the sense of the CSL functions.      */
     792             : /************************************************************************/
     793             : 
     794             : /** Read one line, and return split into fields.
     795             :  * The return result is a stringlist, in the sense of the CSL functions.
     796             :  *
     797             :  * @param fp File handle. Must not be NULL
     798             :  * @param nMaxLineSize Maximum line size, or 0 for unlimited.
     799             :  * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
     800             :  * @param bHonourStrings Should be true, unless double quotes should not be
     801             :  *                       considered when separating fields.
     802             :  * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
     803             :  *                                     quote characters should be kept.
     804             :  * @param bMergeDelimiter Whether consecutive delimiters should be considered
     805             :  *                        as a single one. Should generally be set to false.
     806             :  * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
     807             :  */
     808       56410 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
     809             :                           const char *pszDelimiter, bool bHonourStrings,
     810             :                           bool bKeepLeadingAndClosingQuotes,
     811             :                           bool bMergeDelimiter, bool bSkipBOM)
     812             : 
     813             : {
     814       56410 :     return CSVReadParseLineGeneric(
     815             :         fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
     816       56410 :         bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
     817             : }
     818             : 
     819             : /************************************************************************/
     820             : /*                             CSVCompare()                             */
     821             : /*                                                                      */
     822             : /*      Compare a field to a search value using a particular            */
     823             : /*      criteria.                                                       */
     824             : /************************************************************************/
     825             : 
     826         603 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
     827             :                        CSVCompareCriteria eCriteria)
     828             : 
     829             : {
     830         603 :     if (eCriteria == CC_ExactString)
     831             :     {
     832           0 :         return (strcmp(pszFieldValue, pszTarget) == 0);
     833             :     }
     834         603 :     else if (eCriteria == CC_ApproxString)
     835             :     {
     836         270 :         return EQUAL(pszFieldValue, pszTarget);
     837             :     }
     838         333 :     else if (eCriteria == CC_Integer)
     839             :     {
     840         626 :         return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
     841         626 :                 atoi(pszFieldValue) == atoi(pszTarget));
     842             :     }
     843             : 
     844           0 :     return false;
     845             : }
     846             : 
     847             : /************************************************************************/
     848             : /*                            CSVScanLines()                            */
     849             : /*                                                                      */
     850             : /*      Read the file scanline for lines where the key field equals     */
     851             : /*      the indicated value with the suggested comparison criteria.     */
     852             : /*      Return the first matching line split into fields.               */
     853             : /*                                                                      */
     854             : /*      Deprecated.  Replaced by CSVScanLinesL().                       */
     855             : /************************************************************************/
     856             : 
     857           0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
     858             :                     CSVCompareCriteria eCriteria)
     859             : 
     860             : {
     861           0 :     CPLAssert(pszValue != nullptr);
     862           0 :     CPLAssert(iKeyField >= 0);
     863           0 :     CPLAssert(fp != nullptr);
     864             : 
     865           0 :     bool bSelected = false;
     866           0 :     const int nTestValue = atoi(pszValue);
     867           0 :     char **papszFields = nullptr;
     868             : 
     869           0 :     while (!bSelected)
     870             :     {
     871           0 :         papszFields = CSVReadParseLine(fp);
     872           0 :         if (papszFields == nullptr)
     873           0 :             return nullptr;
     874             : 
     875           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     876             :         {
     877             :             /* not selected */
     878             :         }
     879           0 :         else if (eCriteria == CC_Integer &&
     880           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     881             :         {
     882           0 :             bSelected = true;
     883             :         }
     884             :         else
     885             :         {
     886           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     887             :         }
     888             : 
     889           0 :         if (!bSelected)
     890             :         {
     891           0 :             CSLDestroy(papszFields);
     892           0 :             papszFields = nullptr;
     893             :         }
     894             :     }
     895             : 
     896           0 :     return papszFields;
     897             : }
     898             : 
     899             : /************************************************************************/
     900             : /*                            CSVScanLinesL()                           */
     901             : /*                                                                      */
     902             : /*      Read the file scanline for lines where the key field equals     */
     903             : /*      the indicated value with the suggested comparison criteria.     */
     904             : /*      Return the first matching line split into fields.               */
     905             : /************************************************************************/
     906             : 
     907           0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
     908             :                      CSVCompareCriteria eCriteria)
     909             : 
     910             : {
     911           0 :     CPLAssert(pszValue != nullptr);
     912           0 :     CPLAssert(iKeyField >= 0);
     913           0 :     CPLAssert(fp != nullptr);
     914             : 
     915           0 :     bool bSelected = false;
     916           0 :     const int nTestValue = atoi(pszValue);
     917           0 :     char **papszFields = nullptr;
     918             : 
     919           0 :     while (!bSelected)
     920             :     {
     921           0 :         papszFields = CSVReadParseLineL(fp);
     922           0 :         if (papszFields == nullptr)
     923           0 :             return nullptr;
     924             : 
     925           0 :         if (CSLCount(papszFields) < iKeyField + 1)
     926             :         {
     927             :             /* not selected */
     928             :         }
     929           0 :         else if (eCriteria == CC_Integer &&
     930           0 :                  atoi(papszFields[iKeyField]) == nTestValue)
     931             :         {
     932           0 :             bSelected = true;
     933             :         }
     934             :         else
     935             :         {
     936           0 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
     937             :         }
     938             : 
     939           0 :         if (!bSelected)
     940             :         {
     941           0 :             CSLDestroy(papszFields);
     942           0 :             papszFields = nullptr;
     943             :         }
     944             :     }
     945             : 
     946           0 :     return papszFields;
     947             : }
     948             : 
     949             : /************************************************************************/
     950             : /*                        CSVScanLinesIndexed()                         */
     951             : /*                                                                      */
     952             : /*      Read the file scanline for lines where the key field equals     */
     953             : /*      the indicated value with the suggested comparison criteria.     */
     954             : /*      Return the first matching line split into fields.               */
     955             : /************************************************************************/
     956             : 
     957          21 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
     958             : 
     959             : {
     960          21 :     CPLAssert(psTable->panLineIndex != nullptr);
     961             : 
     962             :     /* -------------------------------------------------------------------- */
     963             :     /*      Find target record with binary search.                          */
     964             :     /* -------------------------------------------------------------------- */
     965          21 :     int iTop = psTable->nLineCount - 1;
     966          21 :     int iBottom = 0;
     967          21 :     int iResult = -1;
     968             : 
     969         151 :     while (iTop >= iBottom)
     970             :     {
     971         151 :         const int iMiddle = (iTop + iBottom) / 2;
     972         151 :         if (psTable->panLineIndex[iMiddle] > nKeyValue)
     973          90 :             iTop = iMiddle - 1;
     974          61 :         else if (psTable->panLineIndex[iMiddle] < nKeyValue)
     975          40 :             iBottom = iMiddle + 1;
     976             :         else
     977             :         {
     978          21 :             iResult = iMiddle;
     979             :             // if a key is not unique, select the first instance of it.
     980          21 :             while (iResult > 0 &&
     981          21 :                    psTable->panLineIndex[iResult - 1] == nKeyValue)
     982             :             {
     983           0 :                 psTable->bNonUniqueKey = true;
     984           0 :                 iResult--;
     985             :             }
     986          21 :             break;
     987             :         }
     988             :     }
     989             : 
     990          21 :     if (iResult == -1)
     991           0 :         return nullptr;
     992             : 
     993             :     /* -------------------------------------------------------------------- */
     994             :     /*      Parse target line, and update iLastLine indicator.              */
     995             :     /* -------------------------------------------------------------------- */
     996          21 :     psTable->iLastLine = iResult;
     997             : 
     998          21 :     return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
     999             : }
    1000             : 
    1001             : /************************************************************************/
    1002             : /*                        CSVScanLinesIngested()                        */
    1003             : /*                                                                      */
    1004             : /*      Read the file scanline for lines where the key field equals     */
    1005             : /*      the indicated value with the suggested comparison criteria.     */
    1006             : /*      Return the first matching line split into fields.               */
    1007             : /************************************************************************/
    1008             : 
    1009          28 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
    1010             :                                    const char *pszValue,
    1011             :                                    CSVCompareCriteria eCriteria)
    1012             : 
    1013             : {
    1014          28 :     CPLAssert(pszValue != nullptr);
    1015          28 :     CPLAssert(iKeyField >= 0);
    1016             : 
    1017          28 :     const int nTestValue = atoi(pszValue);
    1018             : 
    1019             :     /* -------------------------------------------------------------------- */
    1020             :     /*      Short cut for indexed files.                                    */
    1021             :     /* -------------------------------------------------------------------- */
    1022          28 :     if (iKeyField == 0 && eCriteria == CC_Integer &&
    1023          21 :         psTable->panLineIndex != nullptr)
    1024          21 :         return CSVScanLinesIndexed(psTable, nTestValue);
    1025             : 
    1026             :     /* -------------------------------------------------------------------- */
    1027             :     /*      Scan from in-core lines.                                        */
    1028             :     /* -------------------------------------------------------------------- */
    1029           7 :     char **papszFields = nullptr;
    1030           7 :     bool bSelected = false;
    1031             : 
    1032         484 :     while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
    1033             :     {
    1034         477 :         psTable->iLastLine++;
    1035         477 :         papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
    1036             :                                    false, false);
    1037             : 
    1038         477 :         if (CSLCount(papszFields) < iKeyField + 1)
    1039             :         {
    1040             :             /* not selected */
    1041             :         }
    1042         477 :         else if (eCriteria == CC_Integer &&
    1043         242 :                  atoi(papszFields[iKeyField]) == nTestValue)
    1044             :         {
    1045           2 :             bSelected = true;
    1046             :         }
    1047             :         else
    1048             :         {
    1049         475 :             bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
    1050             :         }
    1051             : 
    1052         477 :         if (!bSelected)
    1053             :         {
    1054         470 :             CSLDestroy(papszFields);
    1055         470 :             papszFields = nullptr;
    1056             :         }
    1057             :     }
    1058             : 
    1059           7 :     return papszFields;
    1060             : }
    1061             : 
    1062             : /************************************************************************/
    1063             : /*                            CSVRewind()                               */
    1064             : /*                                                                      */
    1065             : /*      Rewind a CSV file based on a passed in filename.                */
    1066             : /*      This is aimed at being used with CSVGetNextLine().              */
    1067             : /************************************************************************/
    1068             : 
    1069        1774 : void CSVRewind(const char *pszFilename)
    1070             : 
    1071             : {
    1072             :     /* -------------------------------------------------------------------- */
    1073             :     /*      Get access to the table.                                        */
    1074             :     /* -------------------------------------------------------------------- */
    1075        1774 :     CPLAssert(pszFilename != nullptr);
    1076             : 
    1077        1774 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1078        1774 :     if (psTable != nullptr)
    1079        1774 :         psTable->iLastLine = -1;
    1080        1774 : }
    1081             : 
    1082             : /************************************************************************/
    1083             : /*                           CSVGetNextLine()                           */
    1084             : /*                                                                      */
    1085             : /*      Fetch the next line of a CSV file based on a passed in          */
    1086             : /*      filename.  Returns NULL at end of file, or if file is not       */
    1087             : /*      really established.                                             */
    1088             : /*      This ingests the whole file into memory if not already done.    */
    1089             : /*      When reaching end of file, CSVRewind() may be used to read      */
    1090             : /*      again from the beginning.                                       */
    1091             : /************************************************************************/
    1092             : 
    1093       53548 : char **CSVGetNextLine(const char *pszFilename)
    1094             : 
    1095             : {
    1096             : 
    1097             :     /* -------------------------------------------------------------------- */
    1098             :     /*      Get access to the table.                                        */
    1099             :     /* -------------------------------------------------------------------- */
    1100       53548 :     CPLAssert(pszFilename != nullptr);
    1101             : 
    1102       53548 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1103       53548 :     if (psTable == nullptr)
    1104           0 :         return nullptr;
    1105             : 
    1106       53548 :     CSVIngest(psTable->pszFilename);
    1107             : 
    1108             :     /* -------------------------------------------------------------------- */
    1109             :     /*      If we use CSVGetNextLine() we can pretty much assume we have    */
    1110             :     /*      a non-unique key.                                               */
    1111             :     /* -------------------------------------------------------------------- */
    1112       53548 :     psTable->bNonUniqueKey = true;
    1113             : 
    1114             :     /* -------------------------------------------------------------------- */
    1115             :     /*      Do we have a next line available?  This only works for          */
    1116             :     /*      ingested tables I believe.                                      */
    1117             :     /* -------------------------------------------------------------------- */
    1118       53548 :     if (psTable->iLastLine + 1 >= psTable->nLineCount)
    1119         622 :         return nullptr;
    1120             : 
    1121       52926 :     psTable->iLastLine++;
    1122       52926 :     CSLDestroy(psTable->papszRecFields);
    1123      105852 :     psTable->papszRecFields = CSVSplitLine(
    1124       52926 :         psTable->papszLines[psTable->iLastLine], ",", false, false);
    1125             : 
    1126       52926 :     return psTable->papszRecFields;
    1127             : }
    1128             : 
    1129             : /************************************************************************/
    1130             : /*                            CSVScanFile()                             */
    1131             : /*                                                                      */
    1132             : /*      Scan a whole file using criteria similar to above, but also     */
    1133             : /*      taking care of file opening and closing.                        */
    1134             : /************************************************************************/
    1135             : 
    1136         137 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
    1137             :                           const char *pszValue, CSVCompareCriteria eCriteria)
    1138             : {
    1139         137 :     CSVIngest(psTable->pszFilename);
    1140             : 
    1141             :     /* -------------------------------------------------------------------- */
    1142             :     /*      Does the current record match the criteria?  If so, just        */
    1143             :     /*      return it again.                                                */
    1144             :     /* -------------------------------------------------------------------- */
    1145         137 :     if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
    1146         383 :         CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
    1147         109 :         !psTable->bNonUniqueKey)
    1148             :     {
    1149         109 :         return psTable->papszRecFields;
    1150             :     }
    1151             : 
    1152             :     /* -------------------------------------------------------------------- */
    1153             :     /*      Scan the file from the beginning, replacing the ``current       */
    1154             :     /*      record'' in our structure with the one that is found.           */
    1155             :     /* -------------------------------------------------------------------- */
    1156          28 :     psTable->iLastLine = -1;
    1157          28 :     CSLDestroy(psTable->papszRecFields);
    1158             : 
    1159          28 :     if (psTable->pszRawData != nullptr)
    1160          28 :         psTable->papszRecFields =
    1161          28 :             CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
    1162             :     else
    1163             :     {
    1164           0 :         VSIRewindL(psTable->fp);
    1165           0 :         CPLReadLineL(psTable->fp); /* throw away the header line */
    1166             : 
    1167           0 :         psTable->papszRecFields =
    1168           0 :             CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
    1169             :     }
    1170             : 
    1171          28 :     return psTable->papszRecFields;
    1172             : }
    1173             : 
    1174           4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
    1175             :                    CSVCompareCriteria eCriteria)
    1176             : 
    1177             : {
    1178             :     /* -------------------------------------------------------------------- */
    1179             :     /*      Get access to the table.                                        */
    1180             :     /* -------------------------------------------------------------------- */
    1181           4 :     CPLAssert(pszFilename != nullptr);
    1182             : 
    1183           4 :     if (iKeyField < 0)
    1184           0 :         return nullptr;
    1185             : 
    1186           4 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1187           4 :     if (psTable == nullptr)
    1188           0 :         return nullptr;
    1189             : 
    1190           4 :     return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
    1191             : }
    1192             : 
    1193             : /************************************************************************/
    1194             : /*                           CPLGetFieldId()                            */
    1195             : /*                                                                      */
    1196             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1197             : /*      and find the field with the indicated name.  Returns -1 if      */
    1198             : /*      it fails to find the field name.  Comparison is case            */
    1199             : /*      insensitive, but otherwise exact.  After this function has      */
    1200             : /*      been called the file pointer will be positioned just after      */
    1201             : /*      the first record.                                               */
    1202             : /*                                                                      */
    1203             : /*      Deprecated.  Replaced by CPLGetFieldIdL().                      */
    1204             : /************************************************************************/
    1205             : 
    1206           0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
    1207             : 
    1208             : {
    1209           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1210             : 
    1211           0 :     VSIRewind(fp);
    1212             : 
    1213           0 :     char **papszFields = CSVReadParseLine(fp);
    1214           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1215             :     {
    1216           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1217             :         {
    1218           0 :             CSLDestroy(papszFields);
    1219           0 :             return i;
    1220             :         }
    1221             :     }
    1222             : 
    1223           0 :     CSLDestroy(papszFields);
    1224             : 
    1225           0 :     return -1;
    1226             : }
    1227             : 
    1228             : /************************************************************************/
    1229             : /*                           CPLGetFieldIdL()                           */
    1230             : /*                                                                      */
    1231             : /*      Read the first record of a CSV file (rewinding to be sure),     */
    1232             : /*      and find the field with the indicated name.  Returns -1 if      */
    1233             : /*      it fails to find the field name.  Comparison is case            */
    1234             : /*      insensitive, but otherwise exact.  After this function has      */
    1235             : /*      been called the file pointer will be positioned just after      */
    1236             : /*      the first record.                                               */
    1237             : /************************************************************************/
    1238             : 
    1239           0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
    1240             : 
    1241             : {
    1242           0 :     CPLAssert(fp != nullptr && pszFieldName != nullptr);
    1243             : 
    1244           0 :     VSIRewindL(fp);
    1245             : 
    1246           0 :     char **papszFields = CSVReadParseLineL(fp);
    1247           0 :     for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
    1248             :     {
    1249           0 :         if (EQUAL(papszFields[i], pszFieldName))
    1250             :         {
    1251           0 :             CSLDestroy(papszFields);
    1252           0 :             return i;
    1253             :         }
    1254             :     }
    1255             : 
    1256           0 :     CSLDestroy(papszFields);
    1257             : 
    1258           0 :     return -1;
    1259             : }
    1260             : 
    1261             : /************************************************************************/
    1262             : /*                         CSVGetFileFieldId()                          */
    1263             : /*                                                                      */
    1264             : /*      Same as CPLGetFieldId(), except that we get the file based      */
    1265             : /*      on filename, rather than having an existing handle.             */
    1266             : /************************************************************************/
    1267             : 
    1268        7103 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
    1269             : 
    1270             : {
    1271             :     /* -------------------------------------------------------------------- */
    1272             :     /*      Find the requested field.                                       */
    1273             :     /* -------------------------------------------------------------------- */
    1274        7103 :     const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
    1275       17741 :     for (int i = 0; psTable->papszFieldNames != nullptr &&
    1276       17741 :                     psTable->papszFieldNames[i] != nullptr;
    1277             :          i++)
    1278             :     {
    1279       17741 :         if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
    1280        9988 :             EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
    1281             :         {
    1282        7103 :             return i;
    1283             :         }
    1284             :     }
    1285             : 
    1286           0 :     return -1;
    1287             : }
    1288             : 
    1289        6837 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
    1290             : 
    1291             : {
    1292             :     /* -------------------------------------------------------------------- */
    1293             :     /*      Get access to the table.                                        */
    1294             :     /* -------------------------------------------------------------------- */
    1295        6837 :     CPLAssert(pszFilename != nullptr);
    1296             : 
    1297        6837 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1298        6837 :     if (psTable == nullptr)
    1299           0 :         return -1;
    1300        6837 :     return CSVGetFileFieldId(psTable, pszFieldName);
    1301             : }
    1302             : 
    1303             : /************************************************************************/
    1304             : /*                         CSVScanFileByName()                          */
    1305             : /*                                                                      */
    1306             : /*      Same as CSVScanFile(), but using a field name instead of a      */
    1307             : /*      field number.                                                   */
    1308             : /************************************************************************/
    1309             : 
    1310           4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
    1311             :                          const char *pszValue, CSVCompareCriteria eCriteria)
    1312             : 
    1313             : {
    1314           4 :     const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
    1315           4 :     if (iKeyField == -1)
    1316           0 :         return nullptr;
    1317             : 
    1318           4 :     return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
    1319             : }
    1320             : 
    1321             : /************************************************************************/
    1322             : /*                            CSVGetField()                             */
    1323             : /*                                                                      */
    1324             : /*      The all-in-one function to fetch a particular field value       */
    1325             : /*      from a CSV file.  Note this function will return an empty       */
    1326             : /*      string, rather than NULL if it fails to find the desired        */
    1327             : /*      value for some reason.  The caller can't establish that the     */
    1328             : /*      fetch failed.                                                   */
    1329             : /************************************************************************/
    1330             : 
    1331         133 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
    1332             :                         const char *pszKeyFieldValue,
    1333             :                         CSVCompareCriteria eCriteria,
    1334             :                         const char *pszTargetField)
    1335             : 
    1336             : {
    1337             :     /* -------------------------------------------------------------------- */
    1338             :     /*      Find the table.                                                 */
    1339             :     /* -------------------------------------------------------------------- */
    1340         133 :     CSVTable *const psTable = CSVAccess(pszFilename);
    1341         133 :     if (psTable == nullptr)
    1342           0 :         return "";
    1343             : 
    1344         133 :     const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
    1345         133 :     if (iKeyField == -1)
    1346           0 :         return "";
    1347             : 
    1348             :     /* -------------------------------------------------------------------- */
    1349             :     /*      Find the correct record.                                        */
    1350             :     /* -------------------------------------------------------------------- */
    1351             :     char **papszRecord =
    1352         133 :         CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
    1353         133 :     if (papszRecord == nullptr)
    1354           0 :         return "";
    1355             : 
    1356             :     /* -------------------------------------------------------------------- */
    1357             :     /*      Figure out which field we want out of this.                     */
    1358             :     /* -------------------------------------------------------------------- */
    1359         133 :     const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
    1360         133 :     if (iTargetField < 0)
    1361           0 :         return "";
    1362             : 
    1363         374 :     for (int i = 0; papszRecord[i] != nullptr; ++i)
    1364             :     {
    1365         374 :         if (i == iTargetField)
    1366         133 :             return papszRecord[iTargetField];
    1367             :     }
    1368           0 :     return "";
    1369             : }
    1370             : 
    1371             : /************************************************************************/
    1372             : /*                       GDALDefaultCSVFilename()                       */
    1373             : /************************************************************************/
    1374             : 
    1375             : typedef struct
    1376             : {
    1377             :     char szPath[512];
    1378             :     bool bCSVFinderInitialized;
    1379             : } DefaultCSVFileNameTLS;
    1380             : 
    1381        2412 : const char *GDALDefaultCSVFilename(const char *pszBasename)
    1382             : 
    1383             : {
    1384             :     /* -------------------------------------------------------------------- */
    1385             :     /*      Do we already have this file accessed?  If so, just return      */
    1386             :     /*      the existing path without any further probing.                  */
    1387             :     /* -------------------------------------------------------------------- */
    1388        2412 :     int bMemoryError = FALSE;
    1389             :     CSVTable **ppsCSVTableList =
    1390        2412 :         static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
    1391        2412 :     if (ppsCSVTableList != nullptr)
    1392             :     {
    1393        2406 :         const size_t nBasenameLen = strlen(pszBasename);
    1394             : 
    1395       22378 :         for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
    1396       19972 :              psTable = psTable->psNext)
    1397             :         {
    1398       21826 :             const size_t nFullLen = strlen(psTable->pszFilename);
    1399             : 
    1400       21826 :             if (nFullLen > nBasenameLen &&
    1401       21826 :                 strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
    1402        1854 :                        pszBasename) == 0 &&
    1403        1854 :                 strchr("/\\",
    1404        1854 :                        psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
    1405             :                     nullptr)
    1406             :             {
    1407        1854 :                 return psTable->pszFilename;
    1408             :             }
    1409             :         }
    1410             :     }
    1411             : 
    1412             :     /* -------------------------------------------------------------------- */
    1413             :     /*      Otherwise we need to look harder for it.                        */
    1414             :     /* -------------------------------------------------------------------- */
    1415             :     DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1416         558 :         CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
    1417         558 :     if (pTLSData == nullptr && !bMemoryError)
    1418             :     {
    1419             :         pTLSData = static_cast<DefaultCSVFileNameTLS *>(
    1420           5 :             VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
    1421           5 :         if (pTLSData)
    1422           5 :             CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
    1423             :     }
    1424         558 :     if (pTLSData == nullptr)
    1425           0 :         return "/not_existing_dir/not_existing_path";
    1426             : 
    1427         558 :     const char *pszResult = CPLFindFile("gdal", pszBasename);
    1428             : 
    1429         558 :     if (pszResult != nullptr)
    1430          43 :         return pszResult;
    1431             : 
    1432         515 :     if (!pTLSData->bCSVFinderInitialized)
    1433             :     {
    1434           2 :         pTLSData->bCSVFinderInitialized = true;
    1435             : 
    1436           2 :         if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
    1437           2 :             CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
    1438             : 
    1439           2 :         pszResult = CPLFindFile("gdal", pszBasename);
    1440             : 
    1441           2 :         if (pszResult != nullptr)
    1442           0 :             return pszResult;
    1443             :     }
    1444             : 
    1445             :     // For systems like sandboxes that do not allow other checks.
    1446         515 :     CPLDebug("CPL_CSV",
    1447             :              "Failed to find file in GDALDefaultCSVFilename.  "
    1448             :              "Returning original basename: %s",
    1449             :              pszBasename);
    1450         515 :     CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
    1451         515 :     return pTLSData->szPath;
    1452             : }
    1453             : 
    1454             : /************************************************************************/
    1455             : /*                            CSVFilename()                             */
    1456             : /*                                                                      */
    1457             : /*      Return the full path to a particular CSV file.  This will       */
    1458             : /*      eventually be something the application can override.           */
    1459             : /************************************************************************/
    1460             : 
    1461             : CPL_C_START
    1462             : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
    1463             : CPL_C_END
    1464             : 
    1465        2412 : const char *CSVFilename(const char *pszBasename)
    1466             : 
    1467             : {
    1468        2412 :     if (pfnCSVFilenameHook == nullptr)
    1469        2412 :         return GDALDefaultCSVFilename(pszBasename);
    1470             : 
    1471           0 :     return pfnCSVFilenameHook(pszBasename);
    1472             : }
    1473             : 
    1474             : /************************************************************************/
    1475             : /*                         SetCSVFilenameHook()                         */
    1476             : /*                                                                      */
    1477             : /*      Applications can use this to set a function that will           */
    1478             : /*      massage CSV filenames.                                          */
    1479             : /************************************************************************/
    1480             : 
    1481             : /**
    1482             :  * Override CSV file search method.
    1483             :  *
    1484             :  * @param pfnNewHook The pointer to a function which will return the
    1485             :  * full path for a given filename.
    1486             :  *
    1487             : 
    1488             : This function allows an application to override how the GTIFGetDefn()
    1489             : and related function find the CSV (Comma Separated Value) values
    1490             : required. The pfnHook argument should be a pointer to a function that
    1491             : will take in a CSV filename and return a full path to the file. The
    1492             : returned string should be to an internal static buffer so that the
    1493             : caller doesn't have to free the result.
    1494             : 
    1495             : <b>Example:</b><br>
    1496             : 
    1497             : The listgeo utility uses the following override function if the user
    1498             : specified a CSV file directory with the -t commandline switch (argument
    1499             : put into CSVDirName).  <p>
    1500             : 
    1501             : <pre>
    1502             : 
    1503             :     ...
    1504             :     SetCSVFilenameHook( CSVFileOverride );
    1505             :     ...
    1506             : 
    1507             : static const char *CSVFileOverride( const char * pszInput )
    1508             : 
    1509             : {
    1510             :     static char szPath[1024] = {};
    1511             : 
    1512             :     sprintf( szPath, "%s/%s", CSVDirName, pszInput );
    1513             : 
    1514             :     return szPath;
    1515             : }
    1516             : </pre>
    1517             : 
    1518             : */
    1519             : 
    1520             : CPL_C_START
    1521           0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
    1522             : 
    1523             : {
    1524           0 :     pfnCSVFilenameHook = pfnNewHook;
    1525           0 : }
    1526             : 
    1527             : CPL_C_END

Generated by: LCOV version 1.14