Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: CSV (comma separated value) file access.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 1999, Frank Warmerdam
9 : * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "cpl_port.h"
15 : #include "cpl_csv.h"
16 :
17 : #include <cstddef>
18 : #include <cstdlib>
19 : #include <cstring>
20 : #include <fcntl.h>
21 :
22 : #include "cpl_conv.h"
23 : #include "cpl_error.h"
24 : #include "cpl_multiproc.h"
25 : #include "gdal_csv.h"
26 :
27 : #include <algorithm>
28 :
29 : /* ==================================================================== */
30 : /* The CSVTable is a persistent set of info about an open CSV */
31 : /* table. While it doesn't currently maintain a record index, */
32 : /* or in-memory copy of the table, it could be changed to do so */
33 : /* in the future. */
34 : /* ==================================================================== */
35 : typedef struct ctb
36 : {
37 : VSILFILE *fp;
38 : struct ctb *psNext;
39 : char *pszFilename;
40 : char **papszFieldNames;
41 : int *panFieldNamesLength;
42 : char **papszRecFields;
43 : int nFields;
44 : int iLastLine;
45 : bool bNonUniqueKey;
46 :
47 : /* Cache for whole file */
48 : int nLineCount;
49 : char **papszLines;
50 : int *panLineIndex;
51 : char *pszRawData;
52 : } CSVTable;
53 :
54 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
55 : const char *pszFilename);
56 :
57 : /************************************************************************/
58 : /* CSVFreeTLS() */
59 : /************************************************************************/
60 2 : static void CSVFreeTLS(void *pData)
61 : {
62 2 : CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
63 2 : CPLFree(pData);
64 2 : }
65 :
66 : /* It would likely be better to share this list between threads, but
67 : that will require some rework. */
68 :
69 : /************************************************************************/
70 : /* CSVAccess() */
71 : /* */
72 : /* This function will fetch a handle to the requested table. */
73 : /* If not found in the ``open table list'' the table will be */
74 : /* opened and added to the list. Eventually this function may */
75 : /* become public with an abstracted return type so that */
76 : /* applications can set options about the table. For now this */
77 : /* isn't done. */
78 : /************************************************************************/
79 :
80 130337 : static CSVTable *CSVAccess(const char *pszFilename)
81 :
82 : {
83 : /* -------------------------------------------------------------------- */
84 : /* Fetch the table, and allocate the thread-local pointer to it */
85 : /* if there isn't already one. */
86 : /* -------------------------------------------------------------------- */
87 130337 : int bMemoryError = FALSE;
88 : CSVTable **ppsCSVTableList =
89 130337 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
90 130337 : if (bMemoryError)
91 0 : return nullptr;
92 130337 : if (ppsCSVTableList == nullptr)
93 : {
94 : ppsCSVTableList =
95 5 : static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
96 5 : if (ppsCSVTableList == nullptr)
97 0 : return nullptr;
98 5 : CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
99 : }
100 :
101 : /* -------------------------------------------------------------------- */
102 : /* Is the table already in the list. */
103 : /* -------------------------------------------------------------------- */
104 1011440 : for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
105 881107 : psTable = psTable->psNext)
106 : {
107 1011410 : if (EQUAL(psTable->pszFilename, pszFilename))
108 : {
109 : /*
110 : * Eventually we should consider promoting to the front of
111 : * the list to accelerate frequently accessed tables.
112 : */
113 130307 : return psTable;
114 : }
115 : }
116 :
117 : /* -------------------------------------------------------------------- */
118 : /* If not, try to open it. */
119 : /* -------------------------------------------------------------------- */
120 30 : VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
121 30 : if (fp == nullptr)
122 0 : return nullptr;
123 :
124 : /* -------------------------------------------------------------------- */
125 : /* Create an information structure about this table, and add to */
126 : /* the front of the list. */
127 : /* -------------------------------------------------------------------- */
128 : CSVTable *const psTable =
129 30 : static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
130 30 : if (psTable == nullptr)
131 : {
132 0 : VSIFCloseL(fp);
133 0 : return nullptr;
134 : }
135 :
136 30 : psTable->fp = fp;
137 30 : psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
138 30 : if (psTable->pszFilename == nullptr)
139 : {
140 0 : VSIFree(psTable);
141 0 : VSIFCloseL(fp);
142 0 : return nullptr;
143 : }
144 30 : psTable->bNonUniqueKey = false; // As far as we know now.
145 30 : psTable->psNext = *ppsCSVTableList;
146 :
147 30 : *ppsCSVTableList = psTable;
148 :
149 : /* -------------------------------------------------------------------- */
150 : /* Read the table header record containing the field names. */
151 : /* -------------------------------------------------------------------- */
152 30 : psTable->papszFieldNames = CSVReadParseLineL(fp);
153 30 : psTable->nFields = CSLCount(psTable->papszFieldNames);
154 30 : psTable->panFieldNamesLength =
155 30 : static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
156 30 : for (int i = 0;
157 185 : i < psTable->nFields &&
158 : /* null-pointer check to avoid a false positive from CLang S.A. */
159 155 : psTable->papszFieldNames != nullptr;
160 : i++)
161 : {
162 155 : psTable->panFieldNamesLength[i] =
163 155 : static_cast<int>(strlen(psTable->papszFieldNames[i]));
164 : }
165 :
166 30 : return psTable;
167 : }
168 :
169 : /************************************************************************/
170 : /* CSVDeaccess() */
171 : /************************************************************************/
172 :
173 1133 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
174 : const char *pszFilename)
175 :
176 : {
177 1133 : if (ppsCSVTableList == nullptr)
178 1124 : return;
179 :
180 : /* -------------------------------------------------------------------- */
181 : /* A NULL means deaccess all tables. */
182 : /* -------------------------------------------------------------------- */
183 9 : if (pszFilename == nullptr)
184 : {
185 9 : while (*ppsCSVTableList != nullptr)
186 5 : CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
187 5 : (*ppsCSVTableList)->pszFilename);
188 :
189 4 : return;
190 : }
191 :
192 : /* -------------------------------------------------------------------- */
193 : /* Find this table. */
194 : /* -------------------------------------------------------------------- */
195 5 : CSVTable *psLast = nullptr;
196 5 : CSVTable *psTable = *ppsCSVTableList;
197 5 : for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
198 0 : psTable = psTable->psNext)
199 : {
200 0 : psLast = psTable;
201 : }
202 :
203 5 : if (psTable == nullptr)
204 : {
205 0 : if (bCanUseTLS)
206 0 : CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
207 0 : return;
208 : }
209 :
210 : /* -------------------------------------------------------------------- */
211 : /* Remove the link from the list. */
212 : /* -------------------------------------------------------------------- */
213 5 : if (psLast != nullptr)
214 0 : psLast->psNext = psTable->psNext;
215 : else
216 5 : *ppsCSVTableList = psTable->psNext;
217 :
218 : /* -------------------------------------------------------------------- */
219 : /* Free the table. */
220 : /* -------------------------------------------------------------------- */
221 5 : if (psTable->fp != nullptr)
222 0 : VSIFCloseL(psTable->fp);
223 :
224 5 : CSLDestroy(psTable->papszFieldNames);
225 5 : CPLFree(psTable->panFieldNamesLength);
226 5 : CSLDestroy(psTable->papszRecFields);
227 5 : CPLFree(psTable->pszFilename);
228 5 : CPLFree(psTable->panLineIndex);
229 5 : CPLFree(psTable->pszRawData);
230 5 : CPLFree(psTable->papszLines);
231 :
232 5 : CPLFree(psTable);
233 :
234 5 : if (bCanUseTLS)
235 5 : CPLReadLine(nullptr);
236 : }
237 :
238 1126 : void CSVDeaccess(const char *pszFilename)
239 : {
240 : /* -------------------------------------------------------------------- */
241 : /* Fetch the table, and allocate the thread-local pointer to it */
242 : /* if there isn't already one. */
243 : /* -------------------------------------------------------------------- */
244 1126 : int bMemoryError = FALSE;
245 : CSVTable **ppsCSVTableList =
246 1126 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
247 :
248 1126 : CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
249 1126 : }
250 :
251 : /************************************************************************/
252 : /* CSVSplitLine() */
253 : /* */
254 : /* Tokenize a CSV line into fields in the form of a string */
255 : /* list. This is used instead of the CPLTokenizeString() */
256 : /* because it provides correct CSV escaping and quoting */
257 : /* semantics. */
258 : /************************************************************************/
259 :
260 117449 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
261 : bool bKeepLeadingAndClosingQuotes,
262 : bool bMergeDelimiter)
263 :
264 : {
265 234898 : CPLStringList aosRetList;
266 117449 : if (pszString == nullptr)
267 0 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
268 :
269 117449 : char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
270 117449 : int nTokenMax = 10;
271 117449 : const size_t nDelimiterLength = strlen(pszDelimiter);
272 :
273 117449 : const char *pszIter = pszString;
274 676277 : while (*pszIter != '\0')
275 : {
276 558828 : bool bInString = false;
277 :
278 558828 : int nTokenLen = 0;
279 :
280 : // Try to find the next delimiter, marking end of token.
281 4710510 : do
282 : {
283 : // End if this is a delimiter skip it and break.
284 5269340 : if (!bInString &&
285 2752100 : strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
286 : {
287 441704 : pszIter += nDelimiterLength;
288 441704 : if (bMergeDelimiter)
289 : {
290 9 : while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
291 : 0)
292 5 : pszIter += nDelimiterLength;
293 : }
294 441704 : break;
295 : }
296 :
297 4827630 : if (*pszIter == '"')
298 : {
299 396853 : if (!bInString && nTokenLen > 0)
300 : {
301 : // do not treat in a special way double quotes that appear
302 : // in the middle of a field (similarly to OpenOffice)
303 : // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
304 : }
305 396766 : else if (!bInString || pszIter[1] != '"')
306 : {
307 396086 : bInString = !bInString;
308 396086 : if (!bKeepLeadingAndClosingQuotes)
309 396052 : continue;
310 : }
311 : else // Doubled quotes in string resolve to one quote.
312 : {
313 680 : pszIter++;
314 : }
315 : }
316 :
317 4431580 : if (nTokenLen >= nTokenMax - 2)
318 : {
319 125166 : nTokenMax = nTokenMax * 2 + 10;
320 125166 : pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
321 : }
322 :
323 4431580 : pszToken[nTokenLen] = *pszIter;
324 4431580 : nTokenLen++;
325 4827630 : } while (*(++pszIter) != '\0');
326 :
327 558828 : pszToken[nTokenLen] = '\0';
328 558828 : aosRetList.AddString(pszToken);
329 :
330 : // If the last token is an empty token, then we have to catch
331 : // it now, otherwise we won't reenter the loop and it will be lost.
332 558828 : if (*pszIter == '\0' &&
333 117409 : pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
334 117409 : strncmp(pszIter - nDelimiterLength, pszDelimiter,
335 : nDelimiterLength) == 0)
336 : {
337 285 : aosRetList.AddString("");
338 : }
339 : }
340 :
341 117449 : CPLFree(pszToken);
342 :
343 117449 : if (aosRetList.Count() == 0)
344 40 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
345 : else
346 117409 : return aosRetList.StealList();
347 : }
348 :
349 : /************************************************************************/
350 : /* CSVFindNextLine() */
351 : /* */
352 : /* Find the start of the next line, while at the same time zero */
353 : /* terminating this line. Take into account that there may be */
354 : /* newline indicators within quoted strings, and that quotes */
355 : /* can be escaped with a backslash. */
356 : /************************************************************************/
357 :
358 6329 : static char *CSVFindNextLine(char *pszThisLine)
359 :
360 : {
361 6329 : int i = 0; // i is used after the for loop.
362 :
363 278980 : for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
364 : {
365 278980 : if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
366 35430 : nQuoteCount++;
367 :
368 278980 : if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
369 6329 : (nQuoteCount % 2) == 0)
370 6329 : break;
371 : }
372 :
373 16573 : while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
374 10244 : pszThisLine[i++] = '\0';
375 :
376 6329 : if (pszThisLine[i] == '\0')
377 30 : return nullptr;
378 :
379 6299 : return pszThisLine + i;
380 : }
381 :
382 : /************************************************************************/
383 : /* CSVIngest() */
384 : /* */
385 : /* Load entire file into memory and setup index if possible. */
386 : /************************************************************************/
387 :
388 : // TODO(schwehr): Clean up all the casting in CSVIngest.
389 60654 : static void CSVIngest(CSVTable *psTable)
390 :
391 : {
392 60654 : if (psTable->pszRawData != nullptr)
393 60624 : return;
394 :
395 : /* -------------------------------------------------------------------- */
396 : /* Ingest whole file. */
397 : /* -------------------------------------------------------------------- */
398 30 : if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
399 : {
400 0 : CPLError(CE_Failure, CPLE_FileIO,
401 : "Failed using seek end and tell to get file length: %s",
402 : psTable->pszFilename);
403 0 : return;
404 : }
405 30 : const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
406 30 : if (static_cast<long>(nFileLen) == -1)
407 : {
408 0 : CPLError(CE_Failure, CPLE_FileIO,
409 : "Failed using seek end and tell to get file length: %s",
410 : psTable->pszFilename);
411 0 : return;
412 : }
413 30 : VSIRewindL(psTable->fp);
414 :
415 30 : psTable->pszRawData = static_cast<char *>(
416 30 : VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
417 30 : if (psTable->pszRawData == nullptr)
418 0 : return;
419 30 : if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
420 30 : psTable->fp) != static_cast<size_t>(nFileLen))
421 : {
422 0 : CPLFree(psTable->pszRawData);
423 0 : psTable->pszRawData = nullptr;
424 :
425 0 : CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
426 : psTable->pszFilename);
427 0 : return;
428 : }
429 :
430 30 : psTable->pszRawData[nFileLen] = '\0';
431 :
432 : /* -------------------------------------------------------------------- */
433 : /* Get count of newlines so we can allocate line array. */
434 : /* -------------------------------------------------------------------- */
435 30 : int nMaxLineCount = 0;
436 282925 : for (int i = 0; i < static_cast<int>(nFileLen); i++)
437 : {
438 282895 : if (psTable->pszRawData[i] == 10)
439 6329 : nMaxLineCount++;
440 : }
441 :
442 30 : psTable->papszLines =
443 30 : static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
444 30 : if (psTable->papszLines == nullptr)
445 0 : return;
446 :
447 : /* -------------------------------------------------------------------- */
448 : /* Build a list of record pointers into the raw data buffer */
449 : /* based on line terminators. Zero terminate the line */
450 : /* strings. */
451 : /* -------------------------------------------------------------------- */
452 : /* skip header line */
453 30 : char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
454 :
455 30 : int iLine = 0;
456 6329 : while (pszThisLine != nullptr && iLine < nMaxLineCount)
457 : {
458 6299 : if (pszThisLine[0] != '#')
459 6288 : psTable->papszLines[iLine++] = pszThisLine;
460 6299 : pszThisLine = CSVFindNextLine(pszThisLine);
461 : }
462 :
463 30 : psTable->nLineCount = iLine;
464 :
465 : /* -------------------------------------------------------------------- */
466 : /* Allocate and populate index array. Ensure they are in */
467 : /* ascending order so that binary searches can be done on the */
468 : /* array. */
469 : /* -------------------------------------------------------------------- */
470 30 : psTable->panLineIndex = static_cast<int *>(
471 30 : VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
472 30 : if (psTable->panLineIndex == nullptr)
473 0 : return;
474 :
475 6232 : for (int i = 0; i < psTable->nLineCount; i++)
476 : {
477 6204 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
478 :
479 6204 : if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
480 : {
481 2 : CPLFree(psTable->panLineIndex);
482 2 : psTable->panLineIndex = nullptr;
483 2 : break;
484 : }
485 : }
486 :
487 30 : psTable->iLastLine = -1;
488 :
489 : /* -------------------------------------------------------------------- */
490 : /* We should never need the file handle against, so close it. */
491 : /* -------------------------------------------------------------------- */
492 30 : VSIFCloseL(psTable->fp);
493 30 : psTable->fp = nullptr;
494 : }
495 :
496 60654 : static void CSVIngest(const char *pszFilename)
497 :
498 : {
499 60654 : CSVTable *psTable = CSVAccess(pszFilename);
500 60654 : if (psTable == nullptr)
501 : {
502 0 : CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
503 : pszFilename);
504 0 : return;
505 : }
506 60654 : CSVIngest(psTable);
507 : }
508 :
509 : /************************************************************************/
510 : /* CSVDetectSeperator() */
511 : /************************************************************************/
512 :
513 : /** Detect which field separator is used.
514 : *
515 : * Currently, it can detect comma, semicolon, space, tabulation or pipe.
516 : * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
517 : * most occurrences will be selected (and a warning emitted).
518 : * If no separator found, comma will be considered as the separator.
519 : *
520 : * @return ',', ';', ' ', tabulation character or '|'.
521 : */
522 634 : char CSVDetectSeperator(const char *pszLine)
523 : {
524 634 : bool bInString = false;
525 634 : int nCountComma = 0;
526 634 : int nCountSemicolon = 0;
527 634 : int nCountTab = 0;
528 634 : int nCountPipe = 0;
529 634 : int nCountSpace = 0;
530 :
531 27498 : for (; *pszLine != '\0'; pszLine++)
532 : {
533 26864 : if (!bInString && *pszLine == ',')
534 : {
535 2205 : nCountComma++;
536 : }
537 24659 : else if (!bInString && *pszLine == ';')
538 : {
539 10 : nCountSemicolon++;
540 : }
541 24649 : else if (!bInString && *pszLine == '\t')
542 : {
543 29 : nCountTab++;
544 : }
545 24620 : else if (!bInString && *pszLine == '|')
546 : {
547 9 : nCountPipe++;
548 : }
549 24611 : else if (!bInString && *pszLine == ' ')
550 : {
551 290 : nCountSpace++;
552 : }
553 24321 : else if (*pszLine == '"')
554 : {
555 521 : if (!bInString || pszLine[1] != '"')
556 : {
557 521 : bInString = !bInString;
558 521 : continue;
559 : }
560 : else /* doubled quotes in string resolve to one quote */
561 : {
562 0 : pszLine++;
563 : }
564 : }
565 : }
566 :
567 : const int nMaxCountExceptSpace =
568 : std::max(std::max(nCountComma, nCountSemicolon),
569 634 : std::max(nCountTab, nCountPipe));
570 634 : char chDelimiter = ',';
571 634 : if (nMaxCountExceptSpace == 0)
572 : {
573 38 : if (nCountSpace > 0)
574 9 : chDelimiter = ' ';
575 : }
576 : else
577 : {
578 596 : bool bWarn = false;
579 596 : if (nCountComma == nMaxCountExceptSpace)
580 : {
581 580 : chDelimiter = ',';
582 580 : bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
583 : }
584 16 : else if (nCountSemicolon == nMaxCountExceptSpace)
585 : {
586 5 : chDelimiter = ';';
587 5 : bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
588 : }
589 11 : else if (nCountTab == nMaxCountExceptSpace)
590 : {
591 6 : chDelimiter = '\t';
592 6 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
593 : }
594 : else /* if( nCountPipe == nMaxCountExceptSpace ) */
595 : {
596 5 : chDelimiter = '|';
597 5 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
598 : }
599 596 : if (bWarn)
600 : {
601 6 : CPLError(CE_Warning, CPLE_AppDefined,
602 : "Selecting '%c' as CSV field separator, but "
603 : "other candidate separator(s) have been found.",
604 : chDelimiter);
605 : }
606 : }
607 :
608 634 : return chDelimiter;
609 : }
610 :
611 : /************************************************************************/
612 : /* CSVReadParseLine3L() */
613 : /* */
614 : /* Read one line, and return split into fields. The return */
615 : /* result is a stringlist, in the sense of the CSL functions. */
616 : /************************************************************************/
617 :
618 : static char **
619 58522 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
620 : size_t nMaxLineSize, const char *pszDelimiter,
621 : bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
622 : bool bMergeDelimiter, bool bSkipBOM)
623 : {
624 58522 : const char *pszLine = pfnReadLine(fp, nMaxLineSize);
625 58522 : if (pszLine == nullptr)
626 1447 : return nullptr;
627 :
628 57075 : if (bSkipBOM)
629 : {
630 : // Skip BOM.
631 56697 : const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
632 56697 : if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
633 4 : pszLine += 3;
634 : }
635 :
636 : // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
637 57075 : if (!bHonourStrings)
638 : {
639 2 : return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
640 : }
641 :
642 : // If there are no quotes, then this is the simple case.
643 : // Parse, and return tokens.
644 57073 : if (strchr(pszLine, '\"') == nullptr)
645 49229 : return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
646 49229 : bMergeDelimiter);
647 :
648 7844 : const size_t nDelimiterLength = strlen(pszDelimiter);
649 7844 : bool bInString = false; // keep in that scope !
650 15688 : std::string osWorkLine(pszLine); // keep in that scope !
651 7844 : size_t i = 0; // keep in that scope !
652 :
653 : try
654 : {
655 : while (true)
656 : {
657 808176 : for (; i < osWorkLine.size(); ++i)
658 : {
659 799576 : if (osWorkLine[i] == '\"')
660 : {
661 59919 : if (!bInString)
662 : {
663 : // Only consider " as the start of a quoted string
664 : // if it is the first character of the line, or
665 : // if it is immediately after the field delimiter.
666 53044 : if (i == 0 ||
667 23381 : (i >= nDelimiterLength &&
668 23381 : osWorkLine.compare(i - nDelimiterLength,
669 : nDelimiterLength, pszDelimiter,
670 : nDelimiterLength) == 0))
671 : {
672 29576 : bInString = true;
673 : }
674 : }
675 57495 : else if (i + 1 < osWorkLine.size() &&
676 27239 : osWorkLine[i + 1] == '"')
677 : {
678 : // Escaped double quote in a quoted string
679 681 : ++i;
680 : }
681 : else
682 : {
683 29575 : bInString = false;
684 : }
685 : }
686 : }
687 :
688 8600 : if (!bInString)
689 : {
690 7843 : return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
691 : bKeepLeadingAndClosingQuotes,
692 7843 : bMergeDelimiter);
693 : }
694 :
695 757 : const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
696 757 : if (pszNewLine == nullptr)
697 1 : break;
698 :
699 756 : osWorkLine.append("\n");
700 756 : osWorkLine.append(pszNewLine);
701 756 : }
702 : }
703 0 : catch (const std::exception &e)
704 : {
705 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
706 : }
707 :
708 1 : if (bInString)
709 : {
710 1 : CPLError(CE_Failure, CPLE_AppDefined,
711 : "CSV file has unbalanced number of double-quotes. Corrupted "
712 : "data will likely be returned");
713 : }
714 :
715 1 : return nullptr;
716 : }
717 :
718 : /************************************************************************/
719 : /* CSVReadParseLine() */
720 : /* */
721 : /* Read one line, and return split into fields. The return */
722 : /* result is a stringlist, in the sense of the CSL functions. */
723 : /* */
724 : /* Deprecated. Replaced by CSVReadParseLineL(). */
725 : /************************************************************************/
726 :
727 0 : char **CSVReadParseLine(FILE *fp)
728 : {
729 0 : return CSVReadParseLine2(fp, ',');
730 : }
731 :
732 0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
733 : {
734 0 : return CPLReadLine(static_cast<FILE *>(fp));
735 : }
736 :
737 0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
738 : {
739 0 : CPLAssert(fp != nullptr);
740 0 : if (fp == nullptr)
741 0 : return nullptr;
742 :
743 0 : char szDelimiter[2] = {chDelimiter, 0};
744 0 : return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
745 : 0, // nMaxLineSize,
746 : szDelimiter,
747 : true, // bHonourStrings
748 : false, // bKeepLeadingAndClosingQuotes
749 : false, // bMergeDelimiter
750 0 : true /* bSkipBOM */);
751 : }
752 :
753 : /************************************************************************/
754 : /* CSVReadParseLineL() */
755 : /* */
756 : /* Read one line, and return split into fields. The return */
757 : /* result is a stringlist, in the sense of the CSL functions. */
758 : /* */
759 : /* Replaces CSVReadParseLine(). These functions use the VSI */
760 : /* layer to allow reading from other file containers. */
761 : /************************************************************************/
762 :
763 3910 : char **CSVReadParseLineL(VSILFILE *fp)
764 : {
765 3910 : return CSVReadParseLine2L(fp, ',');
766 : }
767 :
768 3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
769 :
770 : {
771 3910 : CPLAssert(fp != nullptr);
772 3910 : if (fp == nullptr)
773 0 : return nullptr;
774 :
775 3910 : char szDelimiter[2] = {chDelimiter, 0};
776 3910 : return CSVReadParseLine3L(fp,
777 : 0, // nMaxLineSize
778 : szDelimiter,
779 : true, // bHonourStrings
780 : false, // bKeepLeadingAndClosingQuotes
781 : false, // bMergeDelimiter
782 3910 : true /* bSkipBOM */);
783 : }
784 :
785 : /************************************************************************/
786 : /* ReadLineLargeFile() */
787 : /************************************************************************/
788 :
789 59279 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
790 : {
791 59279 : int nBufLength = 0;
792 59279 : return CPLReadLine3L(static_cast<VSILFILE *>(fp),
793 : nMaxLineSize == 0 ? -1
794 : : static_cast<int>(nMaxLineSize),
795 118558 : &nBufLength, nullptr);
796 : }
797 :
798 : /************************************************************************/
799 : /* CSVReadParseLine3L() */
800 : /* */
801 : /* Read one line, and return split into fields. The return */
802 : /* result is a stringlist, in the sense of the CSL functions. */
803 : /************************************************************************/
804 :
805 : /** Read one line, and return split into fields.
806 : * The return result is a stringlist, in the sense of the CSL functions.
807 : *
808 : * @param fp File handle. Must not be NULL
809 : * @param nMaxLineSize Maximum line size, or 0 for unlimited.
810 : * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
811 : * @param bHonourStrings Should be true, unless double quotes should not be
812 : * considered when separating fields.
813 : * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
814 : * quote characters should be kept.
815 : * @param bMergeDelimiter Whether consecutive delimiters should be considered
816 : * as a single one. Should generally be set to false.
817 : * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
818 : */
819 58522 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
820 : const char *pszDelimiter, bool bHonourStrings,
821 : bool bKeepLeadingAndClosingQuotes,
822 : bool bMergeDelimiter, bool bSkipBOM)
823 :
824 : {
825 58522 : return CSVReadParseLineGeneric(
826 : fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
827 58522 : bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
828 : }
829 :
830 : /************************************************************************/
831 : /* CSVCompare() */
832 : /* */
833 : /* Compare a field to a search value using a particular */
834 : /* criteria. */
835 : /************************************************************************/
836 :
837 610 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
838 : CSVCompareCriteria eCriteria)
839 :
840 : {
841 610 : if (eCriteria == CC_ExactString)
842 : {
843 0 : return (strcmp(pszFieldValue, pszTarget) == 0);
844 : }
845 610 : else if (eCriteria == CC_ApproxString)
846 : {
847 270 : return EQUAL(pszFieldValue, pszTarget);
848 : }
849 340 : else if (eCriteria == CC_Integer)
850 : {
851 640 : return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
852 640 : atoi(pszFieldValue) == atoi(pszTarget));
853 : }
854 :
855 0 : return false;
856 : }
857 :
858 : /************************************************************************/
859 : /* CSVScanLines() */
860 : /* */
861 : /* Read the file scanline for lines where the key field equals */
862 : /* the indicated value with the suggested comparison criteria. */
863 : /* Return the first matching line split into fields. */
864 : /* */
865 : /* Deprecated. Replaced by CSVScanLinesL(). */
866 : /************************************************************************/
867 :
868 0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
869 : CSVCompareCriteria eCriteria)
870 :
871 : {
872 0 : CPLAssert(pszValue != nullptr);
873 0 : CPLAssert(iKeyField >= 0);
874 0 : CPLAssert(fp != nullptr);
875 :
876 0 : bool bSelected = false;
877 0 : const int nTestValue = atoi(pszValue);
878 0 : char **papszFields = nullptr;
879 :
880 0 : while (!bSelected)
881 : {
882 0 : papszFields = CSVReadParseLine(fp);
883 0 : if (papszFields == nullptr)
884 0 : return nullptr;
885 :
886 0 : if (CSLCount(papszFields) < iKeyField + 1)
887 : {
888 : /* not selected */
889 : }
890 0 : else if (eCriteria == CC_Integer &&
891 0 : atoi(papszFields[iKeyField]) == nTestValue)
892 : {
893 0 : bSelected = true;
894 : }
895 : else
896 : {
897 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
898 : }
899 :
900 0 : if (!bSelected)
901 : {
902 0 : CSLDestroy(papszFields);
903 0 : papszFields = nullptr;
904 : }
905 : }
906 :
907 0 : return papszFields;
908 : }
909 :
910 : /************************************************************************/
911 : /* CSVScanLinesL() */
912 : /* */
913 : /* Read the file scanline for lines where the key field equals */
914 : /* the indicated value with the suggested comparison criteria. */
915 : /* Return the first matching line split into fields. */
916 : /************************************************************************/
917 :
918 0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
919 : CSVCompareCriteria eCriteria)
920 :
921 : {
922 0 : CPLAssert(pszValue != nullptr);
923 0 : CPLAssert(iKeyField >= 0);
924 0 : CPLAssert(fp != nullptr);
925 :
926 0 : bool bSelected = false;
927 0 : const int nTestValue = atoi(pszValue);
928 0 : char **papszFields = nullptr;
929 :
930 0 : while (!bSelected)
931 : {
932 0 : papszFields = CSVReadParseLineL(fp);
933 0 : if (papszFields == nullptr)
934 0 : return nullptr;
935 :
936 0 : if (CSLCount(papszFields) < iKeyField + 1)
937 : {
938 : /* not selected */
939 : }
940 0 : else if (eCriteria == CC_Integer &&
941 0 : atoi(papszFields[iKeyField]) == nTestValue)
942 : {
943 0 : bSelected = true;
944 : }
945 : else
946 : {
947 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
948 : }
949 :
950 0 : if (!bSelected)
951 : {
952 0 : CSLDestroy(papszFields);
953 0 : papszFields = nullptr;
954 : }
955 : }
956 :
957 0 : return papszFields;
958 : }
959 :
960 : /************************************************************************/
961 : /* CSVScanLinesIndexed() */
962 : /* */
963 : /* Read the file scanline for lines where the key field equals */
964 : /* the indicated value with the suggested comparison criteria. */
965 : /* Return the first matching line split into fields. */
966 : /************************************************************************/
967 :
968 23 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
969 :
970 : {
971 23 : CPLAssert(psTable->panLineIndex != nullptr);
972 :
973 : /* -------------------------------------------------------------------- */
974 : /* Find target record with binary search. */
975 : /* -------------------------------------------------------------------- */
976 23 : int iTop = psTable->nLineCount - 1;
977 23 : int iBottom = 0;
978 23 : int iResult = -1;
979 :
980 167 : while (iTop >= iBottom)
981 : {
982 167 : const int iMiddle = (iTop + iBottom) / 2;
983 167 : if (psTable->panLineIndex[iMiddle] > nKeyValue)
984 98 : iTop = iMiddle - 1;
985 69 : else if (psTable->panLineIndex[iMiddle] < nKeyValue)
986 46 : iBottom = iMiddle + 1;
987 : else
988 : {
989 23 : iResult = iMiddle;
990 : // if a key is not unique, select the first instance of it.
991 23 : while (iResult > 0 &&
992 23 : psTable->panLineIndex[iResult - 1] == nKeyValue)
993 : {
994 0 : psTable->bNonUniqueKey = true;
995 0 : iResult--;
996 : }
997 23 : break;
998 : }
999 : }
1000 :
1001 23 : if (iResult == -1)
1002 0 : return nullptr;
1003 :
1004 : /* -------------------------------------------------------------------- */
1005 : /* Parse target line, and update iLastLine indicator. */
1006 : /* -------------------------------------------------------------------- */
1007 23 : psTable->iLastLine = iResult;
1008 :
1009 23 : return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
1010 : }
1011 :
1012 : /************************************************************************/
1013 : /* CSVScanLinesIngested() */
1014 : /* */
1015 : /* Read the file scanline for lines where the key field equals */
1016 : /* the indicated value with the suggested comparison criteria. */
1017 : /* Return the first matching line split into fields. */
1018 : /************************************************************************/
1019 :
1020 30 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
1021 : const char *pszValue,
1022 : CSVCompareCriteria eCriteria)
1023 :
1024 : {
1025 30 : CPLAssert(pszValue != nullptr);
1026 30 : CPLAssert(iKeyField >= 0);
1027 :
1028 30 : const int nTestValue = atoi(pszValue);
1029 :
1030 : /* -------------------------------------------------------------------- */
1031 : /* Short cut for indexed files. */
1032 : /* -------------------------------------------------------------------- */
1033 30 : if (iKeyField == 0 && eCriteria == CC_Integer &&
1034 23 : psTable->panLineIndex != nullptr)
1035 23 : return CSVScanLinesIndexed(psTable, nTestValue);
1036 :
1037 : /* -------------------------------------------------------------------- */
1038 : /* Scan from in-core lines. */
1039 : /* -------------------------------------------------------------------- */
1040 7 : char **papszFields = nullptr;
1041 7 : bool bSelected = false;
1042 :
1043 484 : while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
1044 : {
1045 477 : psTable->iLastLine++;
1046 477 : papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
1047 : false, false);
1048 :
1049 477 : if (CSLCount(papszFields) < iKeyField + 1)
1050 : {
1051 : /* not selected */
1052 : }
1053 477 : else if (eCriteria == CC_Integer &&
1054 242 : atoi(papszFields[iKeyField]) == nTestValue)
1055 : {
1056 2 : bSelected = true;
1057 : }
1058 : else
1059 : {
1060 475 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
1061 : }
1062 :
1063 477 : if (!bSelected)
1064 : {
1065 470 : CSLDestroy(papszFields);
1066 470 : papszFields = nullptr;
1067 : }
1068 : }
1069 :
1070 7 : return papszFields;
1071 : }
1072 :
1073 : /************************************************************************/
1074 : /* CSVRewind() */
1075 : /* */
1076 : /* Rewind a CSV file based on a passed in filename. */
1077 : /* This is aimed at being used with CSVGetNextLine(). */
1078 : /************************************************************************/
1079 :
1080 1851 : void CSVRewind(const char *pszFilename)
1081 :
1082 : {
1083 : /* -------------------------------------------------------------------- */
1084 : /* Get access to the table. */
1085 : /* -------------------------------------------------------------------- */
1086 1851 : CPLAssert(pszFilename != nullptr);
1087 :
1088 1851 : CSVTable *const psTable = CSVAccess(pszFilename);
1089 1851 : if (psTable != nullptr)
1090 1851 : psTable->iLastLine = -1;
1091 1851 : }
1092 :
1093 : /************************************************************************/
1094 : /* CSVGetNextLine() */
1095 : /* */
1096 : /* Fetch the next line of a CSV file based on a passed in */
1097 : /* filename. Returns NULL at end of file, or if file is not */
1098 : /* really established. */
1099 : /* This ingests the whole file into memory if not already done. */
1100 : /* When reaching end of file, CSVRewind() may be used to read */
1101 : /* again from the beginning. */
1102 : /************************************************************************/
1103 :
1104 60510 : char **CSVGetNextLine(const char *pszFilename)
1105 :
1106 : {
1107 :
1108 : /* -------------------------------------------------------------------- */
1109 : /* Get access to the table. */
1110 : /* -------------------------------------------------------------------- */
1111 60510 : CPLAssert(pszFilename != nullptr);
1112 :
1113 60510 : CSVTable *const psTable = CSVAccess(pszFilename);
1114 60510 : if (psTable == nullptr)
1115 0 : return nullptr;
1116 :
1117 60510 : CSVIngest(psTable->pszFilename);
1118 :
1119 : /* -------------------------------------------------------------------- */
1120 : /* If we use CSVGetNextLine() we can pretty much assume we have */
1121 : /* a non-unique key. */
1122 : /* -------------------------------------------------------------------- */
1123 60510 : psTable->bNonUniqueKey = true;
1124 :
1125 : /* -------------------------------------------------------------------- */
1126 : /* Do we have a next line available? This only works for */
1127 : /* ingested tables I believe. */
1128 : /* -------------------------------------------------------------------- */
1129 60510 : if (psTable->iLastLine + 1 >= psTable->nLineCount)
1130 633 : return nullptr;
1131 :
1132 59877 : psTable->iLastLine++;
1133 59877 : CSLDestroy(psTable->papszRecFields);
1134 119754 : psTable->papszRecFields = CSVSplitLine(
1135 59877 : psTable->papszLines[psTable->iLastLine], ",", false, false);
1136 :
1137 59877 : return psTable->papszRecFields;
1138 : }
1139 :
1140 : /************************************************************************/
1141 : /* CSVScanFile() */
1142 : /* */
1143 : /* Scan a whole file using criteria similar to above, but also */
1144 : /* taking care of file opening and closing. */
1145 : /************************************************************************/
1146 :
1147 144 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
1148 : const char *pszValue, CSVCompareCriteria eCriteria)
1149 : {
1150 144 : CSVIngest(psTable->pszFilename);
1151 :
1152 : /* -------------------------------------------------------------------- */
1153 : /* Does the current record match the criteria? If so, just */
1154 : /* return it again. */
1155 : /* -------------------------------------------------------------------- */
1156 144 : if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
1157 402 : CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
1158 114 : !psTable->bNonUniqueKey)
1159 : {
1160 114 : return psTable->papszRecFields;
1161 : }
1162 :
1163 : /* -------------------------------------------------------------------- */
1164 : /* Scan the file from the beginning, replacing the ``current */
1165 : /* record'' in our structure with the one that is found. */
1166 : /* -------------------------------------------------------------------- */
1167 30 : psTable->iLastLine = -1;
1168 30 : CSLDestroy(psTable->papszRecFields);
1169 :
1170 30 : if (psTable->pszRawData != nullptr)
1171 30 : psTable->papszRecFields =
1172 30 : CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
1173 : else
1174 : {
1175 0 : VSIRewindL(psTable->fp);
1176 0 : CPLReadLineL(psTable->fp); /* throw away the header line */
1177 :
1178 0 : psTable->papszRecFields =
1179 0 : CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
1180 : }
1181 :
1182 30 : return psTable->papszRecFields;
1183 : }
1184 :
1185 4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
1186 : CSVCompareCriteria eCriteria)
1187 :
1188 : {
1189 : /* -------------------------------------------------------------------- */
1190 : /* Get access to the table. */
1191 : /* -------------------------------------------------------------------- */
1192 4 : CPLAssert(pszFilename != nullptr);
1193 :
1194 4 : if (iKeyField < 0)
1195 0 : return nullptr;
1196 :
1197 4 : CSVTable *const psTable = CSVAccess(pszFilename);
1198 4 : if (psTable == nullptr)
1199 0 : return nullptr;
1200 :
1201 4 : return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
1202 : }
1203 :
1204 : /************************************************************************/
1205 : /* CPLGetFieldId() */
1206 : /* */
1207 : /* Read the first record of a CSV file (rewinding to be sure), */
1208 : /* and find the field with the indicated name. Returns -1 if */
1209 : /* it fails to find the field name. Comparison is case */
1210 : /* insensitive, but otherwise exact. After this function has */
1211 : /* been called the file pointer will be positioned just after */
1212 : /* the first record. */
1213 : /* */
1214 : /* Deprecated. Replaced by CPLGetFieldIdL(). */
1215 : /************************************************************************/
1216 :
1217 0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
1218 :
1219 : {
1220 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1221 :
1222 0 : VSIRewind(fp);
1223 :
1224 0 : char **papszFields = CSVReadParseLine(fp);
1225 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1226 : {
1227 0 : if (EQUAL(papszFields[i], pszFieldName))
1228 : {
1229 0 : CSLDestroy(papszFields);
1230 0 : return i;
1231 : }
1232 : }
1233 :
1234 0 : CSLDestroy(papszFields);
1235 :
1236 0 : return -1;
1237 : }
1238 :
1239 : /************************************************************************/
1240 : /* CPLGetFieldIdL() */
1241 : /* */
1242 : /* Read the first record of a CSV file (rewinding to be sure), */
1243 : /* and find the field with the indicated name. Returns -1 if */
1244 : /* it fails to find the field name. Comparison is case */
1245 : /* insensitive, but otherwise exact. After this function has */
1246 : /* been called the file pointer will be positioned just after */
1247 : /* the first record. */
1248 : /************************************************************************/
1249 :
1250 0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
1251 :
1252 : {
1253 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1254 :
1255 0 : VSIRewindL(fp);
1256 :
1257 0 : char **papszFields = CSVReadParseLineL(fp);
1258 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1259 : {
1260 0 : if (EQUAL(papszFields[i], pszFieldName))
1261 : {
1262 0 : CSLDestroy(papszFields);
1263 0 : return i;
1264 : }
1265 : }
1266 :
1267 0 : CSLDestroy(papszFields);
1268 :
1269 0 : return -1;
1270 : }
1271 :
1272 : /************************************************************************/
1273 : /* CSVGetFileFieldId() */
1274 : /* */
1275 : /* Same as CPLGetFieldId(), except that we get the file based */
1276 : /* on filename, rather than having an existing handle. */
1277 : /************************************************************************/
1278 :
1279 7458 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
1280 :
1281 : {
1282 : /* -------------------------------------------------------------------- */
1283 : /* Find the requested field. */
1284 : /* -------------------------------------------------------------------- */
1285 7458 : const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
1286 18760 : for (int i = 0; psTable->papszFieldNames != nullptr &&
1287 18760 : psTable->papszFieldNames[i] != nullptr;
1288 : i++)
1289 : {
1290 18760 : if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
1291 10473 : EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
1292 : {
1293 7458 : return i;
1294 : }
1295 : }
1296 :
1297 0 : return -1;
1298 : }
1299 :
1300 7178 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
1301 :
1302 : {
1303 : /* -------------------------------------------------------------------- */
1304 : /* Get access to the table. */
1305 : /* -------------------------------------------------------------------- */
1306 7178 : CPLAssert(pszFilename != nullptr);
1307 :
1308 7178 : CSVTable *const psTable = CSVAccess(pszFilename);
1309 7178 : if (psTable == nullptr)
1310 0 : return -1;
1311 7178 : return CSVGetFileFieldId(psTable, pszFieldName);
1312 : }
1313 :
1314 : /************************************************************************/
1315 : /* CSVScanFileByName() */
1316 : /* */
1317 : /* Same as CSVScanFile(), but using a field name instead of a */
1318 : /* field number. */
1319 : /************************************************************************/
1320 :
1321 4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
1322 : const char *pszValue, CSVCompareCriteria eCriteria)
1323 :
1324 : {
1325 4 : const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
1326 4 : if (iKeyField == -1)
1327 0 : return nullptr;
1328 :
1329 4 : return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
1330 : }
1331 :
1332 : /************************************************************************/
1333 : /* CSVGetField() */
1334 : /* */
1335 : /* The all-in-one function to fetch a particular field value */
1336 : /* from a CSV file. Note this function will return an empty */
1337 : /* string, rather than NULL if it fails to find the desired */
1338 : /* value for some reason. The caller can't establish that the */
1339 : /* fetch failed. */
1340 : /************************************************************************/
1341 :
1342 140 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
1343 : const char *pszKeyFieldValue,
1344 : CSVCompareCriteria eCriteria,
1345 : const char *pszTargetField)
1346 :
1347 : {
1348 : /* -------------------------------------------------------------------- */
1349 : /* Find the table. */
1350 : /* -------------------------------------------------------------------- */
1351 140 : CSVTable *const psTable = CSVAccess(pszFilename);
1352 140 : if (psTable == nullptr)
1353 0 : return "";
1354 :
1355 140 : const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
1356 140 : if (iKeyField == -1)
1357 0 : return "";
1358 :
1359 : /* -------------------------------------------------------------------- */
1360 : /* Find the correct record. */
1361 : /* -------------------------------------------------------------------- */
1362 : char **papszRecord =
1363 140 : CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
1364 140 : if (papszRecord == nullptr)
1365 0 : return "";
1366 :
1367 : /* -------------------------------------------------------------------- */
1368 : /* Figure out which field we want out of this. */
1369 : /* -------------------------------------------------------------------- */
1370 140 : const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
1371 140 : if (iTargetField < 0)
1372 0 : return "";
1373 :
1374 388 : for (int i = 0; papszRecord[i] != nullptr; ++i)
1375 : {
1376 388 : if (i == iTargetField)
1377 140 : return papszRecord[iTargetField];
1378 : }
1379 0 : return "";
1380 : }
1381 :
1382 : /************************************************************************/
1383 : /* GDALDefaultCSVFilename() */
1384 : /************************************************************************/
1385 :
1386 : typedef struct
1387 : {
1388 : char szPath[512];
1389 : bool bCSVFinderInitialized;
1390 : } DefaultCSVFileNameTLS;
1391 :
1392 2498 : const char *GDALDefaultCSVFilename(const char *pszBasename)
1393 :
1394 : {
1395 : /* -------------------------------------------------------------------- */
1396 : /* Do we already have this file accessed? If so, just return */
1397 : /* the existing path without any further probing. */
1398 : /* -------------------------------------------------------------------- */
1399 2498 : int bMemoryError = FALSE;
1400 : CSVTable **ppsCSVTableList =
1401 2498 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
1402 2498 : if (ppsCSVTableList != nullptr)
1403 : {
1404 2492 : const size_t nBasenameLen = strlen(pszBasename);
1405 :
1406 23272 : for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
1407 20780 : psTable = psTable->psNext)
1408 : {
1409 22718 : const size_t nFullLen = strlen(psTable->pszFilename);
1410 :
1411 22718 : if (nFullLen > nBasenameLen &&
1412 22718 : strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
1413 1938 : pszBasename) == 0 &&
1414 1938 : strchr("/\\",
1415 1938 : psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
1416 : nullptr)
1417 : {
1418 1938 : return psTable->pszFilename;
1419 : }
1420 : }
1421 : }
1422 :
1423 : /* -------------------------------------------------------------------- */
1424 : /* Otherwise we need to look harder for it. */
1425 : /* -------------------------------------------------------------------- */
1426 : DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1427 560 : CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
1428 560 : if (pTLSData == nullptr && !bMemoryError)
1429 : {
1430 : pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1431 5 : VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
1432 5 : if (pTLSData)
1433 5 : CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
1434 : }
1435 560 : if (pTLSData == nullptr)
1436 0 : return "/not_existing_dir/not_existing_path";
1437 :
1438 560 : const char *pszResult = CPLFindFile("gdal", pszBasename);
1439 :
1440 560 : if (pszResult != nullptr)
1441 43 : return pszResult;
1442 :
1443 517 : if (!pTLSData->bCSVFinderInitialized)
1444 : {
1445 2 : pTLSData->bCSVFinderInitialized = true;
1446 :
1447 2 : if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
1448 2 : CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
1449 :
1450 2 : pszResult = CPLFindFile("gdal", pszBasename);
1451 :
1452 2 : if (pszResult != nullptr)
1453 0 : return pszResult;
1454 : }
1455 :
1456 : // For systems like sandboxes that do not allow other checks.
1457 517 : CPLDebug("CPL_CSV",
1458 : "Failed to find file in GDALDefaultCSVFilename. "
1459 : "Returning original basename: %s",
1460 : pszBasename);
1461 517 : CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
1462 517 : return pTLSData->szPath;
1463 : }
1464 :
1465 : /************************************************************************/
1466 : /* CSVFilename() */
1467 : /* */
1468 : /* Return the full path to a particular CSV file. This will */
1469 : /* eventually be something the application can override. */
1470 : /************************************************************************/
1471 :
1472 : CPL_C_START
1473 : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
1474 : CPL_C_END
1475 :
1476 2498 : const char *CSVFilename(const char *pszBasename)
1477 :
1478 : {
1479 2498 : if (pfnCSVFilenameHook == nullptr)
1480 2498 : return GDALDefaultCSVFilename(pszBasename);
1481 :
1482 0 : return pfnCSVFilenameHook(pszBasename);
1483 : }
1484 :
1485 : /************************************************************************/
1486 : /* SetCSVFilenameHook() */
1487 : /* */
1488 : /* Applications can use this to set a function that will */
1489 : /* massage CSV filenames. */
1490 : /************************************************************************/
1491 :
1492 : /**
1493 : * Override CSV file search method.
1494 : *
1495 : * @param pfnNewHook The pointer to a function which will return the
1496 : * full path for a given filename.
1497 : *
1498 :
1499 : This function allows an application to override how the GTIFGetDefn()
1500 : and related function find the CSV (Comma Separated Value) values
1501 : required. The pfnHook argument should be a pointer to a function that
1502 : will take in a CSV filename and return a full path to the file. The
1503 : returned string should be to an internal static buffer so that the
1504 : caller doesn't have to free the result.
1505 :
1506 : Example:
1507 :
1508 : The listgeo utility uses the following override function if the user
1509 : specified a CSV file directory with the -t commandline switch (argument
1510 : put into CSVDirName).
1511 :
1512 : \code{.cpp}
1513 :
1514 : ...
1515 : SetCSVFilenameHook( CSVFileOverride );
1516 : ...
1517 :
1518 : static const char *CSVFileOverride( const char * pszInput )
1519 :
1520 : {
1521 : static char szPath[1024] = {};
1522 :
1523 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1524 :
1525 : return szPath;
1526 : }
1527 : \endcode
1528 :
1529 : */
1530 :
1531 : CPL_C_START
1532 0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
1533 :
1534 : {
1535 0 : pfnCSVFilenameHook = pfnNewHook;
1536 0 : }
1537 :
1538 : CPL_C_END
|