Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: CSV (comma separated value) file access.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 1999, Frank Warmerdam
9 : * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "cpl_port.h"
15 : #include "cpl_csv.h"
16 :
17 : #include <cstddef>
18 : #include <cstdlib>
19 : #include <cstring>
20 : #if HAVE_FCNTL_H
21 : #include <fcntl.h>
22 : #endif
23 :
24 : #include "cpl_conv.h"
25 : #include "cpl_error.h"
26 : #include "cpl_multiproc.h"
27 : #include "gdal_csv.h"
28 :
29 : #include <algorithm>
30 :
31 : /* ==================================================================== */
32 : /* The CSVTable is a persistent set of info about an open CSV */
33 : /* table. While it doesn't currently maintain a record index, */
34 : /* or in-memory copy of the table, it could be changed to do so */
35 : /* in the future. */
36 : /* ==================================================================== */
37 : typedef struct ctb
38 : {
39 : VSILFILE *fp;
40 : struct ctb *psNext;
41 : char *pszFilename;
42 : char **papszFieldNames;
43 : int *panFieldNamesLength;
44 : char **papszRecFields;
45 : int nFields;
46 : int iLastLine;
47 : bool bNonUniqueKey;
48 :
49 : /* Cache for whole file */
50 : int nLineCount;
51 : char **papszLines;
52 : int *panLineIndex;
53 : char *pszRawData;
54 : } CSVTable;
55 :
56 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
57 : const char *pszFilename);
58 :
59 : /************************************************************************/
60 : /* CSVFreeTLS() */
61 : /************************************************************************/
62 2 : static void CSVFreeTLS(void *pData)
63 : {
64 2 : CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
65 2 : CPLFree(pData);
66 2 : }
67 :
68 : /* It would likely be better to share this list between threads, but
69 : that will require some rework. */
70 :
71 : /************************************************************************/
72 : /* CSVAccess() */
73 : /* */
74 : /* This function will fetch a handle to the requested table. */
75 : /* If not found in the ``open table list'' the table will be */
76 : /* opened and added to the list. Eventually this function may */
77 : /* become public with an abstracted return type so that */
78 : /* applications can set options about the table. For now this */
79 : /* isn't done. */
80 : /************************************************************************/
81 :
82 130205 : static CSVTable *CSVAccess(const char *pszFilename)
83 :
84 : {
85 : /* -------------------------------------------------------------------- */
86 : /* Fetch the table, and allocate the thread-local pointer to it */
87 : /* if there isn't already one. */
88 : /* -------------------------------------------------------------------- */
89 130205 : int bMemoryError = FALSE;
90 : CSVTable **ppsCSVTableList =
91 130205 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
92 130205 : if (bMemoryError)
93 0 : return nullptr;
94 130205 : if (ppsCSVTableList == nullptr)
95 : {
96 : ppsCSVTableList =
97 5 : static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
98 5 : if (ppsCSVTableList == nullptr)
99 0 : return nullptr;
100 5 : CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
101 : }
102 :
103 : /* -------------------------------------------------------------------- */
104 : /* Is the table already in the list. */
105 : /* -------------------------------------------------------------------- */
106 1009480 : for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
107 879271 : psTable = psTable->psNext)
108 : {
109 1009450 : if (EQUAL(psTable->pszFilename, pszFilename))
110 : {
111 : /*
112 : * Eventually we should consider promoting to the front of
113 : * the list to accelerate frequently accessed tables.
114 : */
115 130175 : return psTable;
116 : }
117 : }
118 :
119 : /* -------------------------------------------------------------------- */
120 : /* If not, try to open it. */
121 : /* -------------------------------------------------------------------- */
122 30 : VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
123 30 : if (fp == nullptr)
124 0 : return nullptr;
125 :
126 : /* -------------------------------------------------------------------- */
127 : /* Create an information structure about this table, and add to */
128 : /* the front of the list. */
129 : /* -------------------------------------------------------------------- */
130 : CSVTable *const psTable =
131 30 : static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
132 30 : if (psTable == nullptr)
133 : {
134 0 : VSIFCloseL(fp);
135 0 : return nullptr;
136 : }
137 :
138 30 : psTable->fp = fp;
139 30 : psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
140 30 : if (psTable->pszFilename == nullptr)
141 : {
142 0 : VSIFree(psTable);
143 0 : VSIFCloseL(fp);
144 0 : return nullptr;
145 : }
146 30 : psTable->bNonUniqueKey = false; // As far as we know now.
147 30 : psTable->psNext = *ppsCSVTableList;
148 :
149 30 : *ppsCSVTableList = psTable;
150 :
151 : /* -------------------------------------------------------------------- */
152 : /* Read the table header record containing the field names. */
153 : /* -------------------------------------------------------------------- */
154 30 : psTable->papszFieldNames = CSVReadParseLineL(fp);
155 30 : psTable->nFields = CSLCount(psTable->papszFieldNames);
156 30 : psTable->panFieldNamesLength =
157 30 : static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
158 30 : for (int i = 0;
159 185 : i < psTable->nFields &&
160 : /* null-pointer check to avoid a false positive from CLang S.A. */
161 155 : psTable->papszFieldNames != nullptr;
162 : i++)
163 : {
164 155 : psTable->panFieldNamesLength[i] =
165 155 : static_cast<int>(strlen(psTable->papszFieldNames[i]));
166 : }
167 :
168 30 : return psTable;
169 : }
170 :
171 : /************************************************************************/
172 : /* CSVDeaccess() */
173 : /************************************************************************/
174 :
175 953 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
176 : const char *pszFilename)
177 :
178 : {
179 953 : if (ppsCSVTableList == nullptr)
180 944 : return;
181 :
182 : /* -------------------------------------------------------------------- */
183 : /* A NULL means deaccess all tables. */
184 : /* -------------------------------------------------------------------- */
185 9 : if (pszFilename == nullptr)
186 : {
187 9 : while (*ppsCSVTableList != nullptr)
188 5 : CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
189 5 : (*ppsCSVTableList)->pszFilename);
190 :
191 4 : return;
192 : }
193 :
194 : /* -------------------------------------------------------------------- */
195 : /* Find this table. */
196 : /* -------------------------------------------------------------------- */
197 5 : CSVTable *psLast = nullptr;
198 5 : CSVTable *psTable = *ppsCSVTableList;
199 5 : for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
200 0 : psTable = psTable->psNext)
201 : {
202 0 : psLast = psTable;
203 : }
204 :
205 5 : if (psTable == nullptr)
206 : {
207 0 : if (bCanUseTLS)
208 0 : CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
209 0 : return;
210 : }
211 :
212 : /* -------------------------------------------------------------------- */
213 : /* Remove the link from the list. */
214 : /* -------------------------------------------------------------------- */
215 5 : if (psLast != nullptr)
216 0 : psLast->psNext = psTable->psNext;
217 : else
218 5 : *ppsCSVTableList = psTable->psNext;
219 :
220 : /* -------------------------------------------------------------------- */
221 : /* Free the table. */
222 : /* -------------------------------------------------------------------- */
223 5 : if (psTable->fp != nullptr)
224 0 : VSIFCloseL(psTable->fp);
225 :
226 5 : CSLDestroy(psTable->papszFieldNames);
227 5 : CPLFree(psTable->panFieldNamesLength);
228 5 : CSLDestroy(psTable->papszRecFields);
229 5 : CPLFree(psTable->pszFilename);
230 5 : CPLFree(psTable->panLineIndex);
231 5 : CPLFree(psTable->pszRawData);
232 5 : CPLFree(psTable->papszLines);
233 :
234 5 : CPLFree(psTable);
235 :
236 5 : if (bCanUseTLS)
237 5 : CPLReadLine(nullptr);
238 : }
239 :
240 946 : void CSVDeaccess(const char *pszFilename)
241 : {
242 : /* -------------------------------------------------------------------- */
243 : /* Fetch the table, and allocate the thread-local pointer to it */
244 : /* if there isn't already one. */
245 : /* -------------------------------------------------------------------- */
246 946 : int bMemoryError = FALSE;
247 : CSVTable **ppsCSVTableList =
248 946 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
249 :
250 946 : CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
251 946 : }
252 :
253 : /************************************************************************/
254 : /* CSVSplitLine() */
255 : /* */
256 : /* Tokenize a CSV line into fields in the form of a string */
257 : /* list. This is used instead of the CPLTokenizeString() */
258 : /* because it provides correct CSV escaping and quoting */
259 : /* semantics. */
260 : /************************************************************************/
261 :
262 116303 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
263 : bool bKeepLeadingAndClosingQuotes,
264 : bool bMergeDelimiter)
265 :
266 : {
267 232606 : CPLStringList aosRetList;
268 116303 : if (pszString == nullptr)
269 0 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
270 :
271 116303 : char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
272 116303 : int nTokenMax = 10;
273 116303 : const size_t nDelimiterLength = strlen(pszDelimiter);
274 :
275 116303 : const char *pszIter = pszString;
276 669210 : while (*pszIter != '\0')
277 : {
278 552907 : bool bInString = false;
279 :
280 552907 : int nTokenLen = 0;
281 :
282 : // Try to find the next delimiter, marking end of token.
283 4646140 : do
284 : {
285 : // End if this is a delimiter skip it and break.
286 5199050 : if (!bInString &&
287 2711060 : strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
288 : {
289 436925 : pszIter += nDelimiterLength;
290 436925 : if (bMergeDelimiter)
291 : {
292 9 : while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
293 : 0)
294 5 : pszIter += nDelimiterLength;
295 : }
296 436925 : break;
297 : }
298 :
299 4762120 : if (*pszIter == '"')
300 : {
301 395873 : if (!bInString && nTokenLen > 0)
302 : {
303 : // do not treat in a special way double quotes that appear
304 : // in the middle of a field (similarly to OpenOffice)
305 : // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
306 : }
307 395786 : else if (!bInString || pszIter[1] != '"')
308 : {
309 395100 : bInString = !bInString;
310 395100 : if (!bKeepLeadingAndClosingQuotes)
311 395066 : continue;
312 : }
313 : else // Doubled quotes in string resolve to one quote.
314 : {
315 686 : pszIter++;
316 : }
317 : }
318 :
319 4367060 : if (nTokenLen >= nTokenMax - 2)
320 : {
321 122920 : nTokenMax = nTokenMax * 2 + 10;
322 122920 : pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
323 : }
324 :
325 4367060 : pszToken[nTokenLen] = *pszIter;
326 4367060 : nTokenLen++;
327 4762120 : } while (*(++pszIter) != '\0');
328 :
329 552907 : pszToken[nTokenLen] = '\0';
330 552907 : aosRetList.AddString(pszToken);
331 :
332 : // If the last token is an empty token, then we have to catch
333 : // it now, otherwise we won't reenter the loop and it will be lost.
334 552907 : if (*pszIter == '\0' &&
335 116263 : pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
336 116263 : strncmp(pszIter - nDelimiterLength, pszDelimiter,
337 : nDelimiterLength) == 0)
338 : {
339 281 : aosRetList.AddString("");
340 : }
341 : }
342 :
343 116303 : CPLFree(pszToken);
344 :
345 116303 : if (aosRetList.Count() == 0)
346 40 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
347 : else
348 116263 : return aosRetList.StealList();
349 : }
350 :
351 : /************************************************************************/
352 : /* CSVFindNextLine() */
353 : /* */
354 : /* Find the start of the next line, while at the same time zero */
355 : /* terminating this line. Take into account that there may be */
356 : /* newline indicators within quoted strings, and that quotes */
357 : /* can be escaped with a backslash. */
358 : /************************************************************************/
359 :
360 6329 : static char *CSVFindNextLine(char *pszThisLine)
361 :
362 : {
363 6329 : int i = 0; // i is used after the for loop.
364 :
365 275213 : for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
366 : {
367 275213 : if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
368 35430 : nQuoteCount++;
369 :
370 275213 : if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
371 6329 : (nQuoteCount % 2) == 0)
372 6329 : break;
373 : }
374 :
375 16573 : while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
376 10244 : pszThisLine[i++] = '\0';
377 :
378 6329 : if (pszThisLine[i] == '\0')
379 30 : return nullptr;
380 :
381 6299 : return pszThisLine + i;
382 : }
383 :
384 : /************************************************************************/
385 : /* CSVIngest() */
386 : /* */
387 : /* Load entire file into memory and setup index if possible. */
388 : /************************************************************************/
389 :
390 : // TODO(schwehr): Clean up all the casting in CSVIngest.
391 60606 : static void CSVIngest(CSVTable *psTable)
392 :
393 : {
394 60606 : if (psTable->pszRawData != nullptr)
395 60576 : return;
396 :
397 : /* -------------------------------------------------------------------- */
398 : /* Ingest whole file. */
399 : /* -------------------------------------------------------------------- */
400 30 : if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
401 : {
402 0 : CPLError(CE_Failure, CPLE_FileIO,
403 : "Failed using seek end and tell to get file length: %s",
404 : psTable->pszFilename);
405 0 : return;
406 : }
407 30 : const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
408 30 : if (static_cast<long>(nFileLen) == -1)
409 : {
410 0 : CPLError(CE_Failure, CPLE_FileIO,
411 : "Failed using seek end and tell to get file length: %s",
412 : psTable->pszFilename);
413 0 : return;
414 : }
415 30 : VSIRewindL(psTable->fp);
416 :
417 30 : psTable->pszRawData = static_cast<char *>(
418 30 : VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
419 30 : if (psTable->pszRawData == nullptr)
420 0 : return;
421 30 : if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
422 30 : psTable->fp) != static_cast<size_t>(nFileLen))
423 : {
424 0 : CPLFree(psTable->pszRawData);
425 0 : psTable->pszRawData = nullptr;
426 :
427 0 : CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
428 : psTable->pszFilename);
429 0 : return;
430 : }
431 :
432 30 : psTable->pszRawData[nFileLen] = '\0';
433 :
434 : /* -------------------------------------------------------------------- */
435 : /* Get count of newlines so we can allocate line array. */
436 : /* -------------------------------------------------------------------- */
437 30 : int nMaxLineCount = 0;
438 279158 : for (int i = 0; i < static_cast<int>(nFileLen); i++)
439 : {
440 279128 : if (psTable->pszRawData[i] == 10)
441 6329 : nMaxLineCount++;
442 : }
443 :
444 30 : psTable->papszLines =
445 30 : static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
446 30 : if (psTable->papszLines == nullptr)
447 0 : return;
448 :
449 : /* -------------------------------------------------------------------- */
450 : /* Build a list of record pointers into the raw data buffer */
451 : /* based on line terminators. Zero terminate the line */
452 : /* strings. */
453 : /* -------------------------------------------------------------------- */
454 : /* skip header line */
455 30 : char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
456 :
457 30 : int iLine = 0;
458 6329 : while (pszThisLine != nullptr && iLine < nMaxLineCount)
459 : {
460 6299 : if (pszThisLine[0] != '#')
461 6288 : psTable->papszLines[iLine++] = pszThisLine;
462 6299 : pszThisLine = CSVFindNextLine(pszThisLine);
463 : }
464 :
465 30 : psTable->nLineCount = iLine;
466 :
467 : /* -------------------------------------------------------------------- */
468 : /* Allocate and populate index array. Ensure they are in */
469 : /* ascending order so that binary searches can be done on the */
470 : /* array. */
471 : /* -------------------------------------------------------------------- */
472 30 : psTable->panLineIndex = static_cast<int *>(
473 30 : VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
474 30 : if (psTable->panLineIndex == nullptr)
475 0 : return;
476 :
477 6232 : for (int i = 0; i < psTable->nLineCount; i++)
478 : {
479 6204 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
480 :
481 6204 : if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
482 : {
483 2 : CPLFree(psTable->panLineIndex);
484 2 : psTable->panLineIndex = nullptr;
485 2 : break;
486 : }
487 : }
488 :
489 30 : psTable->iLastLine = -1;
490 :
491 : /* -------------------------------------------------------------------- */
492 : /* We should never need the file handle against, so close it. */
493 : /* -------------------------------------------------------------------- */
494 30 : VSIFCloseL(psTable->fp);
495 30 : psTable->fp = nullptr;
496 : }
497 :
498 60606 : static void CSVIngest(const char *pszFilename)
499 :
500 : {
501 60606 : CSVTable *psTable = CSVAccess(pszFilename);
502 60606 : if (psTable == nullptr)
503 : {
504 0 : CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
505 : pszFilename);
506 0 : return;
507 : }
508 60606 : CSVIngest(psTable);
509 : }
510 :
511 : /************************************************************************/
512 : /* CSVDetectSeperator() */
513 : /************************************************************************/
514 :
515 : /** Detect which field separator is used.
516 : *
517 : * Currently, it can detect comma, semicolon, space, tabulation or pipe.
518 : * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
519 : * most occurrences will be selected (and a warning emitted).
520 : * If no separator found, comma will be considered as the separator.
521 : *
522 : * @return ',', ';', ' ', tabulation character or '|'.
523 : */
524 594 : char CSVDetectSeperator(const char *pszLine)
525 : {
526 594 : bool bInString = false;
527 594 : int nCountComma = 0;
528 594 : int nCountSemicolon = 0;
529 594 : int nCountTab = 0;
530 594 : int nCountPipe = 0;
531 594 : int nCountSpace = 0;
532 :
533 26771 : for (; *pszLine != '\0'; pszLine++)
534 : {
535 26177 : if (!bInString && *pszLine == ',')
536 : {
537 2130 : nCountComma++;
538 : }
539 24047 : else if (!bInString && *pszLine == ';')
540 : {
541 10 : nCountSemicolon++;
542 : }
543 24037 : else if (!bInString && *pszLine == '\t')
544 : {
545 29 : nCountTab++;
546 : }
547 24008 : else if (!bInString && *pszLine == '|')
548 : {
549 9 : nCountPipe++;
550 : }
551 23999 : else if (!bInString && *pszLine == ' ')
552 : {
553 290 : nCountSpace++;
554 : }
555 23709 : else if (*pszLine == '"')
556 : {
557 519 : if (!bInString || pszLine[1] != '"')
558 : {
559 519 : bInString = !bInString;
560 519 : continue;
561 : }
562 : else /* doubled quotes in string resolve to one quote */
563 : {
564 0 : pszLine++;
565 : }
566 : }
567 : }
568 :
569 : const int nMaxCountExceptSpace =
570 : std::max(std::max(nCountComma, nCountSemicolon),
571 594 : std::max(nCountTab, nCountPipe));
572 594 : char chDelimiter = ',';
573 594 : if (nMaxCountExceptSpace == 0)
574 : {
575 35 : if (nCountSpace > 0)
576 9 : chDelimiter = ' ';
577 : }
578 : else
579 : {
580 559 : bool bWarn = false;
581 559 : if (nCountComma == nMaxCountExceptSpace)
582 : {
583 543 : chDelimiter = ',';
584 543 : bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
585 : }
586 16 : else if (nCountSemicolon == nMaxCountExceptSpace)
587 : {
588 5 : chDelimiter = ';';
589 5 : bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
590 : }
591 11 : else if (nCountTab == nMaxCountExceptSpace)
592 : {
593 6 : chDelimiter = '\t';
594 6 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
595 : }
596 : else /* if( nCountPipe == nMaxCountExceptSpace ) */
597 : {
598 5 : chDelimiter = '|';
599 5 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
600 : }
601 559 : if (bWarn)
602 : {
603 6 : CPLError(CE_Warning, CPLE_AppDefined,
604 : "Selecting '%c' as CSV field separator, but "
605 : "other candidate separator(s) have been found.",
606 : chDelimiter);
607 : }
608 : }
609 :
610 594 : return chDelimiter;
611 : }
612 :
613 : /************************************************************************/
614 : /* CSVReadParseLine3L() */
615 : /* */
616 : /* Read one line, and return split into fields. The return */
617 : /* result is a stringlist, in the sense of the CSL functions. */
618 : /************************************************************************/
619 :
620 : static char **
621 57338 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
622 : size_t nMaxLineSize, const char *pszDelimiter,
623 : bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
624 : bool bMergeDelimiter, bool bSkipBOM)
625 : {
626 57338 : const char *pszLine = pfnReadLine(fp, nMaxLineSize);
627 57338 : if (pszLine == nullptr)
628 1366 : return nullptr;
629 :
630 55972 : if (bSkipBOM)
631 : {
632 : // Skip BOM.
633 55605 : const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
634 55605 : if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
635 4 : pszLine += 3;
636 : }
637 :
638 : // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
639 55972 : if (!bHonourStrings)
640 : {
641 2 : return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
642 : }
643 :
644 : // If there are no quotes, then this is the simple case.
645 : // Parse, and return tokens.
646 55970 : if (strchr(pszLine, '\"') == nullptr)
647 48336 : return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
648 48336 : bMergeDelimiter);
649 :
650 7634 : const size_t nDelimiterLength = strlen(pszDelimiter);
651 7634 : bool bInString = false; // keep in that scope !
652 15268 : std::string osWorkLine(pszLine); // keep in that scope !
653 7634 : size_t i = 0; // keep in that scope !
654 :
655 : try
656 : {
657 : while (true)
658 : {
659 792187 : for (; i < osWorkLine.size(); ++i)
660 : {
661 783798 : if (osWorkLine[i] == '\"')
662 : {
663 59057 : if (!bInString)
664 : {
665 : // Only consider " as the start of a quoted string
666 : // if it is the first character of the line, or
667 : // if it is immediately after the field delimiter.
668 52233 : if (i == 0 ||
669 23004 : (i >= nDelimiterLength &&
670 23004 : osWorkLine.compare(i - nDelimiterLength,
671 : nDelimiterLength, pszDelimiter,
672 : nDelimiterLength) == 0))
673 : {
674 29142 : bInString = true;
675 : }
676 : }
677 56793 : else if (i + 1 < osWorkLine.size() &&
678 26965 : osWorkLine[i + 1] == '"')
679 : {
680 : // Escaped double quote in a quoted string
681 686 : ++i;
682 : }
683 : else
684 : {
685 29142 : bInString = false;
686 : }
687 : }
688 : }
689 :
690 8389 : if (!bInString)
691 : {
692 7634 : return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
693 : bKeepLeadingAndClosingQuotes,
694 7634 : bMergeDelimiter);
695 : }
696 :
697 755 : const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
698 755 : if (pszNewLine == nullptr)
699 0 : break;
700 :
701 755 : osWorkLine.append("\n");
702 755 : osWorkLine.append(pszNewLine);
703 755 : }
704 : }
705 0 : catch (const std::exception &e)
706 : {
707 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
708 : }
709 0 : return nullptr;
710 : }
711 :
712 : /************************************************************************/
713 : /* CSVReadParseLine() */
714 : /* */
715 : /* Read one line, and return split into fields. The return */
716 : /* result is a stringlist, in the sense of the CSL functions. */
717 : /* */
718 : /* Deprecated. Replaced by CSVReadParseLineL(). */
719 : /************************************************************************/
720 :
721 0 : char **CSVReadParseLine(FILE *fp)
722 : {
723 0 : return CSVReadParseLine2(fp, ',');
724 : }
725 :
726 0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
727 : {
728 0 : return CPLReadLine(static_cast<FILE *>(fp));
729 : }
730 :
731 0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
732 : {
733 0 : CPLAssert(fp != nullptr);
734 0 : if (fp == nullptr)
735 0 : return nullptr;
736 :
737 0 : char szDelimiter[2] = {chDelimiter, 0};
738 0 : return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
739 : 0, // nMaxLineSize,
740 : szDelimiter,
741 : true, // bHonourStrings
742 : false, // bKeepLeadingAndClosingQuotes
743 : false, // bMergeDelimiter
744 0 : true /* bSkipBOM */);
745 : }
746 :
747 : /************************************************************************/
748 : /* CSVReadParseLineL() */
749 : /* */
750 : /* Read one line, and return split into fields. The return */
751 : /* result is a stringlist, in the sense of the CSL functions. */
752 : /* */
753 : /* Replaces CSVReadParseLine(). These functions use the VSI */
754 : /* layer to allow reading from other file containers. */
755 : /************************************************************************/
756 :
757 3910 : char **CSVReadParseLineL(VSILFILE *fp)
758 : {
759 3910 : return CSVReadParseLine2L(fp, ',');
760 : }
761 :
762 3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
763 :
764 : {
765 3910 : CPLAssert(fp != nullptr);
766 3910 : if (fp == nullptr)
767 0 : return nullptr;
768 :
769 3910 : char szDelimiter[2] = {chDelimiter, 0};
770 3910 : return CSVReadParseLine3L(fp,
771 : 0, // nMaxLineSize
772 : szDelimiter,
773 : true, // bHonourStrings
774 : false, // bKeepLeadingAndClosingQuotes
775 : false, // bMergeDelimiter
776 3910 : true /* bSkipBOM */);
777 : }
778 :
779 : /************************************************************************/
780 : /* ReadLineLargeFile() */
781 : /************************************************************************/
782 :
783 58093 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
784 : {
785 58093 : int nBufLength = 0;
786 58093 : return CPLReadLine3L(static_cast<VSILFILE *>(fp),
787 : nMaxLineSize == 0 ? -1
788 : : static_cast<int>(nMaxLineSize),
789 116186 : &nBufLength, nullptr);
790 : }
791 :
792 : /************************************************************************/
793 : /* CSVReadParseLine3L() */
794 : /* */
795 : /* Read one line, and return split into fields. The return */
796 : /* result is a stringlist, in the sense of the CSL functions. */
797 : /************************************************************************/
798 :
799 : /** Read one line, and return split into fields.
800 : * The return result is a stringlist, in the sense of the CSL functions.
801 : *
802 : * @param fp File handle. Must not be NULL
803 : * @param nMaxLineSize Maximum line size, or 0 for unlimited.
804 : * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
805 : * @param bHonourStrings Should be true, unless double quotes should not be
806 : * considered when separating fields.
807 : * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
808 : * quote characters should be kept.
809 : * @param bMergeDelimiter Whether consecutive delimiters should be considered
810 : * as a single one. Should generally be set to false.
811 : * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
812 : */
813 57338 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
814 : const char *pszDelimiter, bool bHonourStrings,
815 : bool bKeepLeadingAndClosingQuotes,
816 : bool bMergeDelimiter, bool bSkipBOM)
817 :
818 : {
819 57338 : return CSVReadParseLineGeneric(
820 : fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
821 57338 : bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
822 : }
823 :
824 : /************************************************************************/
825 : /* CSVCompare() */
826 : /* */
827 : /* Compare a field to a search value using a particular */
828 : /* criteria. */
829 : /************************************************************************/
830 :
831 610 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
832 : CSVCompareCriteria eCriteria)
833 :
834 : {
835 610 : if (eCriteria == CC_ExactString)
836 : {
837 0 : return (strcmp(pszFieldValue, pszTarget) == 0);
838 : }
839 610 : else if (eCriteria == CC_ApproxString)
840 : {
841 270 : return EQUAL(pszFieldValue, pszTarget);
842 : }
843 340 : else if (eCriteria == CC_Integer)
844 : {
845 640 : return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
846 640 : atoi(pszFieldValue) == atoi(pszTarget));
847 : }
848 :
849 0 : return false;
850 : }
851 :
852 : /************************************************************************/
853 : /* CSVScanLines() */
854 : /* */
855 : /* Read the file scanline for lines where the key field equals */
856 : /* the indicated value with the suggested comparison criteria. */
857 : /* Return the first matching line split into fields. */
858 : /* */
859 : /* Deprecated. Replaced by CSVScanLinesL(). */
860 : /************************************************************************/
861 :
862 0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
863 : CSVCompareCriteria eCriteria)
864 :
865 : {
866 0 : CPLAssert(pszValue != nullptr);
867 0 : CPLAssert(iKeyField >= 0);
868 0 : CPLAssert(fp != nullptr);
869 :
870 0 : bool bSelected = false;
871 0 : const int nTestValue = atoi(pszValue);
872 0 : char **papszFields = nullptr;
873 :
874 0 : while (!bSelected)
875 : {
876 0 : papszFields = CSVReadParseLine(fp);
877 0 : if (papszFields == nullptr)
878 0 : return nullptr;
879 :
880 0 : if (CSLCount(papszFields) < iKeyField + 1)
881 : {
882 : /* not selected */
883 : }
884 0 : else if (eCriteria == CC_Integer &&
885 0 : atoi(papszFields[iKeyField]) == nTestValue)
886 : {
887 0 : bSelected = true;
888 : }
889 : else
890 : {
891 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
892 : }
893 :
894 0 : if (!bSelected)
895 : {
896 0 : CSLDestroy(papszFields);
897 0 : papszFields = nullptr;
898 : }
899 : }
900 :
901 0 : return papszFields;
902 : }
903 :
904 : /************************************************************************/
905 : /* CSVScanLinesL() */
906 : /* */
907 : /* Read the file scanline for lines where the key field equals */
908 : /* the indicated value with the suggested comparison criteria. */
909 : /* Return the first matching line split into fields. */
910 : /************************************************************************/
911 :
912 0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
913 : CSVCompareCriteria eCriteria)
914 :
915 : {
916 0 : CPLAssert(pszValue != nullptr);
917 0 : CPLAssert(iKeyField >= 0);
918 0 : CPLAssert(fp != nullptr);
919 :
920 0 : bool bSelected = false;
921 0 : const int nTestValue = atoi(pszValue);
922 0 : char **papszFields = nullptr;
923 :
924 0 : while (!bSelected)
925 : {
926 0 : papszFields = CSVReadParseLineL(fp);
927 0 : if (papszFields == nullptr)
928 0 : return nullptr;
929 :
930 0 : if (CSLCount(papszFields) < iKeyField + 1)
931 : {
932 : /* not selected */
933 : }
934 0 : else if (eCriteria == CC_Integer &&
935 0 : atoi(papszFields[iKeyField]) == nTestValue)
936 : {
937 0 : bSelected = true;
938 : }
939 : else
940 : {
941 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
942 : }
943 :
944 0 : if (!bSelected)
945 : {
946 0 : CSLDestroy(papszFields);
947 0 : papszFields = nullptr;
948 : }
949 : }
950 :
951 0 : return papszFields;
952 : }
953 :
954 : /************************************************************************/
955 : /* CSVScanLinesIndexed() */
956 : /* */
957 : /* Read the file scanline for lines where the key field equals */
958 : /* the indicated value with the suggested comparison criteria. */
959 : /* Return the first matching line split into fields. */
960 : /************************************************************************/
961 :
962 23 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
963 :
964 : {
965 23 : CPLAssert(psTable->panLineIndex != nullptr);
966 :
967 : /* -------------------------------------------------------------------- */
968 : /* Find target record with binary search. */
969 : /* -------------------------------------------------------------------- */
970 23 : int iTop = psTable->nLineCount - 1;
971 23 : int iBottom = 0;
972 23 : int iResult = -1;
973 :
974 167 : while (iTop >= iBottom)
975 : {
976 167 : const int iMiddle = (iTop + iBottom) / 2;
977 167 : if (psTable->panLineIndex[iMiddle] > nKeyValue)
978 98 : iTop = iMiddle - 1;
979 69 : else if (psTable->panLineIndex[iMiddle] < nKeyValue)
980 46 : iBottom = iMiddle + 1;
981 : else
982 : {
983 23 : iResult = iMiddle;
984 : // if a key is not unique, select the first instance of it.
985 23 : while (iResult > 0 &&
986 23 : psTable->panLineIndex[iResult - 1] == nKeyValue)
987 : {
988 0 : psTable->bNonUniqueKey = true;
989 0 : iResult--;
990 : }
991 23 : break;
992 : }
993 : }
994 :
995 23 : if (iResult == -1)
996 0 : return nullptr;
997 :
998 : /* -------------------------------------------------------------------- */
999 : /* Parse target line, and update iLastLine indicator. */
1000 : /* -------------------------------------------------------------------- */
1001 23 : psTable->iLastLine = iResult;
1002 :
1003 23 : return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
1004 : }
1005 :
1006 : /************************************************************************/
1007 : /* CSVScanLinesIngested() */
1008 : /* */
1009 : /* Read the file scanline for lines where the key field equals */
1010 : /* the indicated value with the suggested comparison criteria. */
1011 : /* Return the first matching line split into fields. */
1012 : /************************************************************************/
1013 :
1014 30 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
1015 : const char *pszValue,
1016 : CSVCompareCriteria eCriteria)
1017 :
1018 : {
1019 30 : CPLAssert(pszValue != nullptr);
1020 30 : CPLAssert(iKeyField >= 0);
1021 :
1022 30 : const int nTestValue = atoi(pszValue);
1023 :
1024 : /* -------------------------------------------------------------------- */
1025 : /* Short cut for indexed files. */
1026 : /* -------------------------------------------------------------------- */
1027 30 : if (iKeyField == 0 && eCriteria == CC_Integer &&
1028 23 : psTable->panLineIndex != nullptr)
1029 23 : return CSVScanLinesIndexed(psTable, nTestValue);
1030 :
1031 : /* -------------------------------------------------------------------- */
1032 : /* Scan from in-core lines. */
1033 : /* -------------------------------------------------------------------- */
1034 7 : char **papszFields = nullptr;
1035 7 : bool bSelected = false;
1036 :
1037 484 : while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
1038 : {
1039 477 : psTable->iLastLine++;
1040 477 : papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
1041 : false, false);
1042 :
1043 477 : if (CSLCount(papszFields) < iKeyField + 1)
1044 : {
1045 : /* not selected */
1046 : }
1047 477 : else if (eCriteria == CC_Integer &&
1048 242 : atoi(papszFields[iKeyField]) == nTestValue)
1049 : {
1050 2 : bSelected = true;
1051 : }
1052 : else
1053 : {
1054 475 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
1055 : }
1056 :
1057 477 : if (!bSelected)
1058 : {
1059 470 : CSLDestroy(papszFields);
1060 470 : papszFields = nullptr;
1061 : }
1062 : }
1063 :
1064 7 : return papszFields;
1065 : }
1066 :
1067 : /************************************************************************/
1068 : /* CSVRewind() */
1069 : /* */
1070 : /* Rewind a CSV file based on a passed in filename. */
1071 : /* This is aimed at being used with CSVGetNextLine(). */
1072 : /************************************************************************/
1073 :
1074 1843 : void CSVRewind(const char *pszFilename)
1075 :
1076 : {
1077 : /* -------------------------------------------------------------------- */
1078 : /* Get access to the table. */
1079 : /* -------------------------------------------------------------------- */
1080 1843 : CPLAssert(pszFilename != nullptr);
1081 :
1082 1843 : CSVTable *const psTable = CSVAccess(pszFilename);
1083 1843 : if (psTable != nullptr)
1084 1843 : psTable->iLastLine = -1;
1085 1843 : }
1086 :
1087 : /************************************************************************/
1088 : /* CSVGetNextLine() */
1089 : /* */
1090 : /* Fetch the next line of a CSV file based on a passed in */
1091 : /* filename. Returns NULL at end of file, or if file is not */
1092 : /* really established. */
1093 : /* This ingests the whole file into memory if not already done. */
1094 : /* When reaching end of file, CSVRewind() may be used to read */
1095 : /* again from the beginning. */
1096 : /************************************************************************/
1097 :
1098 60462 : char **CSVGetNextLine(const char *pszFilename)
1099 :
1100 : {
1101 :
1102 : /* -------------------------------------------------------------------- */
1103 : /* Get access to the table. */
1104 : /* -------------------------------------------------------------------- */
1105 60462 : CPLAssert(pszFilename != nullptr);
1106 :
1107 60462 : CSVTable *const psTable = CSVAccess(pszFilename);
1108 60462 : if (psTable == nullptr)
1109 0 : return nullptr;
1110 :
1111 60462 : CSVIngest(psTable->pszFilename);
1112 :
1113 : /* -------------------------------------------------------------------- */
1114 : /* If we use CSVGetNextLine() we can pretty much assume we have */
1115 : /* a non-unique key. */
1116 : /* -------------------------------------------------------------------- */
1117 60462 : psTable->bNonUniqueKey = true;
1118 :
1119 : /* -------------------------------------------------------------------- */
1120 : /* Do we have a next line available? This only works for */
1121 : /* ingested tables I believe. */
1122 : /* -------------------------------------------------------------------- */
1123 60462 : if (psTable->iLastLine + 1 >= psTable->nLineCount)
1124 629 : return nullptr;
1125 :
1126 59833 : psTable->iLastLine++;
1127 59833 : CSLDestroy(psTable->papszRecFields);
1128 119666 : psTable->papszRecFields = CSVSplitLine(
1129 59833 : psTable->papszLines[psTable->iLastLine], ",", false, false);
1130 :
1131 59833 : return psTable->papszRecFields;
1132 : }
1133 :
1134 : /************************************************************************/
1135 : /* CSVScanFile() */
1136 : /* */
1137 : /* Scan a whole file using criteria similar to above, but also */
1138 : /* taking care of file opening and closing. */
1139 : /************************************************************************/
1140 :
1141 144 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
1142 : const char *pszValue, CSVCompareCriteria eCriteria)
1143 : {
1144 144 : CSVIngest(psTable->pszFilename);
1145 :
1146 : /* -------------------------------------------------------------------- */
1147 : /* Does the current record match the criteria? If so, just */
1148 : /* return it again. */
1149 : /* -------------------------------------------------------------------- */
1150 144 : if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
1151 402 : CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
1152 114 : !psTable->bNonUniqueKey)
1153 : {
1154 114 : return psTable->papszRecFields;
1155 : }
1156 :
1157 : /* -------------------------------------------------------------------- */
1158 : /* Scan the file from the beginning, replacing the ``current */
1159 : /* record'' in our structure with the one that is found. */
1160 : /* -------------------------------------------------------------------- */
1161 30 : psTable->iLastLine = -1;
1162 30 : CSLDestroy(psTable->papszRecFields);
1163 :
1164 30 : if (psTable->pszRawData != nullptr)
1165 30 : psTable->papszRecFields =
1166 30 : CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
1167 : else
1168 : {
1169 0 : VSIRewindL(psTable->fp);
1170 0 : CPLReadLineL(psTable->fp); /* throw away the header line */
1171 :
1172 0 : psTable->papszRecFields =
1173 0 : CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
1174 : }
1175 :
1176 30 : return psTable->papszRecFields;
1177 : }
1178 :
1179 4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
1180 : CSVCompareCriteria eCriteria)
1181 :
1182 : {
1183 : /* -------------------------------------------------------------------- */
1184 : /* Get access to the table. */
1185 : /* -------------------------------------------------------------------- */
1186 4 : CPLAssert(pszFilename != nullptr);
1187 :
1188 4 : if (iKeyField < 0)
1189 0 : return nullptr;
1190 :
1191 4 : CSVTable *const psTable = CSVAccess(pszFilename);
1192 4 : if (psTable == nullptr)
1193 0 : return nullptr;
1194 :
1195 4 : return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
1196 : }
1197 :
1198 : /************************************************************************/
1199 : /* CPLGetFieldId() */
1200 : /* */
1201 : /* Read the first record of a CSV file (rewinding to be sure), */
1202 : /* and find the field with the indicated name. Returns -1 if */
1203 : /* it fails to find the field name. Comparison is case */
1204 : /* insensitive, but otherwise exact. After this function has */
1205 : /* been called the file pointer will be positioned just after */
1206 : /* the first record. */
1207 : /* */
1208 : /* Deprecated. Replaced by CPLGetFieldIdL(). */
1209 : /************************************************************************/
1210 :
1211 0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
1212 :
1213 : {
1214 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1215 :
1216 0 : VSIRewind(fp);
1217 :
1218 0 : char **papszFields = CSVReadParseLine(fp);
1219 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1220 : {
1221 0 : if (EQUAL(papszFields[i], pszFieldName))
1222 : {
1223 0 : CSLDestroy(papszFields);
1224 0 : return i;
1225 : }
1226 : }
1227 :
1228 0 : CSLDestroy(papszFields);
1229 :
1230 0 : return -1;
1231 : }
1232 :
1233 : /************************************************************************/
1234 : /* CPLGetFieldIdL() */
1235 : /* */
1236 : /* Read the first record of a CSV file (rewinding to be sure), */
1237 : /* and find the field with the indicated name. Returns -1 if */
1238 : /* it fails to find the field name. Comparison is case */
1239 : /* insensitive, but otherwise exact. After this function has */
1240 : /* been called the file pointer will be positioned just after */
1241 : /* the first record. */
1242 : /************************************************************************/
1243 :
1244 0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
1245 :
1246 : {
1247 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1248 :
1249 0 : VSIRewindL(fp);
1250 :
1251 0 : char **papszFields = CSVReadParseLineL(fp);
1252 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1253 : {
1254 0 : if (EQUAL(papszFields[i], pszFieldName))
1255 : {
1256 0 : CSLDestroy(papszFields);
1257 0 : return i;
1258 : }
1259 : }
1260 :
1261 0 : CSLDestroy(papszFields);
1262 :
1263 0 : return -1;
1264 : }
1265 :
1266 : /************************************************************************/
1267 : /* CSVGetFileFieldId() */
1268 : /* */
1269 : /* Same as CPLGetFieldId(), except that we get the file based */
1270 : /* on filename, rather than having an existing handle. */
1271 : /************************************************************************/
1272 :
1273 7430 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
1274 :
1275 : {
1276 : /* -------------------------------------------------------------------- */
1277 : /* Find the requested field. */
1278 : /* -------------------------------------------------------------------- */
1279 7430 : const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
1280 18696 : for (int i = 0; psTable->papszFieldNames != nullptr &&
1281 18696 : psTable->papszFieldNames[i] != nullptr;
1282 : i++)
1283 : {
1284 18696 : if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
1285 10433 : EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
1286 : {
1287 7430 : return i;
1288 : }
1289 : }
1290 :
1291 0 : return -1;
1292 : }
1293 :
1294 7150 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
1295 :
1296 : {
1297 : /* -------------------------------------------------------------------- */
1298 : /* Get access to the table. */
1299 : /* -------------------------------------------------------------------- */
1300 7150 : CPLAssert(pszFilename != nullptr);
1301 :
1302 7150 : CSVTable *const psTable = CSVAccess(pszFilename);
1303 7150 : if (psTable == nullptr)
1304 0 : return -1;
1305 7150 : return CSVGetFileFieldId(psTable, pszFieldName);
1306 : }
1307 :
1308 : /************************************************************************/
1309 : /* CSVScanFileByName() */
1310 : /* */
1311 : /* Same as CSVScanFile(), but using a field name instead of a */
1312 : /* field number. */
1313 : /************************************************************************/
1314 :
1315 4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
1316 : const char *pszValue, CSVCompareCriteria eCriteria)
1317 :
1318 : {
1319 4 : const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
1320 4 : if (iKeyField == -1)
1321 0 : return nullptr;
1322 :
1323 4 : return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
1324 : }
1325 :
1326 : /************************************************************************/
1327 : /* CSVGetField() */
1328 : /* */
1329 : /* The all-in-one function to fetch a particular field value */
1330 : /* from a CSV file. Note this function will return an empty */
1331 : /* string, rather than NULL if it fails to find the desired */
1332 : /* value for some reason. The caller can't establish that the */
1333 : /* fetch failed. */
1334 : /************************************************************************/
1335 :
1336 140 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
1337 : const char *pszKeyFieldValue,
1338 : CSVCompareCriteria eCriteria,
1339 : const char *pszTargetField)
1340 :
1341 : {
1342 : /* -------------------------------------------------------------------- */
1343 : /* Find the table. */
1344 : /* -------------------------------------------------------------------- */
1345 140 : CSVTable *const psTable = CSVAccess(pszFilename);
1346 140 : if (psTable == nullptr)
1347 0 : return "";
1348 :
1349 140 : const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
1350 140 : if (iKeyField == -1)
1351 0 : return "";
1352 :
1353 : /* -------------------------------------------------------------------- */
1354 : /* Find the correct record. */
1355 : /* -------------------------------------------------------------------- */
1356 : char **papszRecord =
1357 140 : CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
1358 140 : if (papszRecord == nullptr)
1359 0 : return "";
1360 :
1361 : /* -------------------------------------------------------------------- */
1362 : /* Figure out which field we want out of this. */
1363 : /* -------------------------------------------------------------------- */
1364 140 : const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
1365 140 : if (iTargetField < 0)
1366 0 : return "";
1367 :
1368 388 : for (int i = 0; papszRecord[i] != nullptr; ++i)
1369 : {
1370 388 : if (i == iTargetField)
1371 140 : return papszRecord[iTargetField];
1372 : }
1373 0 : return "";
1374 : }
1375 :
1376 : /************************************************************************/
1377 : /* GDALDefaultCSVFilename() */
1378 : /************************************************************************/
1379 :
1380 : typedef struct
1381 : {
1382 : char szPath[512];
1383 : bool bCSVFinderInitialized;
1384 : } DefaultCSVFileNameTLS;
1385 :
1386 2488 : const char *GDALDefaultCSVFilename(const char *pszBasename)
1387 :
1388 : {
1389 : /* -------------------------------------------------------------------- */
1390 : /* Do we already have this file accessed? If so, just return */
1391 : /* the existing path without any further probing. */
1392 : /* -------------------------------------------------------------------- */
1393 2488 : int bMemoryError = FALSE;
1394 : CSVTable **ppsCSVTableList =
1395 2488 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
1396 2488 : if (ppsCSVTableList != nullptr)
1397 : {
1398 2482 : const size_t nBasenameLen = strlen(pszBasename);
1399 :
1400 23118 : for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
1401 20636 : psTable = psTable->psNext)
1402 : {
1403 22566 : const size_t nFullLen = strlen(psTable->pszFilename);
1404 :
1405 22566 : if (nFullLen > nBasenameLen &&
1406 22566 : strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
1407 1930 : pszBasename) == 0 &&
1408 1930 : strchr("/\\",
1409 1930 : psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
1410 : nullptr)
1411 : {
1412 1930 : return psTable->pszFilename;
1413 : }
1414 : }
1415 : }
1416 :
1417 : /* -------------------------------------------------------------------- */
1418 : /* Otherwise we need to look harder for it. */
1419 : /* -------------------------------------------------------------------- */
1420 : DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1421 558 : CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
1422 558 : if (pTLSData == nullptr && !bMemoryError)
1423 : {
1424 : pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1425 5 : VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
1426 5 : if (pTLSData)
1427 5 : CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
1428 : }
1429 558 : if (pTLSData == nullptr)
1430 0 : return "/not_existing_dir/not_existing_path";
1431 :
1432 558 : const char *pszResult = CPLFindFile("gdal", pszBasename);
1433 :
1434 558 : if (pszResult != nullptr)
1435 43 : return pszResult;
1436 :
1437 515 : if (!pTLSData->bCSVFinderInitialized)
1438 : {
1439 2 : pTLSData->bCSVFinderInitialized = true;
1440 :
1441 2 : if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
1442 2 : CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
1443 :
1444 2 : pszResult = CPLFindFile("gdal", pszBasename);
1445 :
1446 2 : if (pszResult != nullptr)
1447 0 : return pszResult;
1448 : }
1449 :
1450 : // For systems like sandboxes that do not allow other checks.
1451 515 : CPLDebug("CPL_CSV",
1452 : "Failed to find file in GDALDefaultCSVFilename. "
1453 : "Returning original basename: %s",
1454 : pszBasename);
1455 515 : CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
1456 515 : return pTLSData->szPath;
1457 : }
1458 :
1459 : /************************************************************************/
1460 : /* CSVFilename() */
1461 : /* */
1462 : /* Return the full path to a particular CSV file. This will */
1463 : /* eventually be something the application can override. */
1464 : /************************************************************************/
1465 :
1466 : CPL_C_START
1467 : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
1468 : CPL_C_END
1469 :
1470 2488 : const char *CSVFilename(const char *pszBasename)
1471 :
1472 : {
1473 2488 : if (pfnCSVFilenameHook == nullptr)
1474 2488 : return GDALDefaultCSVFilename(pszBasename);
1475 :
1476 0 : return pfnCSVFilenameHook(pszBasename);
1477 : }
1478 :
1479 : /************************************************************************/
1480 : /* SetCSVFilenameHook() */
1481 : /* */
1482 : /* Applications can use this to set a function that will */
1483 : /* massage CSV filenames. */
1484 : /************************************************************************/
1485 :
1486 : /**
1487 : * Override CSV file search method.
1488 : *
1489 : * @param pfnNewHook The pointer to a function which will return the
1490 : * full path for a given filename.
1491 : *
1492 :
1493 : This function allows an application to override how the GTIFGetDefn()
1494 : and related function find the CSV (Comma Separated Value) values
1495 : required. The pfnHook argument should be a pointer to a function that
1496 : will take in a CSV filename and return a full path to the file. The
1497 : returned string should be to an internal static buffer so that the
1498 : caller doesn't have to free the result.
1499 :
1500 : Example:
1501 :
1502 : The listgeo utility uses the following override function if the user
1503 : specified a CSV file directory with the -t commandline switch (argument
1504 : put into CSVDirName).
1505 :
1506 : \code{.cpp}
1507 :
1508 : ...
1509 : SetCSVFilenameHook( CSVFileOverride );
1510 : ...
1511 :
1512 : static const char *CSVFileOverride( const char * pszInput )
1513 :
1514 : {
1515 : static char szPath[1024] = {};
1516 :
1517 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1518 :
1519 : return szPath;
1520 : }
1521 : \endcode
1522 :
1523 : */
1524 :
1525 : CPL_C_START
1526 0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
1527 :
1528 : {
1529 0 : pfnCSVFilenameHook = pfnNewHook;
1530 0 : }
1531 :
1532 : CPL_C_END
|