Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: CSV (comma separated value) file access.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 1999, Frank Warmerdam
9 : * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "cpl_port.h"
15 : #include "cpl_csv.h"
16 :
17 : #include <cstddef>
18 : #include <cstdlib>
19 : #include <cstring>
20 : #if HAVE_FCNTL_H
21 : #include <fcntl.h>
22 : #endif
23 :
24 : #include "cpl_conv.h"
25 : #include "cpl_error.h"
26 : #include "cpl_multiproc.h"
27 : #include "gdal_csv.h"
28 :
29 : #include <algorithm>
30 :
31 : /* ==================================================================== */
32 : /* The CSVTable is a persistent set of info about an open CSV */
33 : /* table. While it doesn't currently maintain a record index, */
34 : /* or in-memory copy of the table, it could be changed to do so */
35 : /* in the future. */
36 : /* ==================================================================== */
37 : typedef struct ctb
38 : {
39 : VSILFILE *fp;
40 : struct ctb *psNext;
41 : char *pszFilename;
42 : char **papszFieldNames;
43 : int *panFieldNamesLength;
44 : char **papszRecFields;
45 : int nFields;
46 : int iLastLine;
47 : bool bNonUniqueKey;
48 :
49 : /* Cache for whole file */
50 : int nLineCount;
51 : char **papszLines;
52 : int *panLineIndex;
53 : char *pszRawData;
54 : } CSVTable;
55 :
56 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
57 : const char *pszFilename);
58 :
59 : /************************************************************************/
60 : /* CSVFreeTLS() */
61 : /************************************************************************/
62 2 : static void CSVFreeTLS(void *pData)
63 : {
64 2 : CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
65 2 : CPLFree(pData);
66 2 : }
67 :
68 : /* It would likely be better to share this list between threads, but
69 : that will require some rework. */
70 :
71 : /************************************************************************/
72 : /* CSVAccess() */
73 : /* */
74 : /* This function will fetch a handle to the requested table. */
75 : /* If not found in the ``open table list'' the table will be */
76 : /* opened and added to the list. Eventually this function may */
77 : /* become public with an abstracted return type so that */
78 : /* applications can set options about the table. For now this */
79 : /* isn't done. */
80 : /************************************************************************/
81 :
82 130205 : static CSVTable *CSVAccess(const char *pszFilename)
83 :
84 : {
85 : /* -------------------------------------------------------------------- */
86 : /* Fetch the table, and allocate the thread-local pointer to it */
87 : /* if there isn't already one. */
88 : /* -------------------------------------------------------------------- */
89 130205 : int bMemoryError = FALSE;
90 : CSVTable **ppsCSVTableList =
91 130205 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
92 130205 : if (bMemoryError)
93 0 : return nullptr;
94 130205 : if (ppsCSVTableList == nullptr)
95 : {
96 : ppsCSVTableList =
97 5 : static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
98 5 : if (ppsCSVTableList == nullptr)
99 0 : return nullptr;
100 5 : CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
101 : }
102 :
103 : /* -------------------------------------------------------------------- */
104 : /* Is the table already in the list. */
105 : /* -------------------------------------------------------------------- */
106 1009480 : for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
107 879271 : psTable = psTable->psNext)
108 : {
109 1009450 : if (EQUAL(psTable->pszFilename, pszFilename))
110 : {
111 : /*
112 : * Eventually we should consider promoting to the front of
113 : * the list to accelerate frequently accessed tables.
114 : */
115 130175 : return psTable;
116 : }
117 : }
118 :
119 : /* -------------------------------------------------------------------- */
120 : /* If not, try to open it. */
121 : /* -------------------------------------------------------------------- */
122 30 : VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
123 30 : if (fp == nullptr)
124 0 : return nullptr;
125 :
126 : /* -------------------------------------------------------------------- */
127 : /* Create an information structure about this table, and add to */
128 : /* the front of the list. */
129 : /* -------------------------------------------------------------------- */
130 : CSVTable *const psTable =
131 30 : static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
132 30 : if (psTable == nullptr)
133 : {
134 0 : VSIFCloseL(fp);
135 0 : return nullptr;
136 : }
137 :
138 30 : psTable->fp = fp;
139 30 : psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
140 30 : if (psTable->pszFilename == nullptr)
141 : {
142 0 : VSIFree(psTable);
143 0 : VSIFCloseL(fp);
144 0 : return nullptr;
145 : }
146 30 : psTable->bNonUniqueKey = false; // As far as we know now.
147 30 : psTable->psNext = *ppsCSVTableList;
148 :
149 30 : *ppsCSVTableList = psTable;
150 :
151 : /* -------------------------------------------------------------------- */
152 : /* Read the table header record containing the field names. */
153 : /* -------------------------------------------------------------------- */
154 30 : psTable->papszFieldNames = CSVReadParseLineL(fp);
155 30 : psTable->nFields = CSLCount(psTable->papszFieldNames);
156 30 : psTable->panFieldNamesLength =
157 30 : static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
158 30 : for (int i = 0;
159 185 : i < psTable->nFields &&
160 : /* null-pointer check to avoid a false positive from CLang S.A. */
161 155 : psTable->papszFieldNames != nullptr;
162 : i++)
163 : {
164 155 : psTable->panFieldNamesLength[i] =
165 155 : static_cast<int>(strlen(psTable->papszFieldNames[i]));
166 : }
167 :
168 30 : return psTable;
169 : }
170 :
171 : /************************************************************************/
172 : /* CSVDeaccess() */
173 : /************************************************************************/
174 :
175 955 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
176 : const char *pszFilename)
177 :
178 : {
179 955 : if (ppsCSVTableList == nullptr)
180 946 : return;
181 :
182 : /* -------------------------------------------------------------------- */
183 : /* A NULL means deaccess all tables. */
184 : /* -------------------------------------------------------------------- */
185 9 : if (pszFilename == nullptr)
186 : {
187 9 : while (*ppsCSVTableList != nullptr)
188 5 : CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
189 5 : (*ppsCSVTableList)->pszFilename);
190 :
191 4 : return;
192 : }
193 :
194 : /* -------------------------------------------------------------------- */
195 : /* Find this table. */
196 : /* -------------------------------------------------------------------- */
197 5 : CSVTable *psLast = nullptr;
198 5 : CSVTable *psTable = *ppsCSVTableList;
199 5 : for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
200 0 : psTable = psTable->psNext)
201 : {
202 0 : psLast = psTable;
203 : }
204 :
205 5 : if (psTable == nullptr)
206 : {
207 0 : if (bCanUseTLS)
208 0 : CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
209 0 : return;
210 : }
211 :
212 : /* -------------------------------------------------------------------- */
213 : /* Remove the link from the list. */
214 : /* -------------------------------------------------------------------- */
215 5 : if (psLast != nullptr)
216 0 : psLast->psNext = psTable->psNext;
217 : else
218 5 : *ppsCSVTableList = psTable->psNext;
219 :
220 : /* -------------------------------------------------------------------- */
221 : /* Free the table. */
222 : /* -------------------------------------------------------------------- */
223 5 : if (psTable->fp != nullptr)
224 0 : VSIFCloseL(psTable->fp);
225 :
226 5 : CSLDestroy(psTable->papszFieldNames);
227 5 : CPLFree(psTable->panFieldNamesLength);
228 5 : CSLDestroy(psTable->papszRecFields);
229 5 : CPLFree(psTable->pszFilename);
230 5 : CPLFree(psTable->panLineIndex);
231 5 : CPLFree(psTable->pszRawData);
232 5 : CPLFree(psTable->papszLines);
233 :
234 5 : CPLFree(psTable);
235 :
236 5 : if (bCanUseTLS)
237 5 : CPLReadLine(nullptr);
238 : }
239 :
240 948 : void CSVDeaccess(const char *pszFilename)
241 : {
242 : /* -------------------------------------------------------------------- */
243 : /* Fetch the table, and allocate the thread-local pointer to it */
244 : /* if there isn't already one. */
245 : /* -------------------------------------------------------------------- */
246 948 : int bMemoryError = FALSE;
247 : CSVTable **ppsCSVTableList =
248 948 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
249 :
250 948 : CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
251 948 : }
252 :
253 : /************************************************************************/
254 : /* CSVSplitLine() */
255 : /* */
256 : /* Tokenize a CSV line into fields in the form of a string */
257 : /* list. This is used instead of the CPLTokenizeString() */
258 : /* because it provides correct CSV escaping and quoting */
259 : /* semantics. */
260 : /************************************************************************/
261 :
262 116329 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
263 : bool bKeepLeadingAndClosingQuotes,
264 : bool bMergeDelimiter)
265 :
266 : {
267 232658 : CPLStringList aosRetList;
268 116329 : if (pszString == nullptr)
269 0 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
270 :
271 116329 : char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
272 116329 : int nTokenMax = 10;
273 116329 : const size_t nDelimiterLength = strlen(pszDelimiter);
274 :
275 116329 : const char *pszIter = pszString;
276 669294 : while (*pszIter != '\0')
277 : {
278 552965 : bool bInString = false;
279 :
280 552965 : int nTokenLen = 0;
281 :
282 : // Try to find the next delimiter, marking end of token.
283 4647150 : do
284 : {
285 : // End if this is a delimiter skip it and break.
286 5200120 : if (!bInString &&
287 2711680 : strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
288 : {
289 436957 : pszIter += nDelimiterLength;
290 436957 : if (bMergeDelimiter)
291 : {
292 9 : while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
293 : 0)
294 5 : pszIter += nDelimiterLength;
295 : }
296 436957 : break;
297 : }
298 :
299 4763160 : if (*pszIter == '"')
300 : {
301 395871 : if (!bInString && nTokenLen > 0)
302 : {
303 : // do not treat in a special way double quotes that appear
304 : // in the middle of a field (similarly to OpenOffice)
305 : // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
306 : }
307 395784 : else if (!bInString || pszIter[1] != '"')
308 : {
309 395098 : bInString = !bInString;
310 395098 : if (!bKeepLeadingAndClosingQuotes)
311 395064 : continue;
312 : }
313 : else // Doubled quotes in string resolve to one quote.
314 : {
315 686 : pszIter++;
316 : }
317 : }
318 :
319 4368100 : if (nTokenLen >= nTokenMax - 2)
320 : {
321 122941 : nTokenMax = nTokenMax * 2 + 10;
322 122941 : pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
323 : }
324 :
325 4368100 : pszToken[nTokenLen] = *pszIter;
326 4368100 : nTokenLen++;
327 4763160 : } while (*(++pszIter) != '\0');
328 :
329 552965 : pszToken[nTokenLen] = '\0';
330 552965 : aosRetList.AddString(pszToken);
331 :
332 : // If the last token is an empty token, then we have to catch
333 : // it now, otherwise we won't reenter the loop and it will be lost.
334 552965 : if (*pszIter == '\0' &&
335 116289 : pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
336 116289 : strncmp(pszIter - nDelimiterLength, pszDelimiter,
337 : nDelimiterLength) == 0)
338 : {
339 281 : aosRetList.AddString("");
340 : }
341 : }
342 :
343 116329 : CPLFree(pszToken);
344 :
345 116329 : if (aosRetList.Count() == 0)
346 40 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
347 : else
348 116289 : return aosRetList.StealList();
349 : }
350 :
351 : /************************************************************************/
352 : /* CSVFindNextLine() */
353 : /* */
354 : /* Find the start of the next line, while at the same time zero */
355 : /* terminating this line. Take into account that there may be */
356 : /* newline indicators within quoted strings, and that quotes */
357 : /* can be escaped with a backslash. */
358 : /************************************************************************/
359 :
360 6329 : static char *CSVFindNextLine(char *pszThisLine)
361 :
362 : {
363 6329 : int i = 0; // i is used after the for loop.
364 :
365 275213 : for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
366 : {
367 275213 : if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
368 35430 : nQuoteCount++;
369 :
370 275213 : if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
371 6329 : (nQuoteCount % 2) == 0)
372 6329 : break;
373 : }
374 :
375 16573 : while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
376 10244 : pszThisLine[i++] = '\0';
377 :
378 6329 : if (pszThisLine[i] == '\0')
379 30 : return nullptr;
380 :
381 6299 : return pszThisLine + i;
382 : }
383 :
384 : /************************************************************************/
385 : /* CSVIngest() */
386 : /* */
387 : /* Load entire file into memory and setup index if possible. */
388 : /************************************************************************/
389 :
390 : // TODO(schwehr): Clean up all the casting in CSVIngest.
391 60606 : static void CSVIngest(CSVTable *psTable)
392 :
393 : {
394 60606 : if (psTable->pszRawData != nullptr)
395 60576 : return;
396 :
397 : /* -------------------------------------------------------------------- */
398 : /* Ingest whole file. */
399 : /* -------------------------------------------------------------------- */
400 30 : if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
401 : {
402 0 : CPLError(CE_Failure, CPLE_FileIO,
403 : "Failed using seek end and tell to get file length: %s",
404 : psTable->pszFilename);
405 0 : return;
406 : }
407 30 : const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
408 30 : if (static_cast<long>(nFileLen) == -1)
409 : {
410 0 : CPLError(CE_Failure, CPLE_FileIO,
411 : "Failed using seek end and tell to get file length: %s",
412 : psTable->pszFilename);
413 0 : return;
414 : }
415 30 : VSIRewindL(psTable->fp);
416 :
417 30 : psTable->pszRawData = static_cast<char *>(
418 30 : VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
419 30 : if (psTable->pszRawData == nullptr)
420 0 : return;
421 30 : if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
422 30 : psTable->fp) != static_cast<size_t>(nFileLen))
423 : {
424 0 : CPLFree(psTable->pszRawData);
425 0 : psTable->pszRawData = nullptr;
426 :
427 0 : CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
428 : psTable->pszFilename);
429 0 : return;
430 : }
431 :
432 30 : psTable->pszRawData[nFileLen] = '\0';
433 :
434 : /* -------------------------------------------------------------------- */
435 : /* Get count of newlines so we can allocate line array. */
436 : /* -------------------------------------------------------------------- */
437 30 : int nMaxLineCount = 0;
438 279158 : for (int i = 0; i < static_cast<int>(nFileLen); i++)
439 : {
440 279128 : if (psTable->pszRawData[i] == 10)
441 6329 : nMaxLineCount++;
442 : }
443 :
444 30 : psTable->papszLines =
445 30 : static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
446 30 : if (psTable->papszLines == nullptr)
447 0 : return;
448 :
449 : /* -------------------------------------------------------------------- */
450 : /* Build a list of record pointers into the raw data buffer */
451 : /* based on line terminators. Zero terminate the line */
452 : /* strings. */
453 : /* -------------------------------------------------------------------- */
454 : /* skip header line */
455 30 : char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
456 :
457 30 : int iLine = 0;
458 6329 : while (pszThisLine != nullptr && iLine < nMaxLineCount)
459 : {
460 6299 : if (pszThisLine[0] != '#')
461 6288 : psTable->papszLines[iLine++] = pszThisLine;
462 6299 : pszThisLine = CSVFindNextLine(pszThisLine);
463 : }
464 :
465 30 : psTable->nLineCount = iLine;
466 :
467 : /* -------------------------------------------------------------------- */
468 : /* Allocate and populate index array. Ensure they are in */
469 : /* ascending order so that binary searches can be done on the */
470 : /* array. */
471 : /* -------------------------------------------------------------------- */
472 30 : psTable->panLineIndex = static_cast<int *>(
473 30 : VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
474 30 : if (psTable->panLineIndex == nullptr)
475 0 : return;
476 :
477 6232 : for (int i = 0; i < psTable->nLineCount; i++)
478 : {
479 6204 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
480 :
481 6204 : if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
482 : {
483 2 : CPLFree(psTable->panLineIndex);
484 2 : psTable->panLineIndex = nullptr;
485 2 : break;
486 : }
487 : }
488 :
489 30 : psTable->iLastLine = -1;
490 :
491 : /* -------------------------------------------------------------------- */
492 : /* We should never need the file handle against, so close it. */
493 : /* -------------------------------------------------------------------- */
494 30 : VSIFCloseL(psTable->fp);
495 30 : psTable->fp = nullptr;
496 : }
497 :
498 60606 : static void CSVIngest(const char *pszFilename)
499 :
500 : {
501 60606 : CSVTable *psTable = CSVAccess(pszFilename);
502 60606 : if (psTable == nullptr)
503 : {
504 0 : CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
505 : pszFilename);
506 0 : return;
507 : }
508 60606 : CSVIngest(psTable);
509 : }
510 :
511 : /************************************************************************/
512 : /* CSVDetectSeperator() */
513 : /************************************************************************/
514 :
515 : /** Detect which field separator is used.
516 : *
517 : * Currently, it can detect comma, semicolon, space, tabulation or pipe.
518 : * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
519 : * most occurrences will be selected (and a warning emitted).
520 : * If no separator found, comma will be considered as the separator.
521 : *
522 : * @return ',', ';', ' ', tabulation character or '|'.
523 : */
524 597 : char CSVDetectSeperator(const char *pszLine)
525 : {
526 597 : bool bInString = false;
527 597 : int nCountComma = 0;
528 597 : int nCountSemicolon = 0;
529 597 : int nCountTab = 0;
530 597 : int nCountPipe = 0;
531 597 : int nCountSpace = 0;
532 :
533 26802 : for (; *pszLine != '\0'; pszLine++)
534 : {
535 26205 : if (!bInString && *pszLine == ',')
536 : {
537 2133 : nCountComma++;
538 : }
539 24072 : else if (!bInString && *pszLine == ';')
540 : {
541 10 : nCountSemicolon++;
542 : }
543 24062 : else if (!bInString && *pszLine == '\t')
544 : {
545 29 : nCountTab++;
546 : }
547 24033 : else if (!bInString && *pszLine == '|')
548 : {
549 9 : nCountPipe++;
550 : }
551 24024 : else if (!bInString && *pszLine == ' ')
552 : {
553 290 : nCountSpace++;
554 : }
555 23734 : else if (*pszLine == '"')
556 : {
557 519 : if (!bInString || pszLine[1] != '"')
558 : {
559 519 : bInString = !bInString;
560 519 : continue;
561 : }
562 : else /* doubled quotes in string resolve to one quote */
563 : {
564 0 : pszLine++;
565 : }
566 : }
567 : }
568 :
569 : const int nMaxCountExceptSpace =
570 : std::max(std::max(nCountComma, nCountSemicolon),
571 597 : std::max(nCountTab, nCountPipe));
572 597 : char chDelimiter = ',';
573 597 : if (nMaxCountExceptSpace == 0)
574 : {
575 35 : if (nCountSpace > 0)
576 9 : chDelimiter = ' ';
577 : }
578 : else
579 : {
580 562 : bool bWarn = false;
581 562 : if (nCountComma == nMaxCountExceptSpace)
582 : {
583 546 : chDelimiter = ',';
584 546 : bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
585 : }
586 16 : else if (nCountSemicolon == nMaxCountExceptSpace)
587 : {
588 5 : chDelimiter = ';';
589 5 : bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
590 : }
591 11 : else if (nCountTab == nMaxCountExceptSpace)
592 : {
593 6 : chDelimiter = '\t';
594 6 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
595 : }
596 : else /* if( nCountPipe == nMaxCountExceptSpace ) */
597 : {
598 5 : chDelimiter = '|';
599 5 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
600 : }
601 562 : if (bWarn)
602 : {
603 6 : CPLError(CE_Warning, CPLE_AppDefined,
604 : "Selecting '%c' as CSV field separator, but "
605 : "other candidate separator(s) have been found.",
606 : chDelimiter);
607 : }
608 : }
609 :
610 597 : return chDelimiter;
611 : }
612 :
613 : /************************************************************************/
614 : /* CSVReadParseLine3L() */
615 : /* */
616 : /* Read one line, and return split into fields. The return */
617 : /* result is a stringlist, in the sense of the CSL functions. */
618 : /************************************************************************/
619 :
620 : static char **
621 57364 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
622 : size_t nMaxLineSize, const char *pszDelimiter,
623 : bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
624 : bool bMergeDelimiter, bool bSkipBOM)
625 : {
626 57364 : const char *pszLine = pfnReadLine(fp, nMaxLineSize);
627 57364 : if (pszLine == nullptr)
628 1365 : return nullptr;
629 :
630 55999 : if (bSkipBOM)
631 : {
632 : // Skip BOM.
633 55632 : const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
634 55632 : if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
635 4 : pszLine += 3;
636 : }
637 :
638 : // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
639 55999 : if (!bHonourStrings)
640 : {
641 2 : return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
642 : }
643 :
644 : // If there are no quotes, then this is the simple case.
645 : // Parse, and return tokens.
646 55997 : if (strchr(pszLine, '\"') == nullptr)
647 48355 : return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
648 48355 : bMergeDelimiter);
649 :
650 7642 : const size_t nDelimiterLength = strlen(pszDelimiter);
651 7642 : bool bInString = false; // keep in that scope !
652 15284 : std::string osWorkLine(pszLine); // keep in that scope !
653 7642 : size_t i = 0; // keep in that scope !
654 :
655 : try
656 : {
657 : while (true)
658 : {
659 792644 : for (; i < osWorkLine.size(); ++i)
660 : {
661 784246 : if (osWorkLine[i] == '\"')
662 : {
663 59057 : if (!bInString)
664 : {
665 : // Only consider " as the start of a quoted string
666 : // if it is the first character of the line, or
667 : // if it is immediately after the field delimiter.
668 52238 : if (i == 0 ||
669 23009 : (i >= nDelimiterLength &&
670 23009 : osWorkLine.compare(i - nDelimiterLength,
671 : nDelimiterLength, pszDelimiter,
672 : nDelimiterLength) == 0))
673 : {
674 29142 : bInString = true;
675 : }
676 : }
677 56781 : else if (i + 1 < osWorkLine.size() &&
678 26953 : osWorkLine[i + 1] == '"')
679 : {
680 : // Escaped double quote in a quoted string
681 687 : ++i;
682 : }
683 : else
684 : {
685 29141 : bInString = false;
686 : }
687 : }
688 : }
689 :
690 8398 : if (!bInString)
691 : {
692 7641 : return CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
693 : bKeepLeadingAndClosingQuotes,
694 7641 : bMergeDelimiter);
695 : }
696 :
697 757 : const char *pszNewLine = pfnReadLine(fp, nMaxLineSize);
698 757 : if (pszNewLine == nullptr)
699 1 : break;
700 :
701 756 : osWorkLine.append("\n");
702 756 : osWorkLine.append(pszNewLine);
703 756 : }
704 : }
705 0 : catch (const std::exception &e)
706 : {
707 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
708 : }
709 :
710 1 : if (bInString)
711 : {
712 1 : CPLError(CE_Failure, CPLE_AppDefined,
713 : "CSV file has unbalanced number of double-quotes. Corrupted "
714 : "data will likely be returned");
715 : }
716 :
717 1 : return nullptr;
718 : }
719 :
720 : /************************************************************************/
721 : /* CSVReadParseLine() */
722 : /* */
723 : /* Read one line, and return split into fields. The return */
724 : /* result is a stringlist, in the sense of the CSL functions. */
725 : /* */
726 : /* Deprecated. Replaced by CSVReadParseLineL(). */
727 : /************************************************************************/
728 :
729 0 : char **CSVReadParseLine(FILE *fp)
730 : {
731 0 : return CSVReadParseLine2(fp, ',');
732 : }
733 :
734 0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
735 : {
736 0 : return CPLReadLine(static_cast<FILE *>(fp));
737 : }
738 :
739 0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
740 : {
741 0 : CPLAssert(fp != nullptr);
742 0 : if (fp == nullptr)
743 0 : return nullptr;
744 :
745 0 : char szDelimiter[2] = {chDelimiter, 0};
746 0 : return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
747 : 0, // nMaxLineSize,
748 : szDelimiter,
749 : true, // bHonourStrings
750 : false, // bKeepLeadingAndClosingQuotes
751 : false, // bMergeDelimiter
752 0 : true /* bSkipBOM */);
753 : }
754 :
755 : /************************************************************************/
756 : /* CSVReadParseLineL() */
757 : /* */
758 : /* Read one line, and return split into fields. The return */
759 : /* result is a stringlist, in the sense of the CSL functions. */
760 : /* */
761 : /* Replaces CSVReadParseLine(). These functions use the VSI */
762 : /* layer to allow reading from other file containers. */
763 : /************************************************************************/
764 :
765 3910 : char **CSVReadParseLineL(VSILFILE *fp)
766 : {
767 3910 : return CSVReadParseLine2L(fp, ',');
768 : }
769 :
770 3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
771 :
772 : {
773 3910 : CPLAssert(fp != nullptr);
774 3910 : if (fp == nullptr)
775 0 : return nullptr;
776 :
777 3910 : char szDelimiter[2] = {chDelimiter, 0};
778 3910 : return CSVReadParseLine3L(fp,
779 : 0, // nMaxLineSize
780 : szDelimiter,
781 : true, // bHonourStrings
782 : false, // bKeepLeadingAndClosingQuotes
783 : false, // bMergeDelimiter
784 3910 : true /* bSkipBOM */);
785 : }
786 :
787 : /************************************************************************/
788 : /* ReadLineLargeFile() */
789 : /************************************************************************/
790 :
791 58121 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
792 : {
793 58121 : int nBufLength = 0;
794 58121 : return CPLReadLine3L(static_cast<VSILFILE *>(fp),
795 : nMaxLineSize == 0 ? -1
796 : : static_cast<int>(nMaxLineSize),
797 116242 : &nBufLength, nullptr);
798 : }
799 :
800 : /************************************************************************/
801 : /* CSVReadParseLine3L() */
802 : /* */
803 : /* Read one line, and return split into fields. The return */
804 : /* result is a stringlist, in the sense of the CSL functions. */
805 : /************************************************************************/
806 :
807 : /** Read one line, and return split into fields.
808 : * The return result is a stringlist, in the sense of the CSL functions.
809 : *
810 : * @param fp File handle. Must not be NULL
811 : * @param nMaxLineSize Maximum line size, or 0 for unlimited.
812 : * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
813 : * @param bHonourStrings Should be true, unless double quotes should not be
814 : * considered when separating fields.
815 : * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
816 : * quote characters should be kept.
817 : * @param bMergeDelimiter Whether consecutive delimiters should be considered
818 : * as a single one. Should generally be set to false.
819 : * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
820 : */
821 57364 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
822 : const char *pszDelimiter, bool bHonourStrings,
823 : bool bKeepLeadingAndClosingQuotes,
824 : bool bMergeDelimiter, bool bSkipBOM)
825 :
826 : {
827 57364 : return CSVReadParseLineGeneric(
828 : fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
829 57364 : bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
830 : }
831 :
832 : /************************************************************************/
833 : /* CSVCompare() */
834 : /* */
835 : /* Compare a field to a search value using a particular */
836 : /* criteria. */
837 : /************************************************************************/
838 :
839 610 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
840 : CSVCompareCriteria eCriteria)
841 :
842 : {
843 610 : if (eCriteria == CC_ExactString)
844 : {
845 0 : return (strcmp(pszFieldValue, pszTarget) == 0);
846 : }
847 610 : else if (eCriteria == CC_ApproxString)
848 : {
849 270 : return EQUAL(pszFieldValue, pszTarget);
850 : }
851 340 : else if (eCriteria == CC_Integer)
852 : {
853 640 : return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
854 640 : atoi(pszFieldValue) == atoi(pszTarget));
855 : }
856 :
857 0 : return false;
858 : }
859 :
860 : /************************************************************************/
861 : /* CSVScanLines() */
862 : /* */
863 : /* Read the file scanline for lines where the key field equals */
864 : /* the indicated value with the suggested comparison criteria. */
865 : /* Return the first matching line split into fields. */
866 : /* */
867 : /* Deprecated. Replaced by CSVScanLinesL(). */
868 : /************************************************************************/
869 :
870 0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
871 : CSVCompareCriteria eCriteria)
872 :
873 : {
874 0 : CPLAssert(pszValue != nullptr);
875 0 : CPLAssert(iKeyField >= 0);
876 0 : CPLAssert(fp != nullptr);
877 :
878 0 : bool bSelected = false;
879 0 : const int nTestValue = atoi(pszValue);
880 0 : char **papszFields = nullptr;
881 :
882 0 : while (!bSelected)
883 : {
884 0 : papszFields = CSVReadParseLine(fp);
885 0 : if (papszFields == nullptr)
886 0 : return nullptr;
887 :
888 0 : if (CSLCount(papszFields) < iKeyField + 1)
889 : {
890 : /* not selected */
891 : }
892 0 : else if (eCriteria == CC_Integer &&
893 0 : atoi(papszFields[iKeyField]) == nTestValue)
894 : {
895 0 : bSelected = true;
896 : }
897 : else
898 : {
899 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
900 : }
901 :
902 0 : if (!bSelected)
903 : {
904 0 : CSLDestroy(papszFields);
905 0 : papszFields = nullptr;
906 : }
907 : }
908 :
909 0 : return papszFields;
910 : }
911 :
912 : /************************************************************************/
913 : /* CSVScanLinesL() */
914 : /* */
915 : /* Read the file scanline for lines where the key field equals */
916 : /* the indicated value with the suggested comparison criteria. */
917 : /* Return the first matching line split into fields. */
918 : /************************************************************************/
919 :
920 0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
921 : CSVCompareCriteria eCriteria)
922 :
923 : {
924 0 : CPLAssert(pszValue != nullptr);
925 0 : CPLAssert(iKeyField >= 0);
926 0 : CPLAssert(fp != nullptr);
927 :
928 0 : bool bSelected = false;
929 0 : const int nTestValue = atoi(pszValue);
930 0 : char **papszFields = nullptr;
931 :
932 0 : while (!bSelected)
933 : {
934 0 : papszFields = CSVReadParseLineL(fp);
935 0 : if (papszFields == nullptr)
936 0 : return nullptr;
937 :
938 0 : if (CSLCount(papszFields) < iKeyField + 1)
939 : {
940 : /* not selected */
941 : }
942 0 : else if (eCriteria == CC_Integer &&
943 0 : atoi(papszFields[iKeyField]) == nTestValue)
944 : {
945 0 : bSelected = true;
946 : }
947 : else
948 : {
949 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
950 : }
951 :
952 0 : if (!bSelected)
953 : {
954 0 : CSLDestroy(papszFields);
955 0 : papszFields = nullptr;
956 : }
957 : }
958 :
959 0 : return papszFields;
960 : }
961 :
962 : /************************************************************************/
963 : /* CSVScanLinesIndexed() */
964 : /* */
965 : /* Read the file scanline for lines where the key field equals */
966 : /* the indicated value with the suggested comparison criteria. */
967 : /* Return the first matching line split into fields. */
968 : /************************************************************************/
969 :
970 23 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
971 :
972 : {
973 23 : CPLAssert(psTable->panLineIndex != nullptr);
974 :
975 : /* -------------------------------------------------------------------- */
976 : /* Find target record with binary search. */
977 : /* -------------------------------------------------------------------- */
978 23 : int iTop = psTable->nLineCount - 1;
979 23 : int iBottom = 0;
980 23 : int iResult = -1;
981 :
982 167 : while (iTop >= iBottom)
983 : {
984 167 : const int iMiddle = (iTop + iBottom) / 2;
985 167 : if (psTable->panLineIndex[iMiddle] > nKeyValue)
986 98 : iTop = iMiddle - 1;
987 69 : else if (psTable->panLineIndex[iMiddle] < nKeyValue)
988 46 : iBottom = iMiddle + 1;
989 : else
990 : {
991 23 : iResult = iMiddle;
992 : // if a key is not unique, select the first instance of it.
993 23 : while (iResult > 0 &&
994 23 : psTable->panLineIndex[iResult - 1] == nKeyValue)
995 : {
996 0 : psTable->bNonUniqueKey = true;
997 0 : iResult--;
998 : }
999 23 : break;
1000 : }
1001 : }
1002 :
1003 23 : if (iResult == -1)
1004 0 : return nullptr;
1005 :
1006 : /* -------------------------------------------------------------------- */
1007 : /* Parse target line, and update iLastLine indicator. */
1008 : /* -------------------------------------------------------------------- */
1009 23 : psTable->iLastLine = iResult;
1010 :
1011 23 : return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
1012 : }
1013 :
1014 : /************************************************************************/
1015 : /* CSVScanLinesIngested() */
1016 : /* */
1017 : /* Read the file scanline for lines where the key field equals */
1018 : /* the indicated value with the suggested comparison criteria. */
1019 : /* Return the first matching line split into fields. */
1020 : /************************************************************************/
1021 :
1022 30 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
1023 : const char *pszValue,
1024 : CSVCompareCriteria eCriteria)
1025 :
1026 : {
1027 30 : CPLAssert(pszValue != nullptr);
1028 30 : CPLAssert(iKeyField >= 0);
1029 :
1030 30 : const int nTestValue = atoi(pszValue);
1031 :
1032 : /* -------------------------------------------------------------------- */
1033 : /* Short cut for indexed files. */
1034 : /* -------------------------------------------------------------------- */
1035 30 : if (iKeyField == 0 && eCriteria == CC_Integer &&
1036 23 : psTable->panLineIndex != nullptr)
1037 23 : return CSVScanLinesIndexed(psTable, nTestValue);
1038 :
1039 : /* -------------------------------------------------------------------- */
1040 : /* Scan from in-core lines. */
1041 : /* -------------------------------------------------------------------- */
1042 7 : char **papszFields = nullptr;
1043 7 : bool bSelected = false;
1044 :
1045 484 : while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
1046 : {
1047 477 : psTable->iLastLine++;
1048 477 : papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
1049 : false, false);
1050 :
1051 477 : if (CSLCount(papszFields) < iKeyField + 1)
1052 : {
1053 : /* not selected */
1054 : }
1055 477 : else if (eCriteria == CC_Integer &&
1056 242 : atoi(papszFields[iKeyField]) == nTestValue)
1057 : {
1058 2 : bSelected = true;
1059 : }
1060 : else
1061 : {
1062 475 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
1063 : }
1064 :
1065 477 : if (!bSelected)
1066 : {
1067 470 : CSLDestroy(papszFields);
1068 470 : papszFields = nullptr;
1069 : }
1070 : }
1071 :
1072 7 : return papszFields;
1073 : }
1074 :
1075 : /************************************************************************/
1076 : /* CSVRewind() */
1077 : /* */
1078 : /* Rewind a CSV file based on a passed in filename. */
1079 : /* This is aimed at being used with CSVGetNextLine(). */
1080 : /************************************************************************/
1081 :
1082 1843 : void CSVRewind(const char *pszFilename)
1083 :
1084 : {
1085 : /* -------------------------------------------------------------------- */
1086 : /* Get access to the table. */
1087 : /* -------------------------------------------------------------------- */
1088 1843 : CPLAssert(pszFilename != nullptr);
1089 :
1090 1843 : CSVTable *const psTable = CSVAccess(pszFilename);
1091 1843 : if (psTable != nullptr)
1092 1843 : psTable->iLastLine = -1;
1093 1843 : }
1094 :
1095 : /************************************************************************/
1096 : /* CSVGetNextLine() */
1097 : /* */
1098 : /* Fetch the next line of a CSV file based on a passed in */
1099 : /* filename. Returns NULL at end of file, or if file is not */
1100 : /* really established. */
1101 : /* This ingests the whole file into memory if not already done. */
1102 : /* When reaching end of file, CSVRewind() may be used to read */
1103 : /* again from the beginning. */
1104 : /************************************************************************/
1105 :
1106 60462 : char **CSVGetNextLine(const char *pszFilename)
1107 :
1108 : {
1109 :
1110 : /* -------------------------------------------------------------------- */
1111 : /* Get access to the table. */
1112 : /* -------------------------------------------------------------------- */
1113 60462 : CPLAssert(pszFilename != nullptr);
1114 :
1115 60462 : CSVTable *const psTable = CSVAccess(pszFilename);
1116 60462 : if (psTable == nullptr)
1117 0 : return nullptr;
1118 :
1119 60462 : CSVIngest(psTable->pszFilename);
1120 :
1121 : /* -------------------------------------------------------------------- */
1122 : /* If we use CSVGetNextLine() we can pretty much assume we have */
1123 : /* a non-unique key. */
1124 : /* -------------------------------------------------------------------- */
1125 60462 : psTable->bNonUniqueKey = true;
1126 :
1127 : /* -------------------------------------------------------------------- */
1128 : /* Do we have a next line available? This only works for */
1129 : /* ingested tables I believe. */
1130 : /* -------------------------------------------------------------------- */
1131 60462 : if (psTable->iLastLine + 1 >= psTable->nLineCount)
1132 629 : return nullptr;
1133 :
1134 59833 : psTable->iLastLine++;
1135 59833 : CSLDestroy(psTable->papszRecFields);
1136 119666 : psTable->papszRecFields = CSVSplitLine(
1137 59833 : psTable->papszLines[psTable->iLastLine], ",", false, false);
1138 :
1139 59833 : return psTable->papszRecFields;
1140 : }
1141 :
1142 : /************************************************************************/
1143 : /* CSVScanFile() */
1144 : /* */
1145 : /* Scan a whole file using criteria similar to above, but also */
1146 : /* taking care of file opening and closing. */
1147 : /************************************************************************/
1148 :
1149 144 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
1150 : const char *pszValue, CSVCompareCriteria eCriteria)
1151 : {
1152 144 : CSVIngest(psTable->pszFilename);
1153 :
1154 : /* -------------------------------------------------------------------- */
1155 : /* Does the current record match the criteria? If so, just */
1156 : /* return it again. */
1157 : /* -------------------------------------------------------------------- */
1158 144 : if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
1159 402 : CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
1160 114 : !psTable->bNonUniqueKey)
1161 : {
1162 114 : return psTable->papszRecFields;
1163 : }
1164 :
1165 : /* -------------------------------------------------------------------- */
1166 : /* Scan the file from the beginning, replacing the ``current */
1167 : /* record'' in our structure with the one that is found. */
1168 : /* -------------------------------------------------------------------- */
1169 30 : psTable->iLastLine = -1;
1170 30 : CSLDestroy(psTable->papszRecFields);
1171 :
1172 30 : if (psTable->pszRawData != nullptr)
1173 30 : psTable->papszRecFields =
1174 30 : CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
1175 : else
1176 : {
1177 0 : VSIRewindL(psTable->fp);
1178 0 : CPLReadLineL(psTable->fp); /* throw away the header line */
1179 :
1180 0 : psTable->papszRecFields =
1181 0 : CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
1182 : }
1183 :
1184 30 : return psTable->papszRecFields;
1185 : }
1186 :
1187 4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
1188 : CSVCompareCriteria eCriteria)
1189 :
1190 : {
1191 : /* -------------------------------------------------------------------- */
1192 : /* Get access to the table. */
1193 : /* -------------------------------------------------------------------- */
1194 4 : CPLAssert(pszFilename != nullptr);
1195 :
1196 4 : if (iKeyField < 0)
1197 0 : return nullptr;
1198 :
1199 4 : CSVTable *const psTable = CSVAccess(pszFilename);
1200 4 : if (psTable == nullptr)
1201 0 : return nullptr;
1202 :
1203 4 : return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
1204 : }
1205 :
1206 : /************************************************************************/
1207 : /* CPLGetFieldId() */
1208 : /* */
1209 : /* Read the first record of a CSV file (rewinding to be sure), */
1210 : /* and find the field with the indicated name. Returns -1 if */
1211 : /* it fails to find the field name. Comparison is case */
1212 : /* insensitive, but otherwise exact. After this function has */
1213 : /* been called the file pointer will be positioned just after */
1214 : /* the first record. */
1215 : /* */
1216 : /* Deprecated. Replaced by CPLGetFieldIdL(). */
1217 : /************************************************************************/
1218 :
1219 0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
1220 :
1221 : {
1222 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1223 :
1224 0 : VSIRewind(fp);
1225 :
1226 0 : char **papszFields = CSVReadParseLine(fp);
1227 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1228 : {
1229 0 : if (EQUAL(papszFields[i], pszFieldName))
1230 : {
1231 0 : CSLDestroy(papszFields);
1232 0 : return i;
1233 : }
1234 : }
1235 :
1236 0 : CSLDestroy(papszFields);
1237 :
1238 0 : return -1;
1239 : }
1240 :
1241 : /************************************************************************/
1242 : /* CPLGetFieldIdL() */
1243 : /* */
1244 : /* Read the first record of a CSV file (rewinding to be sure), */
1245 : /* and find the field with the indicated name. Returns -1 if */
1246 : /* it fails to find the field name. Comparison is case */
1247 : /* insensitive, but otherwise exact. After this function has */
1248 : /* been called the file pointer will be positioned just after */
1249 : /* the first record. */
1250 : /************************************************************************/
1251 :
1252 0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
1253 :
1254 : {
1255 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1256 :
1257 0 : VSIRewindL(fp);
1258 :
1259 0 : char **papszFields = CSVReadParseLineL(fp);
1260 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1261 : {
1262 0 : if (EQUAL(papszFields[i], pszFieldName))
1263 : {
1264 0 : CSLDestroy(papszFields);
1265 0 : return i;
1266 : }
1267 : }
1268 :
1269 0 : CSLDestroy(papszFields);
1270 :
1271 0 : return -1;
1272 : }
1273 :
1274 : /************************************************************************/
1275 : /* CSVGetFileFieldId() */
1276 : /* */
1277 : /* Same as CPLGetFieldId(), except that we get the file based */
1278 : /* on filename, rather than having an existing handle. */
1279 : /************************************************************************/
1280 :
1281 7430 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
1282 :
1283 : {
1284 : /* -------------------------------------------------------------------- */
1285 : /* Find the requested field. */
1286 : /* -------------------------------------------------------------------- */
1287 7430 : const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
1288 18696 : for (int i = 0; psTable->papszFieldNames != nullptr &&
1289 18696 : psTable->papszFieldNames[i] != nullptr;
1290 : i++)
1291 : {
1292 18696 : if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
1293 10433 : EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
1294 : {
1295 7430 : return i;
1296 : }
1297 : }
1298 :
1299 0 : return -1;
1300 : }
1301 :
1302 7150 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
1303 :
1304 : {
1305 : /* -------------------------------------------------------------------- */
1306 : /* Get access to the table. */
1307 : /* -------------------------------------------------------------------- */
1308 7150 : CPLAssert(pszFilename != nullptr);
1309 :
1310 7150 : CSVTable *const psTable = CSVAccess(pszFilename);
1311 7150 : if (psTable == nullptr)
1312 0 : return -1;
1313 7150 : return CSVGetFileFieldId(psTable, pszFieldName);
1314 : }
1315 :
1316 : /************************************************************************/
1317 : /* CSVScanFileByName() */
1318 : /* */
1319 : /* Same as CSVScanFile(), but using a field name instead of a */
1320 : /* field number. */
1321 : /************************************************************************/
1322 :
1323 4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
1324 : const char *pszValue, CSVCompareCriteria eCriteria)
1325 :
1326 : {
1327 4 : const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
1328 4 : if (iKeyField == -1)
1329 0 : return nullptr;
1330 :
1331 4 : return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
1332 : }
1333 :
1334 : /************************************************************************/
1335 : /* CSVGetField() */
1336 : /* */
1337 : /* The all-in-one function to fetch a particular field value */
1338 : /* from a CSV file. Note this function will return an empty */
1339 : /* string, rather than NULL if it fails to find the desired */
1340 : /* value for some reason. The caller can't establish that the */
1341 : /* fetch failed. */
1342 : /************************************************************************/
1343 :
1344 140 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
1345 : const char *pszKeyFieldValue,
1346 : CSVCompareCriteria eCriteria,
1347 : const char *pszTargetField)
1348 :
1349 : {
1350 : /* -------------------------------------------------------------------- */
1351 : /* Find the table. */
1352 : /* -------------------------------------------------------------------- */
1353 140 : CSVTable *const psTable = CSVAccess(pszFilename);
1354 140 : if (psTable == nullptr)
1355 0 : return "";
1356 :
1357 140 : const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
1358 140 : if (iKeyField == -1)
1359 0 : return "";
1360 :
1361 : /* -------------------------------------------------------------------- */
1362 : /* Find the correct record. */
1363 : /* -------------------------------------------------------------------- */
1364 : char **papszRecord =
1365 140 : CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
1366 140 : if (papszRecord == nullptr)
1367 0 : return "";
1368 :
1369 : /* -------------------------------------------------------------------- */
1370 : /* Figure out which field we want out of this. */
1371 : /* -------------------------------------------------------------------- */
1372 140 : const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
1373 140 : if (iTargetField < 0)
1374 0 : return "";
1375 :
1376 388 : for (int i = 0; papszRecord[i] != nullptr; ++i)
1377 : {
1378 388 : if (i == iTargetField)
1379 140 : return papszRecord[iTargetField];
1380 : }
1381 0 : return "";
1382 : }
1383 :
1384 : /************************************************************************/
1385 : /* GDALDefaultCSVFilename() */
1386 : /************************************************************************/
1387 :
1388 : typedef struct
1389 : {
1390 : char szPath[512];
1391 : bool bCSVFinderInitialized;
1392 : } DefaultCSVFileNameTLS;
1393 :
1394 2488 : const char *GDALDefaultCSVFilename(const char *pszBasename)
1395 :
1396 : {
1397 : /* -------------------------------------------------------------------- */
1398 : /* Do we already have this file accessed? If so, just return */
1399 : /* the existing path without any further probing. */
1400 : /* -------------------------------------------------------------------- */
1401 2488 : int bMemoryError = FALSE;
1402 : CSVTable **ppsCSVTableList =
1403 2488 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
1404 2488 : if (ppsCSVTableList != nullptr)
1405 : {
1406 2482 : const size_t nBasenameLen = strlen(pszBasename);
1407 :
1408 23118 : for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
1409 20636 : psTable = psTable->psNext)
1410 : {
1411 22566 : const size_t nFullLen = strlen(psTable->pszFilename);
1412 :
1413 22566 : if (nFullLen > nBasenameLen &&
1414 22566 : strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
1415 1930 : pszBasename) == 0 &&
1416 1930 : strchr("/\\",
1417 1930 : psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
1418 : nullptr)
1419 : {
1420 1930 : return psTable->pszFilename;
1421 : }
1422 : }
1423 : }
1424 :
1425 : /* -------------------------------------------------------------------- */
1426 : /* Otherwise we need to look harder for it. */
1427 : /* -------------------------------------------------------------------- */
1428 : DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1429 558 : CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
1430 558 : if (pTLSData == nullptr && !bMemoryError)
1431 : {
1432 : pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1433 5 : VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
1434 5 : if (pTLSData)
1435 5 : CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
1436 : }
1437 558 : if (pTLSData == nullptr)
1438 0 : return "/not_existing_dir/not_existing_path";
1439 :
1440 558 : const char *pszResult = CPLFindFile("gdal", pszBasename);
1441 :
1442 558 : if (pszResult != nullptr)
1443 43 : return pszResult;
1444 :
1445 515 : if (!pTLSData->bCSVFinderInitialized)
1446 : {
1447 2 : pTLSData->bCSVFinderInitialized = true;
1448 :
1449 2 : if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
1450 2 : CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
1451 :
1452 2 : pszResult = CPLFindFile("gdal", pszBasename);
1453 :
1454 2 : if (pszResult != nullptr)
1455 0 : return pszResult;
1456 : }
1457 :
1458 : // For systems like sandboxes that do not allow other checks.
1459 515 : CPLDebug("CPL_CSV",
1460 : "Failed to find file in GDALDefaultCSVFilename. "
1461 : "Returning original basename: %s",
1462 : pszBasename);
1463 515 : CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
1464 515 : return pTLSData->szPath;
1465 : }
1466 :
1467 : /************************************************************************/
1468 : /* CSVFilename() */
1469 : /* */
1470 : /* Return the full path to a particular CSV file. This will */
1471 : /* eventually be something the application can override. */
1472 : /************************************************************************/
1473 :
1474 : CPL_C_START
1475 : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
1476 : CPL_C_END
1477 :
1478 2488 : const char *CSVFilename(const char *pszBasename)
1479 :
1480 : {
1481 2488 : if (pfnCSVFilenameHook == nullptr)
1482 2488 : return GDALDefaultCSVFilename(pszBasename);
1483 :
1484 0 : return pfnCSVFilenameHook(pszBasename);
1485 : }
1486 :
1487 : /************************************************************************/
1488 : /* SetCSVFilenameHook() */
1489 : /* */
1490 : /* Applications can use this to set a function that will */
1491 : /* massage CSV filenames. */
1492 : /************************************************************************/
1493 :
1494 : /**
1495 : * Override CSV file search method.
1496 : *
1497 : * @param pfnNewHook The pointer to a function which will return the
1498 : * full path for a given filename.
1499 : *
1500 :
1501 : This function allows an application to override how the GTIFGetDefn()
1502 : and related function find the CSV (Comma Separated Value) values
1503 : required. The pfnHook argument should be a pointer to a function that
1504 : will take in a CSV filename and return a full path to the file. The
1505 : returned string should be to an internal static buffer so that the
1506 : caller doesn't have to free the result.
1507 :
1508 : Example:
1509 :
1510 : The listgeo utility uses the following override function if the user
1511 : specified a CSV file directory with the -t commandline switch (argument
1512 : put into CSVDirName).
1513 :
1514 : \code{.cpp}
1515 :
1516 : ...
1517 : SetCSVFilenameHook( CSVFileOverride );
1518 : ...
1519 :
1520 : static const char *CSVFileOverride( const char * pszInput )
1521 :
1522 : {
1523 : static char szPath[1024] = {};
1524 :
1525 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1526 :
1527 : return szPath;
1528 : }
1529 : \endcode
1530 :
1531 : */
1532 :
1533 : CPL_C_START
1534 0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
1535 :
1536 : {
1537 0 : pfnCSVFilenameHook = pfnNewHook;
1538 0 : }
1539 :
1540 : CPL_C_END
|