Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: CSV (comma separated value) file access.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 1999, Frank Warmerdam
9 : * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "cpl_port.h"
15 : #include "cpl_csv.h"
16 :
17 : #include <cstddef>
18 : #include <cstdlib>
19 : #include <cstring>
20 : #if HAVE_FCNTL_H
21 : #include <fcntl.h>
22 : #endif
23 :
24 : #include "cpl_conv.h"
25 : #include "cpl_error.h"
26 : #include "cpl_multiproc.h"
27 : #include "gdal_csv.h"
28 :
29 : #include <algorithm>
30 :
31 : /* ==================================================================== */
32 : /* The CSVTable is a persistent set of info about an open CSV */
33 : /* table. While it doesn't currently maintain a record index, */
34 : /* or in-memory copy of the table, it could be changed to do so */
35 : /* in the future. */
36 : /* ==================================================================== */
37 : typedef struct ctb
38 : {
39 : VSILFILE *fp;
40 : struct ctb *psNext;
41 : char *pszFilename;
42 : char **papszFieldNames;
43 : int *panFieldNamesLength;
44 : char **papszRecFields;
45 : int nFields;
46 : int iLastLine;
47 : bool bNonUniqueKey;
48 :
49 : /* Cache for whole file */
50 : int nLineCount;
51 : char **papszLines;
52 : int *panLineIndex;
53 : char *pszRawData;
54 : } CSVTable;
55 :
56 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
57 : const char *pszFilename);
58 :
59 : /************************************************************************/
60 : /* CSVFreeTLS() */
61 : /************************************************************************/
62 2 : static void CSVFreeTLS(void *pData)
63 : {
64 2 : CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
65 2 : CPLFree(pData);
66 2 : }
67 :
68 : /* It would likely be better to share this list between threads, but
69 : that will require some rework. */
70 :
71 : /************************************************************************/
72 : /* CSVAccess() */
73 : /* */
74 : /* This function will fetch a handle to the requested table. */
75 : /* If not found in the ``open table list'' the table will be */
76 : /* opened and added to the list. Eventually this function may */
77 : /* become public with an abstracted return type so that */
78 : /* applications can set options about the table. For now this */
79 : /* isn't done. */
80 : /************************************************************************/
81 :
82 129591 : static CSVTable *CSVAccess(const char *pszFilename)
83 :
84 : {
85 : /* -------------------------------------------------------------------- */
86 : /* Fetch the table, and allocate the thread-local pointer to it */
87 : /* if there isn't already one. */
88 : /* -------------------------------------------------------------------- */
89 129591 : int bMemoryError = FALSE;
90 : CSVTable **ppsCSVTableList =
91 129591 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
92 129591 : if (bMemoryError)
93 0 : return nullptr;
94 129591 : if (ppsCSVTableList == nullptr)
95 : {
96 : ppsCSVTableList =
97 5 : static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
98 5 : if (ppsCSVTableList == nullptr)
99 0 : return nullptr;
100 5 : CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
101 : }
102 :
103 : /* -------------------------------------------------------------------- */
104 : /* Is the table already in the list. */
105 : /* -------------------------------------------------------------------- */
106 1000290 : for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
107 870703 : psTable = psTable->psNext)
108 : {
109 1000260 : if (EQUAL(psTable->pszFilename, pszFilename))
110 : {
111 : /*
112 : * Eventually we should consider promoting to the front of
113 : * the list to accelerate frequently accessed tables.
114 : */
115 129561 : return psTable;
116 : }
117 : }
118 :
119 : /* -------------------------------------------------------------------- */
120 : /* If not, try to open it. */
121 : /* -------------------------------------------------------------------- */
122 30 : VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
123 30 : if (fp == nullptr)
124 0 : return nullptr;
125 :
126 : /* -------------------------------------------------------------------- */
127 : /* Create an information structure about this table, and add to */
128 : /* the front of the list. */
129 : /* -------------------------------------------------------------------- */
130 : CSVTable *const psTable =
131 30 : static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
132 30 : if (psTable == nullptr)
133 : {
134 0 : VSIFCloseL(fp);
135 0 : return nullptr;
136 : }
137 :
138 30 : psTable->fp = fp;
139 30 : psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
140 30 : if (psTable->pszFilename == nullptr)
141 : {
142 0 : VSIFree(psTable);
143 0 : VSIFCloseL(fp);
144 0 : return nullptr;
145 : }
146 30 : psTable->bNonUniqueKey = false; // As far as we know now.
147 30 : psTable->psNext = *ppsCSVTableList;
148 :
149 30 : *ppsCSVTableList = psTable;
150 :
151 : /* -------------------------------------------------------------------- */
152 : /* Read the table header record containing the field names. */
153 : /* -------------------------------------------------------------------- */
154 30 : psTable->papszFieldNames = CSVReadParseLineL(fp);
155 30 : psTable->nFields = CSLCount(psTable->papszFieldNames);
156 30 : psTable->panFieldNamesLength =
157 30 : static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
158 30 : for (int i = 0;
159 185 : i < psTable->nFields &&
160 : /* null-pointer check to avoid a false positive from CLang S.A. */
161 155 : psTable->papszFieldNames != nullptr;
162 : i++)
163 : {
164 155 : psTable->panFieldNamesLength[i] =
165 155 : static_cast<int>(strlen(psTable->papszFieldNames[i]));
166 : }
167 :
168 30 : return psTable;
169 : }
170 :
171 : /************************************************************************/
172 : /* CSVDeaccess() */
173 : /************************************************************************/
174 :
175 945 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
176 : const char *pszFilename)
177 :
178 : {
179 945 : if (ppsCSVTableList == nullptr)
180 936 : return;
181 :
182 : /* -------------------------------------------------------------------- */
183 : /* A NULL means deaccess all tables. */
184 : /* -------------------------------------------------------------------- */
185 9 : if (pszFilename == nullptr)
186 : {
187 9 : while (*ppsCSVTableList != nullptr)
188 5 : CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
189 5 : (*ppsCSVTableList)->pszFilename);
190 :
191 4 : return;
192 : }
193 :
194 : /* -------------------------------------------------------------------- */
195 : /* Find this table. */
196 : /* -------------------------------------------------------------------- */
197 5 : CSVTable *psLast = nullptr;
198 5 : CSVTable *psTable = *ppsCSVTableList;
199 5 : for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
200 0 : psTable = psTable->psNext)
201 : {
202 0 : psLast = psTable;
203 : }
204 :
205 5 : if (psTable == nullptr)
206 : {
207 0 : if (bCanUseTLS)
208 0 : CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
209 0 : return;
210 : }
211 :
212 : /* -------------------------------------------------------------------- */
213 : /* Remove the link from the list. */
214 : /* -------------------------------------------------------------------- */
215 5 : if (psLast != nullptr)
216 0 : psLast->psNext = psTable->psNext;
217 : else
218 5 : *ppsCSVTableList = psTable->psNext;
219 :
220 : /* -------------------------------------------------------------------- */
221 : /* Free the table. */
222 : /* -------------------------------------------------------------------- */
223 5 : if (psTable->fp != nullptr)
224 0 : VSIFCloseL(psTable->fp);
225 :
226 5 : CSLDestroy(psTable->papszFieldNames);
227 5 : CPLFree(psTable->panFieldNamesLength);
228 5 : CSLDestroy(psTable->papszRecFields);
229 5 : CPLFree(psTable->pszFilename);
230 5 : CPLFree(psTable->panLineIndex);
231 5 : CPLFree(psTable->pszRawData);
232 5 : CPLFree(psTable->papszLines);
233 :
234 5 : CPLFree(psTable);
235 :
236 5 : if (bCanUseTLS)
237 5 : CPLReadLine(nullptr);
238 : }
239 :
240 938 : void CSVDeaccess(const char *pszFilename)
241 : {
242 : /* -------------------------------------------------------------------- */
243 : /* Fetch the table, and allocate the thread-local pointer to it */
244 : /* if there isn't already one. */
245 : /* -------------------------------------------------------------------- */
246 938 : int bMemoryError = FALSE;
247 : CSVTable **ppsCSVTableList =
248 938 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
249 :
250 938 : CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
251 938 : }
252 :
253 : /************************************************************************/
254 : /* CSVSplitLine() */
255 : /* */
256 : /* Tokenize a CSV line into fields in the form of a string */
257 : /* list. This is used instead of the CPLTokenizeString() */
258 : /* because it provides correct CSV escaping and quoting */
259 : /* semantics. */
260 : /************************************************************************/
261 :
262 115950 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
263 : bool bKeepLeadingAndClosingQuotes,
264 : bool bMergeDelimiter)
265 :
266 : {
267 231900 : CPLStringList aosRetList;
268 115950 : if (pszString == nullptr)
269 0 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
270 :
271 115950 : char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
272 115950 : int nTokenMax = 10;
273 115950 : const size_t nDelimiterLength = strlen(pszDelimiter);
274 :
275 115950 : const char *pszIter = pszString;
276 667368 : while (*pszIter != '\0')
277 : {
278 551418 : bool bInString = false;
279 :
280 551418 : int nTokenLen = 0;
281 :
282 : // Try to find the next delimiter, marking end of token.
283 4633940 : do
284 : {
285 : // End if this is a delimiter skip it and break.
286 5185360 : if (!bInString &&
287 2706880 : strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
288 : {
289 435788 : pszIter += nDelimiterLength;
290 435788 : if (bMergeDelimiter)
291 : {
292 9 : while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
293 : 0)
294 5 : pszIter += nDelimiterLength;
295 : }
296 435788 : break;
297 : }
298 :
299 4749570 : if (*pszIter == '"')
300 : {
301 394207 : if (!bInString && nTokenLen > 0)
302 : {
303 : // do not treat in a special way double quotes that appear
304 : // in the middle of a field (similarly to OpenOffice)
305 : // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
306 : }
307 394122 : else if (!bInString || pszIter[1] != '"')
308 : {
309 393456 : bInString = !bInString;
310 393456 : if (!bKeepLeadingAndClosingQuotes)
311 393422 : continue;
312 : }
313 : else // Doubled quotes in string resolve to one quote.
314 : {
315 666 : pszIter++;
316 : }
317 : }
318 :
319 4356140 : if (nTokenLen >= nTokenMax - 2)
320 : {
321 122636 : nTokenMax = nTokenMax * 2 + 10;
322 122636 : pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
323 : }
324 :
325 4356140 : pszToken[nTokenLen] = *pszIter;
326 4356140 : nTokenLen++;
327 4749570 : } while (*(++pszIter) != '\0');
328 :
329 551418 : pszToken[nTokenLen] = '\0';
330 551418 : aosRetList.AddString(pszToken);
331 :
332 : // If the last token is an empty token, then we have to catch
333 : // it now, otherwise we won't reenter the loop and it will be lost.
334 551418 : if (*pszIter == '\0' &&
335 115910 : pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
336 115910 : strncmp(pszIter - nDelimiterLength, pszDelimiter,
337 : nDelimiterLength) == 0)
338 : {
339 280 : aosRetList.AddString("");
340 : }
341 : }
342 :
343 115950 : CPLFree(pszToken);
344 :
345 115950 : if (aosRetList.Count() == 0)
346 40 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
347 : else
348 115910 : return aosRetList.StealList();
349 : }
350 :
351 : /************************************************************************/
352 : /* CSVFindNextLine() */
353 : /* */
354 : /* Find the start of the next line, while at the same time zero */
355 : /* terminating this line. Take into account that there may be */
356 : /* newline indicators within quoted strings, and that quotes */
357 : /* can be escaped with a backslash. */
358 : /************************************************************************/
359 :
360 6329 : static char *CSVFindNextLine(char *pszThisLine)
361 :
362 : {
363 6329 : int i = 0; // i is used after the for loop.
364 :
365 275213 : for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
366 : {
367 275213 : if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
368 35430 : nQuoteCount++;
369 :
370 275213 : if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
371 6329 : (nQuoteCount % 2) == 0)
372 6329 : break;
373 : }
374 :
375 16573 : while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
376 10244 : pszThisLine[i++] = '\0';
377 :
378 6329 : if (pszThisLine[i] == '\0')
379 30 : return nullptr;
380 :
381 6299 : return pszThisLine + i;
382 : }
383 :
384 : /************************************************************************/
385 : /* CSVIngest() */
386 : /* */
387 : /* Load entire file into memory and setup index if possible. */
388 : /************************************************************************/
389 :
390 : // TODO(schwehr): Clean up all the casting in CSVIngest.
391 60312 : static void CSVIngest(CSVTable *psTable)
392 :
393 : {
394 60312 : if (psTable->pszRawData != nullptr)
395 60282 : return;
396 :
397 : /* -------------------------------------------------------------------- */
398 : /* Ingest whole file. */
399 : /* -------------------------------------------------------------------- */
400 30 : if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
401 : {
402 0 : CPLError(CE_Failure, CPLE_FileIO,
403 : "Failed using seek end and tell to get file length: %s",
404 : psTable->pszFilename);
405 0 : return;
406 : }
407 30 : const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
408 30 : if (static_cast<long>(nFileLen) == -1)
409 : {
410 0 : CPLError(CE_Failure, CPLE_FileIO,
411 : "Failed using seek end and tell to get file length: %s",
412 : psTable->pszFilename);
413 0 : return;
414 : }
415 30 : VSIRewindL(psTable->fp);
416 :
417 30 : psTable->pszRawData = static_cast<char *>(
418 30 : VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
419 30 : if (psTable->pszRawData == nullptr)
420 0 : return;
421 30 : if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
422 30 : psTable->fp) != static_cast<size_t>(nFileLen))
423 : {
424 0 : CPLFree(psTable->pszRawData);
425 0 : psTable->pszRawData = nullptr;
426 :
427 0 : CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
428 : psTable->pszFilename);
429 0 : return;
430 : }
431 :
432 30 : psTable->pszRawData[nFileLen] = '\0';
433 :
434 : /* -------------------------------------------------------------------- */
435 : /* Get count of newlines so we can allocate line array. */
436 : /* -------------------------------------------------------------------- */
437 30 : int nMaxLineCount = 0;
438 279158 : for (int i = 0; i < static_cast<int>(nFileLen); i++)
439 : {
440 279128 : if (psTable->pszRawData[i] == 10)
441 6329 : nMaxLineCount++;
442 : }
443 :
444 30 : psTable->papszLines =
445 30 : static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
446 30 : if (psTable->papszLines == nullptr)
447 0 : return;
448 :
449 : /* -------------------------------------------------------------------- */
450 : /* Build a list of record pointers into the raw data buffer */
451 : /* based on line terminators. Zero terminate the line */
452 : /* strings. */
453 : /* -------------------------------------------------------------------- */
454 : /* skip header line */
455 30 : char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
456 :
457 30 : int iLine = 0;
458 6329 : while (pszThisLine != nullptr && iLine < nMaxLineCount)
459 : {
460 6299 : if (pszThisLine[0] != '#')
461 6288 : psTable->papszLines[iLine++] = pszThisLine;
462 6299 : pszThisLine = CSVFindNextLine(pszThisLine);
463 : }
464 :
465 30 : psTable->nLineCount = iLine;
466 :
467 : /* -------------------------------------------------------------------- */
468 : /* Allocate and populate index array. Ensure they are in */
469 : /* ascending order so that binary searches can be done on the */
470 : /* array. */
471 : /* -------------------------------------------------------------------- */
472 30 : psTable->panLineIndex = static_cast<int *>(
473 30 : VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
474 30 : if (psTable->panLineIndex == nullptr)
475 0 : return;
476 :
477 6232 : for (int i = 0; i < psTable->nLineCount; i++)
478 : {
479 6204 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
480 :
481 6204 : if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
482 : {
483 2 : CPLFree(psTable->panLineIndex);
484 2 : psTable->panLineIndex = nullptr;
485 2 : break;
486 : }
487 : }
488 :
489 30 : psTable->iLastLine = -1;
490 :
491 : /* -------------------------------------------------------------------- */
492 : /* We should never need the file handle against, so close it. */
493 : /* -------------------------------------------------------------------- */
494 30 : VSIFCloseL(psTable->fp);
495 30 : psTable->fp = nullptr;
496 : }
497 :
498 60312 : static void CSVIngest(const char *pszFilename)
499 :
500 : {
501 60312 : CSVTable *psTable = CSVAccess(pszFilename);
502 60312 : if (psTable == nullptr)
503 : {
504 0 : CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
505 : pszFilename);
506 0 : return;
507 : }
508 60312 : CSVIngest(psTable);
509 : }
510 :
511 : /************************************************************************/
512 : /* CSVDetectSeperator() */
513 : /************************************************************************/
514 :
515 : /** Detect which field separator is used.
516 : *
517 : * Currently, it can detect comma, semicolon, space, tabulation or pipe.
518 : * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
519 : * most occurrences will be selected (and a warning emitted).
520 : * If no separator found, comma will be considered as the separator.
521 : *
522 : * @return ',', ';', ' ', tabulation character or '|'.
523 : */
524 581 : char CSVDetectSeperator(const char *pszLine)
525 : {
526 581 : bool bInString = false;
527 581 : int nCountComma = 0;
528 581 : int nCountSemicolon = 0;
529 581 : int nCountTab = 0;
530 581 : int nCountPipe = 0;
531 581 : int nCountSpace = 0;
532 :
533 26359 : for (; *pszLine != '\0'; pszLine++)
534 : {
535 25778 : if (!bInString && *pszLine == ',')
536 : {
537 2075 : nCountComma++;
538 : }
539 23703 : else if (!bInString && *pszLine == ';')
540 : {
541 10 : nCountSemicolon++;
542 : }
543 23693 : else if (!bInString && *pszLine == '\t')
544 : {
545 29 : nCountTab++;
546 : }
547 23664 : else if (!bInString && *pszLine == '|')
548 : {
549 9 : nCountPipe++;
550 : }
551 23655 : else if (!bInString && *pszLine == ' ')
552 : {
553 290 : nCountSpace++;
554 : }
555 23365 : else if (*pszLine == '"')
556 : {
557 519 : if (!bInString || pszLine[1] != '"')
558 : {
559 519 : bInString = !bInString;
560 519 : continue;
561 : }
562 : else /* doubled quotes in string resolve to one quote */
563 : {
564 0 : pszLine++;
565 : }
566 : }
567 : }
568 :
569 : const int nMaxCountExceptSpace =
570 : std::max(std::max(nCountComma, nCountSemicolon),
571 581 : std::max(nCountTab, nCountPipe));
572 581 : char chDelimiter = ',';
573 581 : if (nMaxCountExceptSpace == 0)
574 : {
575 35 : if (nCountSpace > 0)
576 9 : chDelimiter = ' ';
577 : }
578 : else
579 : {
580 546 : bool bWarn = false;
581 546 : if (nCountComma == nMaxCountExceptSpace)
582 : {
583 530 : chDelimiter = ',';
584 530 : bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
585 : }
586 16 : else if (nCountSemicolon == nMaxCountExceptSpace)
587 : {
588 5 : chDelimiter = ';';
589 5 : bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
590 : }
591 11 : else if (nCountTab == nMaxCountExceptSpace)
592 : {
593 6 : chDelimiter = '\t';
594 6 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
595 : }
596 : else /* if( nCountPipe == nMaxCountExceptSpace ) */
597 : {
598 5 : chDelimiter = '|';
599 5 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
600 : }
601 546 : if (bWarn)
602 : {
603 6 : CPLError(CE_Warning, CPLE_AppDefined,
604 : "Selecting '%c' as CSV field separator, but "
605 : "other candidate separator(s) have been found.",
606 : chDelimiter);
607 : }
608 : }
609 :
610 581 : return chDelimiter;
611 : }
612 :
613 : /************************************************************************/
614 : /* CSVReadParseLine3L() */
615 : /* */
616 : /* Read one line, and return split into fields. The return */
617 : /* result is a stringlist, in the sense of the CSL functions. */
618 : /************************************************************************/
619 :
620 : static char **
621 57273 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
622 : size_t nMaxLineSize, const char *pszDelimiter,
623 : bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
624 : bool bMergeDelimiter, bool bSkipBOM)
625 : {
626 57273 : const char *pszLine = pfnReadLine(fp, nMaxLineSize);
627 57273 : if (pszLine == nullptr)
628 1361 : return nullptr;
629 :
630 55912 : if (bSkipBOM)
631 : {
632 : // Skip BOM.
633 55545 : const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
634 55545 : if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
635 4 : pszLine += 3;
636 : }
637 :
638 : // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
639 55912 : if (!bHonourStrings)
640 : {
641 2 : return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
642 : }
643 :
644 : // If there are no quotes, then this is the simple case.
645 : // Parse, and return tokens.
646 55910 : if (strchr(pszLine, '\"') == nullptr)
647 48293 : return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
648 48293 : bMergeDelimiter);
649 :
650 : try
651 : {
652 : // We must now count the quotes in our working string, and as
653 : // long as it is odd, keep adding new lines.
654 7617 : std::string osWorkLine(pszLine);
655 :
656 7617 : size_t i = 0;
657 7617 : int nCount = 0;
658 :
659 : while (true)
660 : {
661 791978 : for (; i < osWorkLine.size(); i++)
662 : {
663 783606 : if (osWorkLine[i] == '\"')
664 59591 : nCount++;
665 : }
666 :
667 8372 : if (nCount % 2 == 0)
668 7616 : break;
669 :
670 756 : pszLine = pfnReadLine(fp, nMaxLineSize);
671 756 : if (pszLine == nullptr)
672 1 : break;
673 :
674 755 : osWorkLine.append("\n");
675 755 : osWorkLine.append(pszLine);
676 : }
677 :
678 : char **papszReturn =
679 7617 : CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
680 : bKeepLeadingAndClosingQuotes, bMergeDelimiter);
681 :
682 7617 : return papszReturn;
683 : }
684 0 : catch (const std::exception &e)
685 : {
686 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
687 0 : return nullptr;
688 : }
689 : }
690 :
691 : /************************************************************************/
692 : /* CSVReadParseLine() */
693 : /* */
694 : /* Read one line, and return split into fields. The return */
695 : /* result is a stringlist, in the sense of the CSL functions. */
696 : /* */
697 : /* Deprecated. Replaced by CSVReadParseLineL(). */
698 : /************************************************************************/
699 :
700 0 : char **CSVReadParseLine(FILE *fp)
701 : {
702 0 : return CSVReadParseLine2(fp, ',');
703 : }
704 :
705 0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
706 : {
707 0 : return CPLReadLine(static_cast<FILE *>(fp));
708 : }
709 :
710 0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
711 : {
712 0 : CPLAssert(fp != nullptr);
713 0 : if (fp == nullptr)
714 0 : return nullptr;
715 :
716 0 : char szDelimiter[2] = {chDelimiter, 0};
717 0 : return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
718 : 0, // nMaxLineSize,
719 : szDelimiter,
720 : true, // bHonourStrings
721 : false, // bKeepLeadingAndClosingQuotes
722 : false, // bMergeDelimiter
723 0 : true /* bSkipBOM */);
724 : }
725 :
726 : /************************************************************************/
727 : /* CSVReadParseLineL() */
728 : /* */
729 : /* Read one line, and return split into fields. The return */
730 : /* result is a stringlist, in the sense of the CSL functions. */
731 : /* */
732 : /* Replaces CSVReadParseLine(). These functions use the VSI */
733 : /* layer to allow reading from other file containers. */
734 : /************************************************************************/
735 :
736 3910 : char **CSVReadParseLineL(VSILFILE *fp)
737 : {
738 3910 : return CSVReadParseLine2L(fp, ',');
739 : }
740 :
741 3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
742 :
743 : {
744 3910 : CPLAssert(fp != nullptr);
745 3910 : if (fp == nullptr)
746 0 : return nullptr;
747 :
748 3910 : char szDelimiter[2] = {chDelimiter, 0};
749 3910 : return CSVReadParseLine3L(fp,
750 : 0, // nMaxLineSize
751 : szDelimiter,
752 : true, // bHonourStrings
753 : false, // bKeepLeadingAndClosingQuotes
754 : false, // bMergeDelimiter
755 3910 : true /* bSkipBOM */);
756 : }
757 :
758 : /************************************************************************/
759 : /* ReadLineLargeFile() */
760 : /************************************************************************/
761 :
762 58029 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
763 : {
764 58029 : int nBufLength = 0;
765 58029 : return CPLReadLine3L(static_cast<VSILFILE *>(fp),
766 : nMaxLineSize == 0 ? -1
767 : : static_cast<int>(nMaxLineSize),
768 116058 : &nBufLength, nullptr);
769 : }
770 :
771 : /************************************************************************/
772 : /* CSVReadParseLine3L() */
773 : /* */
774 : /* Read one line, and return split into fields. The return */
775 : /* result is a stringlist, in the sense of the CSL functions. */
776 : /************************************************************************/
777 :
778 : /** Read one line, and return split into fields.
779 : * The return result is a stringlist, in the sense of the CSL functions.
780 : *
781 : * @param fp File handle. Must not be NULL
782 : * @param nMaxLineSize Maximum line size, or 0 for unlimited.
783 : * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
784 : * @param bHonourStrings Should be true, unless double quotes should not be
785 : * considered when separating fields.
786 : * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
787 : * quote characters should be kept.
788 : * @param bMergeDelimiter Whether consecutive delimiters should be considered
789 : * as a single one. Should generally be set to false.
790 : * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
791 : */
792 57273 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
793 : const char *pszDelimiter, bool bHonourStrings,
794 : bool bKeepLeadingAndClosingQuotes,
795 : bool bMergeDelimiter, bool bSkipBOM)
796 :
797 : {
798 57273 : return CSVReadParseLineGeneric(
799 : fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
800 57273 : bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
801 : }
802 :
803 : /************************************************************************/
804 : /* CSVCompare() */
805 : /* */
806 : /* Compare a field to a search value using a particular */
807 : /* criteria. */
808 : /************************************************************************/
809 :
810 609 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
811 : CSVCompareCriteria eCriteria)
812 :
813 : {
814 609 : if (eCriteria == CC_ExactString)
815 : {
816 0 : return (strcmp(pszFieldValue, pszTarget) == 0);
817 : }
818 609 : else if (eCriteria == CC_ApproxString)
819 : {
820 270 : return EQUAL(pszFieldValue, pszTarget);
821 : }
822 339 : else if (eCriteria == CC_Integer)
823 : {
824 638 : return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
825 638 : atoi(pszFieldValue) == atoi(pszTarget));
826 : }
827 :
828 0 : return false;
829 : }
830 :
831 : /************************************************************************/
832 : /* CSVScanLines() */
833 : /* */
834 : /* Read the file scanline for lines where the key field equals */
835 : /* the indicated value with the suggested comparison criteria. */
836 : /* Return the first matching line split into fields. */
837 : /* */
838 : /* Deprecated. Replaced by CSVScanLinesL(). */
839 : /************************************************************************/
840 :
841 0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
842 : CSVCompareCriteria eCriteria)
843 :
844 : {
845 0 : CPLAssert(pszValue != nullptr);
846 0 : CPLAssert(iKeyField >= 0);
847 0 : CPLAssert(fp != nullptr);
848 :
849 0 : bool bSelected = false;
850 0 : const int nTestValue = atoi(pszValue);
851 0 : char **papszFields = nullptr;
852 :
853 0 : while (!bSelected)
854 : {
855 0 : papszFields = CSVReadParseLine(fp);
856 0 : if (papszFields == nullptr)
857 0 : return nullptr;
858 :
859 0 : if (CSLCount(papszFields) < iKeyField + 1)
860 : {
861 : /* not selected */
862 : }
863 0 : else if (eCriteria == CC_Integer &&
864 0 : atoi(papszFields[iKeyField]) == nTestValue)
865 : {
866 0 : bSelected = true;
867 : }
868 : else
869 : {
870 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
871 : }
872 :
873 0 : if (!bSelected)
874 : {
875 0 : CSLDestroy(papszFields);
876 0 : papszFields = nullptr;
877 : }
878 : }
879 :
880 0 : return papszFields;
881 : }
882 :
883 : /************************************************************************/
884 : /* CSVScanLinesL() */
885 : /* */
886 : /* Read the file scanline for lines where the key field equals */
887 : /* the indicated value with the suggested comparison criteria. */
888 : /* Return the first matching line split into fields. */
889 : /************************************************************************/
890 :
891 0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
892 : CSVCompareCriteria eCriteria)
893 :
894 : {
895 0 : CPLAssert(pszValue != nullptr);
896 0 : CPLAssert(iKeyField >= 0);
897 0 : CPLAssert(fp != nullptr);
898 :
899 0 : bool bSelected = false;
900 0 : const int nTestValue = atoi(pszValue);
901 0 : char **papszFields = nullptr;
902 :
903 0 : while (!bSelected)
904 : {
905 0 : papszFields = CSVReadParseLineL(fp);
906 0 : if (papszFields == nullptr)
907 0 : return nullptr;
908 :
909 0 : if (CSLCount(papszFields) < iKeyField + 1)
910 : {
911 : /* not selected */
912 : }
913 0 : else if (eCriteria == CC_Integer &&
914 0 : atoi(papszFields[iKeyField]) == nTestValue)
915 : {
916 0 : bSelected = true;
917 : }
918 : else
919 : {
920 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
921 : }
922 :
923 0 : if (!bSelected)
924 : {
925 0 : CSLDestroy(papszFields);
926 0 : papszFields = nullptr;
927 : }
928 : }
929 :
930 0 : return papszFields;
931 : }
932 :
933 : /************************************************************************/
934 : /* CSVScanLinesIndexed() */
935 : /* */
936 : /* Read the file scanline for lines where the key field equals */
937 : /* the indicated value with the suggested comparison criteria. */
938 : /* Return the first matching line split into fields. */
939 : /************************************************************************/
940 :
941 22 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
942 :
943 : {
944 22 : CPLAssert(psTable->panLineIndex != nullptr);
945 :
946 : /* -------------------------------------------------------------------- */
947 : /* Find target record with binary search. */
948 : /* -------------------------------------------------------------------- */
949 22 : int iTop = psTable->nLineCount - 1;
950 22 : int iBottom = 0;
951 22 : int iResult = -1;
952 :
953 159 : while (iTop >= iBottom)
954 : {
955 159 : const int iMiddle = (iTop + iBottom) / 2;
956 159 : if (psTable->panLineIndex[iMiddle] > nKeyValue)
957 93 : iTop = iMiddle - 1;
958 66 : else if (psTable->panLineIndex[iMiddle] < nKeyValue)
959 44 : iBottom = iMiddle + 1;
960 : else
961 : {
962 22 : iResult = iMiddle;
963 : // if a key is not unique, select the first instance of it.
964 22 : while (iResult > 0 &&
965 22 : psTable->panLineIndex[iResult - 1] == nKeyValue)
966 : {
967 0 : psTable->bNonUniqueKey = true;
968 0 : iResult--;
969 : }
970 22 : break;
971 : }
972 : }
973 :
974 22 : if (iResult == -1)
975 0 : return nullptr;
976 :
977 : /* -------------------------------------------------------------------- */
978 : /* Parse target line, and update iLastLine indicator. */
979 : /* -------------------------------------------------------------------- */
980 22 : psTable->iLastLine = iResult;
981 :
982 22 : return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
983 : }
984 :
985 : /************************************************************************/
986 : /* CSVScanLinesIngested() */
987 : /* */
988 : /* Read the file scanline for lines where the key field equals */
989 : /* the indicated value with the suggested comparison criteria. */
990 : /* Return the first matching line split into fields. */
991 : /************************************************************************/
992 :
993 29 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
994 : const char *pszValue,
995 : CSVCompareCriteria eCriteria)
996 :
997 : {
998 29 : CPLAssert(pszValue != nullptr);
999 29 : CPLAssert(iKeyField >= 0);
1000 :
1001 29 : const int nTestValue = atoi(pszValue);
1002 :
1003 : /* -------------------------------------------------------------------- */
1004 : /* Short cut for indexed files. */
1005 : /* -------------------------------------------------------------------- */
1006 29 : if (iKeyField == 0 && eCriteria == CC_Integer &&
1007 22 : psTable->panLineIndex != nullptr)
1008 22 : return CSVScanLinesIndexed(psTable, nTestValue);
1009 :
1010 : /* -------------------------------------------------------------------- */
1011 : /* Scan from in-core lines. */
1012 : /* -------------------------------------------------------------------- */
1013 7 : char **papszFields = nullptr;
1014 7 : bool bSelected = false;
1015 :
1016 484 : while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
1017 : {
1018 477 : psTable->iLastLine++;
1019 477 : papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
1020 : false, false);
1021 :
1022 477 : if (CSLCount(papszFields) < iKeyField + 1)
1023 : {
1024 : /* not selected */
1025 : }
1026 477 : else if (eCriteria == CC_Integer &&
1027 242 : atoi(papszFields[iKeyField]) == nTestValue)
1028 : {
1029 2 : bSelected = true;
1030 : }
1031 : else
1032 : {
1033 475 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
1034 : }
1035 :
1036 477 : if (!bSelected)
1037 : {
1038 470 : CSLDestroy(papszFields);
1039 470 : papszFields = nullptr;
1040 : }
1041 : }
1042 :
1043 7 : return papszFields;
1044 : }
1045 :
1046 : /************************************************************************/
1047 : /* CSVRewind() */
1048 : /* */
1049 : /* Rewind a CSV file based on a passed in filename. */
1050 : /* This is aimed at being used with CSVGetNextLine(). */
1051 : /************************************************************************/
1052 :
1053 1838 : void CSVRewind(const char *pszFilename)
1054 :
1055 : {
1056 : /* -------------------------------------------------------------------- */
1057 : /* Get access to the table. */
1058 : /* -------------------------------------------------------------------- */
1059 1838 : CPLAssert(pszFilename != nullptr);
1060 :
1061 1838 : CSVTable *const psTable = CSVAccess(pszFilename);
1062 1838 : if (psTable != nullptr)
1063 1838 : psTable->iLastLine = -1;
1064 1838 : }
1065 :
1066 : /************************************************************************/
1067 : /* CSVGetNextLine() */
1068 : /* */
1069 : /* Fetch the next line of a CSV file based on a passed in */
1070 : /* filename. Returns NULL at end of file, or if file is not */
1071 : /* really established. */
1072 : /* This ingests the whole file into memory if not already done. */
1073 : /* When reaching end of file, CSVRewind() may be used to read */
1074 : /* again from the beginning. */
1075 : /************************************************************************/
1076 :
1077 60169 : char **CSVGetNextLine(const char *pszFilename)
1078 :
1079 : {
1080 :
1081 : /* -------------------------------------------------------------------- */
1082 : /* Get access to the table. */
1083 : /* -------------------------------------------------------------------- */
1084 60169 : CPLAssert(pszFilename != nullptr);
1085 :
1086 60169 : CSVTable *const psTable = CSVAccess(pszFilename);
1087 60169 : if (psTable == nullptr)
1088 0 : return nullptr;
1089 :
1090 60169 : CSVIngest(psTable->pszFilename);
1091 :
1092 : /* -------------------------------------------------------------------- */
1093 : /* If we use CSVGetNextLine() we can pretty much assume we have */
1094 : /* a non-unique key. */
1095 : /* -------------------------------------------------------------------- */
1096 60169 : psTable->bNonUniqueKey = true;
1097 :
1098 : /* -------------------------------------------------------------------- */
1099 : /* Do we have a next line available? This only works for */
1100 : /* ingested tables I believe. */
1101 : /* -------------------------------------------------------------------- */
1102 60169 : if (psTable->iLastLine + 1 >= psTable->nLineCount)
1103 628 : return nullptr;
1104 :
1105 59541 : psTable->iLastLine++;
1106 59541 : CSLDestroy(psTable->papszRecFields);
1107 119082 : psTable->papszRecFields = CSVSplitLine(
1108 59541 : psTable->papszLines[psTable->iLastLine], ",", false, false);
1109 :
1110 59541 : return psTable->papszRecFields;
1111 : }
1112 :
1113 : /************************************************************************/
1114 : /* CSVScanFile() */
1115 : /* */
1116 : /* Scan a whole file using criteria similar to above, but also */
1117 : /* taking care of file opening and closing. */
1118 : /************************************************************************/
1119 :
1120 143 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
1121 : const char *pszValue, CSVCompareCriteria eCriteria)
1122 : {
1123 143 : CSVIngest(psTable->pszFilename);
1124 :
1125 : /* -------------------------------------------------------------------- */
1126 : /* Does the current record match the criteria? If so, just */
1127 : /* return it again. */
1128 : /* -------------------------------------------------------------------- */
1129 143 : if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
1130 400 : CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
1131 114 : !psTable->bNonUniqueKey)
1132 : {
1133 114 : return psTable->papszRecFields;
1134 : }
1135 :
1136 : /* -------------------------------------------------------------------- */
1137 : /* Scan the file from the beginning, replacing the ``current */
1138 : /* record'' in our structure with the one that is found. */
1139 : /* -------------------------------------------------------------------- */
1140 29 : psTable->iLastLine = -1;
1141 29 : CSLDestroy(psTable->papszRecFields);
1142 :
1143 29 : if (psTable->pszRawData != nullptr)
1144 29 : psTable->papszRecFields =
1145 29 : CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
1146 : else
1147 : {
1148 0 : VSIRewindL(psTable->fp);
1149 0 : CPLReadLineL(psTable->fp); /* throw away the header line */
1150 :
1151 0 : psTable->papszRecFields =
1152 0 : CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
1153 : }
1154 :
1155 29 : return psTable->papszRecFields;
1156 : }
1157 :
1158 4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
1159 : CSVCompareCriteria eCriteria)
1160 :
1161 : {
1162 : /* -------------------------------------------------------------------- */
1163 : /* Get access to the table. */
1164 : /* -------------------------------------------------------------------- */
1165 4 : CPLAssert(pszFilename != nullptr);
1166 :
1167 4 : if (iKeyField < 0)
1168 0 : return nullptr;
1169 :
1170 4 : CSVTable *const psTable = CSVAccess(pszFilename);
1171 4 : if (psTable == nullptr)
1172 0 : return nullptr;
1173 :
1174 4 : return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
1175 : }
1176 :
1177 : /************************************************************************/
1178 : /* CPLGetFieldId() */
1179 : /* */
1180 : /* Read the first record of a CSV file (rewinding to be sure), */
1181 : /* and find the field with the indicated name. Returns -1 if */
1182 : /* it fails to find the field name. Comparison is case */
1183 : /* insensitive, but otherwise exact. After this function has */
1184 : /* been called the file pointer will be positioned just after */
1185 : /* the first record. */
1186 : /* */
1187 : /* Deprecated. Replaced by CPLGetFieldIdL(). */
1188 : /************************************************************************/
1189 :
1190 0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
1191 :
1192 : {
1193 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1194 :
1195 0 : VSIRewind(fp);
1196 :
1197 0 : char **papszFields = CSVReadParseLine(fp);
1198 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1199 : {
1200 0 : if (EQUAL(papszFields[i], pszFieldName))
1201 : {
1202 0 : CSLDestroy(papszFields);
1203 0 : return i;
1204 : }
1205 : }
1206 :
1207 0 : CSLDestroy(papszFields);
1208 :
1209 0 : return -1;
1210 : }
1211 :
1212 : /************************************************************************/
1213 : /* CPLGetFieldIdL() */
1214 : /* */
1215 : /* Read the first record of a CSV file (rewinding to be sure), */
1216 : /* and find the field with the indicated name. Returns -1 if */
1217 : /* it fails to find the field name. Comparison is case */
1218 : /* insensitive, but otherwise exact. After this function has */
1219 : /* been called the file pointer will be positioned just after */
1220 : /* the first record. */
1221 : /************************************************************************/
1222 :
1223 0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
1224 :
1225 : {
1226 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1227 :
1228 0 : VSIRewindL(fp);
1229 :
1230 0 : char **papszFields = CSVReadParseLineL(fp);
1231 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1232 : {
1233 0 : if (EQUAL(papszFields[i], pszFieldName))
1234 : {
1235 0 : CSLDestroy(papszFields);
1236 0 : return i;
1237 : }
1238 : }
1239 :
1240 0 : CSLDestroy(papszFields);
1241 :
1242 0 : return -1;
1243 : }
1244 :
1245 : /************************************************************************/
1246 : /* CSVGetFileFieldId() */
1247 : /* */
1248 : /* Same as CPLGetFieldId(), except that we get the file based */
1249 : /* on filename, rather than having an existing handle. */
1250 : /************************************************************************/
1251 :
1252 7407 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
1253 :
1254 : {
1255 : /* -------------------------------------------------------------------- */
1256 : /* Find the requested field. */
1257 : /* -------------------------------------------------------------------- */
1258 7407 : const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
1259 18637 : for (int i = 0; psTable->papszFieldNames != nullptr &&
1260 18637 : psTable->papszFieldNames[i] != nullptr;
1261 : i++)
1262 : {
1263 18637 : if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
1264 10401 : EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
1265 : {
1266 7407 : return i;
1267 : }
1268 : }
1269 :
1270 0 : return -1;
1271 : }
1272 :
1273 7129 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
1274 :
1275 : {
1276 : /* -------------------------------------------------------------------- */
1277 : /* Get access to the table. */
1278 : /* -------------------------------------------------------------------- */
1279 7129 : CPLAssert(pszFilename != nullptr);
1280 :
1281 7129 : CSVTable *const psTable = CSVAccess(pszFilename);
1282 7129 : if (psTable == nullptr)
1283 0 : return -1;
1284 7129 : return CSVGetFileFieldId(psTable, pszFieldName);
1285 : }
1286 :
1287 : /************************************************************************/
1288 : /* CSVScanFileByName() */
1289 : /* */
1290 : /* Same as CSVScanFile(), but using a field name instead of a */
1291 : /* field number. */
1292 : /************************************************************************/
1293 :
1294 4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
1295 : const char *pszValue, CSVCompareCriteria eCriteria)
1296 :
1297 : {
1298 4 : const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
1299 4 : if (iKeyField == -1)
1300 0 : return nullptr;
1301 :
1302 4 : return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
1303 : }
1304 :
1305 : /************************************************************************/
1306 : /* CSVGetField() */
1307 : /* */
1308 : /* The all-in-one function to fetch a particular field value */
1309 : /* from a CSV file. Note this function will return an empty */
1310 : /* string, rather than NULL if it fails to find the desired */
1311 : /* value for some reason. The caller can't establish that the */
1312 : /* fetch failed. */
1313 : /************************************************************************/
1314 :
1315 139 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
1316 : const char *pszKeyFieldValue,
1317 : CSVCompareCriteria eCriteria,
1318 : const char *pszTargetField)
1319 :
1320 : {
1321 : /* -------------------------------------------------------------------- */
1322 : /* Find the table. */
1323 : /* -------------------------------------------------------------------- */
1324 139 : CSVTable *const psTable = CSVAccess(pszFilename);
1325 139 : if (psTable == nullptr)
1326 0 : return "";
1327 :
1328 139 : const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
1329 139 : if (iKeyField == -1)
1330 0 : return "";
1331 :
1332 : /* -------------------------------------------------------------------- */
1333 : /* Find the correct record. */
1334 : /* -------------------------------------------------------------------- */
1335 : char **papszRecord =
1336 139 : CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
1337 139 : if (papszRecord == nullptr)
1338 0 : return "";
1339 :
1340 : /* -------------------------------------------------------------------- */
1341 : /* Figure out which field we want out of this. */
1342 : /* -------------------------------------------------------------------- */
1343 139 : const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
1344 139 : if (iTargetField < 0)
1345 0 : return "";
1346 :
1347 386 : for (int i = 0; papszRecord[i] != nullptr; ++i)
1348 : {
1349 386 : if (i == iTargetField)
1350 139 : return papszRecord[iTargetField];
1351 : }
1352 0 : return "";
1353 : }
1354 :
1355 : /************************************************************************/
1356 : /* GDALDefaultCSVFilename() */
1357 : /************************************************************************/
1358 :
1359 : typedef struct
1360 : {
1361 : char szPath[512];
1362 : bool bCSVFinderInitialized;
1363 : } DefaultCSVFileNameTLS;
1364 :
1365 2482 : const char *GDALDefaultCSVFilename(const char *pszBasename)
1366 :
1367 : {
1368 : /* -------------------------------------------------------------------- */
1369 : /* Do we already have this file accessed? If so, just return */
1370 : /* the existing path without any further probing. */
1371 : /* -------------------------------------------------------------------- */
1372 2482 : int bMemoryError = FALSE;
1373 : CSVTable **ppsCSVTableList =
1374 2482 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
1375 2482 : if (ppsCSVTableList != nullptr)
1376 : {
1377 2476 : const size_t nBasenameLen = strlen(pszBasename);
1378 :
1379 23033 : for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
1380 20557 : psTable = psTable->psNext)
1381 : {
1382 22481 : const size_t nFullLen = strlen(psTable->pszFilename);
1383 :
1384 22481 : if (nFullLen > nBasenameLen &&
1385 22481 : strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
1386 1924 : pszBasename) == 0 &&
1387 1924 : strchr("/\\",
1388 1924 : psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
1389 : nullptr)
1390 : {
1391 1924 : return psTable->pszFilename;
1392 : }
1393 : }
1394 : }
1395 :
1396 : /* -------------------------------------------------------------------- */
1397 : /* Otherwise we need to look harder for it. */
1398 : /* -------------------------------------------------------------------- */
1399 : DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1400 558 : CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
1401 558 : if (pTLSData == nullptr && !bMemoryError)
1402 : {
1403 : pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1404 5 : VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
1405 5 : if (pTLSData)
1406 5 : CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
1407 : }
1408 558 : if (pTLSData == nullptr)
1409 0 : return "/not_existing_dir/not_existing_path";
1410 :
1411 558 : const char *pszResult = CPLFindFile("gdal", pszBasename);
1412 :
1413 558 : if (pszResult != nullptr)
1414 43 : return pszResult;
1415 :
1416 515 : if (!pTLSData->bCSVFinderInitialized)
1417 : {
1418 2 : pTLSData->bCSVFinderInitialized = true;
1419 :
1420 2 : if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
1421 2 : CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
1422 :
1423 2 : pszResult = CPLFindFile("gdal", pszBasename);
1424 :
1425 2 : if (pszResult != nullptr)
1426 0 : return pszResult;
1427 : }
1428 :
1429 : // For systems like sandboxes that do not allow other checks.
1430 515 : CPLDebug("CPL_CSV",
1431 : "Failed to find file in GDALDefaultCSVFilename. "
1432 : "Returning original basename: %s",
1433 : pszBasename);
1434 515 : CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
1435 515 : return pTLSData->szPath;
1436 : }
1437 :
1438 : /************************************************************************/
1439 : /* CSVFilename() */
1440 : /* */
1441 : /* Return the full path to a particular CSV file. This will */
1442 : /* eventually be something the application can override. */
1443 : /************************************************************************/
1444 :
1445 : CPL_C_START
1446 : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
1447 : CPL_C_END
1448 :
1449 2482 : const char *CSVFilename(const char *pszBasename)
1450 :
1451 : {
1452 2482 : if (pfnCSVFilenameHook == nullptr)
1453 2482 : return GDALDefaultCSVFilename(pszBasename);
1454 :
1455 0 : return pfnCSVFilenameHook(pszBasename);
1456 : }
1457 :
1458 : /************************************************************************/
1459 : /* SetCSVFilenameHook() */
1460 : /* */
1461 : /* Applications can use this to set a function that will */
1462 : /* massage CSV filenames. */
1463 : /************************************************************************/
1464 :
1465 : /**
1466 : * Override CSV file search method.
1467 : *
1468 : * @param pfnNewHook The pointer to a function which will return the
1469 : * full path for a given filename.
1470 : *
1471 :
1472 : This function allows an application to override how the GTIFGetDefn()
1473 : and related function find the CSV (Comma Separated Value) values
1474 : required. The pfnHook argument should be a pointer to a function that
1475 : will take in a CSV filename and return a full path to the file. The
1476 : returned string should be to an internal static buffer so that the
1477 : caller doesn't have to free the result.
1478 :
1479 : <b>Example:</b><br>
1480 :
1481 : The listgeo utility uses the following override function if the user
1482 : specified a CSV file directory with the -t commandline switch (argument
1483 : put into CSVDirName). <p>
1484 :
1485 : <pre>
1486 :
1487 : ...
1488 : SetCSVFilenameHook( CSVFileOverride );
1489 : ...
1490 :
1491 : static const char *CSVFileOverride( const char * pszInput )
1492 :
1493 : {
1494 : static char szPath[1024] = {};
1495 :
1496 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1497 :
1498 : return szPath;
1499 : }
1500 : </pre>
1501 :
1502 : */
1503 :
1504 : CPL_C_START
1505 0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
1506 :
1507 : {
1508 0 : pfnCSVFilenameHook = pfnNewHook;
1509 0 : }
1510 :
1511 : CPL_C_END
|