Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: CSV (comma separated value) file access.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 1999, Frank Warmerdam
9 : * Copyright (c) 2009-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_port.h"
31 : #include "cpl_csv.h"
32 :
33 : #include <cstddef>
34 : #include <cstdlib>
35 : #include <cstring>
36 : #if HAVE_FCNTL_H
37 : #include <fcntl.h>
38 : #endif
39 :
40 : #include "cpl_conv.h"
41 : #include "cpl_error.h"
42 : #include "cpl_multiproc.h"
43 : #include "gdal_csv.h"
44 :
45 : #include <algorithm>
46 :
47 : /* ==================================================================== */
48 : /* The CSVTable is a persistent set of info about an open CSV */
49 : /* table. While it doesn't currently maintain a record index, */
50 : /* or in-memory copy of the table, it could be changed to do so */
51 : /* in the future. */
52 : /* ==================================================================== */
53 : typedef struct ctb
54 : {
55 : VSILFILE *fp;
56 : struct ctb *psNext;
57 : char *pszFilename;
58 : char **papszFieldNames;
59 : int *panFieldNamesLength;
60 : char **papszRecFields;
61 : int nFields;
62 : int iLastLine;
63 : bool bNonUniqueKey;
64 :
65 : /* Cache for whole file */
66 : int nLineCount;
67 : char **papszLines;
68 : int *panLineIndex;
69 : char *pszRawData;
70 : } CSVTable;
71 :
72 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
73 : const char *pszFilename);
74 :
75 : /************************************************************************/
76 : /* CSVFreeTLS() */
77 : /************************************************************************/
78 2 : static void CSVFreeTLS(void *pData)
79 : {
80 2 : CSVDeaccessInternal(static_cast<CSVTable **>(pData), false, nullptr);
81 2 : CPLFree(pData);
82 2 : }
83 :
84 : /* It would likely be better to share this list between threads, but
85 : that will require some rework. */
86 :
87 : /************************************************************************/
88 : /* CSVAccess() */
89 : /* */
90 : /* This function will fetch a handle to the requested table. */
91 : /* If not found in the ``open table list'' the table will be */
92 : /* opened and added to the list. Eventually this function may */
93 : /* become public with an abstracted return type so that */
94 : /* applications can set options about the table. For now this */
95 : /* isn't done. */
96 : /************************************************************************/
97 :
98 115981 : static CSVTable *CSVAccess(const char *pszFilename)
99 :
100 : {
101 : /* -------------------------------------------------------------------- */
102 : /* Fetch the table, and allocate the thread-local pointer to it */
103 : /* if there isn't already one. */
104 : /* -------------------------------------------------------------------- */
105 115981 : int bMemoryError = FALSE;
106 : CSVTable **ppsCSVTableList =
107 115981 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
108 115981 : if (bMemoryError)
109 0 : return nullptr;
110 115981 : if (ppsCSVTableList == nullptr)
111 : {
112 : ppsCSVTableList =
113 5 : static_cast<CSVTable **>(VSI_CALLOC_VERBOSE(1, sizeof(CSVTable *)));
114 5 : if (ppsCSVTableList == nullptr)
115 0 : return nullptr;
116 5 : CPLSetTLSWithFreeFunc(CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS);
117 : }
118 :
119 : /* -------------------------------------------------------------------- */
120 : /* Is the table already in the list. */
121 : /* -------------------------------------------------------------------- */
122 928290 : for (CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
123 812309 : psTable = psTable->psNext)
124 : {
125 928260 : if (EQUAL(psTable->pszFilename, pszFilename))
126 : {
127 : /*
128 : * Eventually we should consider promoting to the front of
129 : * the list to accelerate frequently accessed tables.
130 : */
131 115951 : return psTable;
132 : }
133 : }
134 :
135 : /* -------------------------------------------------------------------- */
136 : /* If not, try to open it. */
137 : /* -------------------------------------------------------------------- */
138 30 : VSILFILE *fp = VSIFOpenL(pszFilename, "rb");
139 30 : if (fp == nullptr)
140 0 : return nullptr;
141 :
142 : /* -------------------------------------------------------------------- */
143 : /* Create an information structure about this table, and add to */
144 : /* the front of the list. */
145 : /* -------------------------------------------------------------------- */
146 : CSVTable *const psTable =
147 30 : static_cast<CSVTable *>(VSI_CALLOC_VERBOSE(sizeof(CSVTable), 1));
148 30 : if (psTable == nullptr)
149 : {
150 0 : VSIFCloseL(fp);
151 0 : return nullptr;
152 : }
153 :
154 30 : psTable->fp = fp;
155 30 : psTable->pszFilename = VSI_STRDUP_VERBOSE(pszFilename);
156 30 : if (psTable->pszFilename == nullptr)
157 : {
158 0 : VSIFree(psTable);
159 0 : VSIFCloseL(fp);
160 0 : return nullptr;
161 : }
162 30 : psTable->bNonUniqueKey = false; // As far as we know now.
163 30 : psTable->psNext = *ppsCSVTableList;
164 :
165 30 : *ppsCSVTableList = psTable;
166 :
167 : /* -------------------------------------------------------------------- */
168 : /* Read the table header record containing the field names. */
169 : /* -------------------------------------------------------------------- */
170 30 : psTable->papszFieldNames = CSVReadParseLineL(fp);
171 30 : psTable->nFields = CSLCount(psTable->papszFieldNames);
172 30 : psTable->panFieldNamesLength =
173 30 : static_cast<int *>(CPLMalloc(sizeof(int) * psTable->nFields));
174 30 : for (int i = 0;
175 185 : i < psTable->nFields &&
176 : /* null-pointer check to avoid a false positive from CLang S.A. */
177 155 : psTable->papszFieldNames != nullptr;
178 : i++)
179 : {
180 155 : psTable->panFieldNamesLength[i] =
181 155 : static_cast<int>(strlen(psTable->papszFieldNames[i]));
182 : }
183 :
184 30 : return psTable;
185 : }
186 :
187 : /************************************************************************/
188 : /* CSVDeaccess() */
189 : /************************************************************************/
190 :
191 863 : static void CSVDeaccessInternal(CSVTable **ppsCSVTableList, bool bCanUseTLS,
192 : const char *pszFilename)
193 :
194 : {
195 863 : if (ppsCSVTableList == nullptr)
196 854 : return;
197 :
198 : /* -------------------------------------------------------------------- */
199 : /* A NULL means deaccess all tables. */
200 : /* -------------------------------------------------------------------- */
201 9 : if (pszFilename == nullptr)
202 : {
203 9 : while (*ppsCSVTableList != nullptr)
204 5 : CSVDeaccessInternal(ppsCSVTableList, bCanUseTLS,
205 5 : (*ppsCSVTableList)->pszFilename);
206 :
207 4 : return;
208 : }
209 :
210 : /* -------------------------------------------------------------------- */
211 : /* Find this table. */
212 : /* -------------------------------------------------------------------- */
213 5 : CSVTable *psLast = nullptr;
214 5 : CSVTable *psTable = *ppsCSVTableList;
215 5 : for (; psTable != nullptr && !EQUAL(psTable->pszFilename, pszFilename);
216 0 : psTable = psTable->psNext)
217 : {
218 0 : psLast = psTable;
219 : }
220 :
221 5 : if (psTable == nullptr)
222 : {
223 0 : if (bCanUseTLS)
224 0 : CPLDebug("CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename);
225 0 : return;
226 : }
227 :
228 : /* -------------------------------------------------------------------- */
229 : /* Remove the link from the list. */
230 : /* -------------------------------------------------------------------- */
231 5 : if (psLast != nullptr)
232 0 : psLast->psNext = psTable->psNext;
233 : else
234 5 : *ppsCSVTableList = psTable->psNext;
235 :
236 : /* -------------------------------------------------------------------- */
237 : /* Free the table. */
238 : /* -------------------------------------------------------------------- */
239 5 : if (psTable->fp != nullptr)
240 0 : VSIFCloseL(psTable->fp);
241 :
242 5 : CSLDestroy(psTable->papszFieldNames);
243 5 : CPLFree(psTable->panFieldNamesLength);
244 5 : CSLDestroy(psTable->papszRecFields);
245 5 : CPLFree(psTable->pszFilename);
246 5 : CPLFree(psTable->panLineIndex);
247 5 : CPLFree(psTable->pszRawData);
248 5 : CPLFree(psTable->papszLines);
249 :
250 5 : CPLFree(psTable);
251 :
252 5 : if (bCanUseTLS)
253 5 : CPLReadLine(nullptr);
254 : }
255 :
256 856 : void CSVDeaccess(const char *pszFilename)
257 : {
258 : /* -------------------------------------------------------------------- */
259 : /* Fetch the table, and allocate the thread-local pointer to it */
260 : /* if there isn't already one. */
261 : /* -------------------------------------------------------------------- */
262 856 : int bMemoryError = FALSE;
263 : CSVTable **ppsCSVTableList =
264 856 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
265 :
266 856 : CSVDeaccessInternal(ppsCSVTableList, true, pszFilename);
267 856 : }
268 :
269 : /************************************************************************/
270 : /* CSVSplitLine() */
271 : /* */
272 : /* Tokenize a CSV line into fields in the form of a string */
273 : /* list. This is used instead of the CPLTokenizeString() */
274 : /* because it provides correct CSV escaping and quoting */
275 : /* semantics. */
276 : /************************************************************************/
277 :
278 108480 : static char **CSVSplitLine(const char *pszString, const char *pszDelimiter,
279 : bool bKeepLeadingAndClosingQuotes,
280 : bool bMergeDelimiter)
281 :
282 : {
283 216960 : CPLStringList aosRetList;
284 108480 : if (pszString == nullptr)
285 0 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
286 :
287 108480 : char *pszToken = static_cast<char *>(CPLCalloc(10, 1));
288 108480 : int nTokenMax = 10;
289 108480 : const size_t nDelimiterLength = strlen(pszDelimiter);
290 :
291 108480 : const char *pszIter = pszString;
292 618741 : while (*pszIter != '\0')
293 : {
294 510261 : bool bInString = false;
295 :
296 510261 : int nTokenLen = 0;
297 :
298 : // Try to find the next delimiter, marking end of token.
299 4332680 : do
300 : {
301 : // End if this is a delimiter skip it and break.
302 4842940 : if (!bInString &&
303 2579290 : strncmp(pszIter, pszDelimiter, nDelimiterLength) == 0)
304 : {
305 402094 : pszIter += nDelimiterLength;
306 402094 : if (bMergeDelimiter)
307 : {
308 9 : while (strncmp(pszIter, pszDelimiter, nDelimiterLength) ==
309 : 0)
310 5 : pszIter += nDelimiterLength;
311 : }
312 402094 : break;
313 : }
314 :
315 4440840 : if (*pszIter == '"')
316 : {
317 350303 : if (!bInString && nTokenLen > 0)
318 : {
319 : // do not treat in a special way double quotes that appear
320 : // in the middle of a field (similarly to OpenOffice)
321 : // Like in records: 1,50°46'06.6"N 116°42'04.4,foo
322 : }
323 350218 : else if (!bInString || pszIter[1] != '"')
324 : {
325 349552 : bInString = !bInString;
326 349552 : if (!bKeepLeadingAndClosingQuotes)
327 349518 : continue;
328 : }
329 : else // Doubled quotes in string resolve to one quote.
330 : {
331 666 : pszIter++;
332 : }
333 : }
334 :
335 4091330 : if (nTokenLen >= nTokenMax - 2)
336 : {
337 116098 : nTokenMax = nTokenMax * 2 + 10;
338 116098 : pszToken = static_cast<char *>(CPLRealloc(pszToken, nTokenMax));
339 : }
340 :
341 4091330 : pszToken[nTokenLen] = *pszIter;
342 4091330 : nTokenLen++;
343 4440840 : } while (*(++pszIter) != '\0');
344 :
345 510261 : pszToken[nTokenLen] = '\0';
346 510261 : aosRetList.AddString(pszToken);
347 :
348 : // If the last token is an empty token, then we have to catch
349 : // it now, otherwise we won't reenter the loop and it will be lost.
350 510261 : if (*pszIter == '\0' &&
351 108440 : pszIter - pszString >= static_cast<int>(nDelimiterLength) &&
352 108440 : strncmp(pszIter - nDelimiterLength, pszDelimiter,
353 : nDelimiterLength) == 0)
354 : {
355 273 : aosRetList.AddString("");
356 : }
357 : }
358 :
359 108480 : CPLFree(pszToken);
360 :
361 108480 : if (aosRetList.Count() == 0)
362 40 : return static_cast<char **>(CPLCalloc(sizeof(char *), 1));
363 : else
364 108440 : return aosRetList.StealList();
365 : }
366 :
367 : /************************************************************************/
368 : /* CSVFindNextLine() */
369 : /* */
370 : /* Find the start of the next line, while at the same time zero */
371 : /* terminating this line. Take into account that there may be */
372 : /* newline indicators within quoted strings, and that quotes */
373 : /* can be escaped with a backslash. */
374 : /************************************************************************/
375 :
376 6329 : static char *CSVFindNextLine(char *pszThisLine)
377 :
378 : {
379 6329 : int i = 0; // i is used after the for loop.
380 :
381 275213 : for (int nQuoteCount = 0; pszThisLine[i] != '\0'; i++)
382 : {
383 275213 : if (pszThisLine[i] == '\"' && (i == 0 || pszThisLine[i - 1] != '\\'))
384 35430 : nQuoteCount++;
385 :
386 275213 : if ((pszThisLine[i] == 10 || pszThisLine[i] == 13) &&
387 6329 : (nQuoteCount % 2) == 0)
388 6329 : break;
389 : }
390 :
391 16573 : while (pszThisLine[i] == 10 || pszThisLine[i] == 13)
392 10244 : pszThisLine[i++] = '\0';
393 :
394 6329 : if (pszThisLine[i] == '\0')
395 30 : return nullptr;
396 :
397 6299 : return pszThisLine + i;
398 : }
399 :
400 : /************************************************************************/
401 : /* CSVIngest() */
402 : /* */
403 : /* Load entire file into memory and setup index if possible. */
404 : /************************************************************************/
405 :
406 : // TODO(schwehr): Clean up all the casting in CSVIngest.
407 53685 : static void CSVIngest(CSVTable *psTable)
408 :
409 : {
410 53685 : if (psTable->pszRawData != nullptr)
411 53655 : return;
412 :
413 : /* -------------------------------------------------------------------- */
414 : /* Ingest whole file. */
415 : /* -------------------------------------------------------------------- */
416 30 : if (VSIFSeekL(psTable->fp, 0, SEEK_END) != 0)
417 : {
418 0 : CPLError(CE_Failure, CPLE_FileIO,
419 : "Failed using seek end and tell to get file length: %s",
420 : psTable->pszFilename);
421 0 : return;
422 : }
423 30 : const vsi_l_offset nFileLen = VSIFTellL(psTable->fp);
424 30 : if (static_cast<long>(nFileLen) == -1)
425 : {
426 0 : CPLError(CE_Failure, CPLE_FileIO,
427 : "Failed using seek end and tell to get file length: %s",
428 : psTable->pszFilename);
429 0 : return;
430 : }
431 30 : VSIRewindL(psTable->fp);
432 :
433 30 : psTable->pszRawData = static_cast<char *>(
434 30 : VSI_MALLOC_VERBOSE(static_cast<size_t>(nFileLen) + 1));
435 30 : if (psTable->pszRawData == nullptr)
436 0 : return;
437 30 : if (VSIFReadL(psTable->pszRawData, 1, static_cast<size_t>(nFileLen),
438 30 : psTable->fp) != static_cast<size_t>(nFileLen))
439 : {
440 0 : CPLFree(psTable->pszRawData);
441 0 : psTable->pszRawData = nullptr;
442 :
443 0 : CPLError(CE_Failure, CPLE_FileIO, "Read of file %s failed.",
444 : psTable->pszFilename);
445 0 : return;
446 : }
447 :
448 30 : psTable->pszRawData[nFileLen] = '\0';
449 :
450 : /* -------------------------------------------------------------------- */
451 : /* Get count of newlines so we can allocate line array. */
452 : /* -------------------------------------------------------------------- */
453 30 : int nMaxLineCount = 0;
454 279158 : for (int i = 0; i < static_cast<int>(nFileLen); i++)
455 : {
456 279128 : if (psTable->pszRawData[i] == 10)
457 6329 : nMaxLineCount++;
458 : }
459 :
460 30 : psTable->papszLines =
461 30 : static_cast<char **>(VSI_CALLOC_VERBOSE(sizeof(char *), nMaxLineCount));
462 30 : if (psTable->papszLines == nullptr)
463 0 : return;
464 :
465 : /* -------------------------------------------------------------------- */
466 : /* Build a list of record pointers into the raw data buffer */
467 : /* based on line terminators. Zero terminate the line */
468 : /* strings. */
469 : /* -------------------------------------------------------------------- */
470 : /* skip header line */
471 30 : char *pszThisLine = CSVFindNextLine(psTable->pszRawData);
472 :
473 30 : int iLine = 0;
474 6329 : while (pszThisLine != nullptr && iLine < nMaxLineCount)
475 : {
476 6299 : if (pszThisLine[0] != '#')
477 6288 : psTable->papszLines[iLine++] = pszThisLine;
478 6299 : pszThisLine = CSVFindNextLine(pszThisLine);
479 : }
480 :
481 30 : psTable->nLineCount = iLine;
482 :
483 : /* -------------------------------------------------------------------- */
484 : /* Allocate and populate index array. Ensure they are in */
485 : /* ascending order so that binary searches can be done on the */
486 : /* array. */
487 : /* -------------------------------------------------------------------- */
488 30 : psTable->panLineIndex = static_cast<int *>(
489 30 : VSI_MALLOC_VERBOSE(sizeof(int) * psTable->nLineCount));
490 30 : if (psTable->panLineIndex == nullptr)
491 0 : return;
492 :
493 6232 : for (int i = 0; i < psTable->nLineCount; i++)
494 : {
495 6204 : psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
496 :
497 6204 : if (i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i - 1])
498 : {
499 2 : CPLFree(psTable->panLineIndex);
500 2 : psTable->panLineIndex = nullptr;
501 2 : break;
502 : }
503 : }
504 :
505 30 : psTable->iLastLine = -1;
506 :
507 : /* -------------------------------------------------------------------- */
508 : /* We should never need the file handle against, so close it. */
509 : /* -------------------------------------------------------------------- */
510 30 : VSIFCloseL(psTable->fp);
511 30 : psTable->fp = nullptr;
512 : }
513 :
514 53685 : static void CSVIngest(const char *pszFilename)
515 :
516 : {
517 53685 : CSVTable *psTable = CSVAccess(pszFilename);
518 53685 : if (psTable == nullptr)
519 : {
520 0 : CPLError(CE_Failure, CPLE_FileIO, "Failed to open file: %s",
521 : pszFilename);
522 0 : return;
523 : }
524 53685 : CSVIngest(psTable);
525 : }
526 :
527 : /************************************************************************/
528 : /* CSVDetectSeperator() */
529 : /************************************************************************/
530 :
531 : /** Detect which field separator is used.
532 : *
533 : * Currently, it can detect comma, semicolon, space, tabulation or pipe.
534 : * In case of ambiguity, starting with GDAL 3.7.1, the separator with the
535 : * most occurrences will be selected (and a warning emitted).
536 : * If no separator found, comma will be considered as the separator.
537 : *
538 : * @return ',', ';', ' ', tabulation character or '|'.
539 : */
540 568 : char CSVDetectSeperator(const char *pszLine)
541 : {
542 568 : bool bInString = false;
543 568 : int nCountComma = 0;
544 568 : int nCountSemicolon = 0;
545 568 : int nCountTab = 0;
546 568 : int nCountPipe = 0;
547 568 : int nCountSpace = 0;
548 :
549 25353 : for (; *pszLine != '\0'; pszLine++)
550 : {
551 24785 : if (!bInString && *pszLine == ',')
552 : {
553 2011 : nCountComma++;
554 : }
555 22774 : else if (!bInString && *pszLine == ';')
556 : {
557 10 : nCountSemicolon++;
558 : }
559 22764 : else if (!bInString && *pszLine == '\t')
560 : {
561 29 : nCountTab++;
562 : }
563 22735 : else if (!bInString && *pszLine == '|')
564 : {
565 9 : nCountPipe++;
566 : }
567 22726 : else if (!bInString && *pszLine == ' ')
568 : {
569 258 : nCountSpace++;
570 : }
571 22468 : else if (*pszLine == '"')
572 : {
573 519 : if (!bInString || pszLine[1] != '"')
574 : {
575 519 : bInString = !bInString;
576 519 : continue;
577 : }
578 : else /* doubled quotes in string resolve to one quote */
579 : {
580 0 : pszLine++;
581 : }
582 : }
583 : }
584 :
585 : const int nMaxCountExceptSpace =
586 : std::max(std::max(nCountComma, nCountSemicolon),
587 568 : std::max(nCountTab, nCountPipe));
588 568 : char chDelimiter = ',';
589 568 : if (nMaxCountExceptSpace == 0)
590 : {
591 33 : if (nCountSpace > 0)
592 8 : chDelimiter = ' ';
593 : }
594 : else
595 : {
596 535 : bool bWarn = false;
597 535 : if (nCountComma == nMaxCountExceptSpace)
598 : {
599 519 : chDelimiter = ',';
600 519 : bWarn = (nCountSemicolon > 0 || nCountTab > 0 || nCountPipe > 0);
601 : }
602 16 : else if (nCountSemicolon == nMaxCountExceptSpace)
603 : {
604 5 : chDelimiter = ';';
605 5 : bWarn = (nCountComma > 0 || nCountTab > 0 || nCountPipe > 0);
606 : }
607 11 : else if (nCountTab == nMaxCountExceptSpace)
608 : {
609 6 : chDelimiter = '\t';
610 6 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountPipe > 0);
611 : }
612 : else /* if( nCountPipe == nMaxCountExceptSpace ) */
613 : {
614 5 : chDelimiter = '|';
615 5 : bWarn = (nCountComma > 0 || nCountSemicolon > 0 || nCountTab > 0);
616 : }
617 535 : if (bWarn)
618 : {
619 6 : CPLError(CE_Warning, CPLE_AppDefined,
620 : "Selecting '%c' as CSV field separator, but "
621 : "other candidate separator(s) have been found.",
622 : chDelimiter);
623 : }
624 : }
625 :
626 568 : return chDelimiter;
627 : }
628 :
629 : /************************************************************************/
630 : /* CSVReadParseLine3L() */
631 : /* */
632 : /* Read one line, and return split into fields. The return */
633 : /* result is a stringlist, in the sense of the CSL functions. */
634 : /************************************************************************/
635 :
636 : static char **
637 56410 : CSVReadParseLineGeneric(void *fp, const char *(*pfnReadLine)(void *, size_t),
638 : size_t nMaxLineSize, const char *pszDelimiter,
639 : bool bHonourStrings, bool bKeepLeadingAndClosingQuotes,
640 : bool bMergeDelimiter, bool bSkipBOM)
641 : {
642 56410 : const char *pszLine = pfnReadLine(fp, nMaxLineSize);
643 56410 : if (pszLine == nullptr)
644 1352 : return nullptr;
645 :
646 55058 : if (bSkipBOM)
647 : {
648 : // Skip BOM.
649 54702 : const GByte *pabyData = reinterpret_cast<const GByte *>(pszLine);
650 54702 : if (pabyData[0] == 0xEF && pabyData[1] == 0xBB && pabyData[2] == 0xBF)
651 4 : pszLine += 3;
652 : }
653 :
654 : // Special fix to read NdfcFacilities.xls with un-balanced double quotes.
655 55058 : if (!bHonourStrings)
656 : {
657 2 : return CSLTokenizeStringComplex(pszLine, pszDelimiter, FALSE, TRUE);
658 : }
659 :
660 : // If there are no quotes, then this is the simple case.
661 : // Parse, and return tokens.
662 55056 : if (strchr(pszLine, '\"') == nullptr)
663 47550 : return CSVSplitLine(pszLine, pszDelimiter, bKeepLeadingAndClosingQuotes,
664 47550 : bMergeDelimiter);
665 :
666 : try
667 : {
668 : // We must now count the quotes in our working string, and as
669 : // long as it is odd, keep adding new lines.
670 7506 : std::string osWorkLine(pszLine);
671 :
672 7506 : size_t i = 0;
673 7506 : int nCount = 0;
674 :
675 : while (true)
676 : {
677 783060 : for (; i < osWorkLine.size(); i++)
678 : {
679 774799 : if (osWorkLine[i] == '\"')
680 58975 : nCount++;
681 : }
682 :
683 8261 : if (nCount % 2 == 0)
684 7505 : break;
685 :
686 756 : pszLine = pfnReadLine(fp, nMaxLineSize);
687 756 : if (pszLine == nullptr)
688 1 : break;
689 :
690 755 : osWorkLine.append("\n");
691 755 : osWorkLine.append(pszLine);
692 : }
693 :
694 : char **papszReturn =
695 7506 : CSVSplitLine(osWorkLine.c_str(), pszDelimiter,
696 : bKeepLeadingAndClosingQuotes, bMergeDelimiter);
697 :
698 7506 : return papszReturn;
699 : }
700 0 : catch (const std::exception &e)
701 : {
702 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "%s", e.what());
703 0 : return nullptr;
704 : }
705 : }
706 :
707 : /************************************************************************/
708 : /* CSVReadParseLine() */
709 : /* */
710 : /* Read one line, and return split into fields. The return */
711 : /* result is a stringlist, in the sense of the CSL functions. */
712 : /* */
713 : /* Deprecated. Replaced by CSVReadParseLineL(). */
714 : /************************************************************************/
715 :
716 0 : char **CSVReadParseLine(FILE *fp)
717 : {
718 0 : return CSVReadParseLine2(fp, ',');
719 : }
720 :
721 0 : static const char *ReadLineClassicalFile(void *fp, size_t /* nMaxLineSize */)
722 : {
723 0 : return CPLReadLine(static_cast<FILE *>(fp));
724 : }
725 :
726 0 : char **CSVReadParseLine2(FILE *fp, char chDelimiter)
727 : {
728 0 : CPLAssert(fp != nullptr);
729 0 : if (fp == nullptr)
730 0 : return nullptr;
731 :
732 0 : char szDelimiter[2] = {chDelimiter, 0};
733 0 : return CSVReadParseLineGeneric(fp, ReadLineClassicalFile,
734 : 0, // nMaxLineSize,
735 : szDelimiter,
736 : true, // bHonourStrings
737 : false, // bKeepLeadingAndClosingQuotes
738 : false, // bMergeDelimiter
739 0 : true /* bSkipBOM */);
740 : }
741 :
742 : /************************************************************************/
743 : /* CSVReadParseLineL() */
744 : /* */
745 : /* Read one line, and return split into fields. The return */
746 : /* result is a stringlist, in the sense of the CSL functions. */
747 : /* */
748 : /* Replaces CSVReadParseLine(). These functions use the VSI */
749 : /* layer to allow reading from other file containers. */
750 : /************************************************************************/
751 :
752 3910 : char **CSVReadParseLineL(VSILFILE *fp)
753 : {
754 3910 : return CSVReadParseLine2L(fp, ',');
755 : }
756 :
757 3910 : char **CSVReadParseLine2L(VSILFILE *fp, char chDelimiter)
758 :
759 : {
760 3910 : CPLAssert(fp != nullptr);
761 3910 : if (fp == nullptr)
762 0 : return nullptr;
763 :
764 3910 : char szDelimiter[2] = {chDelimiter, 0};
765 3910 : return CSVReadParseLine3L(fp,
766 : 0, // nMaxLineSize
767 : szDelimiter,
768 : true, // bHonourStrings
769 : false, // bKeepLeadingAndClosingQuotes
770 : false, // bMergeDelimiter
771 3910 : true /* bSkipBOM */);
772 : }
773 :
774 : /************************************************************************/
775 : /* ReadLineLargeFile() */
776 : /************************************************************************/
777 :
778 57166 : static const char *ReadLineLargeFile(void *fp, size_t nMaxLineSize)
779 : {
780 57166 : int nBufLength = 0;
781 57166 : return CPLReadLine3L(static_cast<VSILFILE *>(fp),
782 : nMaxLineSize == 0 ? -1
783 : : static_cast<int>(nMaxLineSize),
784 114332 : &nBufLength, nullptr);
785 : }
786 :
787 : /************************************************************************/
788 : /* CSVReadParseLine3L() */
789 : /* */
790 : /* Read one line, and return split into fields. The return */
791 : /* result is a stringlist, in the sense of the CSL functions. */
792 : /************************************************************************/
793 :
794 : /** Read one line, and return split into fields.
795 : * The return result is a stringlist, in the sense of the CSL functions.
796 : *
797 : * @param fp File handle. Must not be NULL
798 : * @param nMaxLineSize Maximum line size, or 0 for unlimited.
799 : * @param pszDelimiter Delimiter sequence for readers (can be multiple bytes)
800 : * @param bHonourStrings Should be true, unless double quotes should not be
801 : * considered when separating fields.
802 : * @param bKeepLeadingAndClosingQuotes Whether the leading and closing double
803 : * quote characters should be kept.
804 : * @param bMergeDelimiter Whether consecutive delimiters should be considered
805 : * as a single one. Should generally be set to false.
806 : * @param bSkipBOM Whether leading UTF-8 BOM should be skipped.
807 : */
808 56410 : char **CSVReadParseLine3L(VSILFILE *fp, size_t nMaxLineSize,
809 : const char *pszDelimiter, bool bHonourStrings,
810 : bool bKeepLeadingAndClosingQuotes,
811 : bool bMergeDelimiter, bool bSkipBOM)
812 :
813 : {
814 56410 : return CSVReadParseLineGeneric(
815 : fp, ReadLineLargeFile, nMaxLineSize, pszDelimiter, bHonourStrings,
816 56410 : bKeepLeadingAndClosingQuotes, bMergeDelimiter, bSkipBOM);
817 : }
818 :
819 : /************************************************************************/
820 : /* CSVCompare() */
821 : /* */
822 : /* Compare a field to a search value using a particular */
823 : /* criteria. */
824 : /************************************************************************/
825 :
826 603 : static bool CSVCompare(const char *pszFieldValue, const char *pszTarget,
827 : CSVCompareCriteria eCriteria)
828 :
829 : {
830 603 : if (eCriteria == CC_ExactString)
831 : {
832 0 : return (strcmp(pszFieldValue, pszTarget) == 0);
833 : }
834 603 : else if (eCriteria == CC_ApproxString)
835 : {
836 270 : return EQUAL(pszFieldValue, pszTarget);
837 : }
838 333 : else if (eCriteria == CC_Integer)
839 : {
840 626 : return (CPLGetValueType(pszFieldValue) == CPL_VALUE_INTEGER &&
841 626 : atoi(pszFieldValue) == atoi(pszTarget));
842 : }
843 :
844 0 : return false;
845 : }
846 :
847 : /************************************************************************/
848 : /* CSVScanLines() */
849 : /* */
850 : /* Read the file scanline for lines where the key field equals */
851 : /* the indicated value with the suggested comparison criteria. */
852 : /* Return the first matching line split into fields. */
853 : /* */
854 : /* Deprecated. Replaced by CSVScanLinesL(). */
855 : /************************************************************************/
856 :
857 0 : char **CSVScanLines(FILE *fp, int iKeyField, const char *pszValue,
858 : CSVCompareCriteria eCriteria)
859 :
860 : {
861 0 : CPLAssert(pszValue != nullptr);
862 0 : CPLAssert(iKeyField >= 0);
863 0 : CPLAssert(fp != nullptr);
864 :
865 0 : bool bSelected = false;
866 0 : const int nTestValue = atoi(pszValue);
867 0 : char **papszFields = nullptr;
868 :
869 0 : while (!bSelected)
870 : {
871 0 : papszFields = CSVReadParseLine(fp);
872 0 : if (papszFields == nullptr)
873 0 : return nullptr;
874 :
875 0 : if (CSLCount(papszFields) < iKeyField + 1)
876 : {
877 : /* not selected */
878 : }
879 0 : else if (eCriteria == CC_Integer &&
880 0 : atoi(papszFields[iKeyField]) == nTestValue)
881 : {
882 0 : bSelected = true;
883 : }
884 : else
885 : {
886 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
887 : }
888 :
889 0 : if (!bSelected)
890 : {
891 0 : CSLDestroy(papszFields);
892 0 : papszFields = nullptr;
893 : }
894 : }
895 :
896 0 : return papszFields;
897 : }
898 :
899 : /************************************************************************/
900 : /* CSVScanLinesL() */
901 : /* */
902 : /* Read the file scanline for lines where the key field equals */
903 : /* the indicated value with the suggested comparison criteria. */
904 : /* Return the first matching line split into fields. */
905 : /************************************************************************/
906 :
907 0 : char **CSVScanLinesL(VSILFILE *fp, int iKeyField, const char *pszValue,
908 : CSVCompareCriteria eCriteria)
909 :
910 : {
911 0 : CPLAssert(pszValue != nullptr);
912 0 : CPLAssert(iKeyField >= 0);
913 0 : CPLAssert(fp != nullptr);
914 :
915 0 : bool bSelected = false;
916 0 : const int nTestValue = atoi(pszValue);
917 0 : char **papszFields = nullptr;
918 :
919 0 : while (!bSelected)
920 : {
921 0 : papszFields = CSVReadParseLineL(fp);
922 0 : if (papszFields == nullptr)
923 0 : return nullptr;
924 :
925 0 : if (CSLCount(papszFields) < iKeyField + 1)
926 : {
927 : /* not selected */
928 : }
929 0 : else if (eCriteria == CC_Integer &&
930 0 : atoi(papszFields[iKeyField]) == nTestValue)
931 : {
932 0 : bSelected = true;
933 : }
934 : else
935 : {
936 0 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
937 : }
938 :
939 0 : if (!bSelected)
940 : {
941 0 : CSLDestroy(papszFields);
942 0 : papszFields = nullptr;
943 : }
944 : }
945 :
946 0 : return papszFields;
947 : }
948 :
949 : /************************************************************************/
950 : /* CSVScanLinesIndexed() */
951 : /* */
952 : /* Read the file scanline for lines where the key field equals */
953 : /* the indicated value with the suggested comparison criteria. */
954 : /* Return the first matching line split into fields. */
955 : /************************************************************************/
956 :
957 21 : static char **CSVScanLinesIndexed(CSVTable *psTable, int nKeyValue)
958 :
959 : {
960 21 : CPLAssert(psTable->panLineIndex != nullptr);
961 :
962 : /* -------------------------------------------------------------------- */
963 : /* Find target record with binary search. */
964 : /* -------------------------------------------------------------------- */
965 21 : int iTop = psTable->nLineCount - 1;
966 21 : int iBottom = 0;
967 21 : int iResult = -1;
968 :
969 151 : while (iTop >= iBottom)
970 : {
971 151 : const int iMiddle = (iTop + iBottom) / 2;
972 151 : if (psTable->panLineIndex[iMiddle] > nKeyValue)
973 90 : iTop = iMiddle - 1;
974 61 : else if (psTable->panLineIndex[iMiddle] < nKeyValue)
975 40 : iBottom = iMiddle + 1;
976 : else
977 : {
978 21 : iResult = iMiddle;
979 : // if a key is not unique, select the first instance of it.
980 21 : while (iResult > 0 &&
981 21 : psTable->panLineIndex[iResult - 1] == nKeyValue)
982 : {
983 0 : psTable->bNonUniqueKey = true;
984 0 : iResult--;
985 : }
986 21 : break;
987 : }
988 : }
989 :
990 21 : if (iResult == -1)
991 0 : return nullptr;
992 :
993 : /* -------------------------------------------------------------------- */
994 : /* Parse target line, and update iLastLine indicator. */
995 : /* -------------------------------------------------------------------- */
996 21 : psTable->iLastLine = iResult;
997 :
998 21 : return CSVSplitLine(psTable->papszLines[iResult], ",", false, false);
999 : }
1000 :
1001 : /************************************************************************/
1002 : /* CSVScanLinesIngested() */
1003 : /* */
1004 : /* Read the file scanline for lines where the key field equals */
1005 : /* the indicated value with the suggested comparison criteria. */
1006 : /* Return the first matching line split into fields. */
1007 : /************************************************************************/
1008 :
1009 28 : static char **CSVScanLinesIngested(CSVTable *psTable, int iKeyField,
1010 : const char *pszValue,
1011 : CSVCompareCriteria eCriteria)
1012 :
1013 : {
1014 28 : CPLAssert(pszValue != nullptr);
1015 28 : CPLAssert(iKeyField >= 0);
1016 :
1017 28 : const int nTestValue = atoi(pszValue);
1018 :
1019 : /* -------------------------------------------------------------------- */
1020 : /* Short cut for indexed files. */
1021 : /* -------------------------------------------------------------------- */
1022 28 : if (iKeyField == 0 && eCriteria == CC_Integer &&
1023 21 : psTable->panLineIndex != nullptr)
1024 21 : return CSVScanLinesIndexed(psTable, nTestValue);
1025 :
1026 : /* -------------------------------------------------------------------- */
1027 : /* Scan from in-core lines. */
1028 : /* -------------------------------------------------------------------- */
1029 7 : char **papszFields = nullptr;
1030 7 : bool bSelected = false;
1031 :
1032 484 : while (!bSelected && psTable->iLastLine + 1 < psTable->nLineCount)
1033 : {
1034 477 : psTable->iLastLine++;
1035 477 : papszFields = CSVSplitLine(psTable->papszLines[psTable->iLastLine], ",",
1036 : false, false);
1037 :
1038 477 : if (CSLCount(papszFields) < iKeyField + 1)
1039 : {
1040 : /* not selected */
1041 : }
1042 477 : else if (eCriteria == CC_Integer &&
1043 242 : atoi(papszFields[iKeyField]) == nTestValue)
1044 : {
1045 2 : bSelected = true;
1046 : }
1047 : else
1048 : {
1049 475 : bSelected = CSVCompare(papszFields[iKeyField], pszValue, eCriteria);
1050 : }
1051 :
1052 477 : if (!bSelected)
1053 : {
1054 470 : CSLDestroy(papszFields);
1055 470 : papszFields = nullptr;
1056 : }
1057 : }
1058 :
1059 7 : return papszFields;
1060 : }
1061 :
1062 : /************************************************************************/
1063 : /* CSVRewind() */
1064 : /* */
1065 : /* Rewind a CSV file based on a passed in filename. */
1066 : /* This is aimed at being used with CSVGetNextLine(). */
1067 : /************************************************************************/
1068 :
1069 1774 : void CSVRewind(const char *pszFilename)
1070 :
1071 : {
1072 : /* -------------------------------------------------------------------- */
1073 : /* Get access to the table. */
1074 : /* -------------------------------------------------------------------- */
1075 1774 : CPLAssert(pszFilename != nullptr);
1076 :
1077 1774 : CSVTable *const psTable = CSVAccess(pszFilename);
1078 1774 : if (psTable != nullptr)
1079 1774 : psTable->iLastLine = -1;
1080 1774 : }
1081 :
1082 : /************************************************************************/
1083 : /* CSVGetNextLine() */
1084 : /* */
1085 : /* Fetch the next line of a CSV file based on a passed in */
1086 : /* filename. Returns NULL at end of file, or if file is not */
1087 : /* really established. */
1088 : /* This ingests the whole file into memory if not already done. */
1089 : /* When reaching end of file, CSVRewind() may be used to read */
1090 : /* again from the beginning. */
1091 : /************************************************************************/
1092 :
1093 53548 : char **CSVGetNextLine(const char *pszFilename)
1094 :
1095 : {
1096 :
1097 : /* -------------------------------------------------------------------- */
1098 : /* Get access to the table. */
1099 : /* -------------------------------------------------------------------- */
1100 53548 : CPLAssert(pszFilename != nullptr);
1101 :
1102 53548 : CSVTable *const psTable = CSVAccess(pszFilename);
1103 53548 : if (psTable == nullptr)
1104 0 : return nullptr;
1105 :
1106 53548 : CSVIngest(psTable->pszFilename);
1107 :
1108 : /* -------------------------------------------------------------------- */
1109 : /* If we use CSVGetNextLine() we can pretty much assume we have */
1110 : /* a non-unique key. */
1111 : /* -------------------------------------------------------------------- */
1112 53548 : psTable->bNonUniqueKey = true;
1113 :
1114 : /* -------------------------------------------------------------------- */
1115 : /* Do we have a next line available? This only works for */
1116 : /* ingested tables I believe. */
1117 : /* -------------------------------------------------------------------- */
1118 53548 : if (psTable->iLastLine + 1 >= psTable->nLineCount)
1119 622 : return nullptr;
1120 :
1121 52926 : psTable->iLastLine++;
1122 52926 : CSLDestroy(psTable->papszRecFields);
1123 105852 : psTable->papszRecFields = CSVSplitLine(
1124 52926 : psTable->papszLines[psTable->iLastLine], ",", false, false);
1125 :
1126 52926 : return psTable->papszRecFields;
1127 : }
1128 :
1129 : /************************************************************************/
1130 : /* CSVScanFile() */
1131 : /* */
1132 : /* Scan a whole file using criteria similar to above, but also */
1133 : /* taking care of file opening and closing. */
1134 : /************************************************************************/
1135 :
1136 137 : static char **CSVScanFile(CSVTable *const psTable, int iKeyField,
1137 : const char *pszValue, CSVCompareCriteria eCriteria)
1138 : {
1139 137 : CSVIngest(psTable->pszFilename);
1140 :
1141 : /* -------------------------------------------------------------------- */
1142 : /* Does the current record match the criteria? If so, just */
1143 : /* return it again. */
1144 : /* -------------------------------------------------------------------- */
1145 137 : if (iKeyField >= 0 && iKeyField < CSLCount(psTable->papszRecFields) &&
1146 383 : CSVCompare(psTable->papszRecFields[iKeyField], pszValue, eCriteria) &&
1147 109 : !psTable->bNonUniqueKey)
1148 : {
1149 109 : return psTable->papszRecFields;
1150 : }
1151 :
1152 : /* -------------------------------------------------------------------- */
1153 : /* Scan the file from the beginning, replacing the ``current */
1154 : /* record'' in our structure with the one that is found. */
1155 : /* -------------------------------------------------------------------- */
1156 28 : psTable->iLastLine = -1;
1157 28 : CSLDestroy(psTable->papszRecFields);
1158 :
1159 28 : if (psTable->pszRawData != nullptr)
1160 28 : psTable->papszRecFields =
1161 28 : CSVScanLinesIngested(psTable, iKeyField, pszValue, eCriteria);
1162 : else
1163 : {
1164 0 : VSIRewindL(psTable->fp);
1165 0 : CPLReadLineL(psTable->fp); /* throw away the header line */
1166 :
1167 0 : psTable->papszRecFields =
1168 0 : CSVScanLinesL(psTable->fp, iKeyField, pszValue, eCriteria);
1169 : }
1170 :
1171 28 : return psTable->papszRecFields;
1172 : }
1173 :
1174 4 : char **CSVScanFile(const char *pszFilename, int iKeyField, const char *pszValue,
1175 : CSVCompareCriteria eCriteria)
1176 :
1177 : {
1178 : /* -------------------------------------------------------------------- */
1179 : /* Get access to the table. */
1180 : /* -------------------------------------------------------------------- */
1181 4 : CPLAssert(pszFilename != nullptr);
1182 :
1183 4 : if (iKeyField < 0)
1184 0 : return nullptr;
1185 :
1186 4 : CSVTable *const psTable = CSVAccess(pszFilename);
1187 4 : if (psTable == nullptr)
1188 0 : return nullptr;
1189 :
1190 4 : return CSVScanFile(psTable, iKeyField, pszValue, eCriteria);
1191 : }
1192 :
1193 : /************************************************************************/
1194 : /* CPLGetFieldId() */
1195 : /* */
1196 : /* Read the first record of a CSV file (rewinding to be sure), */
1197 : /* and find the field with the indicated name. Returns -1 if */
1198 : /* it fails to find the field name. Comparison is case */
1199 : /* insensitive, but otherwise exact. After this function has */
1200 : /* been called the file pointer will be positioned just after */
1201 : /* the first record. */
1202 : /* */
1203 : /* Deprecated. Replaced by CPLGetFieldIdL(). */
1204 : /************************************************************************/
1205 :
1206 0 : int CSVGetFieldId(FILE *fp, const char *pszFieldName)
1207 :
1208 : {
1209 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1210 :
1211 0 : VSIRewind(fp);
1212 :
1213 0 : char **papszFields = CSVReadParseLine(fp);
1214 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1215 : {
1216 0 : if (EQUAL(papszFields[i], pszFieldName))
1217 : {
1218 0 : CSLDestroy(papszFields);
1219 0 : return i;
1220 : }
1221 : }
1222 :
1223 0 : CSLDestroy(papszFields);
1224 :
1225 0 : return -1;
1226 : }
1227 :
1228 : /************************************************************************/
1229 : /* CPLGetFieldIdL() */
1230 : /* */
1231 : /* Read the first record of a CSV file (rewinding to be sure), */
1232 : /* and find the field with the indicated name. Returns -1 if */
1233 : /* it fails to find the field name. Comparison is case */
1234 : /* insensitive, but otherwise exact. After this function has */
1235 : /* been called the file pointer will be positioned just after */
1236 : /* the first record. */
1237 : /************************************************************************/
1238 :
1239 0 : int CSVGetFieldIdL(VSILFILE *fp, const char *pszFieldName)
1240 :
1241 : {
1242 0 : CPLAssert(fp != nullptr && pszFieldName != nullptr);
1243 :
1244 0 : VSIRewindL(fp);
1245 :
1246 0 : char **papszFields = CSVReadParseLineL(fp);
1247 0 : for (int i = 0; papszFields != nullptr && papszFields[i] != nullptr; i++)
1248 : {
1249 0 : if (EQUAL(papszFields[i], pszFieldName))
1250 : {
1251 0 : CSLDestroy(papszFields);
1252 0 : return i;
1253 : }
1254 : }
1255 :
1256 0 : CSLDestroy(papszFields);
1257 :
1258 0 : return -1;
1259 : }
1260 :
1261 : /************************************************************************/
1262 : /* CSVGetFileFieldId() */
1263 : /* */
1264 : /* Same as CPLGetFieldId(), except that we get the file based */
1265 : /* on filename, rather than having an existing handle. */
1266 : /************************************************************************/
1267 :
1268 7103 : static int CSVGetFileFieldId(CSVTable *const psTable, const char *pszFieldName)
1269 :
1270 : {
1271 : /* -------------------------------------------------------------------- */
1272 : /* Find the requested field. */
1273 : /* -------------------------------------------------------------------- */
1274 7103 : const int nFieldNameLength = static_cast<int>(strlen(pszFieldName));
1275 17741 : for (int i = 0; psTable->papszFieldNames != nullptr &&
1276 17741 : psTable->papszFieldNames[i] != nullptr;
1277 : i++)
1278 : {
1279 17741 : if (psTable->panFieldNamesLength[i] == nFieldNameLength &&
1280 9988 : EQUALN(psTable->papszFieldNames[i], pszFieldName, nFieldNameLength))
1281 : {
1282 7103 : return i;
1283 : }
1284 : }
1285 :
1286 0 : return -1;
1287 : }
1288 :
1289 6837 : int CSVGetFileFieldId(const char *pszFilename, const char *pszFieldName)
1290 :
1291 : {
1292 : /* -------------------------------------------------------------------- */
1293 : /* Get access to the table. */
1294 : /* -------------------------------------------------------------------- */
1295 6837 : CPLAssert(pszFilename != nullptr);
1296 :
1297 6837 : CSVTable *const psTable = CSVAccess(pszFilename);
1298 6837 : if (psTable == nullptr)
1299 0 : return -1;
1300 6837 : return CSVGetFileFieldId(psTable, pszFieldName);
1301 : }
1302 :
1303 : /************************************************************************/
1304 : /* CSVScanFileByName() */
1305 : /* */
1306 : /* Same as CSVScanFile(), but using a field name instead of a */
1307 : /* field number. */
1308 : /************************************************************************/
1309 :
1310 4 : char **CSVScanFileByName(const char *pszFilename, const char *pszKeyFieldName,
1311 : const char *pszValue, CSVCompareCriteria eCriteria)
1312 :
1313 : {
1314 4 : const int iKeyField = CSVGetFileFieldId(pszFilename, pszKeyFieldName);
1315 4 : if (iKeyField == -1)
1316 0 : return nullptr;
1317 :
1318 4 : return CSVScanFile(pszFilename, iKeyField, pszValue, eCriteria);
1319 : }
1320 :
1321 : /************************************************************************/
1322 : /* CSVGetField() */
1323 : /* */
1324 : /* The all-in-one function to fetch a particular field value */
1325 : /* from a CSV file. Note this function will return an empty */
1326 : /* string, rather than NULL if it fails to find the desired */
1327 : /* value for some reason. The caller can't establish that the */
1328 : /* fetch failed. */
1329 : /************************************************************************/
1330 :
1331 133 : const char *CSVGetField(const char *pszFilename, const char *pszKeyFieldName,
1332 : const char *pszKeyFieldValue,
1333 : CSVCompareCriteria eCriteria,
1334 : const char *pszTargetField)
1335 :
1336 : {
1337 : /* -------------------------------------------------------------------- */
1338 : /* Find the table. */
1339 : /* -------------------------------------------------------------------- */
1340 133 : CSVTable *const psTable = CSVAccess(pszFilename);
1341 133 : if (psTable == nullptr)
1342 0 : return "";
1343 :
1344 133 : const int iKeyField = CSVGetFileFieldId(psTable, pszKeyFieldName);
1345 133 : if (iKeyField == -1)
1346 0 : return "";
1347 :
1348 : /* -------------------------------------------------------------------- */
1349 : /* Find the correct record. */
1350 : /* -------------------------------------------------------------------- */
1351 : char **papszRecord =
1352 133 : CSVScanFile(psTable, iKeyField, pszKeyFieldValue, eCriteria);
1353 133 : if (papszRecord == nullptr)
1354 0 : return "";
1355 :
1356 : /* -------------------------------------------------------------------- */
1357 : /* Figure out which field we want out of this. */
1358 : /* -------------------------------------------------------------------- */
1359 133 : const int iTargetField = CSVGetFileFieldId(psTable, pszTargetField);
1360 133 : if (iTargetField < 0)
1361 0 : return "";
1362 :
1363 374 : for (int i = 0; papszRecord[i] != nullptr; ++i)
1364 : {
1365 374 : if (i == iTargetField)
1366 133 : return papszRecord[iTargetField];
1367 : }
1368 0 : return "";
1369 : }
1370 :
1371 : /************************************************************************/
1372 : /* GDALDefaultCSVFilename() */
1373 : /************************************************************************/
1374 :
1375 : typedef struct
1376 : {
1377 : char szPath[512];
1378 : bool bCSVFinderInitialized;
1379 : } DefaultCSVFileNameTLS;
1380 :
1381 2412 : const char *GDALDefaultCSVFilename(const char *pszBasename)
1382 :
1383 : {
1384 : /* -------------------------------------------------------------------- */
1385 : /* Do we already have this file accessed? If so, just return */
1386 : /* the existing path without any further probing. */
1387 : /* -------------------------------------------------------------------- */
1388 2412 : int bMemoryError = FALSE;
1389 : CSVTable **ppsCSVTableList =
1390 2412 : static_cast<CSVTable **>(CPLGetTLSEx(CTLS_CSVTABLEPTR, &bMemoryError));
1391 2412 : if (ppsCSVTableList != nullptr)
1392 : {
1393 2406 : const size_t nBasenameLen = strlen(pszBasename);
1394 :
1395 22378 : for (const CSVTable *psTable = *ppsCSVTableList; psTable != nullptr;
1396 19972 : psTable = psTable->psNext)
1397 : {
1398 21826 : const size_t nFullLen = strlen(psTable->pszFilename);
1399 :
1400 21826 : if (nFullLen > nBasenameLen &&
1401 21826 : strcmp(psTable->pszFilename + nFullLen - nBasenameLen,
1402 1854 : pszBasename) == 0 &&
1403 1854 : strchr("/\\",
1404 1854 : psTable->pszFilename[+nFullLen - nBasenameLen - 1]) !=
1405 : nullptr)
1406 : {
1407 1854 : return psTable->pszFilename;
1408 : }
1409 : }
1410 : }
1411 :
1412 : /* -------------------------------------------------------------------- */
1413 : /* Otherwise we need to look harder for it. */
1414 : /* -------------------------------------------------------------------- */
1415 : DefaultCSVFileNameTLS *pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1416 558 : CPLGetTLSEx(CTLS_CSVDEFAULTFILENAME, &bMemoryError));
1417 558 : if (pTLSData == nullptr && !bMemoryError)
1418 : {
1419 : pTLSData = static_cast<DefaultCSVFileNameTLS *>(
1420 5 : VSI_CALLOC_VERBOSE(1, sizeof(DefaultCSVFileNameTLS)));
1421 5 : if (pTLSData)
1422 5 : CPLSetTLS(CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE);
1423 : }
1424 558 : if (pTLSData == nullptr)
1425 0 : return "/not_existing_dir/not_existing_path";
1426 :
1427 558 : const char *pszResult = CPLFindFile("gdal", pszBasename);
1428 :
1429 558 : if (pszResult != nullptr)
1430 43 : return pszResult;
1431 :
1432 515 : if (!pTLSData->bCSVFinderInitialized)
1433 : {
1434 2 : pTLSData->bCSVFinderInitialized = true;
1435 :
1436 2 : if (CPLGetConfigOption("GDAL_DATA", nullptr) != nullptr)
1437 2 : CPLPushFinderLocation(CPLGetConfigOption("GDAL_DATA", nullptr));
1438 :
1439 2 : pszResult = CPLFindFile("gdal", pszBasename);
1440 :
1441 2 : if (pszResult != nullptr)
1442 0 : return pszResult;
1443 : }
1444 :
1445 : // For systems like sandboxes that do not allow other checks.
1446 515 : CPLDebug("CPL_CSV",
1447 : "Failed to find file in GDALDefaultCSVFilename. "
1448 : "Returning original basename: %s",
1449 : pszBasename);
1450 515 : CPLStrlcpy(pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath));
1451 515 : return pTLSData->szPath;
1452 : }
1453 :
1454 : /************************************************************************/
1455 : /* CSVFilename() */
1456 : /* */
1457 : /* Return the full path to a particular CSV file. This will */
1458 : /* eventually be something the application can override. */
1459 : /************************************************************************/
1460 :
1461 : CPL_C_START
1462 : static const char *(*pfnCSVFilenameHook)(const char *) = nullptr;
1463 : CPL_C_END
1464 :
1465 2412 : const char *CSVFilename(const char *pszBasename)
1466 :
1467 : {
1468 2412 : if (pfnCSVFilenameHook == nullptr)
1469 2412 : return GDALDefaultCSVFilename(pszBasename);
1470 :
1471 0 : return pfnCSVFilenameHook(pszBasename);
1472 : }
1473 :
1474 : /************************************************************************/
1475 : /* SetCSVFilenameHook() */
1476 : /* */
1477 : /* Applications can use this to set a function that will */
1478 : /* massage CSV filenames. */
1479 : /************************************************************************/
1480 :
1481 : /**
1482 : * Override CSV file search method.
1483 : *
1484 : * @param pfnNewHook The pointer to a function which will return the
1485 : * full path for a given filename.
1486 : *
1487 :
1488 : This function allows an application to override how the GTIFGetDefn()
1489 : and related function find the CSV (Comma Separated Value) values
1490 : required. The pfnHook argument should be a pointer to a function that
1491 : will take in a CSV filename and return a full path to the file. The
1492 : returned string should be to an internal static buffer so that the
1493 : caller doesn't have to free the result.
1494 :
1495 : <b>Example:</b><br>
1496 :
1497 : The listgeo utility uses the following override function if the user
1498 : specified a CSV file directory with the -t commandline switch (argument
1499 : put into CSVDirName). <p>
1500 :
1501 : <pre>
1502 :
1503 : ...
1504 : SetCSVFilenameHook( CSVFileOverride );
1505 : ...
1506 :
1507 : static const char *CSVFileOverride( const char * pszInput )
1508 :
1509 : {
1510 : static char szPath[1024] = {};
1511 :
1512 : sprintf( szPath, "%s/%s", CSVDirName, pszInput );
1513 :
1514 : return szPath;
1515 : }
1516 : </pre>
1517 :
1518 : */
1519 :
1520 : CPL_C_START
1521 0 : void SetCSVFilenameHook(const char *(*pfnNewHook)(const char *))
1522 :
1523 : {
1524 0 : pfnCSVFilenameHook = pfnNewHook;
1525 0 : }
1526 :
1527 : CPL_C_END
|