Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CSV Translator
4 : * Purpose: Implements OGRCSVDriver.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
9 : * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "cpl_port.h"
15 : #include "ogr_csv.h"
16 :
17 : #include <cerrno>
18 : #include <cstring>
19 : #include <map>
20 : #include <string>
21 : #include <utility>
22 :
23 : #include "cpl_conv.h"
24 : #include "cpl_error.h"
25 : #include "cpl_multiproc.h"
26 : #include "cpl_string.h"
27 : #include "cpl_vsi.h"
28 : #include "gdal.h"
29 : #include "gdal_priv.h"
30 :
31 : static CPLMutex *hMutex = nullptr;
32 : static std::map<CPLString, GDALDataset *> *poMap = nullptr;
33 :
34 : /************************************************************************/
35 : /* OGRCSVDriverIdentify() */
36 : /************************************************************************/
37 :
38 60131 : static int OGRCSVDriverIdentify(GDALOpenInfo *poOpenInfo)
39 :
40 : {
41 60131 : if (poOpenInfo->fpL != nullptr)
42 : {
43 6741 : if (poOpenInfo->IsSingleAllowedDriver("CSV"))
44 184 : return TRUE;
45 :
46 : const CPLString osBaseFilename =
47 13114 : CPLGetFilename(poOpenInfo->pszFilename);
48 : const CPLString osExt =
49 13114 : OGRCSVDataSource::GetRealExtension(poOpenInfo->pszFilename);
50 :
51 6557 : if (EQUAL(osBaseFilename, "NfdcFacilities.xls") ||
52 6557 : EQUAL(osBaseFilename, "NfdcRunways.xls") ||
53 19671 : EQUAL(osBaseFilename, "NfdcRemarks.xls") ||
54 6557 : EQUAL(osBaseFilename, "NfdcSchedules.xls"))
55 : {
56 0 : return TRUE;
57 : }
58 6557 : else if ((STARTS_WITH_CI(osBaseFilename, "NationalFile_") ||
59 6557 : STARTS_WITH_CI(osBaseFilename, "POP_PLACES_") ||
60 6557 : STARTS_WITH_CI(osBaseFilename, "HIST_FEATURES_") ||
61 6557 : STARTS_WITH_CI(osBaseFilename, "US_CONCISE_") ||
62 6557 : STARTS_WITH_CI(osBaseFilename, "AllNames_") ||
63 6557 : STARTS_WITH_CI(osBaseFilename,
64 6557 : "Feature_Description_History_") ||
65 6557 : STARTS_WITH_CI(osBaseFilename, "ANTARCTICA_") ||
66 6557 : STARTS_WITH_CI(osBaseFilename, "GOVT_UNITS_") ||
67 6557 : STARTS_WITH_CI(osBaseFilename, "NationalFedCodes_") ||
68 6557 : STARTS_WITH_CI(osBaseFilename, "AllStates_") ||
69 13114 : STARTS_WITH_CI(osBaseFilename, "AllStatesFedCodes_") ||
70 6557 : (osBaseFilename.size() > 2 &&
71 13114 : STARTS_WITH_CI(osBaseFilename + 2, "_Features_")) ||
72 6557 : (osBaseFilename.size() > 2 &&
73 13114 : STARTS_WITH_CI(osBaseFilename + 2, "_FedCodes_"))) &&
74 0 : (EQUAL(osExt, "txt") || EQUAL(osExt, "zip")))
75 : {
76 0 : return TRUE;
77 : }
78 13112 : else if (EQUAL(osBaseFilename, "allCountries.txt") ||
79 6555 : EQUAL(osBaseFilename, "allCountries.zip"))
80 : {
81 2 : return TRUE;
82 : }
83 12253 : else if (EQUAL(osExt, "csv") || EQUAL(osExt, "tsv") ||
84 5698 : EQUAL(osExt, "psv"))
85 : {
86 859 : return TRUE;
87 : }
88 5706 : else if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") &&
89 10 : EQUAL(osExt, "zip"))
90 : {
91 2 : return -1; // Unsure.
92 : }
93 : else
94 : {
95 5694 : return FALSE;
96 : }
97 : }
98 53390 : else if (STARTS_WITH_CI(poOpenInfo->pszFilename, "CSV:"))
99 : {
100 2 : return TRUE;
101 : }
102 53388 : else if (poOpenInfo->bIsDirectory)
103 : {
104 1677 : if (poOpenInfo->IsSingleAllowedDriver("CSV"))
105 2 : return TRUE;
106 :
107 1675 : return -1; // Unsure.
108 : }
109 :
110 51711 : return FALSE;
111 : }
112 :
113 : /************************************************************************/
114 : /* OGRCSVDriverRemoveFromMap() */
115 : /************************************************************************/
116 :
117 272 : void OGRCSVDriverRemoveFromMap(const char *pszName, GDALDataset *poDS)
118 : {
119 272 : if (poMap == nullptr)
120 71 : return;
121 402 : CPLMutexHolderD(&hMutex);
122 201 : std::map<CPLString, GDALDataset *>::iterator oIter = poMap->find(pszName);
123 201 : if (oIter != poMap->end())
124 : {
125 85 : GDALDataset *poOtherDS = oIter->second;
126 85 : if (poDS == poOtherDS)
127 85 : poMap->erase(oIter);
128 : }
129 : }
130 :
131 : /************************************************************************/
132 : /* Open() */
133 : /************************************************************************/
134 :
135 1275 : static GDALDataset *OGRCSVDriverOpen(GDALOpenInfo *poOpenInfo)
136 :
137 : {
138 1275 : if (!OGRCSVDriverIdentify(poOpenInfo))
139 0 : return nullptr;
140 :
141 1275 : if (poMap != nullptr)
142 : {
143 1650 : CPLMutexHolderD(&hMutex);
144 : std::map<CPLString, GDALDataset *>::iterator oIter =
145 825 : poMap->find(poOpenInfo->pszFilename);
146 825 : if (oIter != poMap->end())
147 : {
148 2 : GDALDataset *poOtherDS = oIter->second;
149 2 : poOtherDS->FlushCache(false);
150 : }
151 : }
152 :
153 1275 : auto poDSUniquePtr = std::make_unique<OGRCSVDataSource>();
154 :
155 2550 : if (!poDSUniquePtr->Open(poOpenInfo->pszFilename,
156 1275 : poOpenInfo->eAccess == GA_Update, false,
157 1275 : poOpenInfo->papszOpenOptions,
158 1275 : poOpenInfo->IsSingleAllowedDriver("CSV")))
159 : {
160 672 : poDSUniquePtr.reset();
161 : }
162 :
163 1275 : auto poDS = poDSUniquePtr.release();
164 :
165 1275 : if (poOpenInfo->eAccess == GA_Update && poDS != nullptr)
166 : {
167 170 : CPLMutexHolderD(&hMutex);
168 85 : if (poMap == nullptr)
169 10 : poMap = new std::map<CPLString, GDALDataset *>();
170 85 : if (poMap->find(poOpenInfo->pszFilename) == poMap->end())
171 : {
172 85 : (*poMap)[poOpenInfo->pszFilename] = poDS;
173 : }
174 : }
175 :
176 1275 : return poDS;
177 : }
178 :
179 : /************************************************************************/
180 : /* Create() */
181 : /************************************************************************/
182 :
183 : static GDALDataset *
184 117 : OGRCSVDriverCreate(const char *pszName, CPL_UNUSED int nBands,
185 : CPL_UNUSED int nXSize, CPL_UNUSED int nYSize,
186 : CPL_UNUSED GDALDataType eDT, CSLConstList papszOptions)
187 : {
188 : // First, ensure there isn't any such file yet.
189 : VSIStatBufL sStatBuf;
190 :
191 117 : if (strcmp(pszName, "/dev/stdout") == 0)
192 0 : pszName = "/vsistdout/";
193 :
194 117 : if (VSIStatL(pszName, &sStatBuf) == 0)
195 : {
196 0 : CPLError(CE_Failure, CPLE_AppDefined,
197 : "It seems a file system object called '%s' already exists.",
198 : pszName);
199 :
200 0 : return nullptr;
201 : }
202 :
203 : // If the target is not a simple .csv then create it as a directory.
204 234 : CPLString osDirName;
205 :
206 117 : if (EQUAL(CPLGetExtensionSafe(pszName).c_str(), "csv"))
207 : {
208 78 : osDirName = CPLGetPathSafe(pszName);
209 78 : if (osDirName == "")
210 0 : osDirName = ".";
211 :
212 : // HACK: CPLGetPathSafe("/vsimem/foo.csv") = "/vsimem", but this is not
213 : // recognized afterwards as a valid directory name.
214 78 : if (osDirName == "/vsimem")
215 17 : osDirName = "/vsimem/";
216 : }
217 : else
218 : {
219 39 : if (STARTS_WITH(pszName, "/vsizip/"))
220 : {
221 : // Do nothing.
222 : }
223 39 : else if (!EQUAL(pszName, "/vsistdout/") && VSIMkdir(pszName, 0755) != 0)
224 : {
225 1 : CPLError(CE_Failure, CPLE_AppDefined,
226 : "Failed to create directory %s:\n%s", pszName,
227 1 : VSIStrerror(errno));
228 1 : return nullptr;
229 : }
230 38 : osDirName = pszName;
231 : }
232 :
233 : // Force it to open as a datasource.
234 232 : auto poDS = std::make_unique<OGRCSVDataSource>();
235 :
236 116 : if (EQUAL(CPLGetExtensionSafe(pszName).c_str(), "csv"))
237 : {
238 78 : poDS->CreateForSingleFile(osDirName, pszName);
239 : }
240 38 : else if (!poDS->Open(osDirName, /* bUpdate = */ true,
241 : /* bForceAccept = */ true, nullptr,
242 : /* bSingleDriver = */ true))
243 : {
244 0 : return nullptr;
245 : }
246 :
247 116 : const char *pszGeometry = CSLFetchNameValue(papszOptions, "GEOMETRY");
248 116 : if (pszGeometry != nullptr && EQUAL(pszGeometry, "AS_WKT"))
249 24 : poDS->EnableGeometryFields();
250 :
251 116 : return poDS.release();
252 : }
253 :
254 : /************************************************************************/
255 : /* Delete() */
256 : /************************************************************************/
257 :
258 22 : static CPLErr OGRCSVDriverDelete(const char *pszFilename)
259 :
260 : {
261 22 : return CPLUnlinkTree(pszFilename) == 0 ? CE_None : CE_Failure;
262 : }
263 :
264 : /************************************************************************/
265 : /* OGRCSVDriverUnload() */
266 : /************************************************************************/
267 :
268 1263 : static void OGRCSVDriverUnload(GDALDriver *)
269 : {
270 1263 : if (hMutex != nullptr)
271 4 : CPLDestroyMutex(hMutex);
272 1263 : hMutex = nullptr;
273 1263 : delete poMap;
274 1263 : poMap = nullptr;
275 1263 : }
276 :
277 : /************************************************************************/
278 : /* RegisterOGRCSV() */
279 : /************************************************************************/
280 :
281 : #define XSTRINGIFY(x) #x
282 : #define STRINGIFY(x) XSTRINGIFY(x)
283 :
284 2068 : void RegisterOGRCSV()
285 :
286 : {
287 2068 : if (GDALGetDriverByName("CSV") != nullptr)
288 263 : return;
289 :
290 1805 : GDALDriver *poDriver = new GDALDriver();
291 :
292 1805 : poDriver->SetDescription("CSV");
293 1805 : poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
294 1805 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
295 1805 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES");
296 1805 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
297 1805 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES");
298 1805 : poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES");
299 1805 : poDriver->SetMetadataItem(GDAL_DMD_CREATION_FIELD_DEFN_FLAGS,
300 1805 : "WidthPrecision");
301 1805 : poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS,
302 1805 : "Name Type WidthPrecision");
303 1805 : poDriver->SetMetadataItem(GDAL_DCAP_MULTIPLE_VECTOR_LAYERS_IN_DIRECTORY,
304 1805 : "YES");
305 :
306 1805 : poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES");
307 1805 : poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
308 1805 : poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
309 :
310 1805 : poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
311 1805 : "Comma Separated Value (.csv)");
312 1805 : poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "csv tsv psv");
313 1805 : poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/csv.html");
314 1805 : poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
315 1805 : poDriver->SetMetadataItem(GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_SIGN,
316 1805 : "YES");
317 1805 : poDriver->SetMetadataItem(
318 1805 : GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_DECIMAL_SEPARATOR, "YES");
319 :
320 1805 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST,
321 : "<CreationOptionList>"
322 : " <Option name='GEOMETRY' type='string-select' "
323 : "description='how to encode geometry fields'>"
324 : " <Value>AS_WKT</Value>"
325 : " </Option>"
326 1805 : "</CreationOptionList>");
327 :
328 1805 : poDriver->SetMetadataItem(
329 : GDAL_DS_LAYER_CREATIONOPTIONLIST,
330 : "<LayerCreationOptionList>"
331 : " <Option name='SEPARATOR' type='string-select' description='field "
332 : "separator' default='COMMA'>"
333 : " <Value>COMMA</Value>"
334 : " <Value>SEMICOLON</Value>"
335 : " <Value>TAB</Value>"
336 : " <Value>SPACE</Value>"
337 : " </Option>"
338 : #ifdef _WIN32
339 : " <Option name='LINEFORMAT' type='string-select' "
340 : "description='end-of-line sequence' default='CRLF'>"
341 : #else
342 : " <Option name='LINEFORMAT' type='string-select' "
343 : "description='end-of-line sequence' default='LF'>"
344 : #endif
345 : " <Value>CRLF</Value>"
346 : " <Value>LF</Value>"
347 : " </Option>"
348 : " <Option name='GEOMETRY' type='string-select' description='how to "
349 : "encode geometry fields'>"
350 : " <Value>AS_WKT</Value>"
351 : " <Value>AS_XYZ</Value>"
352 : " <Value>AS_XY</Value>"
353 : " <Value>AS_YX</Value>"
354 : " <Value>NONE</Value>"
355 : " </Option>"
356 : " <Option name='CREATE_CSVT' type='boolean' description='whether to "
357 : "create a .csvt file' default='NO'/>"
358 : " <Option name='HEADER' alias='HEADERS' type='boolean' "
359 : "description='Whether the first line of the file contains column names "
360 : "or not' default='YES'/>"
361 : " <Option name='WRITE_BOM' type='boolean' description='whether to "
362 : "write a UTF-8 BOM prefix' default='NO'/>"
363 : " <Option name='GEOMETRY_NAME' type='string' description='Name of "
364 : "geometry column. Only used if GEOMETRY=AS_WKT' default='WKT'/>"
365 : " <Option name='STRING_QUOTING' type='string-select' "
366 : "description='whether to double-quote strings. IF_AMBIGUOUS means that "
367 : "string values that look like numbers will be quoted (it also implies "
368 : "IF_NEEDED).' default='IF_AMBIGUOUS'>"
369 : " <Value>IF_NEEDED</Value>"
370 : " <Value>IF_AMBIGUOUS</Value>"
371 : " <Value>ALWAYS</Value>"
372 : " </Option>"
373 1805 : "</LayerCreationOptionList>");
374 :
375 1805 : poDriver->SetMetadataItem(
376 : GDAL_DMD_OPENOPTIONLIST,
377 : "<OpenOptionList>"
378 : " <Option name='SEPARATOR' type='string-select' "
379 : "description='field separator' default='AUTO'>"
380 : " <Value>AUTO</Value>"
381 : " <Value>COMMA</Value>"
382 : " <Value>SEMICOLON</Value>"
383 : " <Value>TAB</Value>"
384 : " <Value>SPACE</Value>"
385 : " <Value>PIPE</Value>"
386 : " </Option>"
387 : " <Option name='MERGE_SEPARATOR' type='boolean' description='whether "
388 : "to merge consecutive separators' default='NO'/>"
389 : " <Option name='AUTODETECT_TYPE' type='boolean' description='whether "
390 : "to guess data type from first bytes of the file' default='NO'/>"
391 : " <Option name='KEEP_SOURCE_COLUMNS' type='boolean' "
392 : "description='whether to add original columns whose guessed data type "
393 : "is not String. Only used if AUTODETECT_TYPE=YES' default='NO'/>"
394 : " <Option name='AUTODETECT_WIDTH' type='string-select' "
395 : "description='whether to auto-detect width/precision. Only used if "
396 : "AUTODETECT_TYPE=YES' default='NO'>"
397 : " <Value>YES</Value>"
398 : " <Value>NO</Value>"
399 : " <Value>STRING_ONLY</Value>"
400 : " </Option>"
401 : " <Option name='AUTODETECT_SIZE_LIMIT' type='int' description='number "
402 : "of bytes to inspect for auto-detection of data type. Only used if "
403 : "AUTODETECT_TYPE=YES' default='1000000'/>"
404 : " <Option name='QUOTED_FIELDS_AS_STRING' type='boolean' "
405 : "description='Only used if AUTODETECT_TYPE=YES. Whether to enforce "
406 : "quoted fields as string fields.' default='NO'/>"
407 : " <Option name='X_POSSIBLE_NAMES' type='string' description='Comma "
408 : "separated list of possible names for X/longitude coordinate of a "
409 : "point.'/>"
410 : " <Option name='Y_POSSIBLE_NAMES' type='string' description='Comma "
411 : "separated list of possible names for Y/latitude coordinate of a "
412 : "point.'/>"
413 : " <Option name='Z_POSSIBLE_NAMES' type='string' description='Comma "
414 : "separated list of possible names for Z/elevation coordinate of a "
415 : "point.'/>"
416 : " <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma "
417 : "separated list of possible names for geometry columns.' "
418 : "default='WKT'/>"
419 : " <Option name='KEEP_GEOM_COLUMNS' type='boolean' "
420 : "description='whether to add original x/y/geometry columns as regular "
421 : "fields.' default='YES'/>"
422 : " <Option name='HEADERS' type='string-select' description='Whether "
423 : "the first line of the file contains column names or not' "
424 : "default='AUTO'>"
425 : " <Value>YES</Value>"
426 : " <Value>NO</Value>"
427 : " <Value>AUTO</Value>"
428 : " </Option>"
429 : " <Option name='EMPTY_STRING_AS_NULL' type='boolean' "
430 : "description='Whether to consider empty strings as null fields on "
431 : "reading' default='NO'/>"
432 : " <Option name='MAX_LINE_SIZE' type='int' description='Maximum number "
433 : "of bytes for a line (-1=unlimited)' default='" STRINGIFY(
434 : OGR_CSV_DEFAULT_MAX_LINE_SIZE) "'/>"
435 : " <Option name='OGR_SCHEMA' "
436 : "type='string' description='"
437 : "Partially or totally overrides the "
438 : "auto-detected schema to use for "
439 : "creating the layer. "
440 : "The overrides are defined as a "
441 : "JSON list of field definitions. "
442 : "This can be a filename or a JSON "
443 : "string or a URL.'/>"
444 1805 : "</OpenOptionList>");
445 :
446 1805 : poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
447 1805 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
448 : "Integer Integer64 Real String Date DateTime "
449 : "Time IntegerList Integer64List RealList "
450 1805 : "StringList");
451 1805 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES,
452 1805 : "Boolean Int16 Float32");
453 1805 : poDriver->SetMetadataItem(GDAL_DCAP_HONOR_GEOM_COORDINATE_PRECISION, "YES");
454 :
455 1805 : poDriver->SetMetadataItem(GDAL_DCAP_UPDATE, "YES");
456 1805 : poDriver->SetMetadataItem(GDAL_DMD_UPDATE_ITEMS, "Features");
457 :
458 1805 : poDriver->pfnOpen = OGRCSVDriverOpen;
459 1805 : poDriver->pfnIdentify = OGRCSVDriverIdentify;
460 1805 : poDriver->pfnCreate = OGRCSVDriverCreate;
461 1805 : poDriver->pfnDelete = OGRCSVDriverDelete;
462 1805 : poDriver->pfnUnloadDriver = OGRCSVDriverUnload;
463 :
464 1805 : GetGDALDriverManager()->RegisterDriver(poDriver);
465 : }
|