Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CSV Translator
4 : * Purpose: Implements OGRCSVDriver.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
9 : * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "cpl_port.h"
15 : #include "ogr_csv.h"
16 :
17 : #include <cerrno>
18 : #include <cstring>
19 : #include <map>
20 : #include <string>
21 : #include <utility>
22 :
23 : #include "cpl_conv.h"
24 : #include "cpl_error.h"
25 : #include "cpl_multiproc.h"
26 : #include "cpl_string.h"
27 : #include "cpl_vsi.h"
28 : #include "gdal.h"
29 : #include "gdal_priv.h"
30 :
31 : static CPLMutex *hMutex = nullptr;
32 : static std::map<CPLString, GDALDataset *> *poMap = nullptr;
33 :
34 : /************************************************************************/
35 : /* OGRCSVDriverIdentify() */
36 : /************************************************************************/
37 :
38 50591 : static int OGRCSVDriverIdentify(GDALOpenInfo *poOpenInfo)
39 :
40 : {
41 50591 : if (poOpenInfo->fpL != nullptr)
42 : {
43 5652 : if (poOpenInfo->IsSingleAllowedDriver("CSV"))
44 182 : return TRUE;
45 :
46 : const CPLString osBaseFilename =
47 10940 : CPLGetFilename(poOpenInfo->pszFilename);
48 : const CPLString osExt =
49 10940 : OGRCSVDataSource::GetRealExtension(poOpenInfo->pszFilename);
50 :
51 5470 : if (EQUAL(osBaseFilename, "NfdcFacilities.xls") ||
52 5470 : EQUAL(osBaseFilename, "NfdcRunways.xls") ||
53 16410 : EQUAL(osBaseFilename, "NfdcRemarks.xls") ||
54 5470 : EQUAL(osBaseFilename, "NfdcSchedules.xls"))
55 : {
56 0 : return TRUE;
57 : }
58 5470 : else if ((STARTS_WITH_CI(osBaseFilename, "NationalFile_") ||
59 5470 : STARTS_WITH_CI(osBaseFilename, "POP_PLACES_") ||
60 5470 : STARTS_WITH_CI(osBaseFilename, "HIST_FEATURES_") ||
61 5470 : STARTS_WITH_CI(osBaseFilename, "US_CONCISE_") ||
62 5470 : STARTS_WITH_CI(osBaseFilename, "AllNames_") ||
63 5470 : STARTS_WITH_CI(osBaseFilename,
64 5470 : "Feature_Description_History_") ||
65 5470 : STARTS_WITH_CI(osBaseFilename, "ANTARCTICA_") ||
66 5470 : STARTS_WITH_CI(osBaseFilename, "GOVT_UNITS_") ||
67 5470 : STARTS_WITH_CI(osBaseFilename, "NationalFedCodes_") ||
68 5470 : STARTS_WITH_CI(osBaseFilename, "AllStates_") ||
69 10940 : STARTS_WITH_CI(osBaseFilename, "AllStatesFedCodes_") ||
70 5470 : (osBaseFilename.size() > 2 &&
71 10940 : STARTS_WITH_CI(osBaseFilename + 2, "_Features_")) ||
72 5470 : (osBaseFilename.size() > 2 &&
73 10940 : STARTS_WITH_CI(osBaseFilename + 2, "_FedCodes_"))) &&
74 0 : (EQUAL(osExt, "txt") || EQUAL(osExt, "zip")))
75 : {
76 0 : return TRUE;
77 : }
78 10938 : else if (EQUAL(osBaseFilename, "allCountries.txt") ||
79 5468 : EQUAL(osBaseFilename, "allCountries.zip"))
80 : {
81 2 : return TRUE;
82 : }
83 10178 : else if (EQUAL(osExt, "csv") || EQUAL(osExt, "tsv") ||
84 4710 : EQUAL(osExt, "psv"))
85 : {
86 760 : return TRUE;
87 : }
88 4716 : else if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") &&
89 8 : EQUAL(osExt, "zip"))
90 : {
91 2 : return -1; // Unsure.
92 : }
93 : else
94 : {
95 4706 : return FALSE;
96 : }
97 : }
98 44939 : else if (STARTS_WITH_CI(poOpenInfo->pszFilename, "CSV:"))
99 : {
100 0 : return TRUE;
101 : }
102 44939 : else if (poOpenInfo->bIsDirectory)
103 : {
104 1327 : if (poOpenInfo->IsSingleAllowedDriver("CSV"))
105 0 : return TRUE;
106 :
107 1327 : return -1; // Unsure.
108 : }
109 :
110 43612 : return FALSE;
111 : }
112 :
113 : /************************************************************************/
114 : /* OGRCSVDriverRemoveFromMap() */
115 : /************************************************************************/
116 :
117 249 : void OGRCSVDriverRemoveFromMap(const char *pszName, GDALDataset *poDS)
118 : {
119 249 : if (poMap == nullptr)
120 50 : return;
121 398 : CPLMutexHolderD(&hMutex);
122 199 : std::map<CPLString, GDALDataset *>::iterator oIter = poMap->find(pszName);
123 199 : if (oIter != poMap->end())
124 : {
125 85 : GDALDataset *poOtherDS = oIter->second;
126 85 : if (poDS == poOtherDS)
127 85 : poMap->erase(oIter);
128 : }
129 : }
130 :
131 : /************************************************************************/
132 : /* Open() */
133 : /************************************************************************/
134 :
135 1121 : static GDALDataset *OGRCSVDriverOpen(GDALOpenInfo *poOpenInfo)
136 :
137 : {
138 1121 : if (!OGRCSVDriverIdentify(poOpenInfo))
139 0 : return nullptr;
140 :
141 1121 : if (poMap != nullptr)
142 : {
143 1562 : CPLMutexHolderD(&hMutex);
144 : std::map<CPLString, GDALDataset *>::iterator oIter =
145 781 : poMap->find(poOpenInfo->pszFilename);
146 781 : if (oIter != poMap->end())
147 : {
148 2 : GDALDataset *poOtherDS = oIter->second;
149 2 : poOtherDS->FlushCache(false);
150 : }
151 : }
152 :
153 1121 : auto poDSUniquePtr = std::make_unique<OGRCSVDataSource>();
154 :
155 2242 : if (!poDSUniquePtr->Open(poOpenInfo->pszFilename,
156 1121 : poOpenInfo->eAccess == GA_Update, false,
157 1121 : poOpenInfo->papszOpenOptions,
158 1121 : poOpenInfo->IsSingleAllowedDriver("CSV")))
159 : {
160 573 : poDSUniquePtr.reset();
161 : }
162 :
163 1121 : auto poDS = poDSUniquePtr.release();
164 :
165 1121 : if (poOpenInfo->eAccess == GA_Update && poDS != nullptr)
166 : {
167 170 : CPLMutexHolderD(&hMutex);
168 85 : if (poMap == nullptr)
169 10 : poMap = new std::map<CPLString, GDALDataset *>();
170 85 : if (poMap->find(poOpenInfo->pszFilename) == poMap->end())
171 : {
172 85 : (*poMap)[poOpenInfo->pszFilename] = poDS;
173 : }
174 : }
175 :
176 1121 : return poDS;
177 : }
178 :
179 : /************************************************************************/
180 : /* Create() */
181 : /************************************************************************/
182 :
183 : static GDALDataset *
184 92 : OGRCSVDriverCreate(const char *pszName, CPL_UNUSED int nBands,
185 : CPL_UNUSED int nXSize, CPL_UNUSED int nYSize,
186 : CPL_UNUSED GDALDataType eDT, char **papszOptions)
187 : {
188 : // First, ensure there isn't any such file yet.
189 : VSIStatBufL sStatBuf;
190 :
191 92 : if (strcmp(pszName, "/dev/stdout") == 0)
192 0 : pszName = "/vsistdout/";
193 :
194 92 : if (VSIStatL(pszName, &sStatBuf) == 0)
195 : {
196 0 : CPLError(CE_Failure, CPLE_AppDefined,
197 : "It seems a file system object called '%s' already exists.",
198 : pszName);
199 :
200 0 : return nullptr;
201 : }
202 :
203 : // If the target is not a simple .csv then create it as a directory.
204 184 : CPLString osDirName;
205 :
206 92 : if (EQUAL(CPLGetExtensionSafe(pszName).c_str(), "csv"))
207 : {
208 59 : osDirName = CPLGetPathSafe(pszName);
209 59 : if (osDirName == "")
210 0 : osDirName = ".";
211 :
212 : // HACK: CPLGetPathSafe("/vsimem/foo.csv") = "/vsimem", but this is not
213 : // recognized afterwards as a valid directory name.
214 59 : if (osDirName == "/vsimem")
215 17 : osDirName = "/vsimem/";
216 : }
217 : else
218 : {
219 33 : if (STARTS_WITH(pszName, "/vsizip/"))
220 : {
221 : // Do nothing.
222 : }
223 33 : else if (!EQUAL(pszName, "/vsistdout/") && VSIMkdir(pszName, 0755) != 0)
224 : {
225 1 : CPLError(CE_Failure, CPLE_AppDefined,
226 : "Failed to create directory %s:\n%s", pszName,
227 1 : VSIStrerror(errno));
228 1 : return nullptr;
229 : }
230 32 : osDirName = pszName;
231 : }
232 :
233 : // Force it to open as a datasource.
234 182 : auto poDS = std::make_unique<OGRCSVDataSource>();
235 :
236 91 : if (EQUAL(CPLGetExtensionSafe(pszName).c_str(), "csv"))
237 : {
238 59 : poDS->CreateForSingleFile(osDirName, pszName);
239 : }
240 32 : else if (!poDS->Open(osDirName, /* bUpdate = */ true,
241 : /* bForceAccept = */ true, nullptr,
242 : /* bSingleDriver = */ true))
243 : {
244 0 : return nullptr;
245 : }
246 :
247 91 : const char *pszGeometry = CSLFetchNameValue(papszOptions, "GEOMETRY");
248 91 : if (pszGeometry != nullptr && EQUAL(pszGeometry, "AS_WKT"))
249 10 : poDS->EnableGeometryFields();
250 :
251 91 : return poDS.release();
252 : }
253 :
254 : /************************************************************************/
255 : /* Delete() */
256 : /************************************************************************/
257 :
258 20 : static CPLErr OGRCSVDriverDelete(const char *pszFilename)
259 :
260 : {
261 20 : return CPLUnlinkTree(pszFilename) == 0 ? CE_None : CE_Failure;
262 : }
263 :
264 : /************************************************************************/
265 : /* OGRCSVDriverUnload() */
266 : /************************************************************************/
267 :
268 941 : static void OGRCSVDriverUnload(GDALDriver *)
269 : {
270 941 : if (hMutex != nullptr)
271 4 : CPLDestroyMutex(hMutex);
272 941 : hMutex = nullptr;
273 941 : delete poMap;
274 941 : poMap = nullptr;
275 941 : }
276 :
277 : /************************************************************************/
278 : /* RegisterOGRCSV() */
279 : /************************************************************************/
280 :
281 : #define XSTRINGIFY(x) #x
282 : #define STRINGIFY(x) XSTRINGIFY(x)
283 :
284 1682 : void RegisterOGRCSV()
285 :
286 : {
287 1682 : if (GDALGetDriverByName("CSV") != nullptr)
288 301 : return;
289 :
290 1381 : GDALDriver *poDriver = new GDALDriver();
291 :
292 1381 : poDriver->SetDescription("CSV");
293 1381 : poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
294 1381 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
295 1381 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES");
296 1381 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
297 1381 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES");
298 1381 : poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES");
299 1381 : poDriver->SetMetadataItem(GDAL_DMD_CREATION_FIELD_DEFN_FLAGS,
300 1381 : "WidthPrecision");
301 1381 : poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS,
302 1381 : "Name Type WidthPrecision");
303 :
304 1381 : poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES");
305 1381 : poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
306 1381 : poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
307 :
308 1381 : poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
309 1381 : "Comma Separated Value (.csv)");
310 1381 : poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "csv tsv psv");
311 1381 : poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/csv.html");
312 1381 : poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
313 1381 : poDriver->SetMetadataItem(GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_SIGN,
314 1381 : "YES");
315 1381 : poDriver->SetMetadataItem(
316 1381 : GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_DECIMAL_SEPARATOR, "YES");
317 :
318 1381 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST,
319 : "<CreationOptionList>"
320 : " <Option name='GEOMETRY' type='string-select' "
321 : "description='how to encode geometry fields'>"
322 : " <Value>AS_WKT</Value>"
323 : " </Option>"
324 1381 : "</CreationOptionList>");
325 :
326 1381 : poDriver->SetMetadataItem(
327 : GDAL_DS_LAYER_CREATIONOPTIONLIST,
328 : "<LayerCreationOptionList>"
329 : " <Option name='SEPARATOR' type='string-select' description='field "
330 : "separator' default='COMMA'>"
331 : " <Value>COMMA</Value>"
332 : " <Value>SEMICOLON</Value>"
333 : " <Value>TAB</Value>"
334 : " <Value>SPACE</Value>"
335 : " </Option>"
336 : #ifdef _WIN32
337 : " <Option name='LINEFORMAT' type='string-select' "
338 : "description='end-of-line sequence' default='CRLF'>"
339 : #else
340 : " <Option name='LINEFORMAT' type='string-select' "
341 : "description='end-of-line sequence' default='LF'>"
342 : #endif
343 : " <Value>CRLF</Value>"
344 : " <Value>LF</Value>"
345 : " </Option>"
346 : " <Option name='GEOMETRY' type='string-select' description='how to "
347 : "encode geometry fields'>"
348 : " <Value>AS_WKT</Value>"
349 : " <Value>AS_XYZ</Value>"
350 : " <Value>AS_XY</Value>"
351 : " <Value>AS_YX</Value>"
352 : " </Option>"
353 : " <Option name='CREATE_CSVT' type='boolean' description='whether to "
354 : "create a .csvt file' default='NO'/>"
355 : " <Option name='WRITE_BOM' type='boolean' description='whether to "
356 : "write a UTF-8 BOM prefix' default='NO'/>"
357 : " <Option name='GEOMETRY_NAME' type='string' description='Name of "
358 : "geometry column. Only used if GEOMETRY=AS_WKT' default='WKT'/>"
359 : " <Option name='STRING_QUOTING' type='string-select' "
360 : "description='whether to double-quote strings. IF_AMBIGUOUS means that "
361 : "string values that look like numbers will be quoted (it also implies "
362 : "IF_NEEDED).' default='IF_AMBIGUOUS'>"
363 : " <Value>IF_NEEDED</Value>"
364 : " <Value>IF_AMBIGUOUS</Value>"
365 : " <Value>ALWAYS</Value>"
366 : " </Option>"
367 1381 : "</LayerCreationOptionList>");
368 :
369 1381 : poDriver->SetMetadataItem(
370 : GDAL_DMD_OPENOPTIONLIST,
371 : "<OpenOptionList>"
372 : " <Option name='SEPARATOR' type='string-select' "
373 : "description='field separator' default='AUTO'>"
374 : " <Value>AUTO</Value>"
375 : " <Value>COMMA</Value>"
376 : " <Value>SEMICOLON</Value>"
377 : " <Value>TAB</Value>"
378 : " <Value>SPACE</Value>"
379 : " <Value>PIPE</Value>"
380 : " </Option>"
381 : " <Option name='MERGE_SEPARATOR' type='boolean' description='whether "
382 : "to merge consecutive separators' default='NO'/>"
383 : " <Option name='AUTODETECT_TYPE' type='boolean' description='whether "
384 : "to guess data type from first bytes of the file' default='NO'/>"
385 : " <Option name='KEEP_SOURCE_COLUMNS' type='boolean' "
386 : "description='whether to add original columns whose guessed data type "
387 : "is not String. Only used if AUTODETECT_TYPE=YES' default='NO'/>"
388 : " <Option name='AUTODETECT_WIDTH' type='string-select' "
389 : "description='whether to auto-detect width/precision. Only used if "
390 : "AUTODETECT_TYPE=YES' default='NO'>"
391 : " <Value>YES</Value>"
392 : " <Value>NO</Value>"
393 : " <Value>STRING_ONLY</Value>"
394 : " </Option>"
395 : " <Option name='AUTODETECT_SIZE_LIMIT' type='int' description='number "
396 : "of bytes to inspect for auto-detection of data type. Only used if "
397 : "AUTODETECT_TYPE=YES' default='1000000'/>"
398 : " <Option name='QUOTED_FIELDS_AS_STRING' type='boolean' "
399 : "description='Only used if AUTODETECT_TYPE=YES. Whether to enforce "
400 : "quoted fields as string fields.' default='NO'/>"
401 : " <Option name='X_POSSIBLE_NAMES' type='string' description='Comma "
402 : "separated list of possible names for X/longitude coordinate of a "
403 : "point.'/>"
404 : " <Option name='Y_POSSIBLE_NAMES' type='string' description='Comma "
405 : "separated list of possible names for Y/latitude coordinate of a "
406 : "point.'/>"
407 : " <Option name='Z_POSSIBLE_NAMES' type='string' description='Comma "
408 : "separated list of possible names for Z/elevation coordinate of a "
409 : "point.'/>"
410 : " <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma "
411 : "separated list of possible names for geometry columns.' "
412 : "default='WKT'/>"
413 : " <Option name='KEEP_GEOM_COLUMNS' type='boolean' "
414 : "description='whether to add original x/y/geometry columns as regular "
415 : "fields.' default='YES'/>"
416 : " <Option name='HEADERS' type='string-select' description='Whether "
417 : "the first line of the file contains column names or not' "
418 : "default='AUTO'>"
419 : " <Value>YES</Value>"
420 : " <Value>NO</Value>"
421 : " <Value>AUTO</Value>"
422 : " </Option>"
423 : " <Option name='EMPTY_STRING_AS_NULL' type='boolean' "
424 : "description='Whether to consider empty strings as null fields on "
425 : "reading' default='NO'/>"
426 : " <Option name='MAX_LINE_SIZE' type='int' description='Maximum number "
427 : "of bytes for a line (-1=unlimited)' default='" STRINGIFY(
428 : OGR_CSV_DEFAULT_MAX_LINE_SIZE) "'/>"
429 : " <Option name='OGR_SCHEMA' "
430 : "type='string' description='"
431 : "Partially or totally overrides the "
432 : "auto-detected schema to use for "
433 : "creating the layer. "
434 : "The overrides are defined as a "
435 : "JSON list of field definitions. "
436 : "This can be a filename or a JSON "
437 : "string or a URL.'/>"
438 1381 : "</OpenOptionList>");
439 :
440 1381 : poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
441 1381 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
442 : "Integer Integer64 Real String Date DateTime "
443 : "Time IntegerList Integer64List RealList "
444 1381 : "StringList");
445 1381 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES,
446 1381 : "Boolean Int16 Float32");
447 1381 : poDriver->SetMetadataItem(GDAL_DCAP_HONOR_GEOM_COORDINATE_PRECISION, "YES");
448 :
449 1381 : poDriver->pfnOpen = OGRCSVDriverOpen;
450 1381 : poDriver->pfnIdentify = OGRCSVDriverIdentify;
451 1381 : poDriver->pfnCreate = OGRCSVDriverCreate;
452 1381 : poDriver->pfnDelete = OGRCSVDriverDelete;
453 1381 : poDriver->pfnUnloadDriver = OGRCSVDriverUnload;
454 :
455 1381 : GetGDALDriverManager()->RegisterDriver(poDriver);
456 : }
|