Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CSV Translator
4 : * Purpose: Implements OGRCSVDriver.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
9 : * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_port.h"
31 : #include "ogr_csv.h"
32 :
33 : #include <cerrno>
34 : #include <cstring>
35 : #include <map>
36 : #include <string>
37 : #include <utility>
38 :
39 : #include "cpl_conv.h"
40 : #include "cpl_error.h"
41 : #include "cpl_multiproc.h"
42 : #include "cpl_string.h"
43 : #include "cpl_vsi.h"
44 : #include "gdal.h"
45 : #include "gdal_priv.h"
46 :
47 : static CPLMutex *hMutex = nullptr;
48 : static std::map<CPLString, GDALDataset *> *poMap = nullptr;
49 :
50 : /************************************************************************/
51 : /* OGRCSVDriverIdentify() */
52 : /************************************************************************/
53 :
54 46790 : static int OGRCSVDriverIdentify(GDALOpenInfo *poOpenInfo)
55 :
56 : {
57 46790 : if (poOpenInfo->fpL != nullptr)
58 : {
59 : const CPLString osBaseFilename =
60 9922 : CPLGetFilename(poOpenInfo->pszFilename);
61 : const CPLString osExt =
62 9922 : OGRCSVDataSource::GetRealExtension(poOpenInfo->pszFilename);
63 :
64 4961 : if (EQUAL(osBaseFilename, "NfdcFacilities.xls") ||
65 4961 : EQUAL(osBaseFilename, "NfdcRunways.xls") ||
66 14883 : EQUAL(osBaseFilename, "NfdcRemarks.xls") ||
67 4961 : EQUAL(osBaseFilename, "NfdcSchedules.xls"))
68 : {
69 0 : return TRUE;
70 : }
71 4961 : else if ((STARTS_WITH_CI(osBaseFilename, "NationalFile_") ||
72 4961 : STARTS_WITH_CI(osBaseFilename, "POP_PLACES_") ||
73 4961 : STARTS_WITH_CI(osBaseFilename, "HIST_FEATURES_") ||
74 4961 : STARTS_WITH_CI(osBaseFilename, "US_CONCISE_") ||
75 4961 : STARTS_WITH_CI(osBaseFilename, "AllNames_") ||
76 4961 : STARTS_WITH_CI(osBaseFilename,
77 4961 : "Feature_Description_History_") ||
78 4961 : STARTS_WITH_CI(osBaseFilename, "ANTARCTICA_") ||
79 4961 : STARTS_WITH_CI(osBaseFilename, "GOVT_UNITS_") ||
80 4961 : STARTS_WITH_CI(osBaseFilename, "NationalFedCodes_") ||
81 4961 : STARTS_WITH_CI(osBaseFilename, "AllStates_") ||
82 9922 : STARTS_WITH_CI(osBaseFilename, "AllStatesFedCodes_") ||
83 4961 : (osBaseFilename.size() > 2 &&
84 9922 : STARTS_WITH_CI(osBaseFilename + 2, "_Features_")) ||
85 4961 : (osBaseFilename.size() > 2 &&
86 9922 : STARTS_WITH_CI(osBaseFilename + 2, "_FedCodes_"))) &&
87 0 : (EQUAL(osExt, "txt") || EQUAL(osExt, "zip")))
88 : {
89 0 : return TRUE;
90 : }
91 9920 : else if (EQUAL(osBaseFilename, "allCountries.txt") ||
92 4959 : EQUAL(osBaseFilename, "allCountries.zip"))
93 : {
94 2 : return TRUE;
95 : }
96 9193 : else if (EQUAL(osExt, "csv") || EQUAL(osExt, "tsv") ||
97 4234 : EQUAL(osExt, "psv"))
98 : {
99 727 : return TRUE;
100 : }
101 4240 : else if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") &&
102 8 : EQUAL(osExt, "zip"))
103 : {
104 2 : return -1; // Unsure.
105 : }
106 : else
107 : {
108 4230 : return FALSE;
109 : }
110 : }
111 41829 : else if (STARTS_WITH_CI(poOpenInfo->pszFilename, "CSV:"))
112 : {
113 148 : return TRUE;
114 : }
115 41681 : else if (poOpenInfo->bIsDirectory)
116 : {
117 1228 : return -1; // Unsure.
118 : }
119 :
120 40453 : return FALSE;
121 : }
122 :
123 : /************************************************************************/
124 : /* OGRCSVDriverRemoveFromMap() */
125 : /************************************************************************/
126 :
127 245 : void OGRCSVDriverRemoveFromMap(const char *pszName, GDALDataset *poDS)
128 : {
129 245 : if (poMap == nullptr)
130 50 : return;
131 390 : CPLMutexHolderD(&hMutex);
132 195 : std::map<CPLString, GDALDataset *>::iterator oIter = poMap->find(pszName);
133 195 : if (oIter != poMap->end())
134 : {
135 85 : GDALDataset *poOtherDS = oIter->second;
136 85 : if (poDS == poOtherDS)
137 85 : poMap->erase(oIter);
138 : }
139 : }
140 :
141 : /************************************************************************/
142 : /* Open() */
143 : /************************************************************************/
144 :
145 1048 : static GDALDataset *OGRCSVDriverOpen(GDALOpenInfo *poOpenInfo)
146 :
147 : {
148 1048 : if (!OGRCSVDriverIdentify(poOpenInfo))
149 0 : return nullptr;
150 :
151 1048 : if (poMap != nullptr)
152 : {
153 1436 : CPLMutexHolderD(&hMutex);
154 : std::map<CPLString, GDALDataset *>::iterator oIter =
155 718 : poMap->find(poOpenInfo->pszFilename);
156 718 : if (oIter != poMap->end())
157 : {
158 2 : GDALDataset *poOtherDS = oIter->second;
159 2 : poOtherDS->FlushCache(false);
160 : }
161 : }
162 :
163 1048 : OGRCSVDataSource *poDS = new OGRCSVDataSource();
164 :
165 1048 : if (!poDS->Open(poOpenInfo->pszFilename, poOpenInfo->eAccess == GA_Update,
166 : FALSE, poOpenInfo->papszOpenOptions))
167 : {
168 525 : delete poDS;
169 525 : poDS = nullptr;
170 : }
171 :
172 1048 : if (poOpenInfo->eAccess == GA_Update && poDS != nullptr)
173 : {
174 170 : CPLMutexHolderD(&hMutex);
175 85 : if (poMap == nullptr)
176 10 : poMap = new std::map<CPLString, GDALDataset *>();
177 85 : if (poMap->find(poOpenInfo->pszFilename) == poMap->end())
178 : {
179 85 : (*poMap)[poOpenInfo->pszFilename] = poDS;
180 : }
181 : }
182 :
183 1048 : return poDS;
184 : }
185 :
186 : /************************************************************************/
187 : /* Create() */
188 : /************************************************************************/
189 :
190 : static GDALDataset *
191 90 : OGRCSVDriverCreate(const char *pszName, CPL_UNUSED int nBands,
192 : CPL_UNUSED int nXSize, CPL_UNUSED int nYSize,
193 : CPL_UNUSED GDALDataType eDT, char **papszOptions)
194 : {
195 : // First, ensure there isn't any such file yet.
196 : VSIStatBufL sStatBuf;
197 :
198 90 : if (strcmp(pszName, "/dev/stdout") == 0)
199 0 : pszName = "/vsistdout/";
200 :
201 90 : if (VSIStatL(pszName, &sStatBuf) == 0)
202 : {
203 0 : CPLError(CE_Failure, CPLE_AppDefined,
204 : "It seems a file system object called '%s' already exists.",
205 : pszName);
206 :
207 0 : return nullptr;
208 : }
209 :
210 : // If the target is not a simple .csv then create it as a directory.
211 180 : CPLString osDirName;
212 :
213 90 : if (EQUAL(CPLGetExtension(pszName), "csv"))
214 : {
215 57 : osDirName = CPLGetPath(pszName);
216 57 : if (osDirName == "")
217 0 : osDirName = ".";
218 :
219 : // HACK: CPLGetPath("/vsimem/foo.csv") = "/vsimem", but this is not
220 : // recognized afterwards as a valid directory name.
221 57 : if (osDirName == "/vsimem")
222 17 : osDirName = "/vsimem/";
223 : }
224 : else
225 : {
226 33 : if (STARTS_WITH(pszName, "/vsizip/"))
227 : {
228 : // Do nothing.
229 : }
230 33 : else if (!EQUAL(pszName, "/vsistdout/") && VSIMkdir(pszName, 0755) != 0)
231 : {
232 1 : CPLError(CE_Failure, CPLE_AppDefined,
233 : "Failed to create directory %s:\n%s", pszName,
234 1 : VSIStrerror(errno));
235 1 : return nullptr;
236 : }
237 32 : osDirName = pszName;
238 : }
239 :
240 : // Force it to open as a datasource.
241 89 : OGRCSVDataSource *poDS = new OGRCSVDataSource();
242 :
243 89 : if (EQUAL(CPLGetExtension(pszName), "csv"))
244 : {
245 57 : poDS->CreateForSingleFile(osDirName, pszName);
246 : }
247 32 : else if (!poDS->Open(osDirName, TRUE, TRUE))
248 : {
249 0 : delete poDS;
250 0 : return nullptr;
251 : }
252 :
253 89 : const char *pszGeometry = CSLFetchNameValue(papszOptions, "GEOMETRY");
254 89 : if (pszGeometry != nullptr && EQUAL(pszGeometry, "AS_WKT"))
255 10 : poDS->EnableGeometryFields();
256 :
257 89 : return poDS;
258 : }
259 :
260 : /************************************************************************/
261 : /* Delete() */
262 : /************************************************************************/
263 :
264 20 : static CPLErr OGRCSVDriverDelete(const char *pszFilename)
265 :
266 : {
267 20 : return CPLUnlinkTree(pszFilename) == 0 ? CE_None : CE_Failure;
268 : }
269 :
270 : /************************************************************************/
271 : /* OGRCSVDriverUnload() */
272 : /************************************************************************/
273 :
274 852 : static void OGRCSVDriverUnload(GDALDriver *)
275 : {
276 852 : if (hMutex != nullptr)
277 4 : CPLDestroyMutex(hMutex);
278 852 : hMutex = nullptr;
279 852 : delete poMap;
280 852 : poMap = nullptr;
281 852 : }
282 :
283 : /************************************************************************/
284 : /* RegisterOGRCSV() */
285 : /************************************************************************/
286 :
287 : #define XSTRINGIFY(x) #x
288 : #define STRINGIFY(x) XSTRINGIFY(x)
289 :
290 1512 : void RegisterOGRCSV()
291 :
292 : {
293 1512 : if (GDALGetDriverByName("CSV") != nullptr)
294 295 : return;
295 :
296 1217 : GDALDriver *poDriver = new GDALDriver();
297 :
298 1217 : poDriver->SetDescription("CSV");
299 1217 : poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
300 1217 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
301 1217 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES");
302 1217 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
303 1217 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES");
304 1217 : poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES");
305 1217 : poDriver->SetMetadataItem(GDAL_DMD_CREATION_FIELD_DEFN_FLAGS,
306 1217 : "WidthPrecision");
307 1217 : poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS,
308 1217 : "Name Type WidthPrecision");
309 :
310 1217 : poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES");
311 1217 : poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
312 1217 : poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
313 :
314 1217 : poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
315 1217 : "Comma Separated Value (.csv)");
316 1217 : poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "csv tsv psv");
317 1217 : poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/csv.html");
318 1217 : poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
319 1217 : poDriver->SetMetadataItem(GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_SIGN,
320 1217 : "YES");
321 1217 : poDriver->SetMetadataItem(
322 1217 : GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_DECIMAL_SEPARATOR, "YES");
323 :
324 1217 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST,
325 : "<CreationOptionList>"
326 : " <Option name='GEOMETRY' type='string-select' "
327 : "description='how to encode geometry fields'>"
328 : " <Value>AS_WKT</Value>"
329 : " </Option>"
330 1217 : "</CreationOptionList>");
331 :
332 1217 : poDriver->SetMetadataItem(
333 : GDAL_DS_LAYER_CREATIONOPTIONLIST,
334 : "<LayerCreationOptionList>"
335 : " <Option name='SEPARATOR' type='string-select' description='field "
336 : "separator' default='COMMA'>"
337 : " <Value>COMMA</Value>"
338 : " <Value>SEMICOLON</Value>"
339 : " <Value>TAB</Value>"
340 : " <Value>SPACE</Value>"
341 : " </Option>"
342 : #ifdef _WIN32
343 : " <Option name='LINEFORMAT' type='string-select' "
344 : "description='end-of-line sequence' default='CRLF'>"
345 : #else
346 : " <Option name='LINEFORMAT' type='string-select' "
347 : "description='end-of-line sequence' default='LF'>"
348 : #endif
349 : " <Value>CRLF</Value>"
350 : " <Value>LF</Value>"
351 : " </Option>"
352 : " <Option name='GEOMETRY' type='string-select' description='how to "
353 : "encode geometry fields'>"
354 : " <Value>AS_WKT</Value>"
355 : " <Value>AS_XYZ</Value>"
356 : " <Value>AS_XY</Value>"
357 : " <Value>AS_YX</Value>"
358 : " </Option>"
359 : " <Option name='CREATE_CSVT' type='boolean' description='whether to "
360 : "create a .csvt file' default='NO'/>"
361 : " <Option name='WRITE_BOM' type='boolean' description='whether to "
362 : "write a UTF-8 BOM prefix' default='NO'/>"
363 : " <Option name='GEOMETRY_NAME' type='string' description='Name of "
364 : "geometry column. Only used if GEOMETRY=AS_WKT' default='WKT'/>"
365 : " <Option name='STRING_QUOTING' type='string-select' "
366 : "description='whether to double-quote strings. IF_AMBIGUOUS means that "
367 : "string values that look like numbers will be quoted (it also implies "
368 : "IF_NEEDED).' default='IF_AMBIGUOUS'>"
369 : " <Value>IF_NEEDED</Value>"
370 : " <Value>IF_AMBIGUOUS</Value>"
371 : " <Value>ALWAYS</Value>"
372 : " </Option>"
373 1217 : "</LayerCreationOptionList>");
374 :
375 1217 : poDriver->SetMetadataItem(
376 : GDAL_DMD_OPENOPTIONLIST,
377 : "<OpenOptionList>"
378 : " <Option name='SEPARATOR' type='string-select' "
379 : "description='field separator' default='AUTO'>"
380 : " <Value>AUTO</Value>"
381 : " <Value>COMMA</Value>"
382 : " <Value>SEMICOLON</Value>"
383 : " <Value>TAB</Value>"
384 : " <Value>SPACE</Value>"
385 : " <Value>PIPE</Value>"
386 : " </Option>"
387 : " <Option name='MERGE_SEPARATOR' type='boolean' description='whether "
388 : "to merge consecutive separators' default='NO'/>"
389 : " <Option name='AUTODETECT_TYPE' type='boolean' description='whether "
390 : "to guess data type from first bytes of the file' default='NO'/>"
391 : " <Option name='KEEP_SOURCE_COLUMNS' type='boolean' "
392 : "description='whether to add original columns whose guessed data type "
393 : "is not String. Only used if AUTODETECT_TYPE=YES' default='NO'/>"
394 : " <Option name='AUTODETECT_WIDTH' type='string-select' "
395 : "description='whether to auto-detect width/precision. Only used if "
396 : "AUTODETECT_TYPE=YES' default='NO'>"
397 : " <Value>YES</Value>"
398 : " <Value>NO</Value>"
399 : " <Value>STRING_ONLY</Value>"
400 : " </Option>"
401 : " <Option name='AUTODETECT_SIZE_LIMIT' type='int' description='number "
402 : "of bytes to inspect for auto-detection of data type. Only used if "
403 : "AUTODETECT_TYPE=YES' default='1000000'/>"
404 : " <Option name='QUOTED_FIELDS_AS_STRING' type='boolean' "
405 : "description='Only used if AUTODETECT_TYPE=YES. Whether to enforce "
406 : "quoted fields as string fields.' default='NO'/>"
407 : " <Option name='X_POSSIBLE_NAMES' type='string' description='Comma "
408 : "separated list of possible names for X/longitude coordinate of a "
409 : "point.'/>"
410 : " <Option name='Y_POSSIBLE_NAMES' type='string' description='Comma "
411 : "separated list of possible names for Y/latitude coordinate of a "
412 : "point.'/>"
413 : " <Option name='Z_POSSIBLE_NAMES' type='string' description='Comma "
414 : "separated list of possible names for Z/elevation coordinate of a "
415 : "point.'/>"
416 : " <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma "
417 : "separated list of possible names for geometry columns.' "
418 : "default='WKT'/>"
419 : " <Option name='KEEP_GEOM_COLUMNS' type='boolean' "
420 : "description='whether to add original x/y/geometry columns as regular "
421 : "fields.' default='YES'/>"
422 : " <Option name='HEADERS' type='string-select' description='Whether "
423 : "the first line of the file contains column names or not' "
424 : "default='AUTO'>"
425 : " <Value>YES</Value>"
426 : " <Value>NO</Value>"
427 : " <Value>AUTO</Value>"
428 : " </Option>"
429 : " <Option name='EMPTY_STRING_AS_NULL' type='boolean' "
430 : "description='Whether to consider empty strings as null fields on "
431 : "reading' default='NO'/>"
432 : " <Option name='MAX_LINE_SIZE' type='int' description='Maximum number "
433 : "of bytes for a line (-1=unlimited)' default='" STRINGIFY(
434 : OGR_CSV_DEFAULT_MAX_LINE_SIZE) "'/>"
435 1217 : "</OpenOptionList>");
436 :
437 1217 : poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
438 1217 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
439 : "Integer Integer64 Real String Date DateTime "
440 : "Time IntegerList Integer64List RealList "
441 1217 : "StringList");
442 1217 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES,
443 1217 : "Boolean Int16 Float32");
444 1217 : poDriver->SetMetadataItem(GDAL_DCAP_HONOR_GEOM_COORDINATE_PRECISION, "YES");
445 :
446 1217 : poDriver->pfnOpen = OGRCSVDriverOpen;
447 1217 : poDriver->pfnIdentify = OGRCSVDriverIdentify;
448 1217 : poDriver->pfnCreate = OGRCSVDriverCreate;
449 1217 : poDriver->pfnDelete = OGRCSVDriverDelete;
450 1217 : poDriver->pfnUnloadDriver = OGRCSVDriverUnload;
451 :
452 1217 : GetGDALDriverManager()->RegisterDriver(poDriver);
453 : }
|