LCOV - code coverage report
Current view: top level - ogr/ogrsf_frmts/csv - ogrcsvdriver.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 149 160 93.1 %
Date: 2025-01-18 12:42:00 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  CSV Translator
       4             :  * Purpose:  Implements OGRCSVDriver.
       5             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
       9             :  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * SPDX-License-Identifier: MIT
      12             :  ****************************************************************************/
      13             : 
      14             : #include "cpl_port.h"
      15             : #include "ogr_csv.h"
      16             : 
      17             : #include <cerrno>
      18             : #include <cstring>
      19             : #include <map>
      20             : #include <string>
      21             : #include <utility>
      22             : 
      23             : #include "cpl_conv.h"
      24             : #include "cpl_error.h"
      25             : #include "cpl_multiproc.h"
      26             : #include "cpl_string.h"
      27             : #include "cpl_vsi.h"
      28             : #include "gdal.h"
      29             : #include "gdal_priv.h"
      30             : 
      31             : static CPLMutex *hMutex = nullptr;
      32             : static std::map<CPLString, GDALDataset *> *poMap = nullptr;
      33             : 
      34             : /************************************************************************/
      35             : /*                         OGRCSVDriverIdentify()                       */
      36             : /************************************************************************/
      37             : 
      38       50591 : static int OGRCSVDriverIdentify(GDALOpenInfo *poOpenInfo)
      39             : 
      40             : {
      41       50591 :     if (poOpenInfo->fpL != nullptr)
      42             :     {
      43        5652 :         if (poOpenInfo->IsSingleAllowedDriver("CSV"))
      44         182 :             return TRUE;
      45             : 
      46             :         const CPLString osBaseFilename =
      47       10940 :             CPLGetFilename(poOpenInfo->pszFilename);
      48             :         const CPLString osExt =
      49       10940 :             OGRCSVDataSource::GetRealExtension(poOpenInfo->pszFilename);
      50             : 
      51        5470 :         if (EQUAL(osBaseFilename, "NfdcFacilities.xls") ||
      52        5470 :             EQUAL(osBaseFilename, "NfdcRunways.xls") ||
      53       16410 :             EQUAL(osBaseFilename, "NfdcRemarks.xls") ||
      54        5470 :             EQUAL(osBaseFilename, "NfdcSchedules.xls"))
      55             :         {
      56           0 :             return TRUE;
      57             :         }
      58        5470 :         else if ((STARTS_WITH_CI(osBaseFilename, "NationalFile_") ||
      59        5470 :                   STARTS_WITH_CI(osBaseFilename, "POP_PLACES_") ||
      60        5470 :                   STARTS_WITH_CI(osBaseFilename, "HIST_FEATURES_") ||
      61        5470 :                   STARTS_WITH_CI(osBaseFilename, "US_CONCISE_") ||
      62        5470 :                   STARTS_WITH_CI(osBaseFilename, "AllNames_") ||
      63        5470 :                   STARTS_WITH_CI(osBaseFilename,
      64        5470 :                                  "Feature_Description_History_") ||
      65        5470 :                   STARTS_WITH_CI(osBaseFilename, "ANTARCTICA_") ||
      66        5470 :                   STARTS_WITH_CI(osBaseFilename, "GOVT_UNITS_") ||
      67        5470 :                   STARTS_WITH_CI(osBaseFilename, "NationalFedCodes_") ||
      68        5470 :                   STARTS_WITH_CI(osBaseFilename, "AllStates_") ||
      69       10940 :                   STARTS_WITH_CI(osBaseFilename, "AllStatesFedCodes_") ||
      70        5470 :                   (osBaseFilename.size() > 2 &&
      71       10940 :                    STARTS_WITH_CI(osBaseFilename + 2, "_Features_")) ||
      72        5470 :                   (osBaseFilename.size() > 2 &&
      73       10940 :                    STARTS_WITH_CI(osBaseFilename + 2, "_FedCodes_"))) &&
      74           0 :                  (EQUAL(osExt, "txt") || EQUAL(osExt, "zip")))
      75             :         {
      76           0 :             return TRUE;
      77             :         }
      78       10938 :         else if (EQUAL(osBaseFilename, "allCountries.txt") ||
      79        5468 :                  EQUAL(osBaseFilename, "allCountries.zip"))
      80             :         {
      81           2 :             return TRUE;
      82             :         }
      83       10178 :         else if (EQUAL(osExt, "csv") || EQUAL(osExt, "tsv") ||
      84        4710 :                  EQUAL(osExt, "psv"))
      85             :         {
      86         760 :             return TRUE;
      87             :         }
      88        4716 :         else if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") &&
      89           8 :                  EQUAL(osExt, "zip"))
      90             :         {
      91           2 :             return -1;  // Unsure.
      92             :         }
      93             :         else
      94             :         {
      95        4706 :             return FALSE;
      96             :         }
      97             :     }
      98       44939 :     else if (STARTS_WITH_CI(poOpenInfo->pszFilename, "CSV:"))
      99             :     {
     100           0 :         return TRUE;
     101             :     }
     102       44939 :     else if (poOpenInfo->bIsDirectory)
     103             :     {
     104        1327 :         if (poOpenInfo->IsSingleAllowedDriver("CSV"))
     105           0 :             return TRUE;
     106             : 
     107        1327 :         return -1;  // Unsure.
     108             :     }
     109             : 
     110       43612 :     return FALSE;
     111             : }
     112             : 
     113             : /************************************************************************/
     114             : /*                        OGRCSVDriverRemoveFromMap()                   */
     115             : /************************************************************************/
     116             : 
     117         249 : void OGRCSVDriverRemoveFromMap(const char *pszName, GDALDataset *poDS)
     118             : {
     119         249 :     if (poMap == nullptr)
     120          50 :         return;
     121         398 :     CPLMutexHolderD(&hMutex);
     122         199 :     std::map<CPLString, GDALDataset *>::iterator oIter = poMap->find(pszName);
     123         199 :     if (oIter != poMap->end())
     124             :     {
     125          85 :         GDALDataset *poOtherDS = oIter->second;
     126          85 :         if (poDS == poOtherDS)
     127          85 :             poMap->erase(oIter);
     128             :     }
     129             : }
     130             : 
     131             : /************************************************************************/
     132             : /*                                Open()                                */
     133             : /************************************************************************/
     134             : 
     135        1121 : static GDALDataset *OGRCSVDriverOpen(GDALOpenInfo *poOpenInfo)
     136             : 
     137             : {
     138        1121 :     if (!OGRCSVDriverIdentify(poOpenInfo))
     139           0 :         return nullptr;
     140             : 
     141        1121 :     if (poMap != nullptr)
     142             :     {
     143        1562 :         CPLMutexHolderD(&hMutex);
     144             :         std::map<CPLString, GDALDataset *>::iterator oIter =
     145         781 :             poMap->find(poOpenInfo->pszFilename);
     146         781 :         if (oIter != poMap->end())
     147             :         {
     148           2 :             GDALDataset *poOtherDS = oIter->second;
     149           2 :             poOtherDS->FlushCache(false);
     150             :         }
     151             :     }
     152             : 
     153        1121 :     auto poDSUniquePtr = std::make_unique<OGRCSVDataSource>();
     154             : 
     155        2242 :     if (!poDSUniquePtr->Open(poOpenInfo->pszFilename,
     156        1121 :                              poOpenInfo->eAccess == GA_Update, false,
     157        1121 :                              poOpenInfo->papszOpenOptions,
     158        1121 :                              poOpenInfo->IsSingleAllowedDriver("CSV")))
     159             :     {
     160         573 :         poDSUniquePtr.reset();
     161             :     }
     162             : 
     163        1121 :     auto poDS = poDSUniquePtr.release();
     164             : 
     165        1121 :     if (poOpenInfo->eAccess == GA_Update && poDS != nullptr)
     166             :     {
     167         170 :         CPLMutexHolderD(&hMutex);
     168          85 :         if (poMap == nullptr)
     169          10 :             poMap = new std::map<CPLString, GDALDataset *>();
     170          85 :         if (poMap->find(poOpenInfo->pszFilename) == poMap->end())
     171             :         {
     172          85 :             (*poMap)[poOpenInfo->pszFilename] = poDS;
     173             :         }
     174             :     }
     175             : 
     176        1121 :     return poDS;
     177             : }
     178             : 
     179             : /************************************************************************/
     180             : /*                               Create()                               */
     181             : /************************************************************************/
     182             : 
     183             : static GDALDataset *
     184          92 : OGRCSVDriverCreate(const char *pszName, CPL_UNUSED int nBands,
     185             :                    CPL_UNUSED int nXSize, CPL_UNUSED int nYSize,
     186             :                    CPL_UNUSED GDALDataType eDT, char **papszOptions)
     187             : {
     188             :     // First, ensure there isn't any such file yet.
     189             :     VSIStatBufL sStatBuf;
     190             : 
     191          92 :     if (strcmp(pszName, "/dev/stdout") == 0)
     192           0 :         pszName = "/vsistdout/";
     193             : 
     194          92 :     if (VSIStatL(pszName, &sStatBuf) == 0)
     195             :     {
     196           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     197             :                  "It seems a file system object called '%s' already exists.",
     198             :                  pszName);
     199             : 
     200           0 :         return nullptr;
     201             :     }
     202             : 
     203             :     // If the target is not a simple .csv then create it as a directory.
     204         184 :     CPLString osDirName;
     205             : 
     206          92 :     if (EQUAL(CPLGetExtensionSafe(pszName).c_str(), "csv"))
     207             :     {
     208          59 :         osDirName = CPLGetPathSafe(pszName);
     209          59 :         if (osDirName == "")
     210           0 :             osDirName = ".";
     211             : 
     212             :         // HACK: CPLGetPathSafe("/vsimem/foo.csv") = "/vsimem", but this is not
     213             :         // recognized afterwards as a valid directory name.
     214          59 :         if (osDirName == "/vsimem")
     215          17 :             osDirName = "/vsimem/";
     216             :     }
     217             :     else
     218             :     {
     219          33 :         if (STARTS_WITH(pszName, "/vsizip/"))
     220             :         {
     221             :             // Do nothing.
     222             :         }
     223          33 :         else if (!EQUAL(pszName, "/vsistdout/") && VSIMkdir(pszName, 0755) != 0)
     224             :         {
     225           1 :             CPLError(CE_Failure, CPLE_AppDefined,
     226             :                      "Failed to create directory %s:\n%s", pszName,
     227           1 :                      VSIStrerror(errno));
     228           1 :             return nullptr;
     229             :         }
     230          32 :         osDirName = pszName;
     231             :     }
     232             : 
     233             :     // Force it to open as a datasource.
     234         182 :     auto poDS = std::make_unique<OGRCSVDataSource>();
     235             : 
     236          91 :     if (EQUAL(CPLGetExtensionSafe(pszName).c_str(), "csv"))
     237             :     {
     238          59 :         poDS->CreateForSingleFile(osDirName, pszName);
     239             :     }
     240          32 :     else if (!poDS->Open(osDirName, /* bUpdate = */ true,
     241             :                          /* bForceAccept = */ true, nullptr,
     242             :                          /* bSingleDriver = */ true))
     243             :     {
     244           0 :         return nullptr;
     245             :     }
     246             : 
     247          91 :     const char *pszGeometry = CSLFetchNameValue(papszOptions, "GEOMETRY");
     248          91 :     if (pszGeometry != nullptr && EQUAL(pszGeometry, "AS_WKT"))
     249          10 :         poDS->EnableGeometryFields();
     250             : 
     251          91 :     return poDS.release();
     252             : }
     253             : 
     254             : /************************************************************************/
     255             : /*                              Delete()                                */
     256             : /************************************************************************/
     257             : 
     258          20 : static CPLErr OGRCSVDriverDelete(const char *pszFilename)
     259             : 
     260             : {
     261          20 :     return CPLUnlinkTree(pszFilename) == 0 ? CE_None : CE_Failure;
     262             : }
     263             : 
     264             : /************************************************************************/
     265             : /*                           OGRCSVDriverUnload()                       */
     266             : /************************************************************************/
     267             : 
     268         941 : static void OGRCSVDriverUnload(GDALDriver *)
     269             : {
     270         941 :     if (hMutex != nullptr)
     271           4 :         CPLDestroyMutex(hMutex);
     272         941 :     hMutex = nullptr;
     273         941 :     delete poMap;
     274         941 :     poMap = nullptr;
     275         941 : }
     276             : 
     277             : /************************************************************************/
     278             : /*                           RegisterOGRCSV()                           */
     279             : /************************************************************************/
     280             : 
     281             : #define XSTRINGIFY(x) #x
     282             : #define STRINGIFY(x) XSTRINGIFY(x)
     283             : 
     284        1682 : void RegisterOGRCSV()
     285             : 
     286             : {
     287        1682 :     if (GDALGetDriverByName("CSV") != nullptr)
     288         301 :         return;
     289             : 
     290        1381 :     GDALDriver *poDriver = new GDALDriver();
     291             : 
     292        1381 :     poDriver->SetDescription("CSV");
     293        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
     294        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
     295        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES");
     296        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
     297        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES");
     298        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES");
     299        1381 :     poDriver->SetMetadataItem(GDAL_DMD_CREATION_FIELD_DEFN_FLAGS,
     300        1381 :                               "WidthPrecision");
     301        1381 :     poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS,
     302        1381 :                               "Name Type WidthPrecision");
     303             : 
     304        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES");
     305        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
     306        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
     307             : 
     308        1381 :     poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
     309        1381 :                               "Comma Separated Value (.csv)");
     310        1381 :     poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "csv tsv psv");
     311        1381 :     poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/csv.html");
     312        1381 :     poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
     313        1381 :     poDriver->SetMetadataItem(GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_SIGN,
     314        1381 :                               "YES");
     315        1381 :     poDriver->SetMetadataItem(
     316        1381 :         GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_DECIMAL_SEPARATOR, "YES");
     317             : 
     318        1381 :     poDriver->SetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST,
     319             :                               "<CreationOptionList>"
     320             :                               "  <Option name='GEOMETRY' type='string-select' "
     321             :                               "description='how to encode geometry fields'>"
     322             :                               "    <Value>AS_WKT</Value>"
     323             :                               "  </Option>"
     324        1381 :                               "</CreationOptionList>");
     325             : 
     326        1381 :     poDriver->SetMetadataItem(
     327             :         GDAL_DS_LAYER_CREATIONOPTIONLIST,
     328             :         "<LayerCreationOptionList>"
     329             :         "  <Option name='SEPARATOR' type='string-select' description='field "
     330             :         "separator' default='COMMA'>"
     331             :         "    <Value>COMMA</Value>"
     332             :         "    <Value>SEMICOLON</Value>"
     333             :         "    <Value>TAB</Value>"
     334             :         "    <Value>SPACE</Value>"
     335             :         "  </Option>"
     336             : #ifdef _WIN32
     337             :         "  <Option name='LINEFORMAT' type='string-select' "
     338             :         "description='end-of-line sequence' default='CRLF'>"
     339             : #else
     340             :         "  <Option name='LINEFORMAT' type='string-select' "
     341             :         "description='end-of-line sequence' default='LF'>"
     342             : #endif
     343             :         "    <Value>CRLF</Value>"
     344             :         "    <Value>LF</Value>"
     345             :         "  </Option>"
     346             :         "  <Option name='GEOMETRY' type='string-select' description='how to "
     347             :         "encode geometry fields'>"
     348             :         "    <Value>AS_WKT</Value>"
     349             :         "    <Value>AS_XYZ</Value>"
     350             :         "    <Value>AS_XY</Value>"
     351             :         "    <Value>AS_YX</Value>"
     352             :         "  </Option>"
     353             :         "  <Option name='CREATE_CSVT' type='boolean' description='whether to "
     354             :         "create a .csvt file' default='NO'/>"
     355             :         "  <Option name='WRITE_BOM' type='boolean' description='whether to "
     356             :         "write a UTF-8 BOM prefix' default='NO'/>"
     357             :         "  <Option name='GEOMETRY_NAME' type='string' description='Name of "
     358             :         "geometry column. Only used if GEOMETRY=AS_WKT' default='WKT'/>"
     359             :         "  <Option name='STRING_QUOTING' type='string-select' "
     360             :         "description='whether to double-quote strings. IF_AMBIGUOUS means that "
     361             :         "string values that look like numbers will be quoted (it also implies "
     362             :         "IF_NEEDED).' default='IF_AMBIGUOUS'>"
     363             :         "    <Value>IF_NEEDED</Value>"
     364             :         "    <Value>IF_AMBIGUOUS</Value>"
     365             :         "    <Value>ALWAYS</Value>"
     366             :         "  </Option>"
     367        1381 :         "</LayerCreationOptionList>");
     368             : 
     369        1381 :     poDriver->SetMetadataItem(
     370             :         GDAL_DMD_OPENOPTIONLIST,
     371             :         "<OpenOptionList>"
     372             :         "  <Option name='SEPARATOR' type='string-select' "
     373             :         "description='field separator' default='AUTO'>"
     374             :         "    <Value>AUTO</Value>"
     375             :         "    <Value>COMMA</Value>"
     376             :         "    <Value>SEMICOLON</Value>"
     377             :         "    <Value>TAB</Value>"
     378             :         "    <Value>SPACE</Value>"
     379             :         "    <Value>PIPE</Value>"
     380             :         "  </Option>"
     381             :         "  <Option name='MERGE_SEPARATOR' type='boolean' description='whether "
     382             :         "to merge consecutive separators' default='NO'/>"
     383             :         "  <Option name='AUTODETECT_TYPE' type='boolean' description='whether "
     384             :         "to guess data type from first bytes of the file' default='NO'/>"
     385             :         "  <Option name='KEEP_SOURCE_COLUMNS' type='boolean' "
     386             :         "description='whether to add original columns whose guessed data type "
     387             :         "is not String. Only used if AUTODETECT_TYPE=YES' default='NO'/>"
     388             :         "  <Option name='AUTODETECT_WIDTH' type='string-select' "
     389             :         "description='whether to auto-detect width/precision. Only used if "
     390             :         "AUTODETECT_TYPE=YES' default='NO'>"
     391             :         "    <Value>YES</Value>"
     392             :         "    <Value>NO</Value>"
     393             :         "    <Value>STRING_ONLY</Value>"
     394             :         "  </Option>"
     395             :         "  <Option name='AUTODETECT_SIZE_LIMIT' type='int' description='number "
     396             :         "of bytes to inspect for auto-detection of data type. Only used if "
     397             :         "AUTODETECT_TYPE=YES' default='1000000'/>"
     398             :         "  <Option name='QUOTED_FIELDS_AS_STRING' type='boolean' "
     399             :         "description='Only used if AUTODETECT_TYPE=YES. Whether to enforce "
     400             :         "quoted fields as string fields.' default='NO'/>"
     401             :         "  <Option name='X_POSSIBLE_NAMES' type='string' description='Comma "
     402             :         "separated list of possible names for X/longitude coordinate of a "
     403             :         "point.'/>"
     404             :         "  <Option name='Y_POSSIBLE_NAMES' type='string' description='Comma "
     405             :         "separated list of possible names for Y/latitude coordinate of a "
     406             :         "point.'/>"
     407             :         "  <Option name='Z_POSSIBLE_NAMES' type='string' description='Comma "
     408             :         "separated list of possible names for Z/elevation coordinate of a "
     409             :         "point.'/>"
     410             :         "  <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma "
     411             :         "separated list of possible names for geometry columns.' "
     412             :         "default='WKT'/>"
     413             :         "  <Option name='KEEP_GEOM_COLUMNS' type='boolean' "
     414             :         "description='whether to add original x/y/geometry columns as regular "
     415             :         "fields.' default='YES'/>"
     416             :         "  <Option name='HEADERS' type='string-select' description='Whether "
     417             :         "the first line of the file contains column names or not' "
     418             :         "default='AUTO'>"
     419             :         "    <Value>YES</Value>"
     420             :         "    <Value>NO</Value>"
     421             :         "    <Value>AUTO</Value>"
     422             :         "  </Option>"
     423             :         "  <Option name='EMPTY_STRING_AS_NULL' type='boolean' "
     424             :         "description='Whether to consider empty strings as null fields on "
     425             :         "reading' default='NO'/>"
     426             :         "  <Option name='MAX_LINE_SIZE' type='int' description='Maximum number "
     427             :         "of bytes for a line (-1=unlimited)' default='" STRINGIFY(
     428             :             OGR_CSV_DEFAULT_MAX_LINE_SIZE) "'/>"
     429             :                                            "  <Option name='OGR_SCHEMA' "
     430             :                                            "type='string' description='"
     431             :                                            "Partially or totally overrides the "
     432             :                                            "auto-detected schema to use for "
     433             :                                            "creating the layer. "
     434             :                                            "The overrides are defined as a "
     435             :                                            "JSON list of field definitions. "
     436             :                                            "This can be a filename or a JSON "
     437             :                                            "string or a URL.'/>"
     438        1381 :                                            "</OpenOptionList>");
     439             : 
     440        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
     441        1381 :     poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
     442             :                               "Integer Integer64 Real String Date DateTime "
     443             :                               "Time IntegerList Integer64List RealList "
     444        1381 :                               "StringList");
     445        1381 :     poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES,
     446        1381 :                               "Boolean Int16 Float32");
     447        1381 :     poDriver->SetMetadataItem(GDAL_DCAP_HONOR_GEOM_COORDINATE_PRECISION, "YES");
     448             : 
     449        1381 :     poDriver->pfnOpen = OGRCSVDriverOpen;
     450        1381 :     poDriver->pfnIdentify = OGRCSVDriverIdentify;
     451        1381 :     poDriver->pfnCreate = OGRCSVDriverCreate;
     452        1381 :     poDriver->pfnDelete = OGRCSVDriverDelete;
     453        1381 :     poDriver->pfnUnloadDriver = OGRCSVDriverUnload;
     454             : 
     455        1381 :     GetGDALDriverManager()->RegisterDriver(poDriver);
     456             : }

Generated by: LCOV version 1.14