LCOV - code coverage report
Current view: top level - ogr/ogrsf_frmts/csv - ogrcsvdriver.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 144 154 93.5 %
Date: 2024-05-02 22:57:13 Functions: 7 7 100.0 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  CSV Translator
       4             :  * Purpose:  Implements OGRCSVDriver.
       5             :  * Author:   Frank Warmerdam, warmerdam@pobox.com
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
       9             :  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * Permission is hereby granted, free of charge, to any person obtaining a
      12             :  * copy of this software and associated documentation files (the "Software"),
      13             :  * to deal in the Software without restriction, including without limitation
      14             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      15             :  * and/or sell copies of the Software, and to permit persons to whom the
      16             :  * Software is furnished to do so, subject to the following conditions:
      17             :  *
      18             :  * The above copyright notice and this permission notice shall be included
      19             :  * in all copies or substantial portions of the Software.
      20             :  *
      21             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      22             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      23             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
      24             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      25             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      26             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      27             :  * DEALINGS IN THE SOFTWARE.
      28             :  ****************************************************************************/
      29             : 
      30             : #include "cpl_port.h"
      31             : #include "ogr_csv.h"
      32             : 
      33             : #include <cerrno>
      34             : #include <cstring>
      35             : #include <map>
      36             : #include <string>
      37             : #include <utility>
      38             : 
      39             : #include "cpl_conv.h"
      40             : #include "cpl_error.h"
      41             : #include "cpl_multiproc.h"
      42             : #include "cpl_string.h"
      43             : #include "cpl_vsi.h"
      44             : #include "gdal.h"
      45             : #include "gdal_priv.h"
      46             : 
      47             : static CPLMutex *hMutex = nullptr;
      48             : static std::map<CPLString, GDALDataset *> *poMap = nullptr;
      49             : 
      50             : /************************************************************************/
      51             : /*                         OGRCSVDriverIdentify()                       */
      52             : /************************************************************************/
      53             : 
      54       46790 : static int OGRCSVDriverIdentify(GDALOpenInfo *poOpenInfo)
      55             : 
      56             : {
      57       46790 :     if (poOpenInfo->fpL != nullptr)
      58             :     {
      59             :         const CPLString osBaseFilename =
      60        9922 :             CPLGetFilename(poOpenInfo->pszFilename);
      61             :         const CPLString osExt =
      62        9922 :             OGRCSVDataSource::GetRealExtension(poOpenInfo->pszFilename);
      63             : 
      64        4961 :         if (EQUAL(osBaseFilename, "NfdcFacilities.xls") ||
      65        4961 :             EQUAL(osBaseFilename, "NfdcRunways.xls") ||
      66       14883 :             EQUAL(osBaseFilename, "NfdcRemarks.xls") ||
      67        4961 :             EQUAL(osBaseFilename, "NfdcSchedules.xls"))
      68             :         {
      69           0 :             return TRUE;
      70             :         }
      71        4961 :         else if ((STARTS_WITH_CI(osBaseFilename, "NationalFile_") ||
      72        4961 :                   STARTS_WITH_CI(osBaseFilename, "POP_PLACES_") ||
      73        4961 :                   STARTS_WITH_CI(osBaseFilename, "HIST_FEATURES_") ||
      74        4961 :                   STARTS_WITH_CI(osBaseFilename, "US_CONCISE_") ||
      75        4961 :                   STARTS_WITH_CI(osBaseFilename, "AllNames_") ||
      76        4961 :                   STARTS_WITH_CI(osBaseFilename,
      77        4961 :                                  "Feature_Description_History_") ||
      78        4961 :                   STARTS_WITH_CI(osBaseFilename, "ANTARCTICA_") ||
      79        4961 :                   STARTS_WITH_CI(osBaseFilename, "GOVT_UNITS_") ||
      80        4961 :                   STARTS_WITH_CI(osBaseFilename, "NationalFedCodes_") ||
      81        4961 :                   STARTS_WITH_CI(osBaseFilename, "AllStates_") ||
      82        9922 :                   STARTS_WITH_CI(osBaseFilename, "AllStatesFedCodes_") ||
      83        4961 :                   (osBaseFilename.size() > 2 &&
      84        9922 :                    STARTS_WITH_CI(osBaseFilename + 2, "_Features_")) ||
      85        4961 :                   (osBaseFilename.size() > 2 &&
      86        9922 :                    STARTS_WITH_CI(osBaseFilename + 2, "_FedCodes_"))) &&
      87           0 :                  (EQUAL(osExt, "txt") || EQUAL(osExt, "zip")))
      88             :         {
      89           0 :             return TRUE;
      90             :         }
      91        9920 :         else if (EQUAL(osBaseFilename, "allCountries.txt") ||
      92        4959 :                  EQUAL(osBaseFilename, "allCountries.zip"))
      93             :         {
      94           2 :             return TRUE;
      95             :         }
      96        9193 :         else if (EQUAL(osExt, "csv") || EQUAL(osExt, "tsv") ||
      97        4234 :                  EQUAL(osExt, "psv"))
      98             :         {
      99         727 :             return TRUE;
     100             :         }
     101        4240 :         else if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") &&
     102           8 :                  EQUAL(osExt, "zip"))
     103             :         {
     104           2 :             return -1;  // Unsure.
     105             :         }
     106             :         else
     107             :         {
     108        4230 :             return FALSE;
     109             :         }
     110             :     }
     111       41829 :     else if (STARTS_WITH_CI(poOpenInfo->pszFilename, "CSV:"))
     112             :     {
     113         148 :         return TRUE;
     114             :     }
     115       41681 :     else if (poOpenInfo->bIsDirectory)
     116             :     {
     117        1228 :         return -1;  // Unsure.
     118             :     }
     119             : 
     120       40453 :     return FALSE;
     121             : }
     122             : 
     123             : /************************************************************************/
     124             : /*                        OGRCSVDriverRemoveFromMap()                   */
     125             : /************************************************************************/
     126             : 
     127         245 : void OGRCSVDriverRemoveFromMap(const char *pszName, GDALDataset *poDS)
     128             : {
     129         245 :     if (poMap == nullptr)
     130          50 :         return;
     131         390 :     CPLMutexHolderD(&hMutex);
     132         195 :     std::map<CPLString, GDALDataset *>::iterator oIter = poMap->find(pszName);
     133         195 :     if (oIter != poMap->end())
     134             :     {
     135          85 :         GDALDataset *poOtherDS = oIter->second;
     136          85 :         if (poDS == poOtherDS)
     137          85 :             poMap->erase(oIter);
     138             :     }
     139             : }
     140             : 
     141             : /************************************************************************/
     142             : /*                                Open()                                */
     143             : /************************************************************************/
     144             : 
     145        1048 : static GDALDataset *OGRCSVDriverOpen(GDALOpenInfo *poOpenInfo)
     146             : 
     147             : {
     148        1048 :     if (!OGRCSVDriverIdentify(poOpenInfo))
     149           0 :         return nullptr;
     150             : 
     151        1048 :     if (poMap != nullptr)
     152             :     {
     153        1436 :         CPLMutexHolderD(&hMutex);
     154             :         std::map<CPLString, GDALDataset *>::iterator oIter =
     155         718 :             poMap->find(poOpenInfo->pszFilename);
     156         718 :         if (oIter != poMap->end())
     157             :         {
     158           2 :             GDALDataset *poOtherDS = oIter->second;
     159           2 :             poOtherDS->FlushCache(false);
     160             :         }
     161             :     }
     162             : 
     163        1048 :     OGRCSVDataSource *poDS = new OGRCSVDataSource();
     164             : 
     165        1048 :     if (!poDS->Open(poOpenInfo->pszFilename, poOpenInfo->eAccess == GA_Update,
     166             :                     FALSE, poOpenInfo->papszOpenOptions))
     167             :     {
     168         525 :         delete poDS;
     169         525 :         poDS = nullptr;
     170             :     }
     171             : 
     172        1048 :     if (poOpenInfo->eAccess == GA_Update && poDS != nullptr)
     173             :     {
     174         170 :         CPLMutexHolderD(&hMutex);
     175          85 :         if (poMap == nullptr)
     176          10 :             poMap = new std::map<CPLString, GDALDataset *>();
     177          85 :         if (poMap->find(poOpenInfo->pszFilename) == poMap->end())
     178             :         {
     179          85 :             (*poMap)[poOpenInfo->pszFilename] = poDS;
     180             :         }
     181             :     }
     182             : 
     183        1048 :     return poDS;
     184             : }
     185             : 
     186             : /************************************************************************/
     187             : /*                               Create()                               */
     188             : /************************************************************************/
     189             : 
     190             : static GDALDataset *
     191          90 : OGRCSVDriverCreate(const char *pszName, CPL_UNUSED int nBands,
     192             :                    CPL_UNUSED int nXSize, CPL_UNUSED int nYSize,
     193             :                    CPL_UNUSED GDALDataType eDT, char **papszOptions)
     194             : {
     195             :     // First, ensure there isn't any such file yet.
     196             :     VSIStatBufL sStatBuf;
     197             : 
     198          90 :     if (strcmp(pszName, "/dev/stdout") == 0)
     199           0 :         pszName = "/vsistdout/";
     200             : 
     201          90 :     if (VSIStatL(pszName, &sStatBuf) == 0)
     202             :     {
     203           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     204             :                  "It seems a file system object called '%s' already exists.",
     205             :                  pszName);
     206             : 
     207           0 :         return nullptr;
     208             :     }
     209             : 
     210             :     // If the target is not a simple .csv then create it as a directory.
     211         180 :     CPLString osDirName;
     212             : 
     213          90 :     if (EQUAL(CPLGetExtension(pszName), "csv"))
     214             :     {
     215          57 :         osDirName = CPLGetPath(pszName);
     216          57 :         if (osDirName == "")
     217           0 :             osDirName = ".";
     218             : 
     219             :         // HACK: CPLGetPath("/vsimem/foo.csv") = "/vsimem", but this is not
     220             :         // recognized afterwards as a valid directory name.
     221          57 :         if (osDirName == "/vsimem")
     222          17 :             osDirName = "/vsimem/";
     223             :     }
     224             :     else
     225             :     {
     226          33 :         if (STARTS_WITH(pszName, "/vsizip/"))
     227             :         {
     228             :             // Do nothing.
     229             :         }
     230          33 :         else if (!EQUAL(pszName, "/vsistdout/") && VSIMkdir(pszName, 0755) != 0)
     231             :         {
     232           1 :             CPLError(CE_Failure, CPLE_AppDefined,
     233             :                      "Failed to create directory %s:\n%s", pszName,
     234           1 :                      VSIStrerror(errno));
     235           1 :             return nullptr;
     236             :         }
     237          32 :         osDirName = pszName;
     238             :     }
     239             : 
     240             :     // Force it to open as a datasource.
     241          89 :     OGRCSVDataSource *poDS = new OGRCSVDataSource();
     242             : 
     243          89 :     if (EQUAL(CPLGetExtension(pszName), "csv"))
     244             :     {
     245          57 :         poDS->CreateForSingleFile(osDirName, pszName);
     246             :     }
     247          32 :     else if (!poDS->Open(osDirName, TRUE, TRUE))
     248             :     {
     249           0 :         delete poDS;
     250           0 :         return nullptr;
     251             :     }
     252             : 
     253          89 :     const char *pszGeometry = CSLFetchNameValue(papszOptions, "GEOMETRY");
     254          89 :     if (pszGeometry != nullptr && EQUAL(pszGeometry, "AS_WKT"))
     255          10 :         poDS->EnableGeometryFields();
     256             : 
     257          89 :     return poDS;
     258             : }
     259             : 
     260             : /************************************************************************/
     261             : /*                              Delete()                                */
     262             : /************************************************************************/
     263             : 
     264          20 : static CPLErr OGRCSVDriverDelete(const char *pszFilename)
     265             : 
     266             : {
     267          20 :     return CPLUnlinkTree(pszFilename) == 0 ? CE_None : CE_Failure;
     268             : }
     269             : 
     270             : /************************************************************************/
     271             : /*                           OGRCSVDriverUnload()                       */
     272             : /************************************************************************/
     273             : 
     274         852 : static void OGRCSVDriverUnload(GDALDriver *)
     275             : {
     276         852 :     if (hMutex != nullptr)
     277           4 :         CPLDestroyMutex(hMutex);
     278         852 :     hMutex = nullptr;
     279         852 :     delete poMap;
     280         852 :     poMap = nullptr;
     281         852 : }
     282             : 
     283             : /************************************************************************/
     284             : /*                           RegisterOGRCSV()                           */
     285             : /************************************************************************/
     286             : 
     287             : #define XSTRINGIFY(x) #x
     288             : #define STRINGIFY(x) XSTRINGIFY(x)
     289             : 
     290        1512 : void RegisterOGRCSV()
     291             : 
     292             : {
     293        1512 :     if (GDALGetDriverByName("CSV") != nullptr)
     294         295 :         return;
     295             : 
     296        1217 :     GDALDriver *poDriver = new GDALDriver();
     297             : 
     298        1217 :     poDriver->SetDescription("CSV");
     299        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
     300        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
     301        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES");
     302        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
     303        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES");
     304        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES");
     305        1217 :     poDriver->SetMetadataItem(GDAL_DMD_CREATION_FIELD_DEFN_FLAGS,
     306        1217 :                               "WidthPrecision");
     307        1217 :     poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS,
     308        1217 :                               "Name Type WidthPrecision");
     309             : 
     310        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES");
     311        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
     312        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
     313             : 
     314        1217 :     poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
     315        1217 :                               "Comma Separated Value (.csv)");
     316        1217 :     poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "csv tsv psv");
     317        1217 :     poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/csv.html");
     318        1217 :     poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
     319        1217 :     poDriver->SetMetadataItem(GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_SIGN,
     320        1217 :                               "YES");
     321        1217 :     poDriver->SetMetadataItem(
     322        1217 :         GDAL_DMD_NUMERIC_FIELD_WIDTH_INCLUDES_DECIMAL_SEPARATOR, "YES");
     323             : 
     324        1217 :     poDriver->SetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST,
     325             :                               "<CreationOptionList>"
     326             :                               "  <Option name='GEOMETRY' type='string-select' "
     327             :                               "description='how to encode geometry fields'>"
     328             :                               "    <Value>AS_WKT</Value>"
     329             :                               "  </Option>"
     330        1217 :                               "</CreationOptionList>");
     331             : 
     332        1217 :     poDriver->SetMetadataItem(
     333             :         GDAL_DS_LAYER_CREATIONOPTIONLIST,
     334             :         "<LayerCreationOptionList>"
     335             :         "  <Option name='SEPARATOR' type='string-select' description='field "
     336             :         "separator' default='COMMA'>"
     337             :         "    <Value>COMMA</Value>"
     338             :         "    <Value>SEMICOLON</Value>"
     339             :         "    <Value>TAB</Value>"
     340             :         "    <Value>SPACE</Value>"
     341             :         "  </Option>"
     342             : #ifdef _WIN32
     343             :         "  <Option name='LINEFORMAT' type='string-select' "
     344             :         "description='end-of-line sequence' default='CRLF'>"
     345             : #else
     346             :         "  <Option name='LINEFORMAT' type='string-select' "
     347             :         "description='end-of-line sequence' default='LF'>"
     348             : #endif
     349             :         "    <Value>CRLF</Value>"
     350             :         "    <Value>LF</Value>"
     351             :         "  </Option>"
     352             :         "  <Option name='GEOMETRY' type='string-select' description='how to "
     353             :         "encode geometry fields'>"
     354             :         "    <Value>AS_WKT</Value>"
     355             :         "    <Value>AS_XYZ</Value>"
     356             :         "    <Value>AS_XY</Value>"
     357             :         "    <Value>AS_YX</Value>"
     358             :         "  </Option>"
     359             :         "  <Option name='CREATE_CSVT' type='boolean' description='whether to "
     360             :         "create a .csvt file' default='NO'/>"
     361             :         "  <Option name='WRITE_BOM' type='boolean' description='whether to "
     362             :         "write a UTF-8 BOM prefix' default='NO'/>"
     363             :         "  <Option name='GEOMETRY_NAME' type='string' description='Name of "
     364             :         "geometry column. Only used if GEOMETRY=AS_WKT' default='WKT'/>"
     365             :         "  <Option name='STRING_QUOTING' type='string-select' "
     366             :         "description='whether to double-quote strings. IF_AMBIGUOUS means that "
     367             :         "string values that look like numbers will be quoted (it also implies "
     368             :         "IF_NEEDED).' default='IF_AMBIGUOUS'>"
     369             :         "    <Value>IF_NEEDED</Value>"
     370             :         "    <Value>IF_AMBIGUOUS</Value>"
     371             :         "    <Value>ALWAYS</Value>"
     372             :         "  </Option>"
     373        1217 :         "</LayerCreationOptionList>");
     374             : 
     375        1217 :     poDriver->SetMetadataItem(
     376             :         GDAL_DMD_OPENOPTIONLIST,
     377             :         "<OpenOptionList>"
     378             :         "  <Option name='SEPARATOR' type='string-select' "
     379             :         "description='field separator' default='AUTO'>"
     380             :         "    <Value>AUTO</Value>"
     381             :         "    <Value>COMMA</Value>"
     382             :         "    <Value>SEMICOLON</Value>"
     383             :         "    <Value>TAB</Value>"
     384             :         "    <Value>SPACE</Value>"
     385             :         "    <Value>PIPE</Value>"
     386             :         "  </Option>"
     387             :         "  <Option name='MERGE_SEPARATOR' type='boolean' description='whether "
     388             :         "to merge consecutive separators' default='NO'/>"
     389             :         "  <Option name='AUTODETECT_TYPE' type='boolean' description='whether "
     390             :         "to guess data type from first bytes of the file' default='NO'/>"
     391             :         "  <Option name='KEEP_SOURCE_COLUMNS' type='boolean' "
     392             :         "description='whether to add original columns whose guessed data type "
     393             :         "is not String. Only used if AUTODETECT_TYPE=YES' default='NO'/>"
     394             :         "  <Option name='AUTODETECT_WIDTH' type='string-select' "
     395             :         "description='whether to auto-detect width/precision. Only used if "
     396             :         "AUTODETECT_TYPE=YES' default='NO'>"
     397             :         "    <Value>YES</Value>"
     398             :         "    <Value>NO</Value>"
     399             :         "    <Value>STRING_ONLY</Value>"
     400             :         "  </Option>"
     401             :         "  <Option name='AUTODETECT_SIZE_LIMIT' type='int' description='number "
     402             :         "of bytes to inspect for auto-detection of data type. Only used if "
     403             :         "AUTODETECT_TYPE=YES' default='1000000'/>"
     404             :         "  <Option name='QUOTED_FIELDS_AS_STRING' type='boolean' "
     405             :         "description='Only used if AUTODETECT_TYPE=YES. Whether to enforce "
     406             :         "quoted fields as string fields.' default='NO'/>"
     407             :         "  <Option name='X_POSSIBLE_NAMES' type='string' description='Comma "
     408             :         "separated list of possible names for X/longitude coordinate of a "
     409             :         "point.'/>"
     410             :         "  <Option name='Y_POSSIBLE_NAMES' type='string' description='Comma "
     411             :         "separated list of possible names for Y/latitude coordinate of a "
     412             :         "point.'/>"
     413             :         "  <Option name='Z_POSSIBLE_NAMES' type='string' description='Comma "
     414             :         "separated list of possible names for Z/elevation coordinate of a "
     415             :         "point.'/>"
     416             :         "  <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma "
     417             :         "separated list of possible names for geometry columns.' "
     418             :         "default='WKT'/>"
     419             :         "  <Option name='KEEP_GEOM_COLUMNS' type='boolean' "
     420             :         "description='whether to add original x/y/geometry columns as regular "
     421             :         "fields.' default='YES'/>"
     422             :         "  <Option name='HEADERS' type='string-select' description='Whether "
     423             :         "the first line of the file contains column names or not' "
     424             :         "default='AUTO'>"
     425             :         "    <Value>YES</Value>"
     426             :         "    <Value>NO</Value>"
     427             :         "    <Value>AUTO</Value>"
     428             :         "  </Option>"
     429             :         "  <Option name='EMPTY_STRING_AS_NULL' type='boolean' "
     430             :         "description='Whether to consider empty strings as null fields on "
     431             :         "reading' default='NO'/>"
     432             :         "  <Option name='MAX_LINE_SIZE' type='int' description='Maximum number "
     433             :         "of bytes for a line (-1=unlimited)' default='" STRINGIFY(
     434             :             OGR_CSV_DEFAULT_MAX_LINE_SIZE) "'/>"
     435        1217 :                                            "</OpenOptionList>");
     436             : 
     437        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
     438        1217 :     poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
     439             :                               "Integer Integer64 Real String Date DateTime "
     440             :                               "Time IntegerList Integer64List RealList "
     441        1217 :                               "StringList");
     442        1217 :     poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES,
     443        1217 :                               "Boolean Int16 Float32");
     444        1217 :     poDriver->SetMetadataItem(GDAL_DCAP_HONOR_GEOM_COORDINATE_PRECISION, "YES");
     445             : 
     446        1217 :     poDriver->pfnOpen = OGRCSVDriverOpen;
     447        1217 :     poDriver->pfnIdentify = OGRCSVDriverIdentify;
     448        1217 :     poDriver->pfnCreate = OGRCSVDriverCreate;
     449        1217 :     poDriver->pfnDelete = OGRCSVDriverDelete;
     450        1217 :     poDriver->pfnUnloadDriver = OGRCSVDriverUnload;
     451             : 
     452        1217 :     GetGDALDriverManager()->RegisterDriver(poDriver);
     453             : }

Generated by: LCOV version 1.14