Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: XLSX Translator
4 : * Purpose: Implements OGRXLSXDriver.
5 : * Author: Even Rouault, even dot rouault at spatialys.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2012, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include "ogr_xlsx.h"
14 : #include "cpl_conv.h"
15 :
16 : extern "C" void RegisterOGRXLSX();
17 :
18 : using namespace OGRXLSX;
19 :
20 : // g++ -DHAVE_EXPAT -g -Wall -fPIC ogr/ogrsf_frmts/xlsx/*.cpp -shared -o
21 : // ogr_XLSX.so -Iport -Igcore -Iogr -Iogr/ogrsf_frmts -Iogr/ogrsf_frmts/mem
22 : // -Iogr/ogrsf_frmts/xlsx -L. -lgdal
23 :
24 : static const char XLSX_MIMETYPE[] =
25 : "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
26 :
27 : /************************************************************************/
28 : /* Identify() */
29 : /************************************************************************/
30 :
31 45178 : static int OGRXLSXDriverIdentify(GDALOpenInfo *poOpenInfo)
32 : {
33 45178 : if (poOpenInfo->fpL == nullptr &&
34 42678 : STARTS_WITH_CI(poOpenInfo->pszFilename, "XLSX:"))
35 : {
36 2 : return TRUE;
37 : }
38 :
39 45176 : if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") ||
40 45165 : STARTS_WITH(poOpenInfo->pszFilename, "/vsitar/"))
41 : {
42 11 : const char *pszExt = CPLGetExtension(poOpenInfo->pszFilename);
43 11 : return EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM") ||
44 22 : EQUAL(pszExt, "XLSX}") || EQUAL(pszExt, "XLSM}");
45 : }
46 :
47 45165 : if (poOpenInfo->nHeaderBytes > 30 &&
48 2328 : memcmp(poOpenInfo->pabyHeader, "PK\x03\x04", 4) == 0)
49 : {
50 : // Fetch the first filename in the zip
51 112 : const int nFilenameLength =
52 112 : CPL_LSBUINT16PTR(poOpenInfo->pabyHeader + 26);
53 112 : if (30 + nFilenameLength > poOpenInfo->nHeaderBytes)
54 66 : return FALSE;
55 : const std::string osFilename(
56 112 : reinterpret_cast<const char *>(poOpenInfo->pabyHeader) + 30,
57 112 : nFilenameLength);
58 112 : if (STARTS_WITH(osFilename.c_str(), "xl/") ||
59 104 : STARTS_WITH(osFilename.c_str(), "_rels/") ||
60 296 : STARTS_WITH(osFilename.c_str(), "docProps/") ||
61 80 : osFilename == "[Content_Types].xml")
62 : {
63 66 : return TRUE;
64 : }
65 46 : const char *pszExt = CPLGetExtension(poOpenInfo->pszFilename);
66 46 : if (EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM"))
67 : {
68 0 : CPLDebug(
69 : "XLSX",
70 : "Identify() failed to recognize first filename in zip (%s), "
71 : "but fallback to extension matching",
72 : osFilename.c_str());
73 0 : return TRUE;
74 : }
75 : }
76 45099 : return FALSE;
77 : }
78 :
79 : /************************************************************************/
80 : /* Open() */
81 : /************************************************************************/
82 :
83 34 : static GDALDataset *OGRXLSXDriverOpen(GDALOpenInfo *poOpenInfo)
84 :
85 : {
86 34 : if (!OGRXLSXDriverIdentify(poOpenInfo))
87 0 : return nullptr;
88 :
89 34 : const char *pszFilename = poOpenInfo->pszFilename;
90 34 : if (poOpenInfo->fpL == nullptr && STARTS_WITH_CI(pszFilename, "XLSX:"))
91 : {
92 1 : pszFilename += strlen("XLSX:");
93 : }
94 68 : const bool bIsVsiZipOrTarPrefixed = STARTS_WITH(pszFilename, "/vsizip/") ||
95 34 : STARTS_WITH(pszFilename, "/vsitar/");
96 34 : if (bIsVsiZipOrTarPrefixed)
97 : {
98 0 : if (poOpenInfo->eAccess != GA_ReadOnly)
99 0 : return nullptr;
100 : }
101 :
102 68 : std::string osPrefixedFilename;
103 34 : if (!bIsVsiZipOrTarPrefixed)
104 : {
105 34 : osPrefixedFilename = "/vsizip/{";
106 34 : osPrefixedFilename += pszFilename;
107 34 : osPrefixedFilename += "}";
108 : }
109 : else
110 : {
111 0 : osPrefixedFilename = pszFilename;
112 : }
113 :
114 68 : CPLString osTmpFilename;
115 : osTmpFilename =
116 34 : CPLSPrintf("%s/[Content_Types].xml", osPrefixedFilename.c_str());
117 34 : VSILFILE *fpContent = VSIFOpenL(osTmpFilename, "rb");
118 34 : if (fpContent == nullptr)
119 0 : return nullptr;
120 :
121 : char szBuffer[2048];
122 34 : int nRead = (int)VSIFReadL(szBuffer, 1, sizeof(szBuffer) - 1, fpContent);
123 34 : szBuffer[nRead] = 0;
124 :
125 34 : VSIFCloseL(fpContent);
126 :
127 34 : if (strstr(szBuffer, XLSX_MIMETYPE) == nullptr)
128 0 : return nullptr;
129 :
130 : osTmpFilename =
131 34 : CPLSPrintf("%s/xl/workbook.xml", osPrefixedFilename.c_str());
132 34 : VSILFILE *fpWorkbook = VSIFOpenL(osTmpFilename, "rb");
133 34 : if (fpWorkbook == nullptr)
134 0 : return nullptr;
135 :
136 : osTmpFilename =
137 34 : CPLSPrintf("%s/xl/_rels/workbook.xml.rels", osPrefixedFilename.c_str());
138 34 : VSILFILE *fpWorkbookRels = VSIFOpenL(osTmpFilename, "rb");
139 34 : if (fpWorkbookRels == nullptr)
140 : {
141 0 : VSIFCloseL(fpWorkbook);
142 0 : return nullptr;
143 : }
144 :
145 : osTmpFilename =
146 34 : CPLSPrintf("%s/xl/sharedStrings.xml", osPrefixedFilename.c_str());
147 34 : VSILFILE *fpSharedStrings = VSIFOpenL(osTmpFilename, "rb");
148 34 : osTmpFilename = CPLSPrintf("%s/xl/styles.xml", osPrefixedFilename.c_str());
149 34 : VSILFILE *fpStyles = VSIFOpenL(osTmpFilename, "rb");
150 :
151 : OGRXLSXDataSource *poDS =
152 34 : new OGRXLSXDataSource(poOpenInfo->papszOpenOptions);
153 :
154 34 : if (!poDS->Open(pszFilename, osPrefixedFilename.c_str(), fpWorkbook,
155 : fpWorkbookRels, fpSharedStrings, fpStyles,
156 34 : poOpenInfo->eAccess == GA_Update))
157 : {
158 0 : delete poDS;
159 0 : poDS = nullptr;
160 : }
161 : else
162 : {
163 34 : poDS->SetDescription(poOpenInfo->pszFilename);
164 : }
165 :
166 34 : return poDS;
167 : }
168 :
169 : /************************************************************************/
170 : /* OGRXLSXDriverCreate() */
171 : /************************************************************************/
172 :
173 25 : static GDALDataset *OGRXLSXDriverCreate(const char *pszName, int /* nXSize */,
174 : int /* nYSize */, int /* nBands */,
175 : GDALDataType /* eDT */,
176 : char **papszOptions)
177 :
178 : {
179 25 : if (!EQUAL(CPLGetExtension(pszName), "XLSX"))
180 : {
181 17 : CPLError(CE_Failure, CPLE_AppDefined, "File extension should be XLSX");
182 17 : return nullptr;
183 : }
184 :
185 : /* -------------------------------------------------------------------- */
186 : /* First, ensure there isn't any such file yet. */
187 : /* -------------------------------------------------------------------- */
188 : VSIStatBufL sStatBuf;
189 :
190 8 : if (VSIStatL(pszName, &sStatBuf) == 0)
191 : {
192 0 : CPLError(CE_Failure, CPLE_AppDefined,
193 : "It seems a file system object called '%s' already exists.",
194 : pszName);
195 :
196 0 : return nullptr;
197 : }
198 :
199 : /* -------------------------------------------------------------------- */
200 : /* Try to create datasource. */
201 : /* -------------------------------------------------------------------- */
202 8 : OGRXLSXDataSource *poDS = new OGRXLSXDataSource(nullptr);
203 :
204 8 : if (!poDS->Create(pszName, papszOptions))
205 : {
206 0 : delete poDS;
207 0 : return nullptr;
208 : }
209 : else
210 8 : return poDS;
211 : }
212 :
213 : /************************************************************************/
214 : /* RegisterOGRXLSX() */
215 : /************************************************************************/
216 :
217 1595 : void RegisterOGRXLSX()
218 :
219 : {
220 1595 : if (GDALGetDriverByName("XLSX") != nullptr)
221 302 : return;
222 :
223 1293 : GDALDriver *poDriver = new GDALDriver();
224 :
225 1293 : poDriver->SetDescription("XLSX");
226 1293 : poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
227 1293 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
228 1293 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES");
229 1293 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
230 1293 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_FIELD, "YES");
231 1293 : poDriver->SetMetadataItem(GDAL_DCAP_REORDER_FIELDS, "YES");
232 1293 : poDriver->SetMetadataItem(GDAL_DMD_ALTER_FIELD_DEFN_FLAGS, "Name Type");
233 :
234 1293 : poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
235 1293 : "MS Office Open XML spreadsheet");
236 1293 : poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "xlsx xlsm");
237 1293 : poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/xlsx.html");
238 1293 : poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
239 1293 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
240 : "Integer Integer64 Real String Date DateTime "
241 1293 : "Time");
242 1293 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES, "Boolean");
243 1293 : poDriver->SetMetadataItem(GDAL_DCAP_NONSPATIAL, "YES");
244 1293 : poDriver->SetMetadataItem(GDAL_DCAP_MULTIPLE_VECTOR_LAYERS, "YES");
245 1293 : poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
246 1293 : poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES");
247 1293 : poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
248 1293 : poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
249 :
250 1293 : poDriver->SetMetadataItem(
251 : GDAL_DMD_OPENOPTIONLIST,
252 : "<OpenOptionList>"
253 : " <Option name='FIELD_TYPES' type='string-select' "
254 : "description='If set to STRING, all fields will be of type String. "
255 : "Otherwise the driver autodetects the field type from field content.' "
256 : "default='AUTO'>"
257 : " <Value>AUTO</Value>"
258 : " <Value>STRING</Value>"
259 : " </Option>"
260 : " <Option name='HEADERS' type='string-select' "
261 : "description='Defines if the first line should be considered as "
262 : "containing the name of the fields.' "
263 : "default='AUTO'>"
264 : " <Value>AUTO</Value>"
265 : " <Value>FORCE</Value>"
266 : " <Value>DISABLE</Value>"
267 : " </Option>"
268 1293 : "</OpenOptionList>");
269 :
270 1293 : poDriver->pfnIdentify = OGRXLSXDriverIdentify;
271 1293 : poDriver->pfnOpen = OGRXLSXDriverOpen;
272 1293 : poDriver->pfnCreate = OGRXLSXDriverCreate;
273 :
274 1293 : GetGDALDriverManager()->RegisterDriver(poDriver);
275 : }
|