Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: XLSX Translator
4 : * Purpose: Implements OGRXLSXDriver.
5 : * Author: Even Rouault, even dot rouault at spatialys.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2012, Even Rouault <even dot rouault at spatialys.com>
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : #include "ogr_xlsx.h"
30 : #include "cpl_conv.h"
31 :
32 : extern "C" void RegisterOGRXLSX();
33 :
34 : using namespace OGRXLSX;
35 :
36 : // g++ -DHAVE_EXPAT -g -Wall -fPIC ogr/ogrsf_frmts/xlsx/*.cpp -shared -o
37 : // ogr_XLSX.so -Iport -Igcore -Iogr -Iogr/ogrsf_frmts -Iogr/ogrsf_frmts/mem
38 : // -Iogr/ogrsf_frmts/xlsx -L. -lgdal
39 :
40 : static const char XLSX_MIMETYPE[] =
41 : "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
42 :
43 : /************************************************************************/
44 : /* Identify() */
45 : /************************************************************************/
46 :
47 42141 : static int OGRXLSXDriverIdentify(GDALOpenInfo *poOpenInfo)
48 : {
49 42141 : if (poOpenInfo->fpL == nullptr &&
50 39980 : STARTS_WITH_CI(poOpenInfo->pszFilename, "XLSX:"))
51 : {
52 2 : return TRUE;
53 : }
54 :
55 42139 : if (STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") ||
56 42128 : STARTS_WITH(poOpenInfo->pszFilename, "/vsitar/"))
57 : {
58 11 : const char *pszExt = CPLGetExtension(poOpenInfo->pszFilename);
59 11 : return EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM") ||
60 22 : EQUAL(pszExt, "XLSX}") || EQUAL(pszExt, "XLSM}");
61 : }
62 :
63 42128 : if (poOpenInfo->nHeaderBytes > 30 &&
64 2033 : memcmp(poOpenInfo->pabyHeader, "PK\x03\x04", 4) == 0)
65 : {
66 : // Fetch the first filename in the zip
67 99 : const int nFilenameLength =
68 99 : CPL_LSBUINT16PTR(poOpenInfo->pabyHeader + 26);
69 99 : if (30 + nFilenameLength > poOpenInfo->nHeaderBytes)
70 58 : return FALSE;
71 : const std::string osFilename(
72 99 : reinterpret_cast<const char *>(poOpenInfo->pabyHeader) + 30,
73 99 : nFilenameLength);
74 99 : if (STARTS_WITH(osFilename.c_str(), "xl/") ||
75 93 : STARTS_WITH(osFilename.c_str(), "_rels/") ||
76 261 : STARTS_WITH(osFilename.c_str(), "docProps/") ||
77 69 : osFilename == "[Content_Types].xml")
78 : {
79 58 : return TRUE;
80 : }
81 41 : const char *pszExt = CPLGetExtension(poOpenInfo->pszFilename);
82 41 : if (EQUAL(pszExt, "XLSX") || EQUAL(pszExt, "XLSM"))
83 : {
84 0 : CPLDebug(
85 : "XLSX",
86 : "Identify() failed to recognize first filename in zip (%s), "
87 : "but fallback to extension matching",
88 : osFilename.c_str());
89 0 : return TRUE;
90 : }
91 : }
92 42070 : return FALSE;
93 : }
94 :
95 : /************************************************************************/
96 : /* Open() */
97 : /************************************************************************/
98 :
99 30 : static GDALDataset *OGRXLSXDriverOpen(GDALOpenInfo *poOpenInfo)
100 :
101 : {
102 30 : if (!OGRXLSXDriverIdentify(poOpenInfo))
103 0 : return nullptr;
104 :
105 30 : const char *pszFilename = poOpenInfo->pszFilename;
106 30 : if (poOpenInfo->fpL == nullptr && STARTS_WITH_CI(pszFilename, "XLSX:"))
107 : {
108 1 : pszFilename += strlen("XLSX:");
109 : }
110 60 : const bool bIsVsiZipOrTarPrefixed = STARTS_WITH(pszFilename, "/vsizip/") ||
111 30 : STARTS_WITH(pszFilename, "/vsitar/");
112 30 : if (bIsVsiZipOrTarPrefixed)
113 : {
114 0 : if (poOpenInfo->eAccess != GA_ReadOnly)
115 0 : return nullptr;
116 : }
117 :
118 60 : std::string osPrefixedFilename;
119 30 : if (!bIsVsiZipOrTarPrefixed)
120 : {
121 30 : osPrefixedFilename = "/vsizip/{";
122 30 : osPrefixedFilename += pszFilename;
123 30 : osPrefixedFilename += "}";
124 : }
125 : else
126 : {
127 0 : osPrefixedFilename = pszFilename;
128 : }
129 :
130 60 : CPLString osTmpFilename;
131 : osTmpFilename =
132 30 : CPLSPrintf("%s/[Content_Types].xml", osPrefixedFilename.c_str());
133 30 : VSILFILE *fpContent = VSIFOpenL(osTmpFilename, "rb");
134 30 : if (fpContent == nullptr)
135 0 : return nullptr;
136 :
137 : char szBuffer[2048];
138 30 : int nRead = (int)VSIFReadL(szBuffer, 1, sizeof(szBuffer) - 1, fpContent);
139 30 : szBuffer[nRead] = 0;
140 :
141 30 : VSIFCloseL(fpContent);
142 :
143 30 : if (strstr(szBuffer, XLSX_MIMETYPE) == nullptr)
144 0 : return nullptr;
145 :
146 : osTmpFilename =
147 30 : CPLSPrintf("%s/xl/workbook.xml", osPrefixedFilename.c_str());
148 30 : VSILFILE *fpWorkbook = VSIFOpenL(osTmpFilename, "rb");
149 30 : if (fpWorkbook == nullptr)
150 0 : return nullptr;
151 :
152 : osTmpFilename =
153 30 : CPLSPrintf("%s/xl/_rels/workbook.xml.rels", osPrefixedFilename.c_str());
154 30 : VSILFILE *fpWorkbookRels = VSIFOpenL(osTmpFilename, "rb");
155 30 : if (fpWorkbookRels == nullptr)
156 : {
157 0 : VSIFCloseL(fpWorkbook);
158 0 : return nullptr;
159 : }
160 :
161 : osTmpFilename =
162 30 : CPLSPrintf("%s/xl/sharedStrings.xml", osPrefixedFilename.c_str());
163 30 : VSILFILE *fpSharedStrings = VSIFOpenL(osTmpFilename, "rb");
164 30 : osTmpFilename = CPLSPrintf("%s/xl/styles.xml", osPrefixedFilename.c_str());
165 30 : VSILFILE *fpStyles = VSIFOpenL(osTmpFilename, "rb");
166 :
167 : OGRXLSXDataSource *poDS =
168 30 : new OGRXLSXDataSource(poOpenInfo->papszOpenOptions);
169 :
170 30 : if (!poDS->Open(pszFilename, osPrefixedFilename.c_str(), fpWorkbook,
171 : fpWorkbookRels, fpSharedStrings, fpStyles,
172 30 : poOpenInfo->eAccess == GA_Update))
173 : {
174 0 : delete poDS;
175 0 : poDS = nullptr;
176 : }
177 : else
178 : {
179 30 : poDS->SetDescription(poOpenInfo->pszFilename);
180 : }
181 :
182 30 : return poDS;
183 : }
184 :
185 : /************************************************************************/
186 : /* OGRXLSXDriverCreate() */
187 : /************************************************************************/
188 :
189 24 : static GDALDataset *OGRXLSXDriverCreate(const char *pszName, int /* nXSize */,
190 : int /* nYSize */, int /* nBands */,
191 : GDALDataType /* eDT */,
192 : char **papszOptions)
193 :
194 : {
195 24 : if (!EQUAL(CPLGetExtension(pszName), "XLSX"))
196 : {
197 17 : CPLError(CE_Failure, CPLE_AppDefined, "File extension should be XLSX");
198 17 : return nullptr;
199 : }
200 :
201 : /* -------------------------------------------------------------------- */
202 : /* First, ensure there isn't any such file yet. */
203 : /* -------------------------------------------------------------------- */
204 : VSIStatBufL sStatBuf;
205 :
206 7 : if (VSIStatL(pszName, &sStatBuf) == 0)
207 : {
208 0 : CPLError(CE_Failure, CPLE_AppDefined,
209 : "It seems a file system object called '%s' already exists.",
210 : pszName);
211 :
212 0 : return nullptr;
213 : }
214 :
215 : /* -------------------------------------------------------------------- */
216 : /* Try to create datasource. */
217 : /* -------------------------------------------------------------------- */
218 7 : OGRXLSXDataSource *poDS = new OGRXLSXDataSource(nullptr);
219 :
220 7 : if (!poDS->Create(pszName, papszOptions))
221 : {
222 0 : delete poDS;
223 0 : return nullptr;
224 : }
225 : else
226 7 : return poDS;
227 : }
228 :
229 : /************************************************************************/
230 : /* RegisterOGRXLSX() */
231 : /************************************************************************/
232 :
233 1512 : void RegisterOGRXLSX()
234 :
235 : {
236 1512 : if (GDALGetDriverByName("XLSX") != nullptr)
237 295 : return;
238 :
239 1217 : GDALDriver *poDriver = new GDALDriver();
240 :
241 1217 : poDriver->SetDescription("XLSX");
242 1217 : poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
243 1217 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_LAYER, "YES");
244 1217 : poDriver->SetMetadataItem(GDAL_DCAP_DELETE_LAYER, "YES");
245 1217 : poDriver->SetMetadataItem(GDAL_DCAP_CREATE_FIELD, "YES");
246 :
247 1217 : poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
248 1217 : "MS Office Open XML spreadsheet");
249 1217 : poDriver->SetMetadataItem(GDAL_DMD_EXTENSIONS, "xlsx xlsm");
250 1217 : poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/xlsx.html");
251 1217 : poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
252 1217 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
253 : "Integer Integer64 Real String Date DateTime "
254 1217 : "Time");
255 1217 : poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATASUBTYPES, "Boolean");
256 1217 : poDriver->SetMetadataItem(GDAL_DCAP_NONSPATIAL, "YES");
257 1217 : poDriver->SetMetadataItem(GDAL_DCAP_MULTIPLE_VECTOR_LAYERS, "YES");
258 1217 : poDriver->SetMetadataItem(GDAL_DCAP_MEASURED_GEOMETRIES, "YES");
259 1217 : poDriver->SetMetadataItem(GDAL_DCAP_CURVE_GEOMETRIES, "YES");
260 1217 : poDriver->SetMetadataItem(GDAL_DCAP_Z_GEOMETRIES, "YES");
261 1217 : poDriver->SetMetadataItem(GDAL_DMD_SUPPORTED_SQL_DIALECTS, "OGRSQL SQLITE");
262 :
263 1217 : poDriver->SetMetadataItem(
264 : GDAL_DMD_OPENOPTIONLIST,
265 : "<OpenOptionList>"
266 : " <Option name='FIELD_TYPES' type='string-select' "
267 : "description='If set to STRING, all fields will be of type String. "
268 : "Otherwise the driver autodetects the field type from field content.' "
269 : "default='AUTO'>"
270 : " <Value>AUTO</Value>"
271 : " <Value>STRING</Value>"
272 : " </Option>"
273 : " <Option name='HEADERS' type='string-select' "
274 : "description='Defines if the first line should be considered as "
275 : "containing the name of the fields.' "
276 : "default='AUTO'>"
277 : " <Value>AUTO</Value>"
278 : " <Value>FORCE</Value>"
279 : " <Value>DISABLE</Value>"
280 : " </Option>"
281 1217 : "</OpenOptionList>");
282 :
283 1217 : poDriver->pfnIdentify = OGRXLSXDriverIdentify;
284 1217 : poDriver->pfnOpen = OGRXLSXDriverOpen;
285 1217 : poDriver->pfnCreate = OGRXLSXDriverCreate;
286 :
287 1217 : GetGDALDriverManager()->RegisterDriver(poDriver);
288 : }
|