Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Implementation of GDALOpenInfo class.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2002, Frank Warmerdam
9 : * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "gdal_priv.h" // Must be included first for mingw VSIStatBufL.
31 : #include "cpl_port.h"
32 :
33 : #include <cstdlib>
34 : #include <cstring>
35 : #ifdef HAVE_UNISTD_H
36 : #include <unistd.h>
37 : #endif
38 :
39 : #include <algorithm>
40 : #include <map>
41 : #include <mutex>
42 : #include <vector>
43 :
44 : #include "cpl_config.h"
45 : #include "cpl_conv.h"
46 : #include "cpl_error.h"
47 : #include "cpl_string.h"
48 : #include "cpl_vsi.h"
49 : #include "gdal.h"
50 :
51 : // Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
52 : // ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
53 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
54 : const GByte *pabyHeader,
55 : int nHeaderBytes);
56 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename);
57 :
58 : /************************************************************************/
59 :
60 : /* This whole section helps for SQLite/GPKG, especially with write-ahead
61 : * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
62 : * properly work and decide when to create/delete the wal related files.
63 : * One issue with POSIX advisory locks is that if within the same process
64 : * you do
65 : * f1 = open('somefile')
66 : * set locks on f1
67 : * f2 = open('somefile')
68 : * close(f2)
69 : * The close(f2) will cancel the locks set on f1. The work on f1 is done by
70 : * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
71 : * So as soon as sqlite3 has opened a file we should make sure not to re-open
72 : * it (actually close it) ourselves.
73 : */
74 :
75 : namespace
76 : {
77 : struct FileNotToOpen
78 : {
79 : CPLString osFilename{};
80 : int nRefCount{};
81 : GByte *pabyHeader{nullptr};
82 : int nHeaderBytes{0};
83 : };
84 : } // namespace
85 :
86 : static std::mutex sFNTOMutex;
87 : static std::map<CPLString, FileNotToOpen> *pMapFNTO = nullptr;
88 :
89 1544 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
90 : const GByte *pabyHeader, int nHeaderBytes)
91 : {
92 3088 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
93 1544 : if (pMapFNTO == nullptr)
94 1346 : pMapFNTO = new std::map<CPLString, FileNotToOpen>();
95 1544 : auto oIter = pMapFNTO->find(pszFilename);
96 1544 : if (oIter != pMapFNTO->end())
97 : {
98 121 : oIter->second.nRefCount++;
99 : }
100 : else
101 : {
102 1423 : FileNotToOpen fnto;
103 1423 : fnto.osFilename = pszFilename;
104 1423 : fnto.nRefCount = 1;
105 1423 : fnto.pabyHeader = static_cast<GByte *>(CPLMalloc(nHeaderBytes + 1));
106 1423 : memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
107 1423 : fnto.pabyHeader[nHeaderBytes] = 0;
108 1423 : fnto.nHeaderBytes = nHeaderBytes;
109 1423 : (*pMapFNTO)[pszFilename] = std::move(fnto);
110 : }
111 1544 : }
112 :
113 1544 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename)
114 : {
115 3088 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
116 1544 : CPLAssert(pMapFNTO);
117 1544 : auto oIter = pMapFNTO->find(pszFilename);
118 1544 : CPLAssert(oIter != pMapFNTO->end());
119 1544 : oIter->second.nRefCount--;
120 1544 : if (oIter->second.nRefCount == 0)
121 : {
122 1423 : CPLFree(oIter->second.pabyHeader);
123 1423 : pMapFNTO->erase(oIter);
124 : }
125 1544 : if (pMapFNTO->empty())
126 : {
127 1346 : delete pMapFNTO;
128 1346 : pMapFNTO = nullptr;
129 : }
130 1544 : }
131 :
132 89517 : static GByte *GDALOpenInfoGetFileNotToOpen(const char *pszFilename,
133 : int *pnHeaderBytes)
134 : {
135 179034 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
136 89517 : *pnHeaderBytes = 0;
137 89517 : if (pMapFNTO == nullptr)
138 : {
139 85112 : return nullptr;
140 : }
141 4405 : auto oIter = pMapFNTO->find(pszFilename);
142 4405 : if (oIter == pMapFNTO->end())
143 : {
144 3081 : return nullptr;
145 : }
146 1324 : *pnHeaderBytes = oIter->second.nHeaderBytes;
147 1324 : GByte *pabyHeader = static_cast<GByte *>(CPLMalloc(*pnHeaderBytes + 1));
148 1324 : memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
149 1324 : pabyHeader[*pnHeaderBytes] = 0;
150 1324 : return pabyHeader;
151 : }
152 :
153 : /************************************************************************/
154 : /* ==================================================================== */
155 : /* GDALOpenInfo */
156 : /* ==================================================================== */
157 : /************************************************************************/
158 :
159 : /************************************************************************/
160 : /* GDALOpenInfo() */
161 : /************************************************************************/
162 :
163 : /** Constructor/
164 : * @param pszFilenameIn filename
165 : * @param nOpenFlagsIn open flags
166 : * @param papszSiblingsIn list of sibling files, or NULL.
167 : */
168 90357 : GDALOpenInfo::GDALOpenInfo(const char *pszFilenameIn, int nOpenFlagsIn,
169 90357 : const char *const *papszSiblingsIn)
170 : : bHasGotSiblingFiles(false), papszSiblingFiles(nullptr),
171 180715 : nHeaderBytesTried(0), pszFilename(CPLStrdup(pszFilenameIn)),
172 : papszOpenOptions(nullptr),
173 90358 : eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
174 : nOpenFlags(nOpenFlagsIn), bStatOK(FALSE), bIsDirectory(FALSE),
175 : fpL(nullptr), nHeaderBytes(0), pabyHeader(nullptr),
176 90357 : papszAllowedDrivers(nullptr)
177 : {
178 90358 : if (STARTS_WITH(pszFilename, "MVT:/vsi"))
179 843 : return;
180 :
181 : /* -------------------------------------------------------------------- */
182 : /* Ensure that C: is treated as C:\ so we can stat it on */
183 : /* Windows. Similar to what is done in CPLStat(). */
184 : /* -------------------------------------------------------------------- */
185 : #ifdef _WIN32
186 : if (strlen(pszFilenameIn) == 2 && pszFilenameIn[1] == ':')
187 : {
188 : char szAltPath[10];
189 :
190 : strcpy(szAltPath, pszFilenameIn);
191 : strcat(szAltPath, "\\");
192 : CPLFree(pszFilename);
193 : pszFilename = CPLStrdup(szAltPath);
194 : }
195 : #endif // WIN32
196 :
197 : /* -------------------------------------------------------------------- */
198 : /* Collect information about the file. */
199 : /* -------------------------------------------------------------------- */
200 :
201 : #ifdef HAVE_READLINK
202 89515 : bool bHasRetried = false;
203 :
204 89517 : retry: // TODO(schwehr): Stop using goto.
205 :
206 : #endif // HAVE_READLINK
207 :
208 : #if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) || \
209 : (defined(__MACH__) && defined(__APPLE__)))
210 : /* On BSDs, fread() on a directory returns non zero, so we have to */
211 : /* do a stat() before to check the nature of pszFilename. */
212 : bool bPotentialDirectory = (eAccess == GA_ReadOnly);
213 : #else
214 89517 : bool bPotentialDirectory = false;
215 : #endif
216 :
217 : /* Check if the filename might be a directory of a special virtual file
218 : * system */
219 89517 : if (STARTS_WITH(pszFilename, "/vsizip/") ||
220 89439 : STARTS_WITH(pszFilename, "/vsitar/") ||
221 89425 : STARTS_WITH(pszFilename, "/vsi7z/") ||
222 89425 : STARTS_WITH(pszFilename, "/vsirar/"))
223 : {
224 92 : const char *pszExt = CPLGetExtension(pszFilename);
225 71 : if (EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") ||
226 69 : EQUAL(pszExt, "gz") || EQUAL(pszExt, "7z") ||
227 68 : EQUAL(pszExt, "rar") ||
228 68 : pszFilename[strlen(pszFilename) - 1] == '}'
229 : #ifdef DEBUG
230 : // For AFL, so that .cur_input is detected as the archive filename.
231 163 : || EQUAL(CPLGetFilename(pszFilename), ".cur_input")
232 : #endif // DEBUG
233 : )
234 : {
235 35 : bPotentialDirectory = true;
236 92 : }
237 : }
238 89425 : else if (STARTS_WITH(pszFilename, "/vsicurl/"))
239 : {
240 27 : bPotentialDirectory = true;
241 : }
242 :
243 89517 : if (bPotentialDirectory)
244 : {
245 62 : int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
246 62 : if (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR)
247 32 : nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
248 :
249 : // For those special files, opening them with VSIFOpenL() might result
250 : // in content, even if they should be considered as directories, so
251 : // use stat.
252 : VSIStatBufL sStat;
253 :
254 62 : if (VSIStatExL(pszFilename, &sStat, nStatFlags) == 0)
255 : {
256 54 : bStatOK = TRUE;
257 54 : if (VSI_ISDIR(sStat.st_mode))
258 20 : bIsDirectory = TRUE;
259 : }
260 : }
261 :
262 89517 : pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
263 :
264 89517 : if (!bIsDirectory && pabyHeader == nullptr)
265 : {
266 88173 : fpL = VSIFOpenExL(pszFilename, (eAccess == GA_Update) ? "r+b" : "rb",
267 88173 : (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR) > 0);
268 : }
269 89516 : if (pabyHeader)
270 : {
271 1324 : bStatOK = TRUE;
272 1324 : nHeaderBytesTried = nHeaderBytes;
273 : }
274 88192 : else if (fpL != nullptr)
275 : {
276 47194 : bStatOK = TRUE;
277 : int nBufSize =
278 47194 : atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
279 47194 : if (nBufSize < 1024)
280 0 : nBufSize = 1024;
281 47194 : else if (nBufSize > 10 * 1024 * 1024)
282 0 : nBufSize = 10 * 1024 * 1024;
283 47194 : pabyHeader = static_cast<GByte *>(CPLCalloc(nBufSize + 1, 1));
284 47194 : nHeaderBytesTried = nBufSize;
285 47193 : nHeaderBytes =
286 47194 : static_cast<int>(VSIFReadL(pabyHeader, 1, nHeaderBytesTried, fpL));
287 47193 : VSIRewindL(fpL);
288 :
289 : /* If we cannot read anything, check if it is not a directory instead */
290 : VSIStatBufL sStat;
291 95627 : if (nHeaderBytes == 0 &&
292 1245 : VSIStatExL(pszFilename, &sStat,
293 48436 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0 &&
294 1245 : VSI_ISDIR(sStat.st_mode))
295 : {
296 1122 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
297 1122 : fpL = nullptr;
298 1122 : CPLFree(pabyHeader);
299 1122 : pabyHeader = nullptr;
300 1122 : bIsDirectory = TRUE;
301 : }
302 : }
303 40998 : else if (!bStatOK)
304 : {
305 : VSIStatBufL sStat;
306 81950 : if (!bPotentialDirectory &&
307 40971 : VSIStatExL(pszFilename, &sStat,
308 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0)
309 : {
310 1231 : bStatOK = TRUE;
311 1231 : if (VSI_ISDIR(sStat.st_mode))
312 1226 : bIsDirectory = TRUE;
313 : }
314 : #ifdef HAVE_READLINK
315 39748 : else if (!bHasRetried && !STARTS_WITH(pszFilename, "/vsi"))
316 : {
317 : // If someone creates a file with "ln -sf
318 : // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
319 : // my_remote_utm.tif" we will be able to open it by passing
320 : // my_remote_utm.tif. This helps a lot for GDAL based readers that
321 : // only provide file explorers to open datasets.
322 21624 : const int nBufSize = 2048;
323 21624 : std::vector<char> oFilename(nBufSize);
324 21624 : char *szPointerFilename = &oFilename[0];
325 : int nBytes = static_cast<int>(
326 21624 : readlink(pszFilename, szPointerFilename, nBufSize));
327 21624 : if (nBytes != -1)
328 : {
329 2 : szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
330 2 : CPLFree(pszFilename);
331 2 : pszFilename = CPLStrdup(szPointerFilename);
332 2 : papszSiblingsIn = nullptr;
333 2 : bHasRetried = true;
334 2 : goto retry;
335 : }
336 : }
337 : #endif // HAVE_READLINK
338 : }
339 :
340 : /* -------------------------------------------------------------------- */
341 : /* Capture sibling list either from passed in values, or by */
342 : /* scanning for them only if requested through GetSiblingFiles(). */
343 : /* -------------------------------------------------------------------- */
344 89511 : if (papszSiblingsIn != nullptr)
345 : {
346 104 : papszSiblingFiles = CSLDuplicate(papszSiblingsIn);
347 104 : bHasGotSiblingFiles = true;
348 : }
349 89407 : else if (bStatOK && !bIsDirectory)
350 : {
351 47294 : papszSiblingFiles = VSISiblingFiles(pszFilename);
352 47297 : if (papszSiblingFiles != nullptr)
353 : {
354 6 : bHasGotSiblingFiles = true;
355 : }
356 : else
357 : {
358 94582 : const char *pszOptionVal = VSIGetPathSpecificOption(
359 47291 : pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
360 47291 : if (EQUAL(pszOptionVal, "EMPTY_DIR"))
361 : {
362 58 : papszSiblingFiles =
363 58 : CSLAddString(nullptr, CPLGetFilename(pszFilename));
364 58 : bHasGotSiblingFiles = true;
365 : }
366 47233 : else if (CPLTestBool(pszOptionVal))
367 : {
368 : /* skip reading the directory */
369 15 : papszSiblingFiles = nullptr;
370 15 : bHasGotSiblingFiles = true;
371 : }
372 : else
373 : {
374 : /* will be lazy loaded */
375 47218 : papszSiblingFiles = nullptr;
376 47218 : bHasGotSiblingFiles = false;
377 : }
378 47297 : }
379 : }
380 : else
381 : {
382 42113 : papszSiblingFiles = nullptr;
383 42113 : bHasGotSiblingFiles = true;
384 : }
385 : }
386 :
387 : /************************************************************************/
388 : /* ~GDALOpenInfo() */
389 : /************************************************************************/
390 :
391 180688 : GDALOpenInfo::~GDALOpenInfo()
392 :
393 : {
394 90355 : VSIFree(pabyHeader);
395 90325 : CPLFree(pszFilename);
396 :
397 90346 : if (fpL != nullptr)
398 11417 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
399 90346 : CSLDestroy(papszSiblingFiles);
400 90333 : }
401 :
402 : /************************************************************************/
403 : /* GetSiblingFiles() */
404 : /************************************************************************/
405 :
406 : /** Return sibling files.
407 : * @return sibling files. Ownership below to the object.
408 : */
409 36430 : char **GDALOpenInfo::GetSiblingFiles()
410 : {
411 36430 : if (bHasGotSiblingFiles)
412 20313 : return papszSiblingFiles;
413 16117 : bHasGotSiblingFiles = true;
414 :
415 16117 : papszSiblingFiles = VSISiblingFiles(pszFilename);
416 16117 : if (papszSiblingFiles != nullptr)
417 : {
418 0 : return papszSiblingFiles;
419 : }
420 :
421 16117 : CPLString osDir = CPLGetDirname(pszFilename);
422 32234 : const int nMaxFiles = atoi(VSIGetPathSpecificOption(
423 16117 : pszFilename, "GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
424 16117 : papszSiblingFiles = VSIReadDirEx(osDir, nMaxFiles);
425 16117 : if (nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles)
426 : {
427 1 : CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
428 : osDir.c_str());
429 1 : CSLDestroy(papszSiblingFiles);
430 1 : papszSiblingFiles = nullptr;
431 : }
432 :
433 16117 : return papszSiblingFiles;
434 : }
435 :
436 : /************************************************************************/
437 : /* StealSiblingFiles() */
438 : /* */
439 : /* Same as GetSiblingFiles() except that the list is stealed */
440 : /* (ie ownership transferred to the caller) and the associated */
441 : /* member variable is set to NULL. */
442 : /************************************************************************/
443 :
444 : /** Return sibling files and steal reference
445 : * @return sibling files. Ownership below to the caller (must be freed with
446 : * CSLDestroy)
447 : */
448 164 : char **GDALOpenInfo::StealSiblingFiles()
449 : {
450 164 : char **papszRet = GetSiblingFiles();
451 164 : papszSiblingFiles = nullptr;
452 164 : return papszRet;
453 : }
454 :
455 : /************************************************************************/
456 : /* AreSiblingFilesLoaded() */
457 : /************************************************************************/
458 :
459 : /** Return whether sibling files have been loaded.
460 : * @return true or false.
461 : */
462 32578 : bool GDALOpenInfo::AreSiblingFilesLoaded() const
463 : {
464 32578 : return bHasGotSiblingFiles;
465 : }
466 :
467 : /************************************************************************/
468 : /* TryToIngest() */
469 : /************************************************************************/
470 :
471 : /** Ingest bytes from the file.
472 : * @param nBytes number of bytes to ingest.
473 : * @return TRUE if successful
474 : */
475 20083 : int GDALOpenInfo::TryToIngest(int nBytes)
476 : {
477 20083 : if (fpL == nullptr)
478 63 : return FALSE;
479 20020 : if (nHeaderBytes < nHeaderBytesTried)
480 12123 : return TRUE;
481 7897 : pabyHeader = static_cast<GByte *>(CPLRealloc(pabyHeader, nBytes + 1));
482 7897 : memset(pabyHeader, 0, nBytes + 1);
483 7897 : VSIRewindL(fpL);
484 7897 : nHeaderBytesTried = nBytes;
485 7897 : nHeaderBytes = static_cast<int>(VSIFReadL(pabyHeader, 1, nBytes, fpL));
486 7897 : VSIRewindL(fpL);
487 :
488 7897 : return TRUE;
489 : }
|