Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Implementation of GDALOpenInfo class.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2002, Frank Warmerdam
9 : * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "gdal_priv.h" // Must be included first for mingw VSIStatBufL.
15 : #include "cpl_port.h"
16 :
17 : #include <cstdlib>
18 : #include <cstring>
19 : #ifdef HAVE_UNISTD_H
20 : #include <unistd.h>
21 : #endif
22 :
23 : #include <algorithm>
24 : #include <map>
25 : #include <mutex>
26 : #include <vector>
27 :
28 : #include "cpl_config.h"
29 : #include "cpl_conv.h"
30 : #include "cpl_error.h"
31 : #include "cpl_string.h"
32 : #include "cpl_vsi.h"
33 : #include "gdal.h"
34 :
35 : // Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
36 : // ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
37 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
38 : const GByte *pabyHeader,
39 : int nHeaderBytes);
40 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename);
41 :
42 : /************************************************************************/
43 :
44 : /* This whole section helps for SQLite/GPKG, especially with write-ahead
45 : * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
46 : * properly work and decide when to create/delete the wal related files.
47 : * One issue with POSIX advisory locks is that if within the same process
48 : * you do
49 : * f1 = open('somefile')
50 : * set locks on f1
51 : * f2 = open('somefile')
52 : * close(f2)
53 : * The close(f2) will cancel the locks set on f1. The work on f1 is done by
54 : * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
55 : * So as soon as sqlite3 has opened a file we should make sure not to re-open
56 : * it (actually close it) ourselves.
57 : */
58 :
59 : namespace
60 : {
61 : struct FileNotToOpen
62 : {
63 : CPLString osFilename{};
64 : int nRefCount{};
65 : GByte *pabyHeader{nullptr};
66 : int nHeaderBytes{0};
67 : };
68 : } // namespace
69 :
70 : static std::mutex sFNTOMutex;
71 : static std::map<CPLString, FileNotToOpen> *pMapFNTO = nullptr;
72 :
73 1650 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
74 : const GByte *pabyHeader, int nHeaderBytes)
75 : {
76 3300 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
77 1650 : if (pMapFNTO == nullptr)
78 1381 : pMapFNTO = new std::map<CPLString, FileNotToOpen>();
79 1650 : auto oIter = pMapFNTO->find(pszFilename);
80 1650 : if (oIter != pMapFNTO->end())
81 : {
82 122 : oIter->second.nRefCount++;
83 : }
84 : else
85 : {
86 1528 : FileNotToOpen fnto;
87 1528 : fnto.osFilename = pszFilename;
88 1528 : fnto.nRefCount = 1;
89 1528 : fnto.pabyHeader = static_cast<GByte *>(CPLMalloc(nHeaderBytes + 1));
90 1528 : memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
91 1528 : fnto.pabyHeader[nHeaderBytes] = 0;
92 1528 : fnto.nHeaderBytes = nHeaderBytes;
93 1528 : (*pMapFNTO)[pszFilename] = std::move(fnto);
94 : }
95 1650 : }
96 :
97 1649 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename)
98 : {
99 3298 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
100 1649 : CPLAssert(pMapFNTO);
101 1649 : auto oIter = pMapFNTO->find(pszFilename);
102 1649 : CPLAssert(oIter != pMapFNTO->end());
103 1649 : oIter->second.nRefCount--;
104 1649 : if (oIter->second.nRefCount == 0)
105 : {
106 1527 : CPLFree(oIter->second.pabyHeader);
107 1527 : pMapFNTO->erase(oIter);
108 : }
109 1649 : if (pMapFNTO->empty())
110 : {
111 1380 : delete pMapFNTO;
112 1380 : pMapFNTO = nullptr;
113 : }
114 1649 : }
115 :
116 97903 : static GByte *GDALOpenInfoGetFileNotToOpen(const char *pszFilename,
117 : int *pnHeaderBytes)
118 : {
119 195806 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
120 97903 : *pnHeaderBytes = 0;
121 97903 : if (pMapFNTO == nullptr)
122 : {
123 91923 : return nullptr;
124 : }
125 5980 : auto oIter = pMapFNTO->find(pszFilename);
126 5980 : if (oIter == pMapFNTO->end())
127 : {
128 4514 : return nullptr;
129 : }
130 1466 : *pnHeaderBytes = oIter->second.nHeaderBytes;
131 1466 : GByte *pabyHeader = static_cast<GByte *>(CPLMalloc(*pnHeaderBytes + 1));
132 1466 : memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
133 1466 : pabyHeader[*pnHeaderBytes] = 0;
134 1466 : return pabyHeader;
135 : }
136 :
137 : /************************************************************************/
138 : /* ==================================================================== */
139 : /* GDALOpenInfo */
140 : /* ==================================================================== */
141 : /************************************************************************/
142 :
143 : /************************************************************************/
144 : /* GDALOpenInfo() */
145 : /************************************************************************/
146 :
147 : /** Constructor/
148 : * @param pszFilenameIn filename
149 : * @param nOpenFlagsIn open flags
150 : * @param papszSiblingsIn list of sibling files, or NULL.
151 : */
152 98744 : GDALOpenInfo::GDALOpenInfo(const char *pszFilenameIn, int nOpenFlagsIn,
153 98744 : const char *const *papszSiblingsIn)
154 : : bHasGotSiblingFiles(false), papszSiblingFiles(nullptr),
155 197488 : nHeaderBytesTried(0), pszFilename(CPLStrdup(pszFilenameIn)),
156 : papszOpenOptions(nullptr),
157 98744 : eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
158 : nOpenFlags(nOpenFlagsIn), bStatOK(FALSE), bIsDirectory(FALSE),
159 : fpL(nullptr), nHeaderBytes(0), pabyHeader(nullptr),
160 98744 : papszAllowedDrivers(nullptr)
161 : {
162 98744 : if (STARTS_WITH(pszFilename, "MVT:/vsi"))
163 843 : return;
164 :
165 : /* -------------------------------------------------------------------- */
166 : /* Ensure that C: is treated as C:\ so we can stat it on */
167 : /* Windows. Similar to what is done in CPLStat(). */
168 : /* -------------------------------------------------------------------- */
169 : #ifdef _WIN32
170 : if (strlen(pszFilenameIn) == 2 && pszFilenameIn[1] == ':')
171 : {
172 : char szAltPath[10];
173 :
174 : strcpy(szAltPath, pszFilenameIn);
175 : strcat(szAltPath, "\\");
176 : CPLFree(pszFilename);
177 : pszFilename = CPLStrdup(szAltPath);
178 : }
179 : #endif // WIN32
180 :
181 : /* -------------------------------------------------------------------- */
182 : /* Collect information about the file. */
183 : /* -------------------------------------------------------------------- */
184 :
185 : #ifdef HAVE_READLINK
186 97901 : bool bHasRetried = false;
187 :
188 97905 : retry: // TODO(schwehr): Stop using goto.
189 :
190 : #endif // HAVE_READLINK
191 :
192 : #if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) || \
193 : (defined(__MACH__) && defined(__APPLE__)))
194 : /* On BSDs, fread() on a directory returns non zero, so we have to */
195 : /* do a stat() before to check the nature of pszFilename. */
196 : bool bPotentialDirectory = (eAccess == GA_ReadOnly);
197 : #else
198 97905 : bool bPotentialDirectory = false;
199 : #endif
200 :
201 : /* Check if the filename might be a directory of a special virtual file
202 : * system */
203 97905 : if (STARTS_WITH(pszFilename, "/vsizip/") ||
204 97731 : STARTS_WITH(pszFilename, "/vsitar/") ||
205 97716 : STARTS_WITH(pszFilename, "/vsi7z/") ||
206 97716 : STARTS_WITH(pszFilename, "/vsirar/"))
207 : {
208 189 : const char *pszExt = CPLGetExtension(pszFilename);
209 162 : if (EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") ||
210 160 : EQUAL(pszExt, "gz") || EQUAL(pszExt, "7z") ||
211 159 : EQUAL(pszExt, "rar") ||
212 159 : pszFilename[strlen(pszFilename) - 1] == '}'
213 : #ifdef DEBUG
214 : // For AFL, so that .cur_input is detected as the archive filename.
215 349 : || EQUAL(CPLGetFilename(pszFilename), ".cur_input")
216 : #endif // DEBUG
217 : )
218 : {
219 39 : bPotentialDirectory = true;
220 187 : }
221 : }
222 97716 : else if (STARTS_WITH(pszFilename, "/vsicurl/"))
223 : {
224 35 : bPotentialDirectory = true;
225 : }
226 :
227 97903 : if (bPotentialDirectory)
228 : {
229 74 : int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
230 74 : if (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR)
231 42 : nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
232 :
233 : // For those special files, opening them with VSIFOpenL() might result
234 : // in content, even if they should be considered as directories, so
235 : // use stat.
236 : VSIStatBufL sStat;
237 :
238 74 : if (VSIStatExL(pszFilename, &sStat, nStatFlags) == 0)
239 : {
240 66 : bStatOK = TRUE;
241 66 : if (VSI_ISDIR(sStat.st_mode))
242 21 : bIsDirectory = TRUE;
243 : }
244 : }
245 :
246 97903 : pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
247 :
248 97903 : if (!bIsDirectory && pabyHeader == nullptr)
249 : {
250 96416 : fpL = VSIFOpenExL(pszFilename, (eAccess == GA_Update) ? "r+b" : "rb",
251 96416 : (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR) > 0);
252 : }
253 97903 : if (pabyHeader)
254 : {
255 1466 : bStatOK = TRUE;
256 1466 : nHeaderBytesTried = nHeaderBytes;
257 : }
258 96437 : else if (fpL != nullptr)
259 : {
260 52381 : bStatOK = TRUE;
261 : int nBufSize =
262 52381 : atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
263 52381 : if (nBufSize < 1024)
264 0 : nBufSize = 1024;
265 52381 : else if (nBufSize > 10 * 1024 * 1024)
266 0 : nBufSize = 10 * 1024 * 1024;
267 52381 : pabyHeader = static_cast<GByte *>(CPLCalloc(nBufSize + 1, 1));
268 52381 : nHeaderBytesTried = nBufSize;
269 52380 : nHeaderBytes =
270 52381 : static_cast<int>(VSIFReadL(pabyHeader, 1, nHeaderBytesTried, fpL));
271 52380 : VSIRewindL(fpL);
272 :
273 : /* If we cannot read anything, check if it is not a directory instead */
274 : VSIStatBufL sStat;
275 106111 : if (nHeaderBytes == 0 &&
276 1357 : VSIStatExL(pszFilename, &sStat,
277 53734 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0 &&
278 1357 : VSI_ISDIR(sStat.st_mode))
279 : {
280 1231 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
281 1231 : fpL = nullptr;
282 1231 : CPLFree(pabyHeader);
283 1231 : pabyHeader = nullptr;
284 1231 : bIsDirectory = TRUE;
285 : }
286 : }
287 44056 : else if (!bStatOK)
288 : {
289 : VSIStatBufL sStat;
290 88061 : if (!bPotentialDirectory &&
291 44027 : VSIStatExL(pszFilename, &sStat,
292 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0)
293 : {
294 1250 : bStatOK = TRUE;
295 1250 : if (VSI_ISDIR(sStat.st_mode))
296 1245 : bIsDirectory = TRUE;
297 : }
298 : #ifdef HAVE_READLINK
299 42784 : else if (!bHasRetried && !STARTS_WITH(pszFilename, "/vsi"))
300 : {
301 : // If someone creates a file with "ln -sf
302 : // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
303 : // my_remote_utm.tif" we will be able to open it by passing
304 : // my_remote_utm.tif. This helps a lot for GDAL based readers that
305 : // only provide file explorers to open datasets.
306 22762 : const int nBufSize = 2048;
307 22762 : std::vector<char> oFilename(nBufSize);
308 22762 : char *szPointerFilename = &oFilename[0];
309 : int nBytes = static_cast<int>(
310 22762 : readlink(pszFilename, szPointerFilename, nBufSize));
311 22761 : if (nBytes != -1)
312 : {
313 2 : szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
314 2 : CPLFree(pszFilename);
315 2 : pszFilename = CPLStrdup(szPointerFilename);
316 2 : papszSiblingsIn = nullptr;
317 2 : bHasRetried = true;
318 2 : goto retry;
319 : }
320 : }
321 : #endif // HAVE_READLINK
322 : }
323 :
324 : /* -------------------------------------------------------------------- */
325 : /* Capture sibling list either from passed in values, or by */
326 : /* scanning for them only if requested through GetSiblingFiles(). */
327 : /* -------------------------------------------------------------------- */
328 97894 : if (papszSiblingsIn != nullptr)
329 : {
330 252 : papszSiblingFiles = CSLDuplicate(papszSiblingsIn);
331 252 : bHasGotSiblingFiles = true;
332 : }
333 97642 : else if (bStatOK && !bIsDirectory)
334 : {
335 52366 : papszSiblingFiles = VSISiblingFiles(pszFilename);
336 52369 : if (papszSiblingFiles != nullptr)
337 : {
338 6 : bHasGotSiblingFiles = true;
339 : }
340 : else
341 : {
342 104726 : const char *pszOptionVal = VSIGetPathSpecificOption(
343 52363 : pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
344 52363 : if (EQUAL(pszOptionVal, "EMPTY_DIR"))
345 : {
346 62 : papszSiblingFiles =
347 62 : CSLAddString(nullptr, CPLGetFilename(pszFilename));
348 62 : bHasGotSiblingFiles = true;
349 : }
350 52301 : else if (CPLTestBool(pszOptionVal))
351 : {
352 : /* skip reading the directory */
353 15 : papszSiblingFiles = nullptr;
354 15 : bHasGotSiblingFiles = true;
355 : }
356 : else
357 : {
358 : /* will be lazy loaded */
359 52286 : papszSiblingFiles = nullptr;
360 52286 : bHasGotSiblingFiles = false;
361 : }
362 52369 : }
363 : }
364 : else
365 : {
366 45276 : papszSiblingFiles = nullptr;
367 45276 : bHasGotSiblingFiles = true;
368 : }
369 : }
370 :
371 : /************************************************************************/
372 : /* ~GDALOpenInfo() */
373 : /************************************************************************/
374 :
375 197429 : GDALOpenInfo::~GDALOpenInfo()
376 :
377 : {
378 98743 : VSIFree(pabyHeader);
379 98647 : CPLFree(pszFilename);
380 :
381 98713 : if (fpL != nullptr)
382 12123 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
383 98713 : CSLDestroy(papszSiblingFiles);
384 98686 : }
385 :
386 : /************************************************************************/
387 : /* GetSiblingFiles() */
388 : /************************************************************************/
389 :
390 : /** Return sibling files.
391 : *
392 : * If the list of sibling files has not already been established, it will be,
393 : * unless the GDAL_DISABLE_READDIR_ON_OPEN configuration option has been set to
394 : * YES or EMPTY_DIR when this instance was constructed.
395 : *
396 : * @return sibling files. Ownership belongs to "this".
397 : */
398 40099 : char **GDALOpenInfo::GetSiblingFiles()
399 : {
400 40099 : if (bHasGotSiblingFiles)
401 23431 : return papszSiblingFiles;
402 16668 : bHasGotSiblingFiles = true;
403 :
404 16668 : papszSiblingFiles = VSISiblingFiles(pszFilename);
405 16668 : if (papszSiblingFiles != nullptr)
406 : {
407 0 : return papszSiblingFiles;
408 : }
409 :
410 16668 : CPLString osDir = CPLGetDirname(pszFilename);
411 33336 : const int nMaxFiles = atoi(VSIGetPathSpecificOption(
412 16668 : pszFilename, "GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
413 16668 : papszSiblingFiles = VSIReadDirEx(osDir, nMaxFiles);
414 16668 : if (nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles)
415 : {
416 1 : CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
417 : osDir.c_str());
418 1 : CSLDestroy(papszSiblingFiles);
419 1 : papszSiblingFiles = nullptr;
420 : }
421 :
422 16668 : return papszSiblingFiles;
423 : }
424 :
425 : /************************************************************************/
426 : /* StealSiblingFiles() */
427 : /* */
428 : /* Same as GetSiblingFiles() except that the list is stealed */
429 : /* (ie ownership transferred to the caller) and the associated */
430 : /* member variable is set to NULL. */
431 : /************************************************************************/
432 :
433 : /** Return sibling files and steal reference
434 : * @return sibling files. Ownership below to the caller (must be freed with
435 : * CSLDestroy)
436 : */
437 8436 : char **GDALOpenInfo::StealSiblingFiles()
438 : {
439 8436 : char **papszRet = GetSiblingFiles();
440 8436 : papszSiblingFiles = nullptr;
441 8436 : return papszRet;
442 : }
443 :
444 : /************************************************************************/
445 : /* AreSiblingFilesLoaded() */
446 : /************************************************************************/
447 :
448 : /** Return whether sibling files have been loaded.
449 : * @return true or false.
450 : */
451 48341 : bool GDALOpenInfo::AreSiblingFilesLoaded() const
452 : {
453 48341 : return bHasGotSiblingFiles;
454 : }
455 :
456 : /************************************************************************/
457 : /* TryToIngest() */
458 : /************************************************************************/
459 :
460 : /** Ingest bytes from the file.
461 : * @param nBytes number of bytes to ingest.
462 : * @return TRUE if successful
463 : */
464 22088 : int GDALOpenInfo::TryToIngest(int nBytes)
465 : {
466 22088 : if (fpL == nullptr)
467 63 : return FALSE;
468 22025 : if (nHeaderBytes < nHeaderBytesTried)
469 13257 : return TRUE;
470 8768 : pabyHeader = static_cast<GByte *>(CPLRealloc(pabyHeader, nBytes + 1));
471 8768 : memset(pabyHeader, 0, nBytes + 1);
472 8768 : VSIRewindL(fpL);
473 8768 : nHeaderBytesTried = nBytes;
474 8768 : nHeaderBytes = static_cast<int>(VSIFReadL(pabyHeader, 1, nBytes, fpL));
475 8768 : VSIRewindL(fpL);
476 :
477 8768 : return TRUE;
478 : }
479 :
480 : /************************************************************************/
481 : /* IsSingleAllowedDriver() */
482 : /************************************************************************/
483 :
484 : /** Returns true if the driver name is the single in the list of allowed
485 : * drivers.
486 : *
487 : * @param pszDriverName Driver name to test.
488 : * @return true if the driver name is the single in the list of allowed
489 : * drivers.
490 : * @since GDAL 3.10
491 : */
492 605414 : bool GDALOpenInfo::IsSingleAllowedDriver(const char *pszDriverName) const
493 : {
494 2805 : return papszAllowedDrivers && papszAllowedDrivers[0] &&
495 608806 : !papszAllowedDrivers[1] &&
496 606001 : EQUAL(papszAllowedDrivers[0], pszDriverName);
497 : }
|