Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Implementation of GDALOpenInfo class.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2002, Frank Warmerdam
9 : * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "gdal_priv.h" // Must be included first for mingw VSIStatBufL.
15 : #include "cpl_port.h"
16 :
17 : #include <cstdlib>
18 : #include <cstring>
19 : #ifdef HAVE_UNISTD_H
20 : #include <unistd.h>
21 : #endif
22 :
23 : #include <algorithm>
24 : #include <map>
25 : #include <mutex>
26 : #include <vector>
27 :
28 : #include "cpl_config.h"
29 : #include "cpl_conv.h"
30 : #include "cpl_error.h"
31 : #include "cpl_string.h"
32 : #include "cpl_vsi.h"
33 : #include "gdal.h"
34 :
35 : // Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
36 : // ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
37 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
38 : const GByte *pabyHeader,
39 : int nHeaderBytes);
40 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename);
41 :
42 : /************************************************************************/
43 :
44 : /* This whole section helps for SQLite/GPKG, especially with write-ahead
45 : * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
46 : * properly work and decide when to create/delete the wal related files.
47 : * One issue with POSIX advisory locks is that if within the same process
48 : * you do
49 : * f1 = open('somefile')
50 : * set locks on f1
51 : * f2 = open('somefile')
52 : * close(f2)
53 : * The close(f2) will cancel the locks set on f1. The work on f1 is done by
54 : * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
55 : * So as soon as sqlite3 has opened a file we should make sure not to re-open
56 : * it (actually close it) ourselves.
57 : */
58 :
59 : namespace
60 : {
61 : struct FileNotToOpen
62 : {
63 : CPLString osFilename{};
64 : int nRefCount{};
65 : GByte *pabyHeader{nullptr};
66 : int nHeaderBytes{0};
67 : };
68 : } // namespace
69 :
70 : static std::mutex sFNTOMutex;
71 : static std::map<CPLString, FileNotToOpen> *pMapFNTO = nullptr;
72 :
73 1707 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
74 : const GByte *pabyHeader, int nHeaderBytes)
75 : {
76 3414 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
77 1707 : if (pMapFNTO == nullptr)
78 1427 : pMapFNTO = new std::map<CPLString, FileNotToOpen>();
79 1707 : auto oIter = pMapFNTO->find(pszFilename);
80 1707 : if (oIter != pMapFNTO->end())
81 : {
82 122 : oIter->second.nRefCount++;
83 : }
84 : else
85 : {
86 1585 : FileNotToOpen fnto;
87 1585 : fnto.osFilename = pszFilename;
88 1585 : fnto.nRefCount = 1;
89 1585 : fnto.pabyHeader = static_cast<GByte *>(CPLMalloc(nHeaderBytes + 1));
90 1585 : memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
91 1585 : fnto.pabyHeader[nHeaderBytes] = 0;
92 1585 : fnto.nHeaderBytes = nHeaderBytes;
93 1585 : (*pMapFNTO)[pszFilename] = std::move(fnto);
94 : }
95 1707 : }
96 :
97 1706 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename)
98 : {
99 3412 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
100 1706 : CPLAssert(pMapFNTO);
101 1706 : auto oIter = pMapFNTO->find(pszFilename);
102 1706 : CPLAssert(oIter != pMapFNTO->end());
103 1706 : oIter->second.nRefCount--;
104 1706 : if (oIter->second.nRefCount == 0)
105 : {
106 1584 : CPLFree(oIter->second.pabyHeader);
107 1584 : pMapFNTO->erase(oIter);
108 : }
109 1706 : if (pMapFNTO->empty())
110 : {
111 1426 : delete pMapFNTO;
112 1426 : pMapFNTO = nullptr;
113 : }
114 1706 : }
115 :
116 100654 : static GByte *GDALOpenInfoGetFileNotToOpen(const char *pszFilename,
117 : int *pnHeaderBytes)
118 : {
119 201308 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
120 100654 : *pnHeaderBytes = 0;
121 100654 : if (pMapFNTO == nullptr)
122 : {
123 94617 : return nullptr;
124 : }
125 6037 : auto oIter = pMapFNTO->find(pszFilename);
126 6037 : if (oIter == pMapFNTO->end())
127 : {
128 4561 : return nullptr;
129 : }
130 1476 : *pnHeaderBytes = oIter->second.nHeaderBytes;
131 1476 : GByte *pabyHeader = static_cast<GByte *>(CPLMalloc(*pnHeaderBytes + 1));
132 1476 : memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
133 1476 : pabyHeader[*pnHeaderBytes] = 0;
134 1476 : return pabyHeader;
135 : }
136 :
137 : /************************************************************************/
138 : /* ==================================================================== */
139 : /* GDALOpenInfo */
140 : /* ==================================================================== */
141 : /************************************************************************/
142 :
143 : /************************************************************************/
144 : /* GDALOpenInfo() */
145 : /************************************************************************/
146 :
147 : /** Constructor/
148 : * @param pszFilenameIn filename
149 : * @param nOpenFlagsIn open flags
150 : * @param papszSiblingsIn list of sibling files, or NULL.
151 : */
152 101498 : GDALOpenInfo::GDALOpenInfo(const char *pszFilenameIn, int nOpenFlagsIn,
153 101498 : const char *const *papszSiblingsIn)
154 202995 : : pszFilename(CPLStrdup(pszFilenameIn)),
155 : osExtension(CPLGetExtensionSafe(pszFilenameIn)),
156 101496 : eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
157 101498 : nOpenFlags(nOpenFlagsIn)
158 : {
159 101496 : if (STARTS_WITH(pszFilename, "MVT:/vsi"))
160 846 : return;
161 :
162 : /* -------------------------------------------------------------------- */
163 : /* Ensure that C: is treated as C:\ so we can stat it on */
164 : /* Windows. Similar to what is done in CPLStat(). */
165 : /* -------------------------------------------------------------------- */
166 : #ifdef _WIN32
167 : if (strlen(pszFilenameIn) == 2 && pszFilenameIn[1] == ':')
168 : {
169 : char szAltPath[10];
170 :
171 : strcpy(szAltPath, pszFilenameIn);
172 : strcat(szAltPath, "\\");
173 : CPLFree(pszFilename);
174 : pszFilename = CPLStrdup(szAltPath);
175 : }
176 : #endif // WIN32
177 :
178 : /* -------------------------------------------------------------------- */
179 : /* Collect information about the file. */
180 : /* -------------------------------------------------------------------- */
181 :
182 : #ifdef HAVE_READLINK
183 100650 : bool bHasRetried = false;
184 :
185 100651 : retry: // TODO(schwehr): Stop using goto.
186 :
187 : #endif // HAVE_READLINK
188 :
189 : #if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) || \
190 : (defined(__MACH__) && defined(__APPLE__)))
191 : /* On BSDs, fread() on a directory returns non zero, so we have to */
192 : /* do a stat() before to check the nature of pszFilename. */
193 : bool bPotentialDirectory = (eAccess == GA_ReadOnly);
194 : #else
195 100651 : bool bPotentialDirectory = false;
196 : #endif
197 :
198 : /* Check if the filename might be a directory of a special virtual file
199 : * system */
200 100651 : if (STARTS_WITH(pszFilename, "/vsizip/") ||
201 100482 : STARTS_WITH(pszFilename, "/vsitar/") ||
202 100462 : STARTS_WITH(pszFilename, "/vsi7z/") ||
203 100464 : STARTS_WITH(pszFilename, "/vsirar/"))
204 : {
205 188 : const char *pszExt = osExtension.c_str();
206 165 : if (EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") ||
207 163 : EQUAL(pszExt, "gz") || EQUAL(pszExt, "7z") ||
208 162 : EQUAL(pszExt, "rar") ||
209 162 : pszFilename[strlen(pszFilename) - 1] == '}'
210 : #ifdef DEBUG
211 : // For AFL, so that .cur_input is detected as the archive filename.
212 355 : || EQUAL(CPLGetFilename(pszFilename), ".cur_input")
213 : #endif // DEBUG
214 : )
215 : {
216 39 : bPotentialDirectory = true;
217 190 : }
218 : }
219 100463 : else if (STARTS_WITH(pszFilename, "/vsicurl/"))
220 : {
221 41 : bPotentialDirectory = true;
222 : }
223 :
224 100653 : if (bPotentialDirectory)
225 : {
226 80 : int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
227 80 : if (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR)
228 48 : nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
229 :
230 : // For those special files, opening them with VSIFOpenL() might result
231 : // in content, even if they should be considered as directories, so
232 : // use stat.
233 : VSIStatBufL sStat;
234 :
235 80 : if (VSIStatExL(pszFilename, &sStat, nStatFlags) == 0)
236 : {
237 72 : bStatOK = TRUE;
238 72 : if (VSI_ISDIR(sStat.st_mode))
239 21 : bIsDirectory = TRUE;
240 : }
241 : }
242 :
243 100653 : pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
244 :
245 100654 : if (!bIsDirectory && pabyHeader == nullptr)
246 : {
247 99157 : fpL = VSIFOpenExL(pszFilename, (eAccess == GA_Update) ? "r+b" : "rb",
248 99157 : (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR) > 0);
249 : }
250 100654 : if (pabyHeader)
251 : {
252 1476 : bStatOK = TRUE;
253 1476 : nHeaderBytesTried = nHeaderBytes;
254 : }
255 99178 : else if (fpL != nullptr)
256 : {
257 54691 : bStatOK = TRUE;
258 : int nBufSize =
259 54691 : atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
260 54691 : if (nBufSize < 1024)
261 0 : nBufSize = 1024;
262 54691 : else if (nBufSize > 10 * 1024 * 1024)
263 0 : nBufSize = 10 * 1024 * 1024;
264 54691 : pabyHeader = static_cast<GByte *>(CPLCalloc(nBufSize + 1, 1));
265 54682 : nHeaderBytesTried = nBufSize;
266 54678 : nHeaderBytes =
267 54682 : static_cast<int>(VSIFReadL(pabyHeader, 1, nHeaderBytesTried, fpL));
268 54678 : VSIRewindL(fpL);
269 :
270 : /* If we cannot read anything, check if it is not a directory instead */
271 : VSIStatBufL sStat;
272 110690 : if (nHeaderBytes == 0 &&
273 1360 : VSIStatExL(pszFilename, &sStat,
274 56025 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0 &&
275 1360 : VSI_ISDIR(sStat.st_mode))
276 : {
277 1233 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
278 1233 : fpL = nullptr;
279 1233 : CPLFree(pabyHeader);
280 1233 : pabyHeader = nullptr;
281 1233 : bIsDirectory = TRUE;
282 : }
283 : }
284 44487 : else if (!bStatOK)
285 : {
286 : VSIStatBufL sStat;
287 88920 : if (!bPotentialDirectory &&
288 44457 : VSIStatExL(pszFilename, &sStat,
289 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0)
290 : {
291 1250 : bStatOK = TRUE;
292 1250 : if (VSI_ISDIR(sStat.st_mode))
293 1245 : bIsDirectory = TRUE;
294 : }
295 : #ifdef HAVE_READLINK
296 43213 : else if (!bHasRetried && !STARTS_WITH(pszFilename, "/vsi"))
297 : {
298 : // If someone creates a file with "ln -sf
299 : // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
300 : // my_remote_utm.tif" we will be able to open it by passing
301 : // my_remote_utm.tif. This helps a lot for GDAL based readers that
302 : // only provide file explorers to open datasets.
303 22914 : const int nBufSize = 2048;
304 22914 : std::vector<char> oFilename(nBufSize);
305 22914 : char *szPointerFilename = &oFilename[0];
306 : int nBytes = static_cast<int>(
307 22914 : readlink(pszFilename, szPointerFilename, nBufSize));
308 22913 : if (nBytes != -1)
309 : {
310 2 : szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
311 2 : CPLFree(pszFilename);
312 2 : pszFilename = CPLStrdup(szPointerFilename);
313 2 : osExtension = CPLGetExtensionSafe(pszFilename);
314 2 : papszSiblingsIn = nullptr;
315 2 : bHasRetried = true;
316 2 : goto retry;
317 : }
318 : }
319 : #endif // HAVE_READLINK
320 : }
321 :
322 : /* -------------------------------------------------------------------- */
323 : /* Capture sibling list either from passed in values, or by */
324 : /* scanning for them only if requested through GetSiblingFiles(). */
325 : /* -------------------------------------------------------------------- */
326 100625 : if (papszSiblingsIn != nullptr)
327 : {
328 252 : papszSiblingFiles = CSLDuplicate(papszSiblingsIn);
329 252 : bHasGotSiblingFiles = true;
330 : }
331 100373 : else if (bStatOK && !bIsDirectory)
332 : {
333 54657 : papszSiblingFiles = VSISiblingFiles(pszFilename);
334 54685 : if (papszSiblingFiles != nullptr)
335 : {
336 6 : bHasGotSiblingFiles = true;
337 : }
338 : else
339 : {
340 109360 : const char *pszOptionVal = VSIGetPathSpecificOption(
341 54679 : pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
342 54681 : if (EQUAL(pszOptionVal, "EMPTY_DIR"))
343 : {
344 65 : papszSiblingFiles =
345 65 : CSLAddString(nullptr, CPLGetFilename(pszFilename));
346 65 : bHasGotSiblingFiles = true;
347 : }
348 54616 : else if (CPLTestBool(pszOptionVal))
349 : {
350 : /* skip reading the directory */
351 18 : papszSiblingFiles = nullptr;
352 18 : bHasGotSiblingFiles = true;
353 : }
354 : else
355 : {
356 : /* will be lazy loaded */
357 54598 : papszSiblingFiles = nullptr;
358 54598 : bHasGotSiblingFiles = false;
359 : }
360 54687 : }
361 : }
362 : else
363 : {
364 45716 : papszSiblingFiles = nullptr;
365 45716 : bHasGotSiblingFiles = true;
366 : }
367 : }
368 :
369 : /************************************************************************/
370 : /* ~GDALOpenInfo() */
371 : /************************************************************************/
372 :
373 101402 : GDALOpenInfo::~GDALOpenInfo()
374 :
375 : {
376 101454 : VSIFree(pabyHeader);
377 101396 : CPLFree(pszFilename);
378 :
379 101435 : if (fpL != nullptr)
380 12556 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
381 101435 : CSLDestroy(papszSiblingFiles);
382 101349 : }
383 :
384 : /************************************************************************/
385 : /* GetSiblingFiles() */
386 : /************************************************************************/
387 :
388 : /** Return sibling files.
389 : *
390 : * If the list of sibling files has not already been established, it will be,
391 : * unless the GDAL_DISABLE_READDIR_ON_OPEN configuration option has been set to
392 : * YES or EMPTY_DIR when this instance was constructed.
393 : *
394 : * @return sibling files. Ownership belongs to "this".
395 : */
396 41463 : char **GDALOpenInfo::GetSiblingFiles()
397 : {
398 41463 : if (bHasGotSiblingFiles)
399 24187 : return papszSiblingFiles;
400 17276 : bHasGotSiblingFiles = true;
401 :
402 17276 : papszSiblingFiles = VSISiblingFiles(pszFilename);
403 17276 : if (papszSiblingFiles != nullptr)
404 : {
405 0 : return papszSiblingFiles;
406 : }
407 :
408 17276 : const CPLString osDir = CPLGetDirnameSafe(pszFilename);
409 34552 : const int nMaxFiles = atoi(VSIGetPathSpecificOption(
410 17276 : pszFilename, "GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
411 17276 : papszSiblingFiles = VSIReadDirEx(osDir, nMaxFiles);
412 17276 : if (nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles)
413 : {
414 1 : CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
415 : osDir.c_str());
416 1 : CSLDestroy(papszSiblingFiles);
417 1 : papszSiblingFiles = nullptr;
418 : }
419 :
420 17276 : return papszSiblingFiles;
421 : }
422 :
423 : /************************************************************************/
424 : /* StealSiblingFiles() */
425 : /* */
426 : /* Same as GetSiblingFiles() except that the list is stealed */
427 : /* (ie ownership transferred to the caller) and the associated */
428 : /* member variable is set to NULL. */
429 : /************************************************************************/
430 :
431 : /** Return sibling files and steal reference
432 : * @return sibling files. Ownership below to the caller (must be freed with
433 : * CSLDestroy)
434 : */
435 8454 : char **GDALOpenInfo::StealSiblingFiles()
436 : {
437 8454 : char **papszRet = GetSiblingFiles();
438 8454 : papszSiblingFiles = nullptr;
439 8454 : return papszRet;
440 : }
441 :
442 : /************************************************************************/
443 : /* AreSiblingFilesLoaded() */
444 : /************************************************************************/
445 :
446 : /** Return whether sibling files have been loaded.
447 : * @return true or false.
448 : */
449 49015 : bool GDALOpenInfo::AreSiblingFilesLoaded() const
450 : {
451 49015 : return bHasGotSiblingFiles;
452 : }
453 :
454 : /************************************************************************/
455 : /* TryToIngest() */
456 : /************************************************************************/
457 :
458 : /** Ingest bytes from the file.
459 : * @param nBytes number of bytes to ingest.
460 : * @return TRUE if successful
461 : */
462 22634 : int GDALOpenInfo::TryToIngest(int nBytes)
463 : {
464 22634 : if (fpL == nullptr)
465 63 : return FALSE;
466 22571 : if (nHeaderBytes < nHeaderBytesTried)
467 13439 : return TRUE;
468 9132 : pabyHeader = static_cast<GByte *>(CPLRealloc(pabyHeader, nBytes + 1));
469 9132 : memset(pabyHeader, 0, nBytes + 1);
470 9132 : VSIRewindL(fpL);
471 9132 : nHeaderBytesTried = nBytes;
472 9132 : nHeaderBytes = static_cast<int>(VSIFReadL(pabyHeader, 1, nBytes, fpL));
473 9132 : VSIRewindL(fpL);
474 :
475 9132 : return TRUE;
476 : }
477 :
478 : /************************************************************************/
479 : /* IsSingleAllowedDriver() */
480 : /************************************************************************/
481 :
482 : /** Returns true if the driver name is the single in the list of allowed
483 : * drivers.
484 : *
485 : * @param pszDriverName Driver name to test.
486 : * @return true if the driver name is the single in the list of allowed
487 : * drivers.
488 : * @since GDAL 3.10
489 : */
490 612202 : bool GDALOpenInfo::IsSingleAllowedDriver(const char *pszDriverName) const
491 : {
492 2918 : return papszAllowedDrivers && papszAllowedDrivers[0] &&
493 615812 : !papszAllowedDrivers[1] &&
494 612894 : EQUAL(papszAllowedDrivers[0], pszDriverName);
495 : }
|