Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Implementation of GDALOpenInfo class.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2002, Frank Warmerdam
9 : * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "gdal_priv.h" // Must be included first for mingw VSIStatBufL.
15 : #include "cpl_port.h"
16 :
17 : #include <cstdlib>
18 : #include <cstring>
19 : #ifdef HAVE_UNISTD_H
20 : #include <unistd.h>
21 : #endif
22 :
23 : #include <algorithm>
24 : #include <map>
25 : #include <mutex>
26 : #include <vector>
27 :
28 : #include "cpl_config.h"
29 : #include "cpl_conv.h"
30 : #include "cpl_error.h"
31 : #include "cpl_string.h"
32 : #include "cpl_vsi.h"
33 : #include "gdal.h"
34 :
35 : // Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
36 : // ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
37 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
38 : const GByte *pabyHeader,
39 : int nHeaderBytes);
40 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename);
41 :
42 : /************************************************************************/
43 :
44 : /* This whole section helps for SQLite/GPKG, especially with write-ahead
45 : * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
46 : * properly work and decide when to create/delete the wal related files.
47 : * One issue with POSIX advisory locks is that if within the same process
48 : * you do
49 : * f1 = open('somefile')
50 : * set locks on f1
51 : * f2 = open('somefile')
52 : * close(f2)
53 : * The close(f2) will cancel the locks set on f1. The work on f1 is done by
54 : * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
55 : * So as soon as sqlite3 has opened a file we should make sure not to re-open
56 : * it (actually close it) ourselves.
57 : */
58 :
59 : namespace
60 : {
61 : struct FileNotToOpen
62 : {
63 : CPLString osFilename{};
64 : int nRefCount{};
65 : GByte *pabyHeader{nullptr};
66 : int nHeaderBytes{0};
67 : };
68 : } // namespace
69 :
70 : static std::mutex sFNTOMutex;
71 : static std::map<CPLString, FileNotToOpen> *pMapFNTO = nullptr;
72 :
73 1759 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
74 : const GByte *pabyHeader, int nHeaderBytes)
75 : {
76 3518 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
77 1759 : if (pMapFNTO == nullptr)
78 1449 : pMapFNTO = new std::map<CPLString, FileNotToOpen>();
79 1759 : auto oIter = pMapFNTO->find(pszFilename);
80 1759 : if (oIter != pMapFNTO->end())
81 : {
82 120 : oIter->second.nRefCount++;
83 : }
84 : else
85 : {
86 1639 : FileNotToOpen fnto;
87 1639 : fnto.osFilename = pszFilename;
88 1639 : fnto.nRefCount = 1;
89 1639 : fnto.pabyHeader = static_cast<GByte *>(CPLMalloc(nHeaderBytes + 1));
90 1639 : memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
91 1639 : fnto.pabyHeader[nHeaderBytes] = 0;
92 1639 : fnto.nHeaderBytes = nHeaderBytes;
93 1639 : (*pMapFNTO)[pszFilename] = std::move(fnto);
94 : }
95 1759 : }
96 :
97 1758 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename)
98 : {
99 3516 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
100 1758 : CPLAssert(pMapFNTO);
101 1758 : auto oIter = pMapFNTO->find(pszFilename);
102 1758 : CPLAssert(oIter != pMapFNTO->end());
103 1758 : oIter->second.nRefCount--;
104 1758 : if (oIter->second.nRefCount == 0)
105 : {
106 1638 : CPLFree(oIter->second.pabyHeader);
107 1638 : pMapFNTO->erase(oIter);
108 : }
109 1758 : if (pMapFNTO->empty())
110 : {
111 1448 : delete pMapFNTO;
112 1448 : pMapFNTO = nullptr;
113 : }
114 1758 : }
115 :
116 108084 : static GByte *GDALOpenInfoGetFileNotToOpen(const char *pszFilename,
117 : int *pnHeaderBytes)
118 : {
119 216168 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
120 108084 : *pnHeaderBytes = 0;
121 108084 : if (pMapFNTO == nullptr)
122 : {
123 102072 : return nullptr;
124 : }
125 6012 : auto oIter = pMapFNTO->find(pszFilename);
126 6012 : if (oIter == pMapFNTO->end())
127 : {
128 4566 : return nullptr;
129 : }
130 1446 : *pnHeaderBytes = oIter->second.nHeaderBytes;
131 1446 : GByte *pabyHeader = static_cast<GByte *>(CPLMalloc(*pnHeaderBytes + 1));
132 1446 : memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
133 1446 : pabyHeader[*pnHeaderBytes] = 0;
134 1446 : return pabyHeader;
135 : }
136 :
137 : /************************************************************************/
138 : /* ==================================================================== */
139 : /* GDALOpenInfo */
140 : /* ==================================================================== */
141 : /************************************************************************/
142 :
143 : /************************************************************************/
144 : /* GDALOpenInfo() */
145 : /************************************************************************/
146 :
147 : /** Constructor/
148 : * @param pszFilenameIn filename
149 : * @param nOpenFlagsIn open flags
150 : * @param papszSiblingsIn list of sibling files, or NULL.
151 : */
152 108932 : GDALOpenInfo::GDALOpenInfo(const char *pszFilenameIn, int nOpenFlagsIn,
153 108932 : const char *const *papszSiblingsIn)
154 217864 : : pszFilename(CPLStrdup(pszFilenameIn)),
155 : osExtension(CPLGetExtensionSafe(pszFilenameIn)),
156 108930 : eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
157 108932 : nOpenFlags(nOpenFlagsIn)
158 : {
159 108930 : if (STARTS_WITH(pszFilename, "MVT:/vsi"))
160 850 : return;
161 :
162 : /* -------------------------------------------------------------------- */
163 : /* Ensure that C: is treated as C:\ so we can stat it on */
164 : /* Windows. Similar to what is done in CPLStat(). */
165 : /* -------------------------------------------------------------------- */
166 : #ifdef _WIN32
167 : if (strlen(pszFilenameIn) == 2 && pszFilenameIn[1] == ':')
168 : {
169 : char szAltPath[10];
170 :
171 : strcpy(szAltPath, pszFilenameIn);
172 : strcat(szAltPath, "\\");
173 : CPLFree(pszFilename);
174 : pszFilename = CPLStrdup(szAltPath);
175 : }
176 : #endif // WIN32
177 :
178 : /* -------------------------------------------------------------------- */
179 : /* Collect information about the file. */
180 : /* -------------------------------------------------------------------- */
181 :
182 : #ifdef HAVE_READLINK
183 108080 : bool bHasRetried = false;
184 :
185 108081 : retry: // TODO(schwehr): Stop using goto.
186 :
187 : #endif // HAVE_READLINK
188 :
189 : #if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) || \
190 : (defined(__MACH__) && defined(__APPLE__)))
191 : /* On BSDs, fread() on a directory returns non zero, so we have to */
192 : /* do a stat() before to check the nature of pszFilename. */
193 : bool bPotentialDirectory = (eAccess == GA_ReadOnly);
194 : #else
195 108081 : bool bPotentialDirectory = false;
196 : #endif
197 :
198 : /* Check if the filename might be a directory of a special virtual file
199 : * system */
200 108081 : if (STARTS_WITH(pszFilename, "/vsizip/") ||
201 107908 : STARTS_WITH(pszFilename, "/vsitar/") ||
202 107890 : STARTS_WITH(pszFilename, "/vsi7z/") ||
203 107889 : STARTS_WITH(pszFilename, "/vsirar/"))
204 : {
205 191 : const char *pszExt = osExtension.c_str();
206 167 : if (EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") ||
207 165 : EQUAL(pszExt, "gz") || EQUAL(pszExt, "7z") ||
208 164 : EQUAL(pszExt, "rar") ||
209 164 : pszFilename[strlen(pszFilename) - 1] == '}'
210 : #ifdef DEBUG
211 : // For AFL, so that .cur_input is detected as the archive filename.
212 360 : || EQUAL(CPLGetFilename(pszFilename), ".cur_input")
213 : #endif // DEBUG
214 : )
215 : {
216 42 : bPotentialDirectory = true;
217 193 : }
218 : }
219 107890 : else if (STARTS_WITH(pszFilename, "/vsicurl/"))
220 : {
221 43 : bPotentialDirectory = true;
222 : }
223 :
224 108083 : if (bPotentialDirectory)
225 : {
226 85 : int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
227 85 : if (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR)
228 51 : nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
229 :
230 : // For those special files, opening them with VSIFOpenL() might result
231 : // in content, even if they should be considered as directories, so
232 : // use stat.
233 : VSIStatBufL sStat;
234 :
235 85 : if (VSIStatExL(pszFilename, &sStat, nStatFlags) == 0)
236 : {
237 77 : bStatOK = TRUE;
238 77 : if (VSI_ISDIR(sStat.st_mode))
239 24 : bIsDirectory = TRUE;
240 : }
241 : }
242 :
243 108083 : pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
244 :
245 108084 : if (!bIsDirectory && pabyHeader == nullptr)
246 : {
247 106614 : fpL = VSIFOpenExL(pszFilename, (eAccess == GA_Update) ? "r+b" : "rb",
248 106614 : (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR) > 0);
249 : }
250 108082 : if (pabyHeader)
251 : {
252 1446 : bStatOK = TRUE;
253 1446 : nHeaderBytesTried = nHeaderBytes;
254 : }
255 106636 : else if (fpL != nullptr)
256 : {
257 58510 : bStatOK = TRUE;
258 : int nBufSize =
259 58510 : atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
260 58511 : if (nBufSize < 1024)
261 0 : nBufSize = 1024;
262 58511 : else if (nBufSize > 10 * 1024 * 1024)
263 0 : nBufSize = 10 * 1024 * 1024;
264 58511 : pabyHeader = static_cast<GByte *>(CPLCalloc(nBufSize + 1, 1));
265 58510 : nHeaderBytesTried = nBufSize;
266 58500 : nHeaderBytes =
267 58510 : static_cast<int>(VSIFReadL(pabyHeader, 1, nHeaderBytesTried, fpL));
268 58500 : VSIRewindL(fpL);
269 :
270 : /* If we cannot read anything, check if it is not a directory instead */
271 : VSIStatBufL sStat;
272 118131 : if (nHeaderBytes == 0 &&
273 1149 : VSIStatExL(pszFilename, &sStat,
274 59640 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0 &&
275 1149 : VSI_ISDIR(sStat.st_mode))
276 : {
277 1028 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
278 1028 : fpL = nullptr;
279 1028 : CPLFree(pabyHeader);
280 1028 : pabyHeader = nullptr;
281 1028 : bIsDirectory = TRUE;
282 : }
283 : }
284 48126 : else if (!bStatOK)
285 : {
286 : VSIStatBufL sStat;
287 96198 : if (!bPotentialDirectory &&
288 48095 : VSIStatExL(pszFilename, &sStat,
289 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0)
290 : {
291 1264 : bStatOK = TRUE;
292 1264 : if (VSI_ISDIR(sStat.st_mode))
293 1259 : bIsDirectory = TRUE;
294 : }
295 : #ifdef HAVE_READLINK
296 46839 : else if (!bHasRetried && !STARTS_WITH(pszFilename, "/vsi"))
297 : {
298 : // If someone creates a file with "ln -sf
299 : // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
300 : // my_remote_utm.tif" we will be able to open it by passing
301 : // my_remote_utm.tif. This helps a lot for GDAL based readers that
302 : // only provide file explorers to open datasets.
303 23175 : const int nBufSize = 2048;
304 23175 : std::vector<char> oFilename(nBufSize);
305 23174 : char *szPointerFilename = &oFilename[0];
306 : int nBytes = static_cast<int>(
307 23174 : readlink(pszFilename, szPointerFilename, nBufSize));
308 23174 : if (nBytes != -1)
309 : {
310 2 : szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
311 2 : CPLFree(pszFilename);
312 2 : pszFilename = CPLStrdup(szPointerFilename);
313 2 : osExtension = CPLGetExtensionSafe(pszFilename);
314 2 : papszSiblingsIn = nullptr;
315 2 : bHasRetried = true;
316 2 : goto retry;
317 : }
318 : }
319 : #endif // HAVE_READLINK
320 : }
321 :
322 : /* -------------------------------------------------------------------- */
323 : /* Capture sibling list either from passed in values, or by */
324 : /* scanning for them only if requested through GetSiblingFiles(). */
325 : /* -------------------------------------------------------------------- */
326 108061 : if (papszSiblingsIn != nullptr)
327 : {
328 351 : papszSiblingFiles = CSLDuplicate(papszSiblingsIn);
329 351 : bHasGotSiblingFiles = true;
330 : }
331 107710 : else if (bStatOK && !bIsDirectory)
332 : {
333 58565 : papszSiblingFiles = VSISiblingFiles(pszFilename);
334 58578 : if (papszSiblingFiles != nullptr)
335 : {
336 6 : bHasGotSiblingFiles = true;
337 : }
338 : else
339 : {
340 117149 : const char *pszOptionVal = VSIGetPathSpecificOption(
341 58572 : pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
342 58577 : if (EQUAL(pszOptionVal, "EMPTY_DIR"))
343 : {
344 70 : papszSiblingFiles =
345 70 : CSLAddString(nullptr, CPLGetFilename(pszFilename));
346 70 : bHasGotSiblingFiles = true;
347 : }
348 58507 : else if (CPLTestBool(pszOptionVal))
349 : {
350 : /* skip reading the directory */
351 18 : papszSiblingFiles = nullptr;
352 18 : bHasGotSiblingFiles = true;
353 : }
354 : else
355 : {
356 : /* will be lazy loaded */
357 58489 : papszSiblingFiles = nullptr;
358 58489 : bHasGotSiblingFiles = false;
359 : }
360 58583 : }
361 : }
362 : else
363 : {
364 49145 : papszSiblingFiles = nullptr;
365 49145 : bHasGotSiblingFiles = true;
366 : }
367 : }
368 :
369 : /************************************************************************/
370 : /* ~GDALOpenInfo() */
371 : /************************************************************************/
372 :
373 108925 : GDALOpenInfo::~GDALOpenInfo()
374 :
375 : {
376 108929 : VSIFree(pabyHeader);
377 108924 : CPLFree(pszFilename);
378 :
379 108928 : if (fpL != nullptr)
380 12953 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
381 108928 : CSLDestroy(papszSiblingFiles);
382 108917 : }
383 :
384 : /************************************************************************/
385 : /* GetSiblingFiles() */
386 : /************************************************************************/
387 :
388 : /** Return sibling files.
389 : *
390 : * If the list of sibling files has not already been established, it will be,
391 : * unless the GDAL_DISABLE_READDIR_ON_OPEN configuration option has been set to
392 : * YES or EMPTY_DIR when this instance was constructed.
393 : *
394 : * @return sibling files. Ownership belongs to "this".
395 : */
396 41955 : char **GDALOpenInfo::GetSiblingFiles()
397 : {
398 41955 : if (bHasGotSiblingFiles)
399 24048 : return papszSiblingFiles;
400 17907 : bHasGotSiblingFiles = true;
401 :
402 17907 : papszSiblingFiles = VSISiblingFiles(pszFilename);
403 17907 : if (papszSiblingFiles != nullptr)
404 : {
405 0 : return papszSiblingFiles;
406 : }
407 :
408 17907 : const CPLString osDir = CPLGetDirnameSafe(pszFilename);
409 35816 : const int nMaxFiles = atoi(VSIGetPathSpecificOption(
410 17908 : pszFilename, "GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
411 17908 : papszSiblingFiles = VSIReadDirEx(osDir, nMaxFiles);
412 17908 : if (nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles)
413 : {
414 1 : CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
415 : osDir.c_str());
416 1 : CSLDestroy(papszSiblingFiles);
417 1 : papszSiblingFiles = nullptr;
418 : }
419 :
420 17908 : return papszSiblingFiles;
421 : }
422 :
423 : /************************************************************************/
424 : /* StealSiblingFiles() */
425 : /* */
426 : /* Same as GetSiblingFiles() except that the list is stealed */
427 : /* (ie ownership transferred to the caller) and the associated */
428 : /* member variable is set to NULL. */
429 : /************************************************************************/
430 :
431 : /** Return sibling files and steal reference
432 : * @return sibling files. Ownership below to the caller (must be freed with
433 : * CSLDestroy)
434 : */
435 9155 : char **GDALOpenInfo::StealSiblingFiles()
436 : {
437 9155 : char **papszRet = GetSiblingFiles();
438 9155 : papszSiblingFiles = nullptr;
439 9155 : return papszRet;
440 : }
441 :
442 : /************************************************************************/
443 : /* AreSiblingFilesLoaded() */
444 : /************************************************************************/
445 :
446 : /** Return whether sibling files have been loaded.
447 : * @return true or false.
448 : */
449 55816 : bool GDALOpenInfo::AreSiblingFilesLoaded() const
450 : {
451 55816 : return bHasGotSiblingFiles;
452 : }
453 :
454 : /************************************************************************/
455 : /* TryToIngest() */
456 : /************************************************************************/
457 :
458 : /** Ingest bytes from the file.
459 : * @param nBytes number of bytes to ingest.
460 : * @return TRUE if successful
461 : */
462 24887 : int GDALOpenInfo::TryToIngest(int nBytes)
463 : {
464 24887 : if (fpL == nullptr)
465 54 : return FALSE;
466 24833 : if (nHeaderBytes < nHeaderBytesTried)
467 14995 : return TRUE;
468 9838 : pabyHeader = static_cast<GByte *>(CPLRealloc(pabyHeader, nBytes + 1));
469 9838 : memset(pabyHeader, 0, nBytes + 1);
470 9838 : VSIRewindL(fpL);
471 9838 : nHeaderBytesTried = nBytes;
472 9838 : nHeaderBytes = static_cast<int>(VSIFReadL(pabyHeader, 1, nBytes, fpL));
473 9838 : VSIRewindL(fpL);
474 :
475 9838 : return TRUE;
476 : }
477 :
478 : /************************************************************************/
479 : /* IsSingleAllowedDriver() */
480 : /************************************************************************/
481 :
482 : /** Returns true if the driver name is the single in the list of allowed
483 : * drivers.
484 : *
485 : * @param pszDriverName Driver name to test.
486 : * @return true if the driver name is the single in the list of allowed
487 : * drivers.
488 : * @since GDAL 3.10
489 : */
490 737727 : bool GDALOpenInfo::IsSingleAllowedDriver(const char *pszDriverName) const
491 : {
492 3139 : return papszAllowedDrivers && papszAllowedDrivers[0] &&
493 741565 : !papszAllowedDrivers[1] &&
494 738426 : EQUAL(papszAllowedDrivers[0], pszDriverName);
495 : }
|