Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL Core
4 : * Purpose: Implementation of GDALOpenInfo class.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2002, Frank Warmerdam
9 : * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : ****************************************************************************/
13 :
14 : #include "gdal_priv.h" // Must be included first for mingw VSIStatBufL.
15 : #include "cpl_port.h"
16 : #include "cpl_vsi_virtual.h"
17 :
18 : #include <cstdlib>
19 : #include <cstring>
20 : #ifdef HAVE_UNISTD_H
21 : #include <unistd.h>
22 : #endif
23 :
24 : #include <algorithm>
25 : #include <map>
26 : #include <mutex>
27 : #include <vector>
28 :
29 : #include "cpl_config.h"
30 : #include "cpl_conv.h"
31 : #include "cpl_error.h"
32 : #include "cpl_string.h"
33 : #include "cpl_vsi.h"
34 : #include "gdal.h"
35 :
36 : // Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
37 : // ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
38 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
39 : const GByte *pabyHeader,
40 : int nHeaderBytes);
41 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename);
42 :
43 : /************************************************************************/
44 :
45 : /* This whole section helps for SQLite/GPKG, especially with write-ahead
46 : * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
47 : * properly work and decide when to create/delete the wal related files.
48 : * One issue with POSIX advisory locks is that if within the same process
49 : * you do
50 : * f1 = open('somefile')
51 : * set locks on f1
52 : * f2 = open('somefile')
53 : * close(f2)
54 : * The close(f2) will cancel the locks set on f1. The work on f1 is done by
55 : * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
56 : * So as soon as sqlite3 has opened a file we should make sure not to re-open
57 : * it (actually close it) ourselves.
58 : */
59 :
60 : namespace
61 : {
62 : struct FileNotToOpen
63 : {
64 : CPLString osFilename{};
65 : int nRefCount{};
66 : GByte *pabyHeader{nullptr};
67 : int nHeaderBytes{0};
68 : };
69 : } // namespace
70 :
71 : static std::mutex sFNTOMutex;
72 : static std::map<CPLString, FileNotToOpen> *pMapFNTO = nullptr;
73 :
74 1871 : void GDALOpenInfoDeclareFileNotToOpen(const char *pszFilename,
75 : const GByte *pabyHeader, int nHeaderBytes)
76 : {
77 3742 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
78 1871 : if (pMapFNTO == nullptr)
79 1612 : pMapFNTO = new std::map<CPLString, FileNotToOpen>();
80 1871 : auto oIter = pMapFNTO->find(pszFilename);
81 1871 : if (oIter != pMapFNTO->end())
82 : {
83 124 : oIter->second.nRefCount++;
84 : }
85 : else
86 : {
87 1747 : FileNotToOpen fnto;
88 1747 : fnto.osFilename = pszFilename;
89 1747 : fnto.nRefCount = 1;
90 1747 : fnto.pabyHeader = static_cast<GByte *>(CPLMalloc(nHeaderBytes + 1));
91 1747 : memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
92 1747 : fnto.pabyHeader[nHeaderBytes] = 0;
93 1747 : fnto.nHeaderBytes = nHeaderBytes;
94 1747 : (*pMapFNTO)[pszFilename] = std::move(fnto);
95 : }
96 1871 : }
97 :
98 1870 : void GDALOpenInfoUnDeclareFileNotToOpen(const char *pszFilename)
99 : {
100 3740 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
101 1870 : CPLAssert(pMapFNTO);
102 1870 : auto oIter = pMapFNTO->find(pszFilename);
103 1870 : CPLAssert(oIter != pMapFNTO->end());
104 1870 : oIter->second.nRefCount--;
105 1870 : if (oIter->second.nRefCount == 0)
106 : {
107 1746 : CPLFree(oIter->second.pabyHeader);
108 1746 : pMapFNTO->erase(oIter);
109 : }
110 1870 : if (pMapFNTO->empty())
111 : {
112 1611 : delete pMapFNTO;
113 1611 : pMapFNTO = nullptr;
114 : }
115 1870 : }
116 :
117 114328 : static GByte *GDALOpenInfoGetFileNotToOpen(const char *pszFilename,
118 : int *pnHeaderBytes)
119 : {
120 228656 : std::lock_guard<std::mutex> oLock(sFNTOMutex);
121 114328 : *pnHeaderBytes = 0;
122 114328 : if (pMapFNTO == nullptr)
123 : {
124 109527 : return nullptr;
125 : }
126 4801 : auto oIter = pMapFNTO->find(pszFilename);
127 4801 : if (oIter == pMapFNTO->end())
128 : {
129 3335 : return nullptr;
130 : }
131 1466 : *pnHeaderBytes = oIter->second.nHeaderBytes;
132 1466 : GByte *pabyHeader = static_cast<GByte *>(CPLMalloc(*pnHeaderBytes + 1));
133 1466 : memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
134 1466 : pabyHeader[*pnHeaderBytes] = 0;
135 1466 : return pabyHeader;
136 : }
137 :
138 : /************************************************************************/
139 : /* ==================================================================== */
140 : /* GDALOpenInfo */
141 : /* ==================================================================== */
142 : /************************************************************************/
143 :
144 : /************************************************************************/
145 : /* GDALOpenInfo() */
146 : /************************************************************************/
147 :
148 : /** Constructor/
149 : * @param pszFilenameIn filename
150 : * @param nOpenFlagsIn open flags
151 : * @param papszSiblingFilesIn list of sibling files, or NULL.
152 : */
153 115198 : GDALOpenInfo::GDALOpenInfo(const char *pszFilenameIn, int nOpenFlagsIn,
154 115198 : const char *const *papszSiblingFilesIn)
155 230396 : : pszFilename(CPLStrdup(pszFilenameIn)),
156 : osExtension(CPLGetExtensionSafe(pszFilenameIn)),
157 115198 : eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
158 115198 : nOpenFlags(nOpenFlagsIn)
159 : {
160 : /* -------------------------------------------------------------------- */
161 : /* Ensure that C: is treated as C:\ so we can stat it on */
162 : /* Windows. Similar to what is done in CPLStat(). */
163 : /* -------------------------------------------------------------------- */
164 : #ifdef _WIN32
165 : if (strlen(pszFilename) == 2 && pszFilename[1] == ':')
166 : {
167 : char szAltPath[10];
168 :
169 : strcpy(szAltPath, pszFilename);
170 : strcat(szAltPath, "\\");
171 : CPLFree(pszFilename);
172 : pszFilename = CPLStrdup(szAltPath);
173 : }
174 : #endif // WIN32
175 :
176 115198 : Init(papszSiblingFilesIn, nullptr);
177 115198 : }
178 :
179 : /************************************************************************/
180 : /* GDALOpenInfo() */
181 : /************************************************************************/
182 :
183 : /** Constructor/
184 : * @param pszFilenameIn filename
185 : * @param nOpenFlagsIn open flags
186 : * @param poFile already opened file
187 : * @since 3.13
188 : */
189 1995 : GDALOpenInfo::GDALOpenInfo(const char *pszFilenameIn, int nOpenFlagsIn,
190 1995 : std::unique_ptr<VSIVirtualHandle> poFile)
191 3990 : : pszFilename(CPLStrdup(pszFilenameIn)),
192 : osExtension(CPLGetExtensionSafe(pszFilenameIn)),
193 1995 : eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
194 1995 : nOpenFlags(nOpenFlagsIn)
195 : {
196 1995 : Init(nullptr, std::move(poFile));
197 1995 : }
198 :
199 : /************************************************************************/
200 : /* Init() */
201 : /************************************************************************/
202 :
203 117193 : void GDALOpenInfo::Init(const char *const *papszSiblingFilesIn,
204 : std::unique_ptr<VSIVirtualHandle> poFile)
205 : {
206 117193 : if (STARTS_WITH(pszFilename, "MVT:/vsi"))
207 872 : return;
208 :
209 : /* -------------------------------------------------------------------- */
210 : /* Collect information about the file. */
211 : /* -------------------------------------------------------------------- */
212 :
213 : #if !defined(_WIN32)
214 116321 : bool bHasRetried = false;
215 :
216 116323 : retry: // TODO(schwehr): Stop using goto.
217 :
218 : #endif // !defined(_WIN32)
219 :
220 : #if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) || \
221 : (defined(__MACH__) && defined(__APPLE__)))
222 : /* On BSDs, fread() on a directory returns non zero, so we have to */
223 : /* do a stat() before to check the nature of pszFilename. */
224 : bool bPotentialDirectory = (eAccess == GA_ReadOnly);
225 : #else
226 116323 : bool bPotentialDirectory = false;
227 : #endif
228 :
229 : /* Check if the filename might be a directory of a special virtual file
230 : * system */
231 116323 : if (STARTS_WITH(pszFilename, "/vsizip/") ||
232 116138 : STARTS_WITH(pszFilename, "/vsitar/") ||
233 116119 : STARTS_WITH(pszFilename, "/vsi7z/") ||
234 116119 : STARTS_WITH(pszFilename, "/vsirar/"))
235 : {
236 204 : const char *pszExt = osExtension.c_str();
237 178 : if (EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") ||
238 175 : EQUAL(pszExt, "gz") || EQUAL(pszExt, "7z") ||
239 174 : EQUAL(pszExt, "rar") ||
240 174 : pszFilename[strlen(pszFilename) - 1] == '}'
241 : #ifdef DEBUG
242 : // For AFL, so that .cur_input is detected as the archive filename.
243 382 : || EQUAL(CPLGetFilename(pszFilename), ".cur_input")
244 : #endif // DEBUG
245 : )
246 : {
247 43 : bPotentialDirectory = true;
248 204 : }
249 : }
250 116119 : else if (STARTS_WITH(pszFilename, "/vsicurl/"))
251 : {
252 43 : bPotentialDirectory = true;
253 : }
254 :
255 116323 : if (bPotentialDirectory && !poFile)
256 : {
257 86 : int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
258 86 : if (nOpenFlags & GDAL_OF_VERBOSE_ERROR)
259 52 : nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
260 :
261 : // For those special files, opening them with VSIFOpenL() might result
262 : // in content, even if they should be considered as directories, so
263 : // use stat.
264 : VSIStatBufL sStat;
265 :
266 86 : if (VSIStatExL(pszFilename, &sStat, nStatFlags) == 0)
267 : {
268 78 : bStatOK = TRUE;
269 78 : if (VSI_ISDIR(sStat.st_mode))
270 25 : bIsDirectory = TRUE;
271 : }
272 : }
273 :
274 116323 : if (poFile)
275 : {
276 1995 : fpL = poFile.release();
277 : }
278 : else
279 : {
280 114328 : pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
281 :
282 114328 : if (!bIsDirectory && pabyHeader == nullptr)
283 : {
284 112837 : fpL =
285 112837 : VSIFOpenExL(pszFilename, (eAccess == GA_Update) ? "r+b" : "rb",
286 112837 : (nOpenFlags & GDAL_OF_VERBOSE_ERROR) > 0);
287 : }
288 : }
289 :
290 116323 : if (pabyHeader)
291 : {
292 1466 : bStatOK = TRUE;
293 1466 : nHeaderBytesTried = nHeaderBytes;
294 : }
295 114857 : else if (fpL != nullptr)
296 : {
297 64046 : bStatOK = TRUE;
298 : int nBufSize =
299 64046 : atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
300 64046 : if (nBufSize < 1024)
301 0 : nBufSize = 1024;
302 64046 : else if (nBufSize > 10 * 1024 * 1024)
303 0 : nBufSize = 10 * 1024 * 1024;
304 64046 : pabyHeader = static_cast<GByte *>(CPLCalloc(nBufSize + 1, 1));
305 64046 : nHeaderBytesTried = nBufSize;
306 64046 : nHeaderBytes =
307 64046 : static_cast<int>(VSIFReadL(pabyHeader, 1, nHeaderBytesTried, fpL));
308 64046 : VSIRewindL(fpL);
309 :
310 : /* If we cannot read anything, check if it is not a directory instead */
311 : VSIStatBufL sStat;
312 129270 : if (nHeaderBytes == 0 &&
313 1178 : VSIStatExL(pszFilename, &sStat,
314 65224 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0 &&
315 1178 : VSI_ISDIR(sStat.st_mode))
316 : {
317 1050 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
318 1050 : fpL = nullptr;
319 1050 : CPLFree(pabyHeader);
320 1050 : pabyHeader = nullptr;
321 1050 : bIsDirectory = TRUE;
322 : }
323 : }
324 50811 : else if (!bStatOK)
325 : {
326 : VSIStatBufL sStat;
327 101564 : if (!bPotentialDirectory &&
328 50778 : VSIStatExL(pszFilename, &sStat,
329 : VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG) == 0)
330 : {
331 1272 : bStatOK = TRUE;
332 1272 : if (VSI_ISDIR(sStat.st_mode))
333 1267 : bIsDirectory = TRUE;
334 : }
335 : #if !defined(_WIN32)
336 49514 : else if (!bHasRetried && !STARTS_WITH(pszFilename, "/vsi"))
337 : {
338 : // If someone creates a file with "ln -sf
339 : // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
340 : // my_remote_utm.tif" we will be able to open it by passing
341 : // my_remote_utm.tif. This helps a lot for GDAL based readers that
342 : // only provide file explorers to open datasets.
343 25124 : const int nBufSize = 2048;
344 25124 : std::vector<char> oFilename(nBufSize);
345 25124 : char *szPointerFilename = &oFilename[0];
346 : int nBytes = static_cast<int>(
347 25124 : readlink(pszFilename, szPointerFilename, nBufSize));
348 25124 : if (nBytes != -1)
349 : {
350 2 : szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
351 2 : CPLFree(pszFilename);
352 2 : pszFilename = CPLStrdup(szPointerFilename);
353 2 : osExtension = CPLGetExtensionSafe(pszFilename);
354 2 : papszSiblingFilesIn = nullptr;
355 2 : bHasRetried = true;
356 2 : goto retry;
357 : }
358 : }
359 : #endif // !defined(_WIN32)
360 : }
361 :
362 : /* -------------------------------------------------------------------- */
363 : /* Capture sibling list either from passed in values, or by */
364 : /* scanning for them only if requested through GetSiblingFiles(). */
365 : /* -------------------------------------------------------------------- */
366 116321 : if (papszSiblingFilesIn != nullptr)
367 : {
368 362 : papszSiblingFiles = CSLDuplicate(papszSiblingFilesIn);
369 362 : bHasGotSiblingFiles = true;
370 : }
371 115959 : else if (bStatOK && !bIsDirectory)
372 : {
373 64105 : papszSiblingFiles = VSISiblingFiles(pszFilename);
374 64105 : if (papszSiblingFiles != nullptr)
375 : {
376 6 : bHasGotSiblingFiles = true;
377 : }
378 : else
379 : {
380 128198 : const char *pszOptionVal = VSIGetPathSpecificOption(
381 64099 : pszFilename, "GDAL_DISABLE_READDIR_ON_OPEN", "NO");
382 64099 : if (EQUAL(pszOptionVal, "EMPTY_DIR"))
383 : {
384 67 : papszSiblingFiles =
385 67 : CSLAddString(nullptr, CPLGetFilename(pszFilename));
386 67 : bHasGotSiblingFiles = true;
387 : }
388 64032 : else if (CPLTestBool(pszOptionVal))
389 : {
390 : /* skip reading the directory */
391 1408 : papszSiblingFiles = nullptr;
392 1408 : bHasGotSiblingFiles = true;
393 : }
394 : else
395 : {
396 : /* will be lazy loaded */
397 62624 : papszSiblingFiles = nullptr;
398 62624 : bHasGotSiblingFiles = false;
399 : }
400 64105 : }
401 : }
402 : else
403 : {
404 51854 : papszSiblingFiles = nullptr;
405 51854 : bHasGotSiblingFiles = true;
406 : }
407 : }
408 :
409 : /************************************************************************/
410 : /* ~GDALOpenInfo() */
411 : /************************************************************************/
412 :
413 117193 : GDALOpenInfo::~GDALOpenInfo()
414 :
415 : {
416 117193 : VSIFree(pabyHeader);
417 117193 : CPLFree(pszFilename);
418 :
419 117193 : if (fpL != nullptr)
420 13312 : CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
421 117193 : CSLDestroy(papszSiblingFiles);
422 117193 : }
423 :
424 : /************************************************************************/
425 : /* GetSiblingFiles() */
426 : /************************************************************************/
427 :
428 : /** Return sibling files.
429 : *
430 : * If the list of sibling files has not already been established, it will be,
431 : * unless the GDAL_DISABLE_READDIR_ON_OPEN configuration option has been set to
432 : * YES or EMPTY_DIR when this instance was constructed.
433 : *
434 : * @return sibling files. Ownership belongs to "this".
435 : */
436 44032 : char **GDALOpenInfo::GetSiblingFiles()
437 : {
438 44032 : if (bHasGotSiblingFiles)
439 26365 : return papszSiblingFiles;
440 17667 : bHasGotSiblingFiles = true;
441 :
442 17667 : papszSiblingFiles = VSISiblingFiles(pszFilename);
443 17667 : if (papszSiblingFiles != nullptr)
444 : {
445 0 : return papszSiblingFiles;
446 : }
447 :
448 17667 : const CPLString osDir = CPLGetDirnameSafe(pszFilename);
449 35334 : const int nMaxFiles = atoi(VSIGetPathSpecificOption(
450 17667 : pszFilename, "GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
451 17667 : papszSiblingFiles = VSIReadDirEx(osDir, nMaxFiles);
452 17667 : if (nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles)
453 : {
454 1 : CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
455 : osDir.c_str());
456 1 : CSLDestroy(papszSiblingFiles);
457 1 : papszSiblingFiles = nullptr;
458 : }
459 :
460 17667 : return papszSiblingFiles;
461 : }
462 :
463 : /************************************************************************/
464 : /* StealSiblingFiles() */
465 : /* */
466 : /* Same as GetSiblingFiles() except that the list is stealed */
467 : /* (ie ownership transferred to the caller) and the associated */
468 : /* member variable is set to NULL. */
469 : /************************************************************************/
470 :
471 : /** Return sibling files and steal reference
472 : * @return sibling files. Ownership below to the caller (must be freed with
473 : * CSLDestroy)
474 : */
475 10920 : char **GDALOpenInfo::StealSiblingFiles()
476 : {
477 10920 : char **papszRet = GetSiblingFiles();
478 10920 : papszSiblingFiles = nullptr;
479 10920 : return papszRet;
480 : }
481 :
482 : /************************************************************************/
483 : /* AreSiblingFilesLoaded() */
484 : /************************************************************************/
485 :
486 : /** Return whether sibling files have been loaded.
487 : * @return true or false.
488 : */
489 59514 : bool GDALOpenInfo::AreSiblingFilesLoaded() const
490 : {
491 59514 : return bHasGotSiblingFiles;
492 : }
493 :
494 : /************************************************************************/
495 : /* TryToIngest() */
496 : /************************************************************************/
497 :
498 : /** Ingest bytes from the file.
499 : * @param nBytes number of bytes to ingest.
500 : * @return TRUE if successful
501 : */
502 27875 : int GDALOpenInfo::TryToIngest(int nBytes)
503 : {
504 27875 : if (fpL == nullptr)
505 54 : return FALSE;
506 27821 : if (nHeaderBytes < nHeaderBytesTried)
507 16088 : return TRUE;
508 11733 : pabyHeader = static_cast<GByte *>(CPLRealloc(pabyHeader, nBytes + 1));
509 11733 : memset(pabyHeader, 0, nBytes + 1);
510 11733 : VSIRewindL(fpL);
511 11733 : nHeaderBytesTried = nBytes;
512 11733 : nHeaderBytes = static_cast<int>(VSIFReadL(pabyHeader, 1, nBytes, fpL));
513 11733 : VSIRewindL(fpL);
514 :
515 11733 : return TRUE;
516 : }
517 :
518 : /************************************************************************/
519 : /* IsSingleAllowedDriver() */
520 : /************************************************************************/
521 :
522 : /** Returns true if the driver name is the single in the list of allowed
523 : * drivers.
524 : *
525 : * @param pszDriverName Driver name to test.
526 : * @return true if the driver name is the single in the list of allowed
527 : * drivers.
528 : * @since GDAL 3.10
529 : */
530 831341 : bool GDALOpenInfo::IsSingleAllowedDriver(const char *pszDriverName) const
531 : {
532 4578 : return papszAllowedDrivers && papszAllowedDrivers[0] &&
533 836694 : !papszAllowedDrivers[1] &&
534 832116 : EQUAL(papszAllowedDrivers[0], pszDriverName);
535 : }
|