Line data Source code
1 : /******************************************************************************
2 : * Project: OGR
3 : * Purpose: OGRGMLASDriver implementation
4 : * Author: Even Rouault, <even dot rouault at spatialys dot com>
5 : *
6 : * Initial development funded by the European Earth observation programme
7 : * Copernicus
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2016, Even Rouault, <even dot rouault at spatialys dot com>
11 : *
12 : * Permission is hereby granted, free of charge, to any person obtaining a
13 : * copy of this software and associated documentation files (the "Software"),
14 : * to deal in the Software without restriction, including without limitation
15 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16 : * and/or sell copies of the Software, and to permit persons to whom the
17 : * Software is furnished to do so, subject to the following conditions:
18 : *
19 : * The above copyright notice and this permission notice shall be included
20 : * in all copies or substantial portions of the Software.
21 : *
22 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
23 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28 : * DEALINGS IN THE SOFTWARE.
29 : ****************************************************************************/
30 :
31 : #include "ogr_gmlas.h"
32 :
33 : #include "cpl_http.h"
34 : #include "cpl_sha256.h"
35 :
36 : /************************************************************************/
37 : /* SetCacheDirectory() */
38 : /************************************************************************/
39 :
40 356 : void GMLASResourceCache::SetCacheDirectory(const std::string &osCacheDirectory)
41 : {
42 356 : m_osCacheDirectory = osCacheDirectory;
43 356 : }
44 :
45 : /************************************************************************/
46 : /* RecursivelyCreateDirectoryIfNeeded() */
47 : /************************************************************************/
48 :
49 68 : bool GMLASResourceCache::RecursivelyCreateDirectoryIfNeeded(
50 : const std::string &osDirname)
51 : {
52 : VSIStatBufL sStat;
53 68 : if (VSIStatL(osDirname.c_str(), &sStat) == 0)
54 : {
55 60 : return true;
56 : }
57 :
58 16 : std::string osParent = CPLGetDirname(osDirname.c_str());
59 8 : if (!osParent.empty() && osParent != ".")
60 : {
61 8 : if (!RecursivelyCreateDirectoryIfNeeded(osParent.c_str()))
62 2 : return false;
63 : }
64 6 : return VSIMkdir(osDirname.c_str(), 0755) == 0;
65 : }
66 :
67 752 : bool GMLASResourceCache::RecursivelyCreateDirectoryIfNeeded()
68 : {
69 752 : if (!m_bHasCheckedCacheDirectory)
70 : {
71 60 : m_bHasCheckedCacheDirectory = true;
72 60 : if (!RecursivelyCreateDirectoryIfNeeded(m_osCacheDirectory))
73 : {
74 1 : CPLError(CE_Warning, CPLE_AppDefined, "Cannot create %s",
75 : m_osCacheDirectory.c_str());
76 1 : m_osCacheDirectory.clear();
77 1 : return false;
78 : }
79 : }
80 751 : return true;
81 : }
82 :
83 : /************************************************************************/
84 : /* GetCachedFilename() */
85 : /************************************************************************/
86 :
87 850 : std::string GMLASResourceCache::GetCachedFilename(const std::string &osResource)
88 : {
89 850 : std::string osLaunderedName(osResource);
90 850 : if (STARTS_WITH(osLaunderedName.c_str(), "http://"))
91 301 : osLaunderedName = osLaunderedName.substr(strlen("http://"));
92 549 : else if (STARTS_WITH(osLaunderedName.c_str(), "https://"))
93 549 : osLaunderedName = osLaunderedName.substr(strlen("https://"));
94 40726 : for (size_t i = 0; i < osLaunderedName.size(); i++)
95 : {
96 47438 : if (!isalnum(static_cast<unsigned char>(osLaunderedName[i])) &&
97 7562 : osLaunderedName[i] != '.')
98 4630 : osLaunderedName[i] = '_';
99 : }
100 :
101 : // If filename is too long, then truncate it and put a hash at the end
102 : // We try to make sure that the whole filename (including the cache path)
103 : // fits into 255 characters, for windows compat
104 :
105 850 : const size_t nWindowsMaxFilenameSize = 255;
106 : // 60 is arbitrary but should be sufficient for most people. We could
107 : // always take into account m_osCacheDirectory.size(), but if we want to
108 : // to be able to share caches between computers, then this would be
109 : // impractical.
110 850 : const size_t nTypicalMaxSizeForDirName = 60;
111 : const size_t nSizeForDirName =
112 1079 : (m_osCacheDirectory.size() > nTypicalMaxSizeForDirName &&
113 229 : m_osCacheDirectory.size() < nWindowsMaxFilenameSize - strlen(".tmp") -
114 : 2 * CPL_SHA256_HASH_SIZE)
115 1079 : ? m_osCacheDirectory.size()
116 850 : : nTypicalMaxSizeForDirName;
117 850 : CPLAssert(nWindowsMaxFilenameSize >= nSizeForDirName);
118 850 : const size_t nMaxFilenameSize = nWindowsMaxFilenameSize - nSizeForDirName;
119 :
120 850 : CPLAssert(nMaxFilenameSize >= strlen(".tmp"));
121 850 : if (osLaunderedName.size() >= nMaxFilenameSize - strlen(".tmp"))
122 : {
123 : GByte abyHash[CPL_SHA256_HASH_SIZE];
124 3 : CPL_SHA256(osResource.c_str(), osResource.size(), abyHash);
125 3 : char *pszHash = CPLBinaryToHex(CPL_SHA256_HASH_SIZE, abyHash);
126 3 : osLaunderedName.resize(nMaxFilenameSize - strlen(".tmp") -
127 : 2 * CPL_SHA256_HASH_SIZE);
128 3 : osLaunderedName += pszHash;
129 3 : CPLFree(pszHash);
130 3 : CPLDebug("GMLAS", "Cached filename truncated to %s",
131 : osLaunderedName.c_str());
132 : }
133 :
134 : return CPLFormFilename(m_osCacheDirectory.c_str(), osLaunderedName.c_str(),
135 1700 : nullptr);
136 : }
137 :
138 : /************************************************************************/
139 : /* CacheAllGML321() */
140 : /************************************************************************/
141 :
142 2 : bool GMLASXSDCache::CacheAllGML321()
143 : {
144 : // As of today (2024-01-02), the schemas in https://schemas.opengis.net/gml/3.2.1
145 : // are actually the same as the ones in the https://schemas.opengis.net/gml/gml-3_2_2.zip archive.
146 : // Download the later and unzip it for faster fetching of GML schemas.
147 :
148 2 : bool bSuccess = false;
149 2 : CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler);
150 :
151 2 : const char *pszHTTPZIP = "https://schemas.opengis.net/gml/gml-3_2_2.zip";
152 2 : CPLHTTPResult *psResult = CPLHTTPFetch(pszHTTPZIP, nullptr);
153 2 : if (psResult && psResult->nDataLen)
154 : {
155 4 : const std::string osZIPFilename(CPLSPrintf("/vsimem/%p.zip", this));
156 : auto fpZIP =
157 2 : VSIFileFromMemBuffer(osZIPFilename.c_str(), psResult->pabyData,
158 2 : psResult->nDataLen, FALSE);
159 2 : if (fpZIP)
160 : {
161 2 : VSIFCloseL(fpZIP);
162 :
163 4 : const std::string osVSIZIPFilename("/vsizip/" + osZIPFilename);
164 : const CPLStringList aosFiles(
165 4 : VSIReadDirRecursive(osVSIZIPFilename.c_str()));
166 70 : for (int i = 0; i < aosFiles.size(); ++i)
167 : {
168 68 : if (strstr(aosFiles[i], ".xsd"))
169 : {
170 : const std::string osFilename(
171 116 : std::string("https://schemas.opengis.net/gml/3.2.1/") +
172 174 : CPLGetFilename(aosFiles[i]));
173 : const std::string osCachedFileName(
174 174 : GetCachedFilename(osFilename.c_str()));
175 :
176 116 : std::string osTmpfilename(osCachedFileName + ".tmp");
177 58 : if (CPLCopyFile(
178 : osTmpfilename.c_str(),
179 116 : (osVSIZIPFilename + "/" + aosFiles[i]).c_str()) ==
180 : 0)
181 : {
182 58 : VSIRename(osTmpfilename.c_str(),
183 : osCachedFileName.c_str());
184 58 : bSuccess = true;
185 : }
186 : }
187 : }
188 : }
189 2 : VSIUnlink(osZIPFilename.c_str());
190 : }
191 2 : CPLHTTPDestroyResult(psResult);
192 2 : if (!bSuccess)
193 : {
194 : static bool bHasWarned = false;
195 0 : if (!bHasWarned)
196 : {
197 0 : bHasWarned = true;
198 0 : CPLDebug("GMLAS", "Cannot get GML schemas from %s", pszHTTPZIP);
199 : }
200 : }
201 4 : return bSuccess;
202 : }
203 :
204 : /************************************************************************/
205 : /* CacheAllISO20070417() */
206 : /************************************************************************/
207 :
208 1 : bool GMLASXSDCache::CacheAllISO20070417()
209 : {
210 : // As of today (2024-01-02), the schemas in https://schemas.opengis.net/iso/19139/20070417/
211 : // are actually the same as the ones in the iso19139-20070417_5-v20220526.zip archive
212 : // in https://schemas.opengis.net/iso/19139/iso19139-20070417.zip archive.
213 : // Download the later and unzip it for faster fetching of ISO schemas.
214 :
215 1 : bool bSuccess = false;
216 1 : CPLErrorStateBackuper oErrorStateBackuper(CPLQuietErrorHandler);
217 :
218 1 : const char *pszHTTPZIP =
219 : "https://schemas.opengis.net/iso/19139/iso19139-20070417.zip";
220 1 : CPLHTTPResult *psResult = CPLHTTPFetch(pszHTTPZIP, nullptr);
221 1 : if (psResult && psResult->nDataLen)
222 : {
223 2 : const std::string osZIPFilename(CPLSPrintf("/vsimem/%p.zip", this));
224 : auto fpZIP =
225 1 : VSIFileFromMemBuffer(osZIPFilename.c_str(), psResult->pabyData,
226 1 : psResult->nDataLen, FALSE);
227 1 : if (fpZIP)
228 : {
229 1 : VSIFCloseL(fpZIP);
230 :
231 : const std::string osVSIZIPFilename(
232 1 : "/vsizip//vsizip/" + osZIPFilename +
233 2 : "/iso19139-20070417_5-v20220526.zip");
234 : const CPLStringList aosFiles(
235 2 : VSIReadDirRecursive(osVSIZIPFilename.c_str()));
236 71 : for (int i = 0; i < aosFiles.size(); ++i)
237 : {
238 137 : if (STARTS_WITH(aosFiles[i], "iso/19139/20070417/") &&
239 67 : strstr(aosFiles[i], ".xsd"))
240 : {
241 : const std::string osFilename(
242 72 : std::string("https://schemas.opengis.net/") +
243 108 : aosFiles[i]);
244 : const std::string osCachedFileName(
245 108 : GetCachedFilename(osFilename.c_str()));
246 :
247 72 : std::string osTmpfilename(osCachedFileName + ".tmp");
248 36 : if (CPLCopyFile(
249 : osTmpfilename.c_str(),
250 72 : (osVSIZIPFilename + "/" + aosFiles[i]).c_str()) ==
251 : 0)
252 : {
253 36 : VSIRename(osTmpfilename.c_str(),
254 : osCachedFileName.c_str());
255 36 : bSuccess = true;
256 : }
257 : }
258 : }
259 : }
260 1 : VSIUnlink(osZIPFilename.c_str());
261 : }
262 1 : CPLHTTPDestroyResult(psResult);
263 1 : if (!bSuccess)
264 : {
265 : static bool bHasWarned = false;
266 0 : if (!bHasWarned)
267 : {
268 0 : bHasWarned = true;
269 0 : CPLDebug("GMLAS", "Cannot get ISO schemas from %s", pszHTTPZIP);
270 : }
271 : }
272 2 : return bSuccess;
273 : }
274 :
275 : /************************************************************************/
276 : /* Open() */
277 : /************************************************************************/
278 :
279 1020 : VSILFILE *GMLASXSDCache::Open(const std::string &osResource,
280 : const std::string &osBasePath,
281 : std::string &osOutFilename)
282 : {
283 1020 : osOutFilename = osResource;
284 1020 : if (!STARTS_WITH(osResource.c_str(), "http://") &&
285 948 : !STARTS_WITH(osResource.c_str(), "https://") &&
286 1968 : CPLIsFilenameRelative(osResource.c_str()) && !osResource.empty())
287 : {
288 : /* Transform a/b + ../c --> a/c */
289 828 : std::string osResourceModified(osResource);
290 828 : std::string osBasePathModified(osBasePath);
291 414 : while ((STARTS_WITH(osResourceModified.c_str(), "../") ||
292 415 : STARTS_WITH(osResourceModified.c_str(), "..\\")) &&
293 1 : !osBasePathModified.empty())
294 : {
295 0 : osBasePathModified = CPLGetDirname(osBasePathModified.c_str());
296 0 : osResourceModified = osResourceModified.substr(3);
297 : }
298 :
299 : osOutFilename = CPLFormFilename(osBasePathModified.c_str(),
300 414 : osResourceModified.c_str(), nullptr);
301 : }
302 :
303 1020 : CPLDebug("GMLAS", "Resolving %s (%s) to %s", osResource.c_str(),
304 : osBasePath.c_str(), osOutFilename.c_str());
305 :
306 1020 : VSILFILE *fp = nullptr;
307 1020 : bool bHasTriedZIPArchive = false;
308 1023 : retry:
309 1023 : if (!m_osCacheDirectory.empty() &&
310 1022 : (STARTS_WITH(osOutFilename.c_str(), "http://") ||
311 2500 : STARTS_WITH(osOutFilename.c_str(), "https://")) &&
312 735 : RecursivelyCreateDirectoryIfNeeded())
313 : {
314 : const std::string osCachedFileName(
315 1468 : GetCachedFilename(osOutFilename.c_str()));
316 735 : if (!m_bRefresh || m_aoSetRefreshedFiles.find(osCachedFileName) !=
317 735 : m_aoSetRefreshedFiles.end())
318 : {
319 733 : fp = VSIFOpenL(osCachedFileName.c_str(), "rb");
320 : }
321 734 : if (fp != nullptr)
322 : {
323 686 : CPLDebug("GMLAS", "Use cached %s", osCachedFileName.c_str());
324 : }
325 48 : else if (m_bAllowDownload)
326 : {
327 47 : if (m_bRefresh)
328 1 : m_aoSetRefreshedFiles.insert(osCachedFileName);
329 :
330 138 : else if (!bHasTriedZIPArchive &&
331 46 : strstr(osOutFilename.c_str(),
332 92 : "://schemas.opengis.net/gml/3.2.1/") &&
333 2 : CPLTestBool(CPLGetConfigOption(
334 : "OGR_GMLAS_USE_SCHEMAS_FROM_OGC_ZIP", "YES")))
335 : {
336 2 : bHasTriedZIPArchive = true;
337 2 : if (CacheAllGML321())
338 3 : goto retry;
339 : }
340 :
341 132 : else if (!bHasTriedZIPArchive &&
342 44 : strstr(osOutFilename.c_str(),
343 88 : "://schemas.opengis.net/iso/19139/20070417/") &&
344 1 : CPLTestBool(CPLGetConfigOption(
345 : "OGR_GMLAS_USE_SCHEMAS_FROM_OGC_ZIP", "YES")))
346 : {
347 1 : bHasTriedZIPArchive = true;
348 1 : if (CacheAllISO20070417())
349 1 : goto retry;
350 : }
351 :
352 : CPLHTTPResult *psResult =
353 44 : CPLHTTPFetch(osOutFilename.c_str(), nullptr);
354 44 : if (psResult == nullptr || psResult->nDataLen == 0)
355 : {
356 2 : CPLError(CE_Failure, CPLE_FileIO, "Cannot resolve %s",
357 : osResource.c_str());
358 2 : CPLHTTPDestroyResult(psResult);
359 2 : return nullptr;
360 : }
361 :
362 84 : std::string osTmpfilename(osCachedFileName + ".tmp");
363 42 : VSILFILE *fpTmp = VSIFOpenL(osTmpfilename.c_str(), "wb");
364 42 : if (fpTmp)
365 : {
366 84 : const auto nRet = VSIFWriteL(psResult->pabyData,
367 42 : psResult->nDataLen, 1, fpTmp);
368 42 : VSIFCloseL(fpTmp);
369 42 : if (nRet == 1)
370 : {
371 42 : VSIRename(osTmpfilename.c_str(), osCachedFileName.c_str());
372 42 : fp = VSIFOpenL(osCachedFileName.c_str(), "rb");
373 : }
374 : }
375 :
376 42 : CPLHTTPDestroyResult(psResult);
377 : }
378 : }
379 : else
380 : {
381 576 : if (STARTS_WITH(osOutFilename.c_str(), "http://") ||
382 287 : STARTS_WITH(osOutFilename.c_str(), "https://"))
383 : {
384 2 : if (m_bAllowDownload)
385 : {
386 : CPLHTTPResult *psResult =
387 2 : CPLHTTPFetch(osOutFilename.c_str(), nullptr);
388 2 : if (psResult == nullptr || psResult->nDataLen == 0)
389 : {
390 0 : CPLError(CE_Failure, CPLE_FileIO, "Cannot resolve %s",
391 : osResource.c_str());
392 0 : CPLHTTPDestroyResult(psResult);
393 0 : return nullptr;
394 : }
395 :
396 4 : fp = VSIFileFromMemBuffer(nullptr, psResult->pabyData,
397 2 : psResult->nDataLen, TRUE);
398 2 : if (fp)
399 : {
400 : // Steal the memory buffer from HTTP result
401 2 : psResult->pabyData = nullptr;
402 2 : psResult->nDataLen = 0;
403 2 : psResult->nDataAlloc = 0;
404 : }
405 2 : CPLHTTPDestroyResult(psResult);
406 : }
407 : }
408 : else
409 : {
410 287 : fp = VSIFOpenL(osOutFilename.c_str(), "rb");
411 : }
412 : }
413 :
414 1018 : if (fp == nullptr)
415 : {
416 3 : CPLError(CE_Failure, CPLE_FileIO, "Cannot resolve %s",
417 : osResource.c_str());
418 : }
419 :
420 1018 : return fp;
421 : }
|