Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: Implement VSI large file api for WebHDFS REST API
5 : * Author: Even Rouault, even.rouault at spatialys.com
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2018, Even Rouault <even.rouault at spatialys.com>
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : #include "cpl_port.h"
30 : #include "cpl_http.h"
31 : #include "cpl_json.h"
32 : #include "cpl_vsil_curl_priv.h"
33 : #include "cpl_vsil_curl_class.h"
34 :
35 : #include <errno.h>
36 :
37 : #include <algorithm>
38 : #include <set>
39 : #include <map>
40 : #include <memory>
41 :
42 : #include "cpl_alibaba_oss.h"
43 :
44 : #ifndef HAVE_CURL
45 :
46 : void VSIInstallWebHdfsHandler(void)
47 : {
48 : // Not supported
49 : }
50 :
51 : #else
52 :
53 : //! @cond Doxygen_Suppress
54 : #ifndef DOXYGEN_SKIP
55 :
56 : #define ENABLE_DEBUG 0
57 :
58 : #define unchecked_curl_easy_setopt(handle, opt, param) \
59 : CPL_IGNORE_RET_VAL(curl_easy_setopt(handle, opt, param))
60 :
61 : namespace cpl
62 : {
63 :
64 : /************************************************************************/
65 : /* VSIWebHDFSFSHandler */
66 : /************************************************************************/
67 :
68 : class VSIWebHDFSFSHandler final : public VSICurlFilesystemHandlerBaseWritable
69 : {
70 : const std::string m_osPrefix;
71 : CPL_DISALLOW_COPY_ASSIGN(VSIWebHDFSFSHandler)
72 :
73 : protected:
74 : VSICurlHandle *CreateFileHandle(const char *pszFilename) override;
75 :
76 0 : int HasOptimizedReadMultiRange(const char * /* pszPath */) override
77 : {
78 0 : return false;
79 : }
80 :
81 : char **GetFileList(const char *pszFilename, int nMaxFiles,
82 : bool *pbGotFileList) override;
83 :
84 : std::string GetURLFromFilename(const std::string &osFilename) override;
85 :
86 : VSIVirtualHandleUniquePtr
87 : CreateWriteHandle(const char *pszFilename,
88 : CSLConstList papszOptions) override;
89 :
90 : public:
91 1228 : explicit VSIWebHDFSFSHandler(const char *pszPrefix) : m_osPrefix(pszPrefix)
92 : {
93 1228 : }
94 :
95 1704 : ~VSIWebHDFSFSHandler() override = default;
96 :
97 : int Unlink(const char *pszFilename) override;
98 : int Rmdir(const char *pszFilename) override;
99 : int Mkdir(const char *pszDirname, long nMode) override;
100 :
101 1 : const char *GetDebugKey() const override
102 : {
103 1 : return "VSIWEBHDFS";
104 : }
105 :
106 148 : std::string GetFSPrefix() const override
107 : {
108 148 : return m_osPrefix;
109 : }
110 :
111 : const char *GetOptions() override;
112 :
113 : std::string
114 0 : GetStreamingFilename(const std::string &osFilename) const override
115 : {
116 0 : return osFilename;
117 : }
118 :
119 0 : VSIFilesystemHandler *Duplicate(const char *pszPrefix) override
120 : {
121 0 : return new VSIWebHDFSFSHandler(pszPrefix);
122 : }
123 : };
124 :
125 : /************************************************************************/
126 : /* VSIWebHDFSHandle */
127 : /************************************************************************/
128 :
129 : class VSIWebHDFSHandle final : public VSICurlHandle
130 : {
131 : CPL_DISALLOW_COPY_ASSIGN(VSIWebHDFSHandle)
132 :
133 : std::string m_osDataNodeHost{};
134 : std::string m_osUsernameParam{};
135 : std::string m_osDelegationParam{};
136 :
137 : std::string DownloadRegion(vsi_l_offset startOffset, int nBlocks) override;
138 :
139 : public:
140 : VSIWebHDFSHandle(VSIWebHDFSFSHandler *poFS, const char *pszFilename,
141 : const char *pszURL);
142 14 : ~VSIWebHDFSHandle() override = default;
143 :
144 0 : int ReadMultiRange(int nRanges, void **ppData,
145 : const vsi_l_offset *panOffsets,
146 : const size_t *panSizes) override
147 : {
148 0 : return VSIVirtualHandle::ReadMultiRange(nRanges, ppData, panOffsets,
149 0 : panSizes);
150 : }
151 :
152 : vsi_l_offset GetFileSize(bool bSetError) override;
153 : };
154 :
155 : /************************************************************************/
156 : /* PatchWebHDFSUrl() */
157 : /************************************************************************/
158 :
159 6 : static std::string PatchWebHDFSUrl(const std::string &osURLIn,
160 : const std::string &osNewHost)
161 : {
162 6 : std::string osURL(osURLIn);
163 6 : size_t nStart = 0;
164 6 : if (STARTS_WITH(osURL.c_str(), "http://"))
165 6 : nStart = strlen("http://");
166 0 : else if (STARTS_WITH(osURL.c_str(), "https://"))
167 0 : nStart = strlen("https://");
168 6 : if (nStart)
169 : {
170 6 : size_t nHostEnd = osURL.find(':', nStart);
171 6 : if (nHostEnd != std::string::npos)
172 : {
173 : osURL =
174 6 : osURL.substr(0, nStart) + osNewHost + osURL.substr(nHostEnd);
175 : }
176 : }
177 6 : return osURL;
178 : }
179 :
180 : /************************************************************************/
181 : /* GetWebHDFSDataNodeHost() */
182 : /************************************************************************/
183 :
184 13 : static std::string GetWebHDFSDataNodeHost(const char *pszFilename)
185 : {
186 : return std::string(
187 13 : VSIGetPathSpecificOption(pszFilename, "WEBHDFS_DATANODE_HOST", ""));
188 : }
189 :
190 : /************************************************************************/
191 : /* VSIWebHDFSWriteHandle */
192 : /************************************************************************/
193 :
194 : class VSIWebHDFSWriteHandle final : public VSIAppendWriteHandle
195 : {
196 : CPL_DISALLOW_COPY_ASSIGN(VSIWebHDFSWriteHandle)
197 :
198 : std::string m_osURL{};
199 : std::string m_osDataNodeHost{};
200 : std::string m_osUsernameParam{};
201 : std::string m_osDelegationParam{};
202 : CPLStringList m_aosHTTPOptions{};
203 :
204 : bool Send(bool bIsLastBlock) override;
205 : bool CreateFile();
206 : bool Append();
207 :
208 : void InvalidateParentDirectory();
209 :
210 : public:
211 : VSIWebHDFSWriteHandle(VSIWebHDFSFSHandler *poFS, const char *pszFilename);
212 : virtual ~VSIWebHDFSWriteHandle();
213 : };
214 :
215 : /************************************************************************/
216 : /* GetWebHDFSBufferSize() */
217 : /************************************************************************/
218 :
219 6 : static int GetWebHDFSBufferSize()
220 : {
221 : int nBufferSize;
222 6 : int nChunkSizeMB = atoi(CPLGetConfigOption("VSIWEBHDFS_SIZE", "4"));
223 6 : if (nChunkSizeMB <= 0 || nChunkSizeMB > 1000)
224 0 : nBufferSize = 4 * 1024 * 1024;
225 : else
226 6 : nBufferSize = nChunkSizeMB * 1024 * 1024;
227 :
228 : // For testing only !
229 : const char *pszChunkSizeBytes =
230 6 : CPLGetConfigOption("VSIWEBHDFS_SIZE_BYTES", nullptr);
231 6 : if (pszChunkSizeBytes)
232 0 : nBufferSize = atoi(pszChunkSizeBytes);
233 6 : if (nBufferSize <= 0 || nBufferSize > 1000 * 1024 * 1024)
234 0 : nBufferSize = 4 * 1024 * 1024;
235 6 : return nBufferSize;
236 : }
237 :
238 : /************************************************************************/
239 : /* VSIWebHDFSWriteHandle() */
240 : /************************************************************************/
241 :
242 6 : VSIWebHDFSWriteHandle::VSIWebHDFSWriteHandle(VSIWebHDFSFSHandler *poFS,
243 6 : const char *pszFilename)
244 6 : : VSIAppendWriteHandle(poFS, poFS->GetFSPrefix().c_str(), pszFilename,
245 : GetWebHDFSBufferSize()),
246 12 : m_osURL(pszFilename + poFS->GetFSPrefix().size()),
247 : m_osDataNodeHost(GetWebHDFSDataNodeHost(pszFilename)),
248 24 : m_aosHTTPOptions(CPLHTTPGetOptionsFromEnv(pszFilename))
249 : {
250 : // cppcheck-suppress useInitializationList
251 : m_osUsernameParam =
252 6 : VSIGetPathSpecificOption(pszFilename, "WEBHDFS_USERNAME", "");
253 6 : if (!m_osUsernameParam.empty())
254 5 : m_osUsernameParam = "&user.name=" + m_osUsernameParam;
255 : m_osDelegationParam =
256 6 : VSIGetPathSpecificOption(pszFilename, "WEBHDFS_DELEGATION", "");
257 6 : if (!m_osDelegationParam.empty())
258 0 : m_osDelegationParam = "&delegation=" + m_osDelegationParam;
259 :
260 6 : if (m_pabyBuffer != nullptr && !CreateFile())
261 : {
262 3 : CPLFree(m_pabyBuffer);
263 3 : m_pabyBuffer = nullptr;
264 : }
265 6 : }
266 :
267 : /************************************************************************/
268 : /* ~VSIWebHDFSWriteHandle() */
269 : /************************************************************************/
270 :
271 12 : VSIWebHDFSWriteHandle::~VSIWebHDFSWriteHandle()
272 : {
273 6 : Close();
274 12 : }
275 :
276 : /************************************************************************/
277 : /* InvalidateParentDirectory() */
278 : /************************************************************************/
279 :
280 3 : void VSIWebHDFSWriteHandle::InvalidateParentDirectory()
281 : {
282 3 : m_poFS->InvalidateCachedData(m_osURL.c_str());
283 :
284 6 : std::string osFilenameWithoutSlash(m_osFilename);
285 3 : if (!osFilenameWithoutSlash.empty() && osFilenameWithoutSlash.back() == '/')
286 0 : osFilenameWithoutSlash.resize(osFilenameWithoutSlash.size() - 1);
287 3 : m_poFS->InvalidateDirContent(CPLGetDirname(osFilenameWithoutSlash.c_str()));
288 3 : }
289 :
290 : /************************************************************************/
291 : /* Send() */
292 : /************************************************************************/
293 :
294 6 : bool VSIWebHDFSWriteHandle::Send(bool /* bIsLastBlock */)
295 : {
296 6 : if (m_nCurOffset > 0)
297 2 : return Append();
298 4 : return true;
299 : }
300 :
301 : /************************************************************************/
302 : /* CreateFile() */
303 : /************************************************************************/
304 :
305 6 : bool VSIWebHDFSWriteHandle::CreateFile()
306 : {
307 6 : if (m_osUsernameParam.empty() && m_osDelegationParam.empty())
308 : {
309 1 : CPLError(CE_Failure, CPLE_AppDefined,
310 : "Configuration option WEBHDFS_USERNAME or WEBHDFS_DELEGATION "
311 : "should be defined");
312 1 : return false;
313 : }
314 :
315 10 : NetworkStatisticsFileSystem oContextFS(m_poFS->GetFSPrefix().c_str());
316 10 : NetworkStatisticsFile oContextFile(m_osFilename.c_str());
317 10 : NetworkStatisticsAction oContextAction("Write");
318 :
319 10 : std::string osURL = m_osURL + "?op=CREATE&overwrite=true" +
320 15 : m_osUsernameParam + m_osDelegationParam;
321 :
322 : std::string osPermission = VSIGetPathSpecificOption(
323 10 : m_osFilename.c_str(), "WEBHDFS_PERMISSION", "");
324 5 : if (!osPermission.empty())
325 0 : osURL += "&permission=" + osPermission;
326 :
327 : std::string osReplication = VSIGetPathSpecificOption(
328 5 : m_osFilename.c_str(), "WEBHDFS_REPLICATION", "");
329 5 : if (!osReplication.empty())
330 0 : osURL += "&replication=" + osReplication;
331 :
332 5 : bool bInRedirect = false;
333 :
334 8 : retry:
335 8 : CURL *hCurlHandle = curl_easy_init();
336 :
337 : struct curl_slist *headers = static_cast<struct curl_slist *>(
338 8 : CPLHTTPSetOptions(hCurlHandle, osURL.c_str(), m_aosHTTPOptions.List()));
339 :
340 8 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_CUSTOMREQUEST, "PUT");
341 8 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_INFILESIZE, 0);
342 :
343 8 : if (!m_osDataNodeHost.empty())
344 : {
345 7 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
346 : }
347 :
348 8 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
349 :
350 8 : WriteFuncStruct sWriteFuncData;
351 8 : VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
352 8 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
353 8 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
354 : VSICurlHandleWriteFunc);
355 :
356 8 : VSICURLMultiPerform(m_poFS->GetCurlMultiHandleFor(m_osURL), hCurlHandle);
357 :
358 8 : curl_slist_free_all(headers);
359 :
360 8 : NetworkStatisticsLogger::LogPUT(0);
361 :
362 8 : long response_code = 0;
363 8 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
364 :
365 8 : if (!bInRedirect)
366 : {
367 5 : char *pszRedirectURL = nullptr;
368 5 : curl_easy_getinfo(hCurlHandle, CURLINFO_REDIRECT_URL, &pszRedirectURL);
369 5 : if (pszRedirectURL && strstr(pszRedirectURL, osURL.c_str()) == nullptr)
370 : {
371 3 : CPLDebug("WEBHDFS", "Redirect URL: %s", pszRedirectURL);
372 :
373 3 : bInRedirect = true;
374 3 : osURL = pszRedirectURL;
375 3 : if (!m_osDataNodeHost.empty())
376 : {
377 3 : osURL = PatchWebHDFSUrl(osURL, m_osDataNodeHost);
378 : }
379 :
380 3 : curl_easy_cleanup(hCurlHandle);
381 3 : CPLFree(sWriteFuncData.pBuffer);
382 :
383 3 : goto retry;
384 : }
385 : }
386 :
387 5 : curl_easy_cleanup(hCurlHandle);
388 :
389 5 : if (response_code == 201)
390 : {
391 3 : InvalidateParentDirectory();
392 : }
393 : else
394 : {
395 2 : CPLDebug("WEBHDFS", "%s",
396 2 : sWriteFuncData.pBuffer ? sWriteFuncData.pBuffer : "(null)");
397 2 : CPLError(CE_Failure, CPLE_AppDefined, "PUT of %s failed",
398 : m_osURL.c_str());
399 : }
400 5 : CPLFree(sWriteFuncData.pBuffer);
401 :
402 5 : return response_code == 201;
403 : }
404 :
405 : /************************************************************************/
406 : /* Append() */
407 : /************************************************************************/
408 :
409 2 : bool VSIWebHDFSWriteHandle::Append()
410 : {
411 4 : NetworkStatisticsFileSystem oContextFS(m_poFS->GetFSPrefix().c_str());
412 4 : NetworkStatisticsFile oContextFile(m_osFilename.c_str());
413 4 : NetworkStatisticsAction oContextAction("Write");
414 :
415 : std::string osURL =
416 6 : m_osURL + "?op=APPEND" + m_osUsernameParam + m_osDelegationParam;
417 :
418 2 : CURL *hCurlHandle = curl_easy_init();
419 :
420 : struct curl_slist *headers = static_cast<struct curl_slist *>(
421 2 : CPLHTTPSetOptions(hCurlHandle, osURL.c_str(), m_aosHTTPOptions.List()));
422 :
423 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_CUSTOMREQUEST, "POST");
424 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
425 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
426 :
427 2 : WriteFuncStruct sWriteFuncData;
428 2 : VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
429 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
430 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
431 : VSICurlHandleWriteFunc);
432 :
433 2 : VSICURLMultiPerform(m_poFS->GetCurlMultiHandleFor(m_osURL), hCurlHandle);
434 :
435 2 : curl_slist_free_all(headers);
436 :
437 2 : NetworkStatisticsLogger::LogPOST(0, 0);
438 :
439 2 : long response_code = 0;
440 2 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
441 :
442 2 : if (response_code != 307)
443 : {
444 0 : CPLDebug("WEBHDFS", "%s",
445 0 : sWriteFuncData.pBuffer ? sWriteFuncData.pBuffer : "(null)");
446 0 : CPLError(CE_Failure, CPLE_AppDefined, "POST of %s failed",
447 : m_osURL.c_str());
448 0 : curl_easy_cleanup(hCurlHandle);
449 0 : CPLFree(sWriteFuncData.pBuffer);
450 0 : return false;
451 : }
452 :
453 2 : char *pszRedirectURL = nullptr;
454 2 : curl_easy_getinfo(hCurlHandle, CURLINFO_REDIRECT_URL, &pszRedirectURL);
455 2 : if (pszRedirectURL == nullptr)
456 : {
457 0 : curl_easy_cleanup(hCurlHandle);
458 0 : CPLFree(sWriteFuncData.pBuffer);
459 0 : return false;
460 : }
461 2 : CPLDebug("WEBHDFS", "Redirect URL: %s", pszRedirectURL);
462 :
463 2 : osURL = pszRedirectURL;
464 2 : if (!m_osDataNodeHost.empty())
465 : {
466 2 : osURL = PatchWebHDFSUrl(osURL, m_osDataNodeHost);
467 : }
468 :
469 2 : curl_easy_cleanup(hCurlHandle);
470 2 : CPLFree(sWriteFuncData.pBuffer);
471 :
472 : // After redirection
473 :
474 2 : hCurlHandle = curl_easy_init();
475 :
476 : headers = static_cast<struct curl_slist *>(
477 2 : CPLHTTPSetOptions(hCurlHandle, osURL.c_str(), m_aosHTTPOptions.List()));
478 : headers =
479 2 : curl_slist_append(headers, "Content-Type: application/octet-stream");
480 :
481 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_POSTFIELDS, m_pabyBuffer);
482 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_POSTFIELDSIZE,
483 : m_nBufferOff);
484 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
485 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
486 :
487 2 : VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
488 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
489 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
490 : VSICurlHandleWriteFunc);
491 :
492 2 : VSICURLMultiPerform(m_poFS->GetCurlMultiHandleFor(m_osURL), hCurlHandle);
493 :
494 2 : curl_slist_free_all(headers);
495 :
496 2 : NetworkStatisticsLogger::LogPOST(m_nBufferOff, 0);
497 :
498 2 : response_code = 0;
499 2 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
500 :
501 2 : curl_easy_cleanup(hCurlHandle);
502 :
503 2 : if (response_code != 200)
504 : {
505 1 : CPLDebug("WEBHDFS", "%s",
506 1 : sWriteFuncData.pBuffer ? sWriteFuncData.pBuffer : "(null)");
507 1 : CPLError(CE_Failure, CPLE_AppDefined, "POST of %s failed",
508 : m_osURL.c_str());
509 : }
510 2 : CPLFree(sWriteFuncData.pBuffer);
511 :
512 2 : return response_code == 200;
513 : }
514 :
515 : /************************************************************************/
516 : /* CreateWriteHandle() */
517 : /************************************************************************/
518 :
519 : VSIVirtualHandleUniquePtr
520 6 : VSIWebHDFSFSHandler::CreateWriteHandle(const char *pszFilename,
521 : CSLConstList /*papszOptions*/)
522 : {
523 12 : auto poHandle = std::make_unique<VSIWebHDFSWriteHandle>(this, pszFilename);
524 6 : if (!poHandle->IsOK())
525 : {
526 3 : return nullptr;
527 : }
528 3 : return VSIVirtualHandleUniquePtr(poHandle.release());
529 : }
530 :
531 : /************************************************************************/
532 : /* GetOptions() */
533 : /************************************************************************/
534 :
535 1 : const char *VSIWebHDFSFSHandler::GetOptions()
536 : {
537 : static std::string osOptions(
538 2 : std::string("<Options>") +
539 : " <Option name='WEBHDFS_USERNAME' type='string' "
540 : "description='username (when security is off)'/>"
541 : " <Option name='WEBHDFS_DELEGATION' type='string' "
542 : "description='Hadoop delegation token (when security is on)'/>"
543 : " <Option name='WEBHDFS_DATANODE_HOST' type='string' "
544 : "description='For APIs using redirect, substitute the redirection "
545 : "hostname with the one provided by this option (normally resolvable "
546 : "hostname should be rewritten by a proxy)'/>"
547 : " <Option name='WEBHDFS_REPLICATION' type='integer' "
548 : "description='Replication value used when creating a file'/>"
549 : " <Option name='WEBHDFS_PERMISSION' type='integer' "
550 : "description='Permission mask (to provide as decimal number) when "
551 3 : "creating a file or directory'/>" +
552 2 : VSICurlFilesystemHandlerBase::GetOptionsStatic() + "</Options>");
553 1 : return osOptions.c_str();
554 : }
555 :
556 : /************************************************************************/
557 : /* CreateFileHandle() */
558 : /************************************************************************/
559 :
560 7 : VSICurlHandle *VSIWebHDFSFSHandler::CreateFileHandle(const char *pszFilename)
561 : {
562 : return new VSIWebHDFSHandle(this, pszFilename,
563 7 : pszFilename + GetFSPrefix().size());
564 : }
565 :
566 : /************************************************************************/
567 : /* GetURLFromFilename() */
568 : /************************************************************************/
569 :
570 : std::string
571 14 : VSIWebHDFSFSHandler::GetURLFromFilename(const std::string &osFilename)
572 : {
573 28 : return osFilename.substr(GetFSPrefix().size());
574 : }
575 :
576 : /************************************************************************/
577 : /* GetFileList() */
578 : /************************************************************************/
579 :
580 2 : char **VSIWebHDFSFSHandler::GetFileList(const char *pszDirname,
581 : int /*nMaxFiles*/, bool *pbGotFileList)
582 : {
583 : if (ENABLE_DEBUG)
584 : CPLDebug("WEBHDFS", "GetFileList(%s)", pszDirname);
585 2 : *pbGotFileList = false;
586 :
587 4 : NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
588 4 : NetworkStatisticsAction oContextAction("ListBucket");
589 :
590 2 : CPLAssert(strlen(pszDirname) >= GetFSPrefix().size());
591 :
592 6 : std::string osBaseURL = pszDirname + GetFSPrefix().size();
593 2 : if (!osBaseURL.empty() && osBaseURL.back() != '/')
594 2 : osBaseURL += '/';
595 :
596 2 : CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osBaseURL);
597 :
598 : std::string osUsernameParam =
599 4 : VSIGetPathSpecificOption(pszDirname, "WEBHDFS_USERNAME", "");
600 2 : if (!osUsernameParam.empty())
601 0 : osUsernameParam = "&user.name=" + osUsernameParam;
602 : std::string osDelegationParam =
603 4 : VSIGetPathSpecificOption(pszDirname, "WEBHDFS_DELEGATION", "");
604 2 : if (!osDelegationParam.empty())
605 0 : osDelegationParam = "&delegation=" + osDelegationParam;
606 : std::string osURL =
607 6 : osBaseURL + "?op=LISTSTATUS" + osUsernameParam + osDelegationParam;
608 :
609 2 : CURL *hCurlHandle = curl_easy_init();
610 :
611 : struct curl_slist *headers =
612 2 : VSICurlSetOptions(hCurlHandle, osURL.c_str(), nullptr);
613 :
614 2 : WriteFuncStruct sWriteFuncData;
615 2 : VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
616 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
617 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
618 : VSICurlHandleWriteFunc);
619 :
620 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
621 :
622 2 : VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
623 :
624 2 : VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
625 :
626 2 : curl_slist_free_all(headers);
627 :
628 2 : NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
629 :
630 2 : long response_code = 0;
631 2 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
632 :
633 4 : CPLStringList aosList;
634 2 : bool bOK = false;
635 2 : if (response_code == 200 && sWriteFuncData.pBuffer)
636 : {
637 2 : CPLJSONDocument oDoc;
638 1 : if (oDoc.LoadMemory(
639 1 : reinterpret_cast<const GByte *>(sWriteFuncData.pBuffer)))
640 : {
641 : CPLJSONArray oFileStatus =
642 3 : oDoc.GetRoot().GetArray("FileStatuses/FileStatus");
643 1 : bOK = oFileStatus.IsValid();
644 3 : for (int i = 0; i < oFileStatus.Size(); i++)
645 : {
646 4 : CPLJSONObject oItem = oFileStatus[i];
647 2 : vsi_l_offset fileSize = oItem.GetLong("length");
648 : size_t mTime = static_cast<size_t>(
649 2 : oItem.GetLong("modificationTime") / 1000);
650 2 : bool bIsDirectory = oItem.GetString("type") == "DIRECTORY";
651 6 : std::string osName = oItem.GetString("pathSuffix");
652 : // can be empty if we for example ask to list a file: in that
653 : // case the file entry is reported but with an empty pathSuffix
654 2 : if (!osName.empty())
655 : {
656 2 : aosList.AddString(osName.c_str());
657 :
658 4 : FileProp prop;
659 2 : prop.eExists = EXIST_YES;
660 2 : prop.bIsDirectory = bIsDirectory;
661 2 : prop.bHasComputedFileSize = true;
662 2 : prop.fileSize = fileSize;
663 2 : prop.mTime = mTime;
664 4 : std::string osCachedFilename(osBaseURL + osName);
665 : #if DEBUG_VERBOSE
666 : CPLDebug("WEBHDFS", "Cache %s", osCachedFilename.c_str());
667 : #endif
668 2 : SetCachedFileProp(osCachedFilename.c_str(), prop);
669 : }
670 : }
671 : }
672 : }
673 :
674 2 : *pbGotFileList = bOK;
675 :
676 2 : CPLFree(sWriteFuncData.pBuffer);
677 2 : curl_easy_cleanup(hCurlHandle);
678 :
679 2 : if (bOK)
680 1 : return aosList.StealList();
681 : else
682 1 : return nullptr;
683 : }
684 :
685 : /************************************************************************/
686 : /* Unlink() */
687 : /************************************************************************/
688 :
689 7 : int VSIWebHDFSFSHandler::Unlink(const char *pszFilename)
690 : {
691 7 : if (!STARTS_WITH_CI(pszFilename, GetFSPrefix().c_str()))
692 1 : return -1;
693 :
694 12 : NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
695 12 : NetworkStatisticsAction oContextAction("Unlink");
696 :
697 18 : std::string osBaseURL = GetURLFromFilename(pszFilename);
698 :
699 6 : CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osBaseURL);
700 :
701 : std::string osUsernameParam =
702 12 : VSIGetPathSpecificOption(pszFilename, "WEBHDFS_USERNAME", "");
703 6 : if (!osUsernameParam.empty())
704 1 : osUsernameParam = "&user.name=" + osUsernameParam;
705 : std::string osDelegationParam =
706 12 : VSIGetPathSpecificOption(pszFilename, "WEBHDFS_DELEGATION", "");
707 6 : if (!osDelegationParam.empty())
708 1 : osDelegationParam = "&delegation=" + osDelegationParam;
709 : std::string osURL =
710 18 : osBaseURL + "?op=DELETE" + osUsernameParam + osDelegationParam;
711 :
712 6 : CURL *hCurlHandle = curl_easy_init();
713 :
714 6 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_CUSTOMREQUEST, "DELETE");
715 :
716 : struct curl_slist *headers =
717 6 : VSICurlSetOptions(hCurlHandle, osURL.c_str(), nullptr);
718 :
719 6 : WriteFuncStruct sWriteFuncData;
720 6 : VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
721 6 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
722 6 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
723 : VSICurlHandleWriteFunc);
724 :
725 6 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
726 :
727 6 : VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
728 :
729 6 : VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
730 :
731 6 : curl_slist_free_all(headers);
732 :
733 6 : NetworkStatisticsLogger::LogDELETE();
734 :
735 6 : long response_code = 0;
736 6 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
737 :
738 6 : CPLStringList aosList;
739 6 : bool bOK = false;
740 6 : if (response_code == 200 && sWriteFuncData.pBuffer)
741 : {
742 8 : CPLJSONDocument oDoc;
743 4 : if (oDoc.LoadMemory(
744 4 : reinterpret_cast<const GByte *>(sWriteFuncData.pBuffer)))
745 : {
746 4 : bOK = oDoc.GetRoot().GetBool("boolean");
747 : }
748 : }
749 6 : if (bOK)
750 : {
751 3 : InvalidateCachedData(osBaseURL.c_str());
752 :
753 6 : std::string osFilenameWithoutSlash(pszFilename);
754 6 : if (!osFilenameWithoutSlash.empty() &&
755 3 : osFilenameWithoutSlash.back() == '/')
756 0 : osFilenameWithoutSlash.resize(osFilenameWithoutSlash.size() - 1);
757 :
758 3 : InvalidateDirContent(CPLGetDirname(osFilenameWithoutSlash.c_str()));
759 : }
760 : else
761 : {
762 3 : CPLDebug("WEBHDFS", "%s",
763 3 : sWriteFuncData.pBuffer ? sWriteFuncData.pBuffer : "(null)");
764 : }
765 :
766 6 : CPLFree(sWriteFuncData.pBuffer);
767 6 : curl_easy_cleanup(hCurlHandle);
768 :
769 6 : return bOK ? 0 : -1;
770 : }
771 :
772 : /************************************************************************/
773 : /* Rmdir() */
774 : /************************************************************************/
775 :
776 3 : int VSIWebHDFSFSHandler::Rmdir(const char *pszFilename)
777 : {
778 6 : NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
779 6 : NetworkStatisticsAction oContextAction("Rmdir");
780 :
781 6 : return Unlink(pszFilename);
782 : }
783 :
784 : /************************************************************************/
785 : /* Mkdir() */
786 : /************************************************************************/
787 :
788 5 : int VSIWebHDFSFSHandler::Mkdir(const char *pszDirname, long nMode)
789 : {
790 5 : if (!STARTS_WITH_CI(pszDirname, GetFSPrefix().c_str()))
791 1 : return -1;
792 :
793 8 : std::string osDirnameWithoutEndSlash(pszDirname);
794 8 : if (!osDirnameWithoutEndSlash.empty() &&
795 4 : osDirnameWithoutEndSlash.back() == '/')
796 : {
797 2 : osDirnameWithoutEndSlash.resize(osDirnameWithoutEndSlash.size() - 1);
798 : }
799 :
800 4 : if (osDirnameWithoutEndSlash.find("/webhdfs/v1") ==
801 5 : osDirnameWithoutEndSlash.size() - strlen("/webhdfs/v1") &&
802 1 : std::count(osDirnameWithoutEndSlash.begin(),
803 5 : osDirnameWithoutEndSlash.end(), '/') == 6)
804 : {
805 : // The server does weird things (creating a webhdfs/v1 subfolder)
806 : // if we provide the root directory like
807 : // /vsiwebhdfs/http://localhost:50070/webhdfs/v1
808 1 : return -1;
809 : }
810 :
811 6 : NetworkStatisticsFileSystem oContextFS(GetFSPrefix().c_str());
812 6 : NetworkStatisticsAction oContextAction("Mkdir");
813 :
814 : std::string osBaseURL =
815 9 : GetURLFromFilename(osDirnameWithoutEndSlash.c_str());
816 :
817 3 : CURLM *hCurlMultiHandle = GetCurlMultiHandleFor(osBaseURL);
818 :
819 : std::string osUsernameParam =
820 6 : VSIGetPathSpecificOption(pszDirname, "WEBHDFS_USERNAME", "");
821 3 : if (!osUsernameParam.empty())
822 1 : osUsernameParam = "&user.name=" + osUsernameParam;
823 : std::string osDelegationParam =
824 6 : VSIGetPathSpecificOption(pszDirname, "WEBHDFS_DELEGATION", "");
825 3 : if (!osDelegationParam.empty())
826 1 : osDelegationParam = "&delegation=" + osDelegationParam;
827 : std::string osURL =
828 9 : osBaseURL + "?op=MKDIRS" + osUsernameParam + osDelegationParam;
829 3 : if (nMode)
830 : {
831 1 : osURL += "&permission=";
832 1 : osURL += CPLSPrintf("%o", static_cast<int>(nMode));
833 : }
834 :
835 3 : CURL *hCurlHandle = curl_easy_init();
836 :
837 3 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_CUSTOMREQUEST, "PUT");
838 :
839 : struct curl_slist *headers =
840 3 : VSICurlSetOptions(hCurlHandle, osURL.c_str(), nullptr);
841 :
842 3 : WriteFuncStruct sWriteFuncData;
843 3 : VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
844 3 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
845 3 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
846 : VSICurlHandleWriteFunc);
847 :
848 3 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
849 :
850 3 : VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
851 :
852 3 : VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
853 :
854 3 : curl_slist_free_all(headers);
855 :
856 3 : NetworkStatisticsLogger::LogPUT(0);
857 :
858 3 : long response_code = 0;
859 3 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
860 :
861 3 : CPLStringList aosList;
862 3 : bool bOK = false;
863 3 : if (response_code == 200 && sWriteFuncData.pBuffer)
864 : {
865 4 : CPLJSONDocument oDoc;
866 2 : if (oDoc.LoadMemory(
867 2 : reinterpret_cast<const GByte *>(sWriteFuncData.pBuffer)))
868 : {
869 2 : bOK = oDoc.GetRoot().GetBool("boolean");
870 : }
871 : }
872 3 : if (bOK)
873 : {
874 2 : InvalidateDirContent(CPLGetDirname(osDirnameWithoutEndSlash.c_str()));
875 :
876 4 : FileProp cachedFileProp;
877 2 : cachedFileProp.eExists = EXIST_YES;
878 2 : cachedFileProp.bIsDirectory = true;
879 2 : cachedFileProp.bHasComputedFileSize = true;
880 2 : SetCachedFileProp(
881 4 : GetURLFromFilename(osDirnameWithoutEndSlash.c_str()).c_str(),
882 : cachedFileProp);
883 :
884 2 : RegisterEmptyDir(osDirnameWithoutEndSlash);
885 : }
886 : else
887 : {
888 1 : CPLDebug("WEBHDFS", "%s",
889 1 : sWriteFuncData.pBuffer ? sWriteFuncData.pBuffer : "(null)");
890 : }
891 :
892 3 : CPLFree(sWriteFuncData.pBuffer);
893 3 : curl_easy_cleanup(hCurlHandle);
894 :
895 3 : return bOK ? 0 : -1;
896 : }
897 :
898 : /************************************************************************/
899 : /* VSIWebHDFSHandle() */
900 : /************************************************************************/
901 :
902 7 : VSIWebHDFSHandle::VSIWebHDFSHandle(VSIWebHDFSFSHandler *poFSIn,
903 7 : const char *pszFilename, const char *pszURL)
904 : : VSICurlHandle(poFSIn, pszFilename, pszURL),
905 7 : m_osDataNodeHost(GetWebHDFSDataNodeHost(pszFilename))
906 : {
907 : // cppcheck-suppress useInitializationList
908 : m_osUsernameParam =
909 7 : VSIGetPathSpecificOption(pszFilename, "WEBHDFS_USERNAME", "");
910 7 : if (!m_osUsernameParam.empty())
911 1 : m_osUsernameParam = "&user.name=" + m_osUsernameParam;
912 : m_osDelegationParam =
913 7 : VSIGetPathSpecificOption(pszFilename, "WEBHDFS_DELEGATION", "");
914 7 : if (!m_osDelegationParam.empty())
915 1 : m_osDelegationParam = "&delegation=" + m_osDelegationParam;
916 7 : }
917 :
918 : /************************************************************************/
919 : /* GetFileSize() */
920 : /************************************************************************/
921 :
922 4 : vsi_l_offset VSIWebHDFSHandle::GetFileSize(bool bSetError)
923 : {
924 4 : if (oFileProp.bHasComputedFileSize)
925 2 : return oFileProp.fileSize;
926 :
927 4 : NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
928 4 : NetworkStatisticsFile oContextFile(m_osFilename.c_str());
929 4 : NetworkStatisticsAction oContextAction("GetFileSize");
930 :
931 2 : oFileProp.bHasComputedFileSize = true;
932 :
933 2 : CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
934 :
935 2 : std::string osURL(m_pszURL);
936 :
937 2 : if (osURL.size() > strlen("/webhdfs/v1") &&
938 2 : osURL.find("/webhdfs/v1") == osURL.size() - strlen("/webhdfs/v1") &&
939 2 : std::count(osURL.begin(), osURL.end(), '/') == 4)
940 : {
941 : // If this is the root directory, add a trailing slash
942 0 : osURL += "/";
943 : }
944 :
945 2 : osURL += "?op=GETFILESTATUS" + m_osUsernameParam + m_osDelegationParam;
946 :
947 2 : CURL *hCurlHandle = curl_easy_init();
948 :
949 : struct curl_slist *headers =
950 2 : VSICurlSetOptions(hCurlHandle, osURL.c_str(), m_papszHTTPOptions);
951 :
952 2 : WriteFuncStruct sWriteFuncData;
953 2 : VSICURLInitWriteFuncStruct(&sWriteFuncData, nullptr, nullptr, nullptr);
954 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
955 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
956 : VSICurlHandleWriteFunc);
957 :
958 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
959 :
960 2 : char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
961 2 : szCurlErrBuf[0] = '\0';
962 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
963 :
964 2 : VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
965 :
966 2 : VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
967 :
968 2 : curl_slist_free_all(headers);
969 :
970 2 : NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
971 :
972 2 : long response_code = 0;
973 2 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
974 :
975 2 : oFileProp.eExists = EXIST_NO;
976 2 : if (response_code == 200 && sWriteFuncData.pBuffer)
977 : {
978 2 : CPLJSONDocument oDoc;
979 1 : if (oDoc.LoadMemory(
980 1 : reinterpret_cast<const GByte *>(sWriteFuncData.pBuffer)))
981 : {
982 2 : CPLJSONObject oFileStatus = oDoc.GetRoot().GetObj("FileStatus");
983 1 : oFileProp.fileSize = oFileStatus.GetLong("length");
984 1 : oFileProp.mTime = static_cast<size_t>(
985 1 : oFileStatus.GetLong("modificationTime") / 1000);
986 1 : oFileProp.bIsDirectory =
987 1 : oFileStatus.GetString("type") == "DIRECTORY";
988 1 : oFileProp.eExists = EXIST_YES;
989 : }
990 : }
991 :
992 : // If there was no VSI error thrown in the process,
993 : // fail by reporting the HTTP response code.
994 2 : if (response_code != 200 && bSetError && VSIGetLastErrorNo() == 0)
995 : {
996 0 : if (strlen(szCurlErrBuf) > 0)
997 : {
998 0 : if (response_code == 0)
999 : {
1000 0 : VSIError(VSIE_HttpError, "CURL error: %s", szCurlErrBuf);
1001 : }
1002 : else
1003 : {
1004 0 : VSIError(VSIE_HttpError, "HTTP response code: %d - %s",
1005 : static_cast<int>(response_code), szCurlErrBuf);
1006 : }
1007 : }
1008 : else
1009 : {
1010 0 : VSIError(VSIE_HttpError, "HTTP response code: %d",
1011 : static_cast<int>(response_code));
1012 : }
1013 : }
1014 :
1015 : if (ENABLE_DEBUG)
1016 : CPLDebug(
1017 : "WEBHDFS", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
1018 : osURL.c_str(), oFileProp.fileSize, static_cast<int>(response_code));
1019 :
1020 2 : CPLFree(sWriteFuncData.pBuffer);
1021 2 : curl_easy_cleanup(hCurlHandle);
1022 :
1023 2 : oFileProp.bHasComputedFileSize = true;
1024 2 : poFS->SetCachedFileProp(m_pszURL, oFileProp);
1025 :
1026 2 : return oFileProp.fileSize;
1027 : }
1028 :
1029 : /************************************************************************/
1030 : /* DownloadRegion() */
1031 : /************************************************************************/
1032 :
1033 3 : std::string VSIWebHDFSHandle::DownloadRegion(const vsi_l_offset startOffset,
1034 : const int nBlocks)
1035 : {
1036 3 : if (bInterrupted && bStopOnInterruptUntilUninstall)
1037 0 : return std::string();
1038 :
1039 3 : poFS->GetCachedFileProp(m_pszURL, oFileProp);
1040 3 : if (oFileProp.eExists == EXIST_NO)
1041 0 : return std::string();
1042 :
1043 6 : NetworkStatisticsFileSystem oContextFS(poFS->GetFSPrefix().c_str());
1044 6 : NetworkStatisticsFile oContextFile(m_osFilename.c_str());
1045 6 : NetworkStatisticsAction oContextAction("Read");
1046 :
1047 3 : CURLM *hCurlMultiHandle = poFS->GetCurlMultiHandleFor(m_pszURL);
1048 :
1049 6 : std::string osURL(m_pszURL);
1050 :
1051 3 : WriteFuncStruct sWriteFuncData;
1052 3 : int nRetryCount = 0;
1053 3 : double dfRetryDelay = m_dfRetryDelay;
1054 3 : bool bInRedirect = false;
1055 : const vsi_l_offset nEndOffset =
1056 3 : startOffset +
1057 3 : static_cast<vsi_l_offset>(nBlocks) * VSICURLGetDownloadChunkSize() - 1;
1058 :
1059 4 : retry:
1060 4 : CURL *hCurlHandle = curl_easy_init();
1061 :
1062 4 : VSICURLInitWriteFuncStruct(&sWriteFuncData, this, pfnReadCbk,
1063 : pReadCbkUserData);
1064 4 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1065 4 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION,
1066 : VSICurlHandleWriteFunc);
1067 :
1068 4 : if (!bInRedirect)
1069 : {
1070 3 : osURL += "?op=OPEN&offset=";
1071 3 : osURL += CPLSPrintf(CPL_FRMT_GUIB, startOffset);
1072 3 : osURL += "&length=";
1073 3 : osURL += CPLSPrintf(CPL_FRMT_GUIB, nEndOffset - startOffset + 1);
1074 3 : osURL += m_osUsernameParam + m_osDelegationParam;
1075 : }
1076 :
1077 : struct curl_slist *headers =
1078 4 : VSICurlSetOptions(hCurlHandle, osURL.c_str(), m_papszHTTPOptions);
1079 :
1080 4 : if (!m_osDataNodeHost.empty())
1081 : {
1082 2 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_FOLLOWLOCATION, 0);
1083 : }
1084 :
1085 : if (ENABLE_DEBUG)
1086 : CPLDebug("WEBHDFS", "Downloading %s...", osURL.c_str());
1087 :
1088 4 : char szCurlErrBuf[CURL_ERROR_SIZE + 1] = {};
1089 4 : szCurlErrBuf[0] = '\0';
1090 4 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf);
1091 :
1092 4 : unchecked_curl_easy_setopt(hCurlHandle, CURLOPT_HTTPHEADER, headers);
1093 :
1094 4 : VSICURLMultiPerform(hCurlMultiHandle, hCurlHandle);
1095 :
1096 4 : VSICURLResetHeaderAndWriterFunctions(hCurlHandle);
1097 :
1098 4 : curl_slist_free_all(headers);
1099 :
1100 4 : NetworkStatisticsLogger::LogGET(sWriteFuncData.nSize);
1101 :
1102 4 : if (sWriteFuncData.bInterrupted)
1103 : {
1104 0 : bInterrupted = true;
1105 :
1106 0 : CPLFree(sWriteFuncData.pBuffer);
1107 0 : curl_easy_cleanup(hCurlHandle);
1108 :
1109 0 : return std::string();
1110 : }
1111 :
1112 4 : long response_code = 0;
1113 4 : curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1114 :
1115 : if (ENABLE_DEBUG)
1116 : CPLDebug("WEBHDFS", "Got response_code=%ld", response_code);
1117 :
1118 4 : if (!bInRedirect)
1119 : {
1120 3 : char *pszRedirectURL = nullptr;
1121 3 : curl_easy_getinfo(hCurlHandle, CURLINFO_REDIRECT_URL, &pszRedirectURL);
1122 3 : if (pszRedirectURL && strstr(pszRedirectURL, m_pszURL) == nullptr)
1123 : {
1124 1 : CPLDebug("WEBHDFS", "Redirect URL: %s", pszRedirectURL);
1125 :
1126 1 : bInRedirect = true;
1127 1 : osURL = pszRedirectURL;
1128 1 : if (!m_osDataNodeHost.empty())
1129 : {
1130 1 : osURL = PatchWebHDFSUrl(osURL, m_osDataNodeHost);
1131 : }
1132 :
1133 1 : CPLFree(sWriteFuncData.pBuffer);
1134 1 : curl_easy_cleanup(hCurlHandle);
1135 :
1136 1 : goto retry;
1137 : }
1138 : }
1139 :
1140 3 : if (response_code != 200)
1141 : {
1142 : // If HTTP 429, 500, 502, 503, 504 error retry after a
1143 : // pause.
1144 : const double dfNewRetryDelay =
1145 1 : CPLHTTPGetNewRetryDelay(static_cast<int>(response_code),
1146 : dfRetryDelay, nullptr, szCurlErrBuf);
1147 1 : if (dfNewRetryDelay > 0 && nRetryCount < m_nMaxRetry)
1148 : {
1149 0 : CPLError(CE_Warning, CPLE_AppDefined,
1150 : "HTTP error code: %d - %s. "
1151 : "Retrying again in %.1f secs",
1152 : static_cast<int>(response_code), m_pszURL, dfRetryDelay);
1153 0 : CPLSleep(dfRetryDelay);
1154 0 : dfRetryDelay = dfNewRetryDelay;
1155 0 : nRetryCount++;
1156 0 : CPLFree(sWriteFuncData.pBuffer);
1157 0 : curl_easy_cleanup(hCurlHandle);
1158 0 : goto retry;
1159 : }
1160 :
1161 1 : if (response_code >= 400 && szCurlErrBuf[0] != '\0')
1162 : {
1163 0 : CPLError(CE_Failure, CPLE_AppDefined, "%d: %s",
1164 : static_cast<int>(response_code), szCurlErrBuf);
1165 : }
1166 1 : if (!oFileProp.bHasComputedFileSize && startOffset == 0)
1167 : {
1168 1 : oFileProp.bHasComputedFileSize = true;
1169 1 : oFileProp.fileSize = 0;
1170 1 : oFileProp.eExists = EXIST_NO;
1171 1 : poFS->SetCachedFileProp(m_pszURL, oFileProp);
1172 : }
1173 1 : CPLFree(sWriteFuncData.pBuffer);
1174 1 : curl_easy_cleanup(hCurlHandle);
1175 1 : return std::string();
1176 : }
1177 :
1178 2 : oFileProp.eExists = EXIST_YES;
1179 2 : poFS->SetCachedFileProp(m_pszURL, oFileProp);
1180 :
1181 2 : DownloadRegionPostProcess(startOffset, nBlocks, sWriteFuncData.pBuffer,
1182 : sWriteFuncData.nSize);
1183 :
1184 4 : std::string osRet;
1185 2 : osRet.assign(sWriteFuncData.pBuffer, sWriteFuncData.nSize);
1186 :
1187 2 : CPLFree(sWriteFuncData.pBuffer);
1188 2 : curl_easy_cleanup(hCurlHandle);
1189 :
1190 2 : return osRet;
1191 : }
1192 :
1193 : } /* end of namespace cpl */
1194 :
1195 : #endif // DOXYGEN_SKIP
1196 : //! @endcond
1197 :
1198 : /************************************************************************/
1199 : /* VSIInstallWebHdfsHandler() */
1200 : /************************************************************************/
1201 :
1202 : /*!
1203 : \brief Install /vsiwebhdfs/ WebHDFS (Hadoop File System) REST API file
1204 : system handler (requires libcurl)
1205 :
1206 : \verbatim embed:rst
1207 : See :ref:`/vsiwebhdfs/ documentation <vsiwebhdfs>`
1208 : \endverbatim
1209 :
1210 : @since GDAL 2.4
1211 : */
1212 1228 : void VSIInstallWebHdfsHandler(void)
1213 : {
1214 1228 : VSIFileManager::InstallHandler(
1215 1228 : "/vsiwebhdfs/", new cpl::VSIWebHDFSFSHandler("/vsiwebhdfs/"));
1216 1228 : }
1217 :
1218 : #endif /* HAVE_CURL */
|