Line data Source code
1 : /**********************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: Implement VSI large file api for HDFS
5 : * Author: James McClain, <jmcclain@azavea.com>
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2010-2015, Even Rouault <even dot rouault at spatialys.com>
9 : * Copyright (c) 2018, Azavea
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : //! @cond Doxygen_Suppress
31 :
32 : #include <string>
33 :
34 : #include <fcntl.h>
35 : #include <sys/types.h>
36 : #include <sys/stat.h>
37 :
38 : #if !defined(_MSC_VER)
39 : #include <unistd.h>
40 : #endif
41 :
42 : #include <cstring>
43 : #include <climits>
44 :
45 : #include "cpl_port.h"
46 : #include "cpl_vsi.h"
47 :
48 : #include "cpl_conv.h"
49 : #include "cpl_error.h"
50 : #include "cpl_vsi_virtual.h"
51 :
52 : #ifdef HDFS_ENABLED
53 :
54 : #include "hdfs.h"
55 :
56 : /************************************************************************/
57 : /* ==================================================================== */
58 : /* VSIHdfsHandle */
59 : /* ==================================================================== */
60 : /************************************************************************/
61 :
62 : #define SILENCE(expr) \
63 : { \
64 : int hOldStderr = dup(2); \
65 : int hNewStderr = open("/dev/null", O_WRONLY); \
66 : \
67 : if ((hOldStderr != -1) && (hNewStderr != -1) && \
68 : (dup2(hNewStderr, 2) != -1)) \
69 : { \
70 : close(hNewStderr); \
71 : expr; \
72 : dup2(hOldStderr, 2); \
73 : close(hOldStderr); \
74 : } \
75 : else \
76 : { \
77 : if (hOldStderr != -1) \
78 : close(hOldStderr); \
79 : if (hNewStderr != -1) \
80 : close(hNewStderr); \
81 : expr; \
82 : } \
83 : }
84 :
85 : class VSIHdfsHandle final : public VSIVirtualHandle
86 : {
87 : private:
88 : CPL_DISALLOW_COPY_ASSIGN(VSIHdfsHandle)
89 :
90 : hdfsFile poFile = nullptr;
91 : hdfsFS poFilesystem = nullptr;
92 : std::string oFilename;
93 : bool bEOF = false;
94 :
95 : public:
96 : static constexpr const char *VSIHDFS = "/vsihdfs/";
97 :
98 : VSIHdfsHandle(hdfsFile poFile, hdfsFS poFilesystem, const char *pszFilename,
99 : bool bReadOnly);
100 : ~VSIHdfsHandle() override;
101 :
102 : int Seek(vsi_l_offset nOffset, int nWhence) override;
103 : vsi_l_offset Tell() override;
104 : size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override;
105 : size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override;
106 : vsi_l_offset Length();
107 : int Eof() override;
108 : int Flush() override;
109 : int Close() override;
110 : };
111 :
112 : VSIHdfsHandle::VSIHdfsHandle(hdfsFile _poFile, hdfsFS _poFilesystem,
113 : const char *pszFilename, bool /*_bReadOnly*/)
114 : : poFile(_poFile), poFilesystem(_poFilesystem), oFilename(pszFilename)
115 : {
116 : }
117 :
118 : VSIHdfsHandle::~VSIHdfsHandle()
119 : {
120 : Close();
121 : }
122 :
123 : int VSIHdfsHandle::Seek(vsi_l_offset nOffset, int nWhence)
124 : {
125 : bEOF = false;
126 : switch (nWhence)
127 : {
128 : case SEEK_SET:
129 : return hdfsSeek(poFilesystem, poFile, nOffset);
130 : case SEEK_CUR:
131 : return hdfsSeek(poFilesystem, poFile, nOffset + Tell());
132 : case SEEK_END:
133 : return hdfsSeek(poFilesystem, poFile,
134 : static_cast<tOffset>(Length()) - nOffset);
135 : default:
136 : return -1;
137 : }
138 : }
139 :
140 : vsi_l_offset VSIHdfsHandle::Tell()
141 : {
142 : return hdfsTell(poFilesystem, poFile);
143 : }
144 :
145 : size_t VSIHdfsHandle::Read(void *pBuffer, size_t nSize, size_t nMemb)
146 : {
147 : if (nSize == 0 || nMemb == 0)
148 : return 0;
149 :
150 : size_t bytes_wanted = nSize * nMemb;
151 : size_t bytes_read = 0;
152 :
153 : while (bytes_read < bytes_wanted)
154 : {
155 : tSize bytes = 0;
156 : size_t bytes_to_request = bytes_wanted - bytes_read;
157 :
158 : // The `Read` function can take 64-bit arguments for its
159 : // read-request size, whereas `hdfsRead` may only take a 32-bit
160 : // argument. If the former requests an amount larger than can
161 : // be encoded in a signed 32-bit number, break the request into
162 : // 2GB batches.
163 : bytes = hdfsRead(
164 : poFilesystem, poFile, static_cast<char *>(pBuffer) + bytes_read,
165 : bytes_to_request > INT_MAX ? INT_MAX : bytes_to_request);
166 :
167 : if (bytes > 0)
168 : {
169 : if (static_cast<size_t>(bytes) < bytes_to_request)
170 : bEOF = true;
171 : bytes_read += bytes;
172 : }
173 : if (bytes == 0)
174 : {
175 : bEOF = true;
176 : return bytes_read / nSize;
177 : }
178 : else if (bytes < 0)
179 : {
180 : bEOF = false;
181 : return 0;
182 : }
183 : }
184 :
185 : return bytes_read / nSize;
186 : }
187 :
188 : size_t VSIHdfsHandle::Write(const void *, size_t, size_t)
189 : {
190 : CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
191 : return -1;
192 : }
193 :
194 : vsi_l_offset VSIHdfsHandle::Length()
195 : {
196 : hdfsFileInfo *poInfo = hdfsGetPathInfo(poFilesystem, oFilename.c_str());
197 : if (poInfo != nullptr)
198 : {
199 : tOffset nSize = poInfo->mSize;
200 : hdfsFreeFileInfo(poInfo, 1);
201 : return static_cast<vsi_l_offset>(nSize);
202 : }
203 : return -1;
204 : }
205 :
206 : int VSIHdfsHandle::Eof()
207 : {
208 : return bEOF;
209 : }
210 :
211 : int VSIHdfsHandle::Flush()
212 : {
213 : return hdfsFlush(poFilesystem, poFile);
214 : }
215 :
216 : int VSIHdfsHandle::Close()
217 : {
218 : int retval = 0;
219 :
220 : if (poFilesystem != nullptr && poFile != nullptr)
221 : retval = hdfsCloseFile(poFilesystem, poFile);
222 : poFile = nullptr;
223 : poFilesystem = nullptr;
224 :
225 : return retval;
226 : }
227 :
228 : class VSIHdfsFilesystemHandler final : public VSIFilesystemHandler
229 : {
230 : private:
231 : CPL_DISALLOW_COPY_ASSIGN(VSIHdfsFilesystemHandler)
232 :
233 : hdfsFS poFilesystem = nullptr;
234 : CPLMutex *hMutex = nullptr;
235 :
236 : public:
237 : VSIHdfsFilesystemHandler();
238 : ~VSIHdfsFilesystemHandler() override;
239 :
240 : void EnsureFilesystem();
241 : VSIVirtualHandle *Open(const char *pszFilename, const char *pszAccess,
242 : bool bSetError,
243 : CSLConstList /* papszOptions */) override;
244 : int Stat(const char *pszFilename, VSIStatBufL *pStatBuf,
245 : int nFlags) override;
246 : int Unlink(const char *pszFilename) override;
247 : int Mkdir(const char *pszDirname, long nMode) override;
248 : int Rmdir(const char *pszDirname) override;
249 : char **ReadDirEx(const char *pszDirname, int nMaxFiles) override;
250 : int Rename(const char *oldpath, const char *newpath) override;
251 : };
252 :
253 : VSIHdfsFilesystemHandler::VSIHdfsFilesystemHandler()
254 : {
255 : }
256 :
257 : VSIHdfsFilesystemHandler::~VSIHdfsFilesystemHandler()
258 : {
259 : if (hMutex != nullptr)
260 : {
261 : CPLDestroyMutex(hMutex);
262 : hMutex = nullptr;
263 : }
264 :
265 : if (poFilesystem != nullptr)
266 : hdfsDisconnect(poFilesystem);
267 : poFilesystem = nullptr;
268 : }
269 :
270 : void VSIHdfsFilesystemHandler::EnsureFilesystem()
271 : {
272 : CPLMutexHolder oHolder(&hMutex);
273 : if (poFilesystem == nullptr)
274 : poFilesystem = hdfsConnect("default", 0);
275 : }
276 :
277 : VSIVirtualHandle *
278 : VSIHdfsFilesystemHandler::Open(const char *pszFilename, const char *pszAccess,
279 : bool, CSLConstList /* papszOptions */)
280 : {
281 : EnsureFilesystem();
282 :
283 : if (strchr(pszAccess, 'w') != nullptr || strchr(pszAccess, 'a') != nullptr)
284 : {
285 : CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
286 : return nullptr;
287 : }
288 :
289 : if (strncmp(pszFilename, VSIHdfsHandle::VSIHDFS,
290 : strlen(VSIHdfsHandle::VSIHDFS)) != 0)
291 : {
292 : return nullptr;
293 : }
294 : else
295 : {
296 : const char *pszPath = pszFilename + strlen(VSIHdfsHandle::VSIHDFS);
297 :
298 : // Open HDFS file, sending Java stack traces to /dev/null.
299 : hdfsFile poFile = nullptr;
300 : SILENCE(poFile =
301 : hdfsOpenFile(poFilesystem, pszPath, O_RDONLY, 0, 0, 0));
302 :
303 : if (poFile != nullptr)
304 : {
305 : VSIHdfsHandle *poHandle =
306 : new VSIHdfsHandle(poFile, poFilesystem, pszPath, true);
307 : return poHandle;
308 : }
309 : }
310 : return nullptr;
311 : }
312 :
313 : int VSIHdfsFilesystemHandler::Stat(const char *pszFilename,
314 : VSIStatBufL *pStatBuf, int)
315 : {
316 : memset(pStatBuf, 0, sizeof(VSIStatBufL));
317 :
318 : if (strncmp(pszFilename, VSIHdfsHandle::VSIHDFS,
319 : strlen(VSIHdfsHandle::VSIHDFS)) != 0)
320 : {
321 : return -1;
322 : }
323 :
324 : EnsureFilesystem();
325 :
326 : // CPLDebug("VSIHDFS", "Stat(%s)", pszFilename);
327 :
328 : hdfsFileInfo *poInfo = hdfsGetPathInfo(
329 : poFilesystem, pszFilename + strlen(VSIHdfsHandle::VSIHDFS));
330 :
331 : if (poInfo != nullptr)
332 : {
333 : pStatBuf->st_dev =
334 : static_cast<dev_t>(0); /* ID of device containing file */
335 : pStatBuf->st_ino = static_cast<ino_t>(0); /* inode number */
336 : switch (poInfo->mKind)
337 : { /* protection */
338 : case tObjectKind::kObjectKindFile:
339 : pStatBuf->st_mode = S_IFREG;
340 : break;
341 : case tObjectKind::kObjectKindDirectory:
342 : pStatBuf->st_mode = S_IFDIR;
343 : break;
344 : default:
345 : CPLError(CE_Failure, CPLE_AppDefined,
346 : "Unrecognized object kind");
347 : }
348 : pStatBuf->st_nlink = static_cast<nlink_t>(0); /* number of hard links */
349 : pStatBuf->st_uid = getuid(); /* user ID of owner */
350 : pStatBuf->st_gid = getgid(); /* group ID of owner */
351 : pStatBuf->st_rdev =
352 : static_cast<dev_t>(0); /* device ID (if special file) */
353 : pStatBuf->st_size =
354 : static_cast<off_t>(poInfo->mSize); /* total size, in bytes */
355 : pStatBuf->st_blksize = static_cast<blksize_t>(
356 : poInfo->mBlockSize); /* blocksize for filesystem I/O */
357 : pStatBuf->st_blocks =
358 : static_cast<blkcnt_t>((poInfo->mBlockSize >> 9) +
359 : 1); /* number of 512B blocks allocated */
360 : pStatBuf->st_atime =
361 : static_cast<time_t>(poInfo->mLastAccess); /* time of last access */
362 : pStatBuf->st_mtime = static_cast<time_t>(
363 : poInfo->mLastMod); /* time of last modification */
364 : pStatBuf->st_ctime = static_cast<time_t>(
365 : poInfo->mLastMod); /* time of last status change */
366 : hdfsFreeFileInfo(poInfo, 1);
367 : return 0;
368 : }
369 :
370 : return -1;
371 : }
372 :
373 : int VSIHdfsFilesystemHandler::Unlink(const char *)
374 : {
375 : CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
376 : return -1;
377 : }
378 :
379 : int VSIHdfsFilesystemHandler::Mkdir(const char *, long)
380 : {
381 : CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
382 : return -1;
383 : }
384 :
385 : int VSIHdfsFilesystemHandler::Rmdir(const char *)
386 : {
387 : CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
388 : return -1;
389 : }
390 :
391 : char **VSIHdfsFilesystemHandler::ReadDirEx(const char *pszDirname,
392 : int /* nMaxFiles */)
393 : {
394 : if (strncmp(pszDirname, VSIHdfsHandle::VSIHDFS,
395 : strlen(VSIHdfsHandle::VSIHDFS)) != 0)
396 : {
397 : return nullptr;
398 : }
399 :
400 : EnsureFilesystem();
401 :
402 : std::string osDirName(pszDirname);
403 : if (osDirName.back() != '/')
404 : osDirName += '/';
405 :
406 : VSIStatBufL sStat;
407 : if (Stat(osDirName.c_str(), &sStat, 0) != 0 || sStat.st_mode != S_IFDIR)
408 : return nullptr;
409 :
410 : int nEntries = 0;
411 : std::string osDirNameWithoutPrefix(
412 : osDirName.substr(strlen(VSIHdfsHandle::VSIHDFS)));
413 :
414 : // file:///home/user/... is accepted, but if this is used, files returned
415 : // by hdfsListDirectory() use file:/home/user/...
416 : if (osDirNameWithoutPrefix.compare(0, strlen("file:///"), "file:///") == 0)
417 : {
418 : osDirNameWithoutPrefix =
419 : "file:/" + osDirNameWithoutPrefix.substr(strlen("file:///"));
420 : }
421 :
422 : hdfsFileInfo *paoInfo = hdfsListDirectory(
423 : poFilesystem, osDirNameWithoutPrefix.c_str(), &nEntries);
424 :
425 : if (paoInfo != nullptr)
426 : {
427 : CPLStringList aosNames;
428 : for (int i = 0; i < nEntries; ++i)
429 : {
430 : // CPLDebug("VSIHDFS", "[%d]: %s", i, paoInfo[i].mName);
431 : if (STARTS_WITH(paoInfo[i].mName, osDirNameWithoutPrefix.c_str()))
432 : {
433 : aosNames.AddString(paoInfo[i].mName +
434 : osDirNameWithoutPrefix.size());
435 : }
436 : else
437 : {
438 : CPLDebug("VSIHDFS",
439 : "hdfsListDirectory() returned %s, but this is not "
440 : "starting with %s",
441 : paoInfo[i].mName, osDirNameWithoutPrefix.c_str());
442 : }
443 : }
444 : hdfsFreeFileInfo(paoInfo, nEntries);
445 : return aosNames.StealList();
446 : }
447 : return nullptr;
448 : }
449 :
450 : int VSIHdfsFilesystemHandler::Rename(const char *, const char *)
451 : {
452 : CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
453 : return -1;
454 : }
455 :
456 : #endif
457 :
458 : //! @endcond
459 :
460 : #ifdef HDFS_ENABLED
461 :
462 : /************************************************************************/
463 : /* VSIInstallHdfsHandler() */
464 : /************************************************************************/
465 :
466 : /**
467 : * \brief Install /vsihdfs/ file system handler (requires JVM and HDFS support)
468 : *
469 : * @since GDAL 2.4.0
470 : */
471 : void VSIInstallHdfsHandler()
472 : {
473 : VSIFileManager::InstallHandler(VSIHdfsHandle::VSIHDFS,
474 : new VSIHdfsFilesystemHandler);
475 : }
476 :
477 : #else
478 :
479 : /************************************************************************/
480 : /* VSIInstallHdfsHandler() */
481 : /************************************************************************/
482 :
483 : /**
484 : * \brief Install /vsihdfs/ file system handler (non-functional stub)
485 : *
486 : * @since GDAL 2.4.0
487 : */
488 1228 : void VSIInstallHdfsHandler(void)
489 : {
490 : // Not supported.
491 1228 : }
492 :
493 : #endif
|