LCOV - code coverage report
Current view: top level - port - cpl_vsil_hdfs.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 2 2 100.0 %
Date: 2024-04-29 01:40:10 Functions: 1 1 100.0 %

          Line data    Source code
       1             : /**********************************************************************
       2             :  *
       3             :  * Project:  CPL - Common Portability Library
       4             :  * Purpose:  Implement VSI large file api for HDFS
       5             :  * Author:   James McClain, <jmcclain@azavea.com>
       6             :  *
       7             :  **********************************************************************
       8             :  * Copyright (c) 2010-2015, Even Rouault <even dot rouault at spatialys.com>
       9             :  * Copyright (c) 2018, Azavea
      10             :  *
      11             :  * Permission is hereby granted, free of charge, to any person obtaining a
      12             :  * copy of this software and associated documentation files (the "Software"),
      13             :  * to deal in the Software without restriction, including without limitation
      14             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      15             :  * and/or sell copies of the Software, and to permit persons to whom the
      16             :  * Software is furnished to do so, subject to the following conditions:
      17             :  *
      18             :  * The above copyright notice and this permission notice shall be included
      19             :  * in all copies or substantial portions of the Software.
      20             :  *
      21             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      22             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      23             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      24             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      25             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      26             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      27             :  * DEALINGS IN THE SOFTWARE.
      28             :  ****************************************************************************/
      29             : 
      30             : //! @cond Doxygen_Suppress
      31             : 
      32             : #include <string>
      33             : 
      34             : #include <fcntl.h>
      35             : #include <sys/types.h>
      36             : #include <sys/stat.h>
      37             : 
      38             : #if !defined(_MSC_VER)
      39             : #include <unistd.h>
      40             : #endif
      41             : 
      42             : #include <cstring>
      43             : #include <climits>
      44             : 
      45             : #include "cpl_port.h"
      46             : #include "cpl_vsi.h"
      47             : 
      48             : #include "cpl_conv.h"
      49             : #include "cpl_error.h"
      50             : #include "cpl_vsi_virtual.h"
      51             : 
      52             : #ifdef HDFS_ENABLED
      53             : 
      54             : #include "hdfs.h"
      55             : 
      56             : /************************************************************************/
      57             : /* ==================================================================== */
      58             : /*                        VSIHdfsHandle                               */
      59             : /* ==================================================================== */
      60             : /************************************************************************/
      61             : 
      62             : #define SILENCE(expr)                                                          \
      63             :     {                                                                          \
      64             :         int hOldStderr = dup(2);                                               \
      65             :         int hNewStderr = open("/dev/null", O_WRONLY);                          \
      66             :                                                                                \
      67             :         if ((hOldStderr != -1) && (hNewStderr != -1) &&                        \
      68             :             (dup2(hNewStderr, 2) != -1))                                       \
      69             :         {                                                                      \
      70             :             close(hNewStderr);                                                 \
      71             :             expr;                                                              \
      72             :             dup2(hOldStderr, 2);                                               \
      73             :             close(hOldStderr);                                                 \
      74             :         }                                                                      \
      75             :         else                                                                   \
      76             :         {                                                                      \
      77             :             if (hOldStderr != -1)                                              \
      78             :                 close(hOldStderr);                                             \
      79             :             if (hNewStderr != -1)                                              \
      80             :                 close(hNewStderr);                                             \
      81             :             expr;                                                              \
      82             :         }                                                                      \
      83             :     }
      84             : 
      85             : class VSIHdfsHandle final : public VSIVirtualHandle
      86             : {
      87             :   private:
      88             :     CPL_DISALLOW_COPY_ASSIGN(VSIHdfsHandle)
      89             : 
      90             :     hdfsFile poFile = nullptr;
      91             :     hdfsFS poFilesystem = nullptr;
      92             :     std::string oFilename;
      93             :     bool bEOF = false;
      94             : 
      95             :   public:
      96             :     static constexpr const char *VSIHDFS = "/vsihdfs/";
      97             : 
      98             :     VSIHdfsHandle(hdfsFile poFile, hdfsFS poFilesystem, const char *pszFilename,
      99             :                   bool bReadOnly);
     100             :     ~VSIHdfsHandle() override;
     101             : 
     102             :     int Seek(vsi_l_offset nOffset, int nWhence) override;
     103             :     vsi_l_offset Tell() override;
     104             :     size_t Read(void *pBuffer, size_t nSize, size_t nMemb) override;
     105             :     size_t Write(const void *pBuffer, size_t nSize, size_t nMemb) override;
     106             :     vsi_l_offset Length();
     107             :     int Eof() override;
     108             :     int Flush() override;
     109             :     int Close() override;
     110             : };
     111             : 
     112             : VSIHdfsHandle::VSIHdfsHandle(hdfsFile _poFile, hdfsFS _poFilesystem,
     113             :                              const char *pszFilename, bool /*_bReadOnly*/)
     114             :     : poFile(_poFile), poFilesystem(_poFilesystem), oFilename(pszFilename)
     115             : {
     116             : }
     117             : 
     118             : VSIHdfsHandle::~VSIHdfsHandle()
     119             : {
     120             :     Close();
     121             : }
     122             : 
     123             : int VSIHdfsHandle::Seek(vsi_l_offset nOffset, int nWhence)
     124             : {
     125             :     bEOF = false;
     126             :     switch (nWhence)
     127             :     {
     128             :         case SEEK_SET:
     129             :             return hdfsSeek(poFilesystem, poFile, nOffset);
     130             :         case SEEK_CUR:
     131             :             return hdfsSeek(poFilesystem, poFile, nOffset + Tell());
     132             :         case SEEK_END:
     133             :             return hdfsSeek(poFilesystem, poFile,
     134             :                             static_cast<tOffset>(Length()) - nOffset);
     135             :         default:
     136             :             return -1;
     137             :     }
     138             : }
     139             : 
     140             : vsi_l_offset VSIHdfsHandle::Tell()
     141             : {
     142             :     return hdfsTell(poFilesystem, poFile);
     143             : }
     144             : 
     145             : size_t VSIHdfsHandle::Read(void *pBuffer, size_t nSize, size_t nMemb)
     146             : {
     147             :     if (nSize == 0 || nMemb == 0)
     148             :         return 0;
     149             : 
     150             :     size_t bytes_wanted = nSize * nMemb;
     151             :     size_t bytes_read = 0;
     152             : 
     153             :     while (bytes_read < bytes_wanted)
     154             :     {
     155             :         tSize bytes = 0;
     156             :         size_t bytes_to_request = bytes_wanted - bytes_read;
     157             : 
     158             :         // The `Read` function can take 64-bit arguments for its
     159             :         // read-request size, whereas `hdfsRead` may only take a 32-bit
     160             :         // argument.  If the former requests an amount larger than can
     161             :         // be encoded in a signed 32-bit number, break the request into
     162             :         // 2GB batches.
     163             :         bytes = hdfsRead(
     164             :             poFilesystem, poFile, static_cast<char *>(pBuffer) + bytes_read,
     165             :             bytes_to_request > INT_MAX ? INT_MAX : bytes_to_request);
     166             : 
     167             :         if (bytes > 0)
     168             :         {
     169             :             if (static_cast<size_t>(bytes) < bytes_to_request)
     170             :                 bEOF = true;
     171             :             bytes_read += bytes;
     172             :         }
     173             :         if (bytes == 0)
     174             :         {
     175             :             bEOF = true;
     176             :             return bytes_read / nSize;
     177             :         }
     178             :         else if (bytes < 0)
     179             :         {
     180             :             bEOF = false;
     181             :             return 0;
     182             :         }
     183             :     }
     184             : 
     185             :     return bytes_read / nSize;
     186             : }
     187             : 
     188             : size_t VSIHdfsHandle::Write(const void *, size_t, size_t)
     189             : {
     190             :     CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
     191             :     return -1;
     192             : }
     193             : 
     194             : vsi_l_offset VSIHdfsHandle::Length()
     195             : {
     196             :     hdfsFileInfo *poInfo = hdfsGetPathInfo(poFilesystem, oFilename.c_str());
     197             :     if (poInfo != nullptr)
     198             :     {
     199             :         tOffset nSize = poInfo->mSize;
     200             :         hdfsFreeFileInfo(poInfo, 1);
     201             :         return static_cast<vsi_l_offset>(nSize);
     202             :     }
     203             :     return -1;
     204             : }
     205             : 
     206             : int VSIHdfsHandle::Eof()
     207             : {
     208             :     return bEOF;
     209             : }
     210             : 
     211             : int VSIHdfsHandle::Flush()
     212             : {
     213             :     return hdfsFlush(poFilesystem, poFile);
     214             : }
     215             : 
     216             : int VSIHdfsHandle::Close()
     217             : {
     218             :     int retval = 0;
     219             : 
     220             :     if (poFilesystem != nullptr && poFile != nullptr)
     221             :         retval = hdfsCloseFile(poFilesystem, poFile);
     222             :     poFile = nullptr;
     223             :     poFilesystem = nullptr;
     224             : 
     225             :     return retval;
     226             : }
     227             : 
     228             : class VSIHdfsFilesystemHandler final : public VSIFilesystemHandler
     229             : {
     230             :   private:
     231             :     CPL_DISALLOW_COPY_ASSIGN(VSIHdfsFilesystemHandler)
     232             : 
     233             :     hdfsFS poFilesystem = nullptr;
     234             :     CPLMutex *hMutex = nullptr;
     235             : 
     236             :   public:
     237             :     VSIHdfsFilesystemHandler();
     238             :     ~VSIHdfsFilesystemHandler() override;
     239             : 
     240             :     void EnsureFilesystem();
     241             :     VSIVirtualHandle *Open(const char *pszFilename, const char *pszAccess,
     242             :                            bool bSetError,
     243             :                            CSLConstList /* papszOptions */) override;
     244             :     int Stat(const char *pszFilename, VSIStatBufL *pStatBuf,
     245             :              int nFlags) override;
     246             :     int Unlink(const char *pszFilename) override;
     247             :     int Mkdir(const char *pszDirname, long nMode) override;
     248             :     int Rmdir(const char *pszDirname) override;
     249             :     char **ReadDirEx(const char *pszDirname, int nMaxFiles) override;
     250             :     int Rename(const char *oldpath, const char *newpath) override;
     251             : };
     252             : 
     253             : VSIHdfsFilesystemHandler::VSIHdfsFilesystemHandler()
     254             : {
     255             : }
     256             : 
     257             : VSIHdfsFilesystemHandler::~VSIHdfsFilesystemHandler()
     258             : {
     259             :     if (hMutex != nullptr)
     260             :     {
     261             :         CPLDestroyMutex(hMutex);
     262             :         hMutex = nullptr;
     263             :     }
     264             : 
     265             :     if (poFilesystem != nullptr)
     266             :         hdfsDisconnect(poFilesystem);
     267             :     poFilesystem = nullptr;
     268             : }
     269             : 
     270             : void VSIHdfsFilesystemHandler::EnsureFilesystem()
     271             : {
     272             :     CPLMutexHolder oHolder(&hMutex);
     273             :     if (poFilesystem == nullptr)
     274             :         poFilesystem = hdfsConnect("default", 0);
     275             : }
     276             : 
     277             : VSIVirtualHandle *
     278             : VSIHdfsFilesystemHandler::Open(const char *pszFilename, const char *pszAccess,
     279             :                                bool, CSLConstList /* papszOptions */)
     280             : {
     281             :     EnsureFilesystem();
     282             : 
     283             :     if (strchr(pszAccess, 'w') != nullptr || strchr(pszAccess, 'a') != nullptr)
     284             :     {
     285             :         CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
     286             :         return nullptr;
     287             :     }
     288             : 
     289             :     if (strncmp(pszFilename, VSIHdfsHandle::VSIHDFS,
     290             :                 strlen(VSIHdfsHandle::VSIHDFS)) != 0)
     291             :     {
     292             :         return nullptr;
     293             :     }
     294             :     else
     295             :     {
     296             :         const char *pszPath = pszFilename + strlen(VSIHdfsHandle::VSIHDFS);
     297             : 
     298             :         // Open HDFS file, sending Java stack traces to /dev/null.
     299             :         hdfsFile poFile = nullptr;
     300             :         SILENCE(poFile =
     301             :                     hdfsOpenFile(poFilesystem, pszPath, O_RDONLY, 0, 0, 0));
     302             : 
     303             :         if (poFile != nullptr)
     304             :         {
     305             :             VSIHdfsHandle *poHandle =
     306             :                 new VSIHdfsHandle(poFile, poFilesystem, pszPath, true);
     307             :             return poHandle;
     308             :         }
     309             :     }
     310             :     return nullptr;
     311             : }
     312             : 
     313             : int VSIHdfsFilesystemHandler::Stat(const char *pszFilename,
     314             :                                    VSIStatBufL *pStatBuf, int)
     315             : {
     316             :     memset(pStatBuf, 0, sizeof(VSIStatBufL));
     317             : 
     318             :     if (strncmp(pszFilename, VSIHdfsHandle::VSIHDFS,
     319             :                 strlen(VSIHdfsHandle::VSIHDFS)) != 0)
     320             :     {
     321             :         return -1;
     322             :     }
     323             : 
     324             :     EnsureFilesystem();
     325             : 
     326             :     // CPLDebug("VSIHDFS", "Stat(%s)", pszFilename);
     327             : 
     328             :     hdfsFileInfo *poInfo = hdfsGetPathInfo(
     329             :         poFilesystem, pszFilename + strlen(VSIHdfsHandle::VSIHDFS));
     330             : 
     331             :     if (poInfo != nullptr)
     332             :     {
     333             :         pStatBuf->st_dev =
     334             :             static_cast<dev_t>(0); /* ID of device containing file */
     335             :         pStatBuf->st_ino = static_cast<ino_t>(0); /* inode number */
     336             :         switch (poInfo->mKind)
     337             :         { /* protection */
     338             :             case tObjectKind::kObjectKindFile:
     339             :                 pStatBuf->st_mode = S_IFREG;
     340             :                 break;
     341             :             case tObjectKind::kObjectKindDirectory:
     342             :                 pStatBuf->st_mode = S_IFDIR;
     343             :                 break;
     344             :             default:
     345             :                 CPLError(CE_Failure, CPLE_AppDefined,
     346             :                          "Unrecognized object kind");
     347             :         }
     348             :         pStatBuf->st_nlink = static_cast<nlink_t>(0); /* number of hard links */
     349             :         pStatBuf->st_uid = getuid();                  /* user ID of owner */
     350             :         pStatBuf->st_gid = getgid();                  /* group ID of owner */
     351             :         pStatBuf->st_rdev =
     352             :             static_cast<dev_t>(0); /* device ID (if special file) */
     353             :         pStatBuf->st_size =
     354             :             static_cast<off_t>(poInfo->mSize); /* total size, in bytes */
     355             :         pStatBuf->st_blksize = static_cast<blksize_t>(
     356             :             poInfo->mBlockSize); /* blocksize for filesystem I/O */
     357             :         pStatBuf->st_blocks =
     358             :             static_cast<blkcnt_t>((poInfo->mBlockSize >> 9) +
     359             :                                   1); /* number of 512B blocks allocated */
     360             :         pStatBuf->st_atime =
     361             :             static_cast<time_t>(poInfo->mLastAccess); /* time of last access */
     362             :         pStatBuf->st_mtime = static_cast<time_t>(
     363             :             poInfo->mLastMod); /* time of last modification */
     364             :         pStatBuf->st_ctime = static_cast<time_t>(
     365             :             poInfo->mLastMod); /* time of last status change */
     366             :         hdfsFreeFileInfo(poInfo, 1);
     367             :         return 0;
     368             :     }
     369             : 
     370             :     return -1;
     371             : }
     372             : 
     373             : int VSIHdfsFilesystemHandler::Unlink(const char *)
     374             : {
     375             :     CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
     376             :     return -1;
     377             : }
     378             : 
     379             : int VSIHdfsFilesystemHandler::Mkdir(const char *, long)
     380             : {
     381             :     CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
     382             :     return -1;
     383             : }
     384             : 
     385             : int VSIHdfsFilesystemHandler::Rmdir(const char *)
     386             : {
     387             :     CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
     388             :     return -1;
     389             : }
     390             : 
     391             : char **VSIHdfsFilesystemHandler::ReadDirEx(const char *pszDirname,
     392             :                                            int /* nMaxFiles */)
     393             : {
     394             :     if (strncmp(pszDirname, VSIHdfsHandle::VSIHDFS,
     395             :                 strlen(VSIHdfsHandle::VSIHDFS)) != 0)
     396             :     {
     397             :         return nullptr;
     398             :     }
     399             : 
     400             :     EnsureFilesystem();
     401             : 
     402             :     std::string osDirName(pszDirname);
     403             :     if (osDirName.back() != '/')
     404             :         osDirName += '/';
     405             : 
     406             :     VSIStatBufL sStat;
     407             :     if (Stat(osDirName.c_str(), &sStat, 0) != 0 || sStat.st_mode != S_IFDIR)
     408             :         return nullptr;
     409             : 
     410             :     int nEntries = 0;
     411             :     std::string osDirNameWithoutPrefix(
     412             :         osDirName.substr(strlen(VSIHdfsHandle::VSIHDFS)));
     413             : 
     414             :     // file:///home/user/... is accepted, but if this is used, files returned
     415             :     // by hdfsListDirectory() use file:/home/user/...
     416             :     if (osDirNameWithoutPrefix.compare(0, strlen("file:///"), "file:///") == 0)
     417             :     {
     418             :         osDirNameWithoutPrefix =
     419             :             "file:/" + osDirNameWithoutPrefix.substr(strlen("file:///"));
     420             :     }
     421             : 
     422             :     hdfsFileInfo *paoInfo = hdfsListDirectory(
     423             :         poFilesystem, osDirNameWithoutPrefix.c_str(), &nEntries);
     424             : 
     425             :     if (paoInfo != nullptr)
     426             :     {
     427             :         CPLStringList aosNames;
     428             :         for (int i = 0; i < nEntries; ++i)
     429             :         {
     430             :             // CPLDebug("VSIHDFS", "[%d]: %s", i, paoInfo[i].mName);
     431             :             if (STARTS_WITH(paoInfo[i].mName, osDirNameWithoutPrefix.c_str()))
     432             :             {
     433             :                 aosNames.AddString(paoInfo[i].mName +
     434             :                                    osDirNameWithoutPrefix.size());
     435             :             }
     436             :             else
     437             :             {
     438             :                 CPLDebug("VSIHDFS",
     439             :                          "hdfsListDirectory() returned %s, but this is not "
     440             :                          "starting with %s",
     441             :                          paoInfo[i].mName, osDirNameWithoutPrefix.c_str());
     442             :             }
     443             :         }
     444             :         hdfsFreeFileInfo(paoInfo, nEntries);
     445             :         return aosNames.StealList();
     446             :     }
     447             :     return nullptr;
     448             : }
     449             : 
     450             : int VSIHdfsFilesystemHandler::Rename(const char *, const char *)
     451             : {
     452             :     CPLError(CE_Failure, CPLE_AppDefined, "HDFS driver is read-only");
     453             :     return -1;
     454             : }
     455             : 
     456             : #endif
     457             : 
     458             : //! @endcond
     459             : 
     460             : #ifdef HDFS_ENABLED
     461             : 
     462             : /************************************************************************/
     463             : /*                       VSIInstallHdfsHandler()                        */
     464             : /************************************************************************/
     465             : 
     466             : /**
     467             :  * \brief Install /vsihdfs/ file system handler (requires JVM and HDFS support)
     468             :  *
     469             :  * @since GDAL 2.4.0
     470             :  */
     471             : void VSIInstallHdfsHandler()
     472             : {
     473             :     VSIFileManager::InstallHandler(VSIHdfsHandle::VSIHDFS,
     474             :                                    new VSIHdfsFilesystemHandler);
     475             : }
     476             : 
     477             : #else
     478             : 
     479             : /************************************************************************/
     480             : /*                       VSIInstallHdfsHandler()                        */
     481             : /************************************************************************/
     482             : 
     483             : /**
     484             :  * \brief Install /vsihdfs/ file system handler (non-functional stub)
     485             :  *
     486             :  * @since GDAL 2.4.0
     487             :  */
     488        1228 : void VSIInstallHdfsHandler(void)
     489             : {
     490             :     // Not supported.
     491        1228 : }
     492             : 
     493             : #endif

Generated by: LCOV version 1.14