LCOV - code coverage report
Current view: top level - port - cpl_userfaultfd.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 115 226 50.9 %
Date: 2024-04-29 01:40:10 Functions: 6 7 85.7 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Name:     cpl_userfaultfd.cpp
       4             :  * Project:  CPL - Common Portability Library
       5             :  * Purpose:  Use userfaultfd and VSIL to service page faults
       6             :  * Author:   James McClain, <james.mcclain@gmail.com>
       7             :  *
       8             :  ******************************************************************************
       9             :  * Copyright (c) 2018, Dr. James McClain <james.mcclain@gmail.com>
      10             :  *
      11             :  * Permission is hereby granted, free of charge, to any person obtaining a
      12             :  * copy of this software and associated documentation files (the "Software"),
      13             :  * to deal in the Software without restriction, including without limitation
      14             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      15             :  * and/or sell copies of the Software, and to permit persons to whom the
      16             :  * Software is furnished to do so, subject to the following conditions:
      17             :  *
      18             :  * The above copyright notice and this permission notice shall be included
      19             :  * in all copies or substantial portions of the Software.
      20             :  *
      21             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
      22             :  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      23             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
      24             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      25             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      26             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      27             :  * DEALINGS IN THE SOFTWARE.
      28             :  ****************************************************************************/
      29             : 
      30             : #ifdef ENABLE_UFFD
      31             : 
      32             : #include <cstdlib>
      33             : #include <cinttypes>
      34             : #include <cstring>
      35             : #include <string>
      36             : 
      37             : #include <errno.h>
      38             : #include <fcntl.h>
      39             : #include <poll.h>
      40             : #include <pthread.h>
      41             : #include <sched.h>
      42             : #include <signal.h>
      43             : #include <unistd.h>
      44             : 
      45             : #include <sys/ioctl.h>
      46             : #include <sys/mman.h>
      47             : #include <sys/stat.h>
      48             : #include <sys/syscall.h>
      49             : #include <sys/types.h>
      50             : #include <sys/utsname.h>
      51             : #include <linux/userfaultfd.h>
      52             : 
      53             : #include "cpl_conv.h"
      54             : #include "cpl_error.h"
      55             : #include "cpl_userfaultfd.h"
      56             : #include "cpl_string.h"
      57             : #include "cpl_vsi.h"
      58             : #include "cpl_multiproc.h"
      59             : 
      60             : #ifndef UFFD_USER_MODE_ONLY
      61             : // The UFFD_USER_MODE_ONLY flag got added in kernel 5.11 which is the one
      62             : // used by Ubuntu 20.04, but the linux-libc-dev package corresponds to 5.4
      63             : #define UFFD_USER_MODE_ONLY 1
      64             : #endif
      65             : 
      66             : #define BAD_MMAP (reinterpret_cast<void *>(-1))
      67             : #define MAX_MESSAGES (0x100)
      68             : 
      69             : static int64_t get_page_limit();
      70             : static void cpl_uffd_fault_handler(void *ptr);
      71             : static void signal_handler(int signal);
      72             : static void uffd_cleanup(void *ptr);
      73             : 
      74             : struct cpl_uffd_context
      75             : {
      76             :     bool keep_going = false;
      77             : 
      78             :     int uffd = -1;
      79             :     struct uffdio_register uffdio_register = {};
      80             :     struct uffd_msg uffd_msgs[MAX_MESSAGES];
      81             : 
      82             :     std::string filename = std::string("");
      83             : 
      84             :     int64_t page_limit = -1;
      85             :     int64_t pages_used = 0;
      86             : 
      87             :     size_t file_size = 0;
      88             :     size_t page_size = 0;
      89             :     void *page_ptr = nullptr;
      90             :     size_t vma_size = 0;
      91             :     void *vma_ptr = nullptr;
      92             :     CPLJoinableThread *thread = nullptr;
      93             : };
      94             : 
      95           2 : static void uffd_cleanup(void *ptr)
      96             : {
      97           2 :     struct cpl_uffd_context *ctx = static_cast<struct cpl_uffd_context *>(ptr);
      98             : 
      99           2 :     if (!ctx)
     100           0 :         return;
     101             : 
     102             :     // Signal shutdown
     103           2 :     ctx->keep_going = false;
     104           2 :     if (ctx->thread)
     105             :     {
     106           2 :         CPLJoinThread(ctx->thread);
     107           2 :         ctx->thread = nullptr;
     108             :     }
     109             : 
     110           2 :     if (ctx->uffd != -1)
     111             :     {
     112           2 :         ioctl(ctx->uffd, UFFDIO_UNREGISTER, &ctx->uffdio_register);
     113           2 :         close(ctx->uffd);
     114           2 :         ctx->uffd = -1;
     115             :     }
     116           2 :     if (ctx->page_ptr && ctx->page_size)
     117           2 :         munmap(ctx->page_ptr, ctx->page_size);
     118           2 :     if (ctx->vma_ptr && ctx->vma_size)
     119           2 :         munmap(ctx->vma_ptr, ctx->vma_size);
     120           2 :     ctx->page_ptr = nullptr;
     121           2 :     ctx->vma_ptr = nullptr;
     122           2 :     ctx->page_size = 0;
     123           2 :     ctx->vma_size = 0;
     124           2 :     ctx->pages_used = 0;
     125           2 :     ctx->page_limit = 0;
     126             : 
     127           2 :     delete ctx;
     128             : 
     129           2 :     return;
     130             : }
     131             : 
     132             : #ifdef HAVE_GCC_WARNING_ZERO_AS_NULL_POINTER_CONSTANT
     133             : #pragma GCC diagnostic push
     134             : #pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
     135             : #endif
     136             : static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
     137             : #ifdef HAVE_GCC_WARNING_ZERO_AS_NULL_POINTER_CONSTANT
     138             : #pragma GCC diagnostic pop
     139             : #endif
     140             : 
     141           2 : static int64_t get_page_limit()
     142             : {
     143             :     int64_t retval;
     144           2 :     const char *variable = CPLGetConfigOption(GDAL_UFFD_LIMIT, nullptr);
     145             : 
     146           2 :     if (variable && sscanf(variable, "%" PRId64, &retval) == 1)
     147           0 :         return retval;
     148             :     else
     149           2 :         return -1;
     150             : }
     151             : 
     152           2 : static void cpl_uffd_fault_handler(void *ptr)
     153             : {
     154           2 :     struct cpl_uffd_context *ctx = static_cast<struct cpl_uffd_context *>(ptr);
     155             :     struct uffdio_copy uffdio_copy;
     156             :     struct pollfd pollfd;
     157             : 
     158             :     // Setup pollfd structure
     159           2 :     pollfd.fd = ctx->uffd;
     160           2 :     pollfd.events = POLLIN;
     161             : 
     162             :     // Open asset for reading
     163           2 :     VSILFILE *file = VSIFOpenL(ctx->filename.c_str(), "rb");
     164             : 
     165           2 :     if (!file)
     166           0 :         return;
     167             : 
     168             :     // Loop until told to stop
     169           4 :     while (ctx->keep_going)
     170             :     {
     171             :         // Poll for event
     172           2 :         if (poll(&pollfd, 1, 16) == -1)
     173           0 :             break;  // 60Hz when no demand
     174           2 :         if ((pollfd.revents & POLLERR) || (pollfd.revents & POLLNVAL))
     175             :             break;
     176           2 :         if (!(pollfd.revents & POLLIN))
     177           0 :             continue;
     178             : 
     179             :         // Read page fault events
     180             :         ssize_t bytes_read = static_cast<ssize_t>(
     181           2 :             read(ctx->uffd, ctx->uffd_msgs, MAX_MESSAGES * sizeof(uffd_msg)));
     182           2 :         if (bytes_read < 1)
     183             :         {
     184           0 :             if (errno == EWOULDBLOCK)
     185           0 :                 continue;
     186             :             else
     187           0 :                 break;
     188             :         }
     189             : 
     190             :         // If too many pages are in use, evict all pages (evict them from
     191             :         // RAM and swap, not just to swap).  It is impossible to control
     192             :         // which/when threads access the VMA, so access to the VMA has to
     193             :         // forbidden while the activity is in progress.
     194             :         //
     195             :         // That is done by (1) installing special handlers for SIGSEGV and
     196             :         // SIGBUS, (2) mprotecting the VMA so that any threads accessing
     197             :         // it receive either SIGSEGV or SIGBUS (which one is apparently a
     198             :         // function of the C library, at least on one non-Linux GNU
     199             :         // system[1]), (3) unregistering the VMA from userfaultfd,
     200             :         // remapping the VMA to evict the pages, registering the VMA
     201             :         // again, (4) making the VMA accessible again, and finally (5)
     202             :         // restoring the previous signal-handling behavior.
     203             :         //
     204             :         // [1] https://lists.debian.org/debian-bsd/2011/05/msg00032.html
     205           2 :         if (ctx->page_limit > 0)
     206             :         {
     207           0 :             pthread_mutex_lock(&mutex);
     208           0 :             if (ctx->pages_used > ctx->page_limit)
     209             :             {
     210             :                 struct sigaction segv;
     211             :                 struct sigaction old_segv;
     212             :                 struct sigaction bus;
     213             :                 struct sigaction old_bus;
     214             : 
     215           0 :                 memset(&segv, 0, sizeof(segv));
     216           0 :                 memset(&old_segv, 0, sizeof(old_segv));
     217           0 :                 memset(&bus, 0, sizeof(bus));
     218           0 :                 memset(&old_bus, 0, sizeof(old_bus));
     219             : 
     220             :                 // Step 1 from the block comment above
     221           0 :                 segv.sa_handler = signal_handler;
     222           0 :                 bus.sa_handler = signal_handler;
     223           0 :                 if (sigaction(SIGSEGV, &segv, &old_segv) == -1)
     224             :                 {
     225           0 :                     CPLError(
     226             :                         CE_Failure, CPLE_AppDefined,
     227             :                         "cpl_uffd_fault_handler: sigaction(SIGSEGV) failed");
     228           0 :                     pthread_mutex_unlock(&mutex);
     229           0 :                     break;
     230             :                 }
     231           0 :                 if (sigaction(SIGBUS, &bus, &old_bus) == -1)
     232             :                 {
     233           0 :                     CPLError(
     234             :                         CE_Failure, CPLE_AppDefined,
     235             :                         "cpl_uffd_fault_handler: sigaction(SIGBUS) failed");
     236           0 :                     pthread_mutex_unlock(&mutex);
     237           0 :                     break;
     238             :                 }
     239             : 
     240             :                 // WARNING: LACK OF THREAD-SAFETY.
     241             :                 //
     242             :                 // For example, if a user program (or another part of the
     243             :                 // library) installs a SIGSEGV or SIGBUS handler from another
     244             :                 // thread after this one has installed its handlers but before
     245             :                 // this one uninstalls its handlers, the intervening handler
     246             :                 // will be eliminated.  There are other examples, as well, but
     247             :                 // there can only be a problems with other threads because the
     248             :                 // faulting thread is blocked here.
     249             :                 //
     250             :                 // This implies that one should not use cpl_virtualmem.h API
     251             :                 // while other threads are actively generating faults that use
     252             :                 // this mechanism.
     253             :                 //
     254             :                 // Having multiple active threads that use this mechanism but
     255             :                 // with no changes to signal-handling in other threads is NOT a
     256             :                 // problem.
     257             : 
     258             :                 // Step 2
     259           0 :                 if (mprotect(ctx->vma_ptr, ctx->vma_size, PROT_NONE) == -1)
     260             :                 {
     261           0 :                     CPLError(CE_Failure, CPLE_AppDefined,
     262             :                              "cpl_uffd_fault_handler: mprotect() failed");
     263           0 :                     pthread_mutex_unlock(&mutex);
     264           0 :                     break;
     265             :                 }
     266             : 
     267             :                 // Step 3
     268           0 :                 if (ioctl(ctx->uffd, UFFDIO_UNREGISTER, &ctx->uffdio_register))
     269             :                 {
     270           0 :                     CPLError(CE_Failure, CPLE_AppDefined,
     271             :                              "cpl_uffd_fault_handler: ioctl(UFFDIO_UNREGISTER) "
     272             :                              "failed");
     273           0 :                     pthread_mutex_unlock(&mutex);
     274           0 :                     break;
     275             :                 }
     276           0 :                 ctx->vma_ptr =
     277           0 :                     mmap(ctx->vma_ptr, ctx->vma_size, PROT_NONE,
     278             :                          MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
     279           0 :                 if (ctx->vma_ptr == BAD_MMAP)
     280             :                 {
     281           0 :                     CPLError(CE_Failure, CPLE_AppDefined,
     282             :                              "cpl_uffd_fault_handler: mmap() failed");
     283           0 :                     ctx->vma_ptr = nullptr;
     284           0 :                     pthread_mutex_unlock(&mutex);
     285           0 :                     break;
     286             :                 }
     287           0 :                 ctx->pages_used = 0;
     288           0 :                 if (ioctl(ctx->uffd, UFFDIO_REGISTER, &ctx->uffdio_register))
     289             :                 {
     290           0 :                     CPLError(CE_Failure, CPLE_AppDefined,
     291             :                              "cpl_uffd_fault_handler: ioctl(UFFDIO_REGISTER) "
     292             :                              "failed");
     293           0 :                     pthread_mutex_unlock(&mutex);
     294           0 :                     break;
     295             :                 }
     296             : 
     297             :                 // Step 4.  Problem: A thread might attempt to read here (before
     298             :                 // the mprotect) and receive a SIGSEGV or SIGBUS.
     299           0 :                 if (mprotect(ctx->vma_ptr, ctx->vma_size, PROT_READ) == -1)
     300             :                 {
     301           0 :                     CPLError(CE_Failure, CPLE_AppDefined,
     302             :                              "cpl_uffd_fault_handler: mprotect() failed");
     303           0 :                     pthread_mutex_unlock(&mutex);
     304           0 :                     break;
     305             :                 }
     306             : 
     307             :                 // Step 5.  Solution: Cannot unregister special handlers before
     308             :                 // any such threads have been handled by them, so sleep for
     309             :                 // 1/100th of a second.
     310             :                 // Coverity complains about sleeping under a mutex
     311             :                 // coverity[sleep]
     312           0 :                 usleep(10000);
     313           0 :                 if (sigaction(SIGSEGV, &old_segv, nullptr) == -1)
     314             :                 {
     315           0 :                     CPLError(
     316             :                         CE_Failure, CPLE_AppDefined,
     317             :                         "cpl_uffd_fault_handler: sigaction(SIGSEGV) failed");
     318           0 :                     pthread_mutex_unlock(&mutex);
     319           0 :                     break;
     320             :                 }
     321           0 :                 if (sigaction(SIGBUS, &old_bus, nullptr) == -1)
     322             :                 {
     323           0 :                     CPLError(
     324             :                         CE_Failure, CPLE_AppDefined,
     325             :                         "cpl_uffd_fault_handler: sigaction(SIGBUS) failed");
     326           0 :                     pthread_mutex_unlock(&mutex);
     327           0 :                     break;
     328             :                 }
     329             :             }
     330           0 :             pthread_mutex_unlock(&mutex);
     331             :         }
     332             : 
     333             :         // Handle page fault events
     334           4 :         for (int i = 0; i < static_cast<int>(bytes_read / sizeof(uffd_msg));
     335             :              ++i)
     336             :         {
     337           2 :             const uintptr_t fault_addr =
     338           2 :                 ctx->uffd_msgs[i].arg.pagefault.address & ~(ctx->page_size - 1);
     339           2 :             const uintptr_t offset =
     340           2 :                 fault_addr - reinterpret_cast<uintptr_t>(ctx->vma_ptr);
     341           2 :             size_t bytes_needed = static_cast<size_t>(ctx->file_size - offset);
     342           2 :             if (bytes_needed > ctx->page_size)
     343           0 :                 bytes_needed = ctx->page_size;
     344             : 
     345             :             // Copy data into page
     346           4 :             if (VSIFSeekL(file, offset, SEEK_SET) != 0 ||
     347           2 :                 VSIFReadL(ctx->page_ptr, bytes_needed, 1, file) != 1)
     348             :             {
     349           0 :                 CPLError(CE_Failure, CPLE_FileIO,
     350             :                          "Cannot get %d bytes at offset " CPL_FRMT_GUIB " of "
     351             :                          "file %s",
     352             :                          static_cast<int>(bytes_needed),
     353             :                          static_cast<GUIntBig>(offset), ctx->filename.c_str());
     354           0 :                 memset(ctx->page_ptr, 0, bytes_needed);
     355             :             }
     356           2 :             ctx->pages_used++;
     357             : 
     358             :             // Use the page to fulfill the page fault
     359           2 :             uffdio_copy.src = reinterpret_cast<uintptr_t>(ctx->page_ptr);
     360           2 :             uffdio_copy.dst = fault_addr;
     361           2 :             uffdio_copy.len = static_cast<uintptr_t>(ctx->page_size);
     362           2 :             uffdio_copy.mode = 0;
     363           2 :             uffdio_copy.copy = 0;
     364           2 :             if (ioctl(ctx->uffd, UFFDIO_COPY, &uffdio_copy) == -1)
     365             :             {
     366           0 :                 CPLError(CE_Failure, CPLE_AppDefined,
     367             :                          "ioctl(UFFDIO_COPY) failed");
     368           0 :                 break;
     369             :             }
     370             :         }
     371             :     }  // end of while loop
     372             : 
     373             :     // Return resources
     374           2 :     VSIFCloseL(file);
     375             : }
     376             : 
     377           0 : static void signal_handler(int signal)
     378             : {
     379           0 :     if (signal == SIGSEGV || signal == SIGBUS)
     380           0 :         sched_yield();
     381           0 :     return;
     382             : }
     383             : 
     384          13 : bool CPLIsUserFaultMappingSupported()
     385             : {
     386             :     // Check the Linux kernel version.  Linux 4.3 or newer is needed for
     387             :     // userfaultfd.
     388          13 :     int major = 0, minor = 0;
     389             :     struct utsname utsname;
     390             : 
     391          13 :     if (uname(&utsname))
     392           0 :         return false;
     393          13 :     sscanf(utsname.release, "%d.%d", &major, &minor);
     394          13 :     if (major < 4)
     395           0 :         return false;
     396          13 :     if (major == 4 && minor < 3)
     397           0 :         return false;
     398             : 
     399             :     static int nEnableUserFaultFD = -1;
     400          13 :     if (nEnableUserFaultFD < 0)
     401             :     {
     402           9 :         nEnableUserFaultFD =
     403           9 :             CPLTestBool(CPLGetConfigOption("CPL_ENABLE_USERFAULTFD", "YES"));
     404             :     }
     405          13 :     if (!nEnableUserFaultFD)
     406           0 :         return false;
     407             : 
     408             :     // Since kernel 5.2, raw userfaultfd is disabled since if the fault
     409             :     // originates from the kernel, that could lead to easier exploitation of
     410             :     // kernel bugs. Since kernel 5.11, UFFD_USER_MODE_ONLY can be used to
     411             :     // restrict the mechanism to faults occurring only from user space, which is
     412             :     // likely to be our use case.
     413          13 :     int uffd = static_cast<int>(syscall(
     414          13 :         __NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY));
     415          13 :     if (uffd == -1 && errno == EINVAL)
     416           0 :         uffd =
     417           0 :             static_cast<int>(syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK));
     418          13 :     if (uffd == -1)
     419             :     {
     420           0 :         const int l_errno = errno;
     421           0 :         if (l_errno == EPERM)
     422             :         {
     423             :             // Since kernel 5.2
     424           0 :             CPLDebug(
     425             :                 "GDAL",
     426             :                 "CPLIsUserFaultMappingSupported(): syscall(__NR_userfaultfd) "
     427             :                 "failed: "
     428             :                 "insufficient permission. add CAP_SYS_PTRACE capability, or "
     429             :                 "set /proc/sys/vm/unprivileged_userfaultfd to 1");
     430             :         }
     431             :         else
     432             :         {
     433           0 :             CPLDebug(
     434             :                 "GDAL",
     435             :                 "CPLIsUserFaultMappingSupported(): syscall(__NR_userfaultfd) "
     436             :                 "failed: "
     437             :                 "error = %d",
     438             :                 l_errno);
     439             :         }
     440           0 :         nEnableUserFaultFD = false;
     441           0 :         return false;
     442             :     }
     443          13 :     close(uffd);
     444          13 :     nEnableUserFaultFD = true;
     445          13 :     return true;
     446             : }
     447             : 
     448             : /*
     449             :  * Returns nullptr on failure, a valid pointer on success.
     450             :  */
     451           2 : cpl_uffd_context *CPLCreateUserFaultMapping(const char *pszFilename,
     452             :                                             void **ppVma, uint64_t *pnVmaSize)
     453             : {
     454             :     VSIStatBufL statbuf;
     455           2 :     struct cpl_uffd_context *ctx = nullptr;
     456             : 
     457           2 :     if (!CPLIsUserFaultMappingSupported())
     458             :     {
     459           0 :         CPLError(
     460             :             CE_Failure, CPLE_NotSupported,
     461             :             "CPLCreateUserFaultMapping(): Linux kernel 4.3 or newer needed");
     462           0 :         return nullptr;
     463             :     }
     464             : 
     465             :     // Get the size of the asset
     466           2 :     if (VSIStatL(pszFilename, &statbuf))
     467           0 :         return nullptr;
     468             : 
     469             :     // Setup the `cpl_uffd_context` struct
     470           2 :     ctx = new cpl_uffd_context();
     471           2 :     ctx->keep_going = true;
     472           2 :     ctx->filename = std::string(pszFilename);
     473           2 :     ctx->page_limit = get_page_limit();
     474           2 :     ctx->pages_used = 0;
     475           2 :     ctx->file_size = static_cast<size_t>(statbuf.st_size);
     476           2 :     ctx->page_size = static_cast<size_t>(sysconf(_SC_PAGESIZE));
     477           2 :     ctx->vma_size = static_cast<size_t>(
     478           2 :         ((static_cast<vsi_l_offset>(statbuf.st_size) / ctx->page_size) + 1) *
     479           2 :         ctx->page_size);
     480           2 :     if (ctx->vma_size < static_cast<vsi_l_offset>(statbuf.st_size))
     481             :     {  // Check for overflow
     482           0 :         uffd_cleanup(ctx);
     483           0 :         CPLError(
     484             :             CE_Failure, CPLE_AppDefined,
     485             :             "CPLCreateUserFaultMapping(): File too large for architecture");
     486           0 :         return nullptr;
     487             :     }
     488             : 
     489             :     // If the mmap failed, free resources and return
     490           2 :     ctx->vma_ptr = mmap(nullptr, ctx->vma_size, PROT_READ,
     491             :                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     492           2 :     if (ctx->vma_ptr == BAD_MMAP)
     493             :     {
     494           0 :         ctx->vma_ptr = nullptr;
     495           0 :         uffd_cleanup(ctx);
     496           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     497             :                  "CPLCreateUserFaultMapping(): mmap() failed");
     498           0 :         return nullptr;
     499             :     }
     500             : 
     501             :     // Attempt to acquire a scratch page to use to fulfill requests.
     502           2 :     ctx->page_ptr =
     503           2 :         mmap(nullptr, static_cast<size_t>(ctx->page_size),
     504             :              PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     505           2 :     if (ctx->page_ptr == BAD_MMAP)
     506             :     {
     507           0 :         ctx->page_ptr = nullptr;
     508           0 :         uffd_cleanup(ctx);
     509           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     510             :                  "CPLCreateUserFaultMapping(): mmap() failed");
     511           0 :         return nullptr;
     512             :     }
     513             : 
     514             :     // Get userfaultfd
     515             : 
     516             :     // Since kernel 5.2, raw userfaultfd is disabled since if the fault
     517             :     // originates from the kernel, that could lead to easier exploitation of
     518             :     // kernel bugs. Since kernel 5.11, UFFD_USER_MODE_ONLY can be used to
     519             :     // restrict the mechanism to faults occurring only from user space, which is
     520             :     // likely to be our use case.
     521           2 :     ctx->uffd = static_cast<int>(syscall(
     522             :         __NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY));
     523           2 :     if (ctx->uffd == -1 && errno == EINVAL)
     524           0 :         ctx->uffd =
     525           0 :             static_cast<int>(syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK));
     526           2 :     if (ctx->uffd == -1)
     527             :     {
     528           0 :         const int l_errno = errno;
     529           0 :         ctx->uffd = -1;
     530           0 :         uffd_cleanup(ctx);
     531           0 :         if (l_errno == EPERM)
     532             :         {
     533             :             // Since kernel 5.2
     534           0 :             CPLError(
     535             :                 CE_Failure, CPLE_AppDefined,
     536             :                 "CPLCreateUserFaultMapping(): syscall(__NR_userfaultfd) "
     537             :                 "failed: "
     538             :                 "insufficient permission. add CAP_SYS_PTRACE capability, or "
     539             :                 "set /proc/sys/vm/unprivileged_userfaultfd to 1");
     540             :         }
     541             :         else
     542             :         {
     543           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     544             :                      "CPLCreateUserFaultMapping(): syscall(__NR_userfaultfd) "
     545             :                      "failed: "
     546             :                      "error = %d",
     547             :                      l_errno);
     548             :         }
     549           0 :         return nullptr;
     550             :     }
     551             : 
     552             :     // Query API
     553             :     {
     554           2 :         struct uffdio_api uffdio_api = {};
     555             : 
     556           2 :         uffdio_api.api = UFFD_API;
     557           2 :         uffdio_api.features = 0;
     558             : 
     559           2 :         if (ioctl(ctx->uffd, UFFDIO_API, &uffdio_api) == -1)
     560             :         {
     561           0 :             uffd_cleanup(ctx);
     562           0 :             CPLError(CE_Failure, CPLE_AppDefined,
     563             :                      "CPLCreateUserFaultMapping(): ioctl(UFFDIO_API) failed");
     564           0 :             return nullptr;
     565             :         }
     566             :     }
     567             : 
     568             :     // Register memory range
     569           2 :     ctx->uffdio_register.range.start =
     570           2 :         reinterpret_cast<uintptr_t>(ctx->vma_ptr);
     571           2 :     ctx->uffdio_register.range.len = ctx->vma_size;
     572           2 :     ctx->uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
     573             : 
     574           2 :     if (ioctl(ctx->uffd, UFFDIO_REGISTER, &ctx->uffdio_register) == -1)
     575             :     {
     576           0 :         uffd_cleanup(ctx);
     577           0 :         CPLError(CE_Failure, CPLE_AppDefined,
     578             :                  "CPLCreateUserFaultMapping(): ioctl(UFFDIO_REGISTER) failed");
     579           0 :         return nullptr;
     580             :     }
     581             : 
     582             :     // Start handler thread
     583           2 :     ctx->thread = CPLCreateJoinableThread(cpl_uffd_fault_handler, ctx);
     584           2 :     if (ctx->thread == nullptr)
     585             :     {
     586           0 :         CPLError(
     587             :             CE_Failure, CPLE_AppDefined,
     588             :             "CPLCreateUserFaultMapping(): CPLCreateJoinableThread() failed");
     589           0 :         uffd_cleanup(ctx);
     590           0 :         return nullptr;
     591             :     }
     592             : 
     593           2 :     *ppVma = ctx->vma_ptr;
     594           2 :     *pnVmaSize = ctx->vma_size;
     595           2 :     return ctx;
     596             : }
     597             : 
     598         642 : void CPLDeleteUserFaultMapping(cpl_uffd_context *ctx)
     599             : {
     600         642 :     if (ctx)
     601             :     {
     602           2 :         uffd_cleanup(ctx);
     603             :     }
     604         642 : }
     605             : 
     606             : #endif  // ENABLE_UFFD

Generated by: LCOV version 1.14