LCOV - code coverage report
Current view: top level - frmts/pdf - pdfio.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 115 137 83.9 %
Date: 2026-01-09 20:32:01 Functions: 18 22 81.8 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  PDF driver
       4             :  * Purpose:  GDALDataset driver for PDF dataset.
       5             :  * Author:   Even Rouault, <even dot rouault at spatialys.com>
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
       9             :  *
      10             :  * SPDX-License-Identifier: MIT
      11             :  ****************************************************************************/
      12             : 
      13             : #include "gdal_pdf.h"
      14             : 
      15             : #ifdef HAVE_POPPLER
      16             : 
      17             : #include "pdfio.h"
      18             : 
      19             : #include "cpl_vsi.h"
      20             : 
      21         233 : static vsi_l_offset VSIPDFFileStreamGetSize(VSILFILE *f)
      22             : {
      23         233 :     VSIFSeekL(f, 0, SEEK_END);
      24         233 :     vsi_l_offset nSize = VSIFTellL(f);
      25         233 :     VSIFSeekL(f, 0, SEEK_SET);
      26         233 :     return nSize;
      27             : }
      28             : 
      29             : /************************************************************************/
      30             : /*                         VSIPDFFileStream()                           */
      31             : /************************************************************************/
      32             : 
      33         233 : VSIPDFFileStream::VSIPDFFileStream(VSILFILE *fIn, const char *pszFilename,
      34         233 :                                    Object &&dictA)
      35         233 :     : BaseStream(std::move(dictA),
      36         466 :                  static_cast<Goffset>(VSIPDFFileStreamGetSize(fIn))),
      37         233 :       poParent(nullptr), poFilename(new GooString(pszFilename)), f(fIn)
      38             : {
      39         233 : }
      40             : 
      41             : /************************************************************************/
      42             : /*                         VSIPDFFileStream()                           */
      43             : /************************************************************************/
      44             : 
      45       10438 : VSIPDFFileStream::VSIPDFFileStream(VSIPDFFileStream *poParentIn,
      46             :                                    vsi_l_offset startA, bool limitedA,
      47       10438 :                                    vsi_l_offset lengthA, Object &&dictA)
      48       10438 :     : BaseStream(std::move(dictA), static_cast<Goffset>(lengthA)),
      49       10438 :       poParent(poParentIn), poFilename(poParentIn->poFilename),
      50       10438 :       f(poParentIn->f), nStart(startA), bLimited(limitedA), nLength(lengthA)
      51             : {
      52       10438 : }
      53             : 
      54             : /************************************************************************/
      55             : /*                        ~VSIPDFFileStream()                           */
      56             : /************************************************************************/
      57             : 
      58       21342 : VSIPDFFileStream::~VSIPDFFileStream()
      59             : {
      60       10671 :     close();
      61       10671 :     if (poParent == nullptr)
      62             :     {
      63         233 :         delete poFilename;
      64             :     }
      65       21342 : }
      66             : 
      67             : /************************************************************************/
      68             : /*                                  copy()                              */
      69             : /************************************************************************/
      70             : 
      71           0 : BaseStream *VSIPDFFileStream::copy()
      72             : {
      73           0 :     return new VSIPDFFileStream(poParent, nStart, bLimited, nLength,
      74           0 :                                 dict.copy());
      75             : }
      76             : 
      77             : /************************************************************************/
      78             : /*                             makeSubStream()                          */
      79             : /************************************************************************/
      80             : 
      81             : #if POPPLER_MAJOR_VERSION > 25 ||                                              \
      82             :     (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 5)
      83             : std::unique_ptr<Stream> VSIPDFFileStream::makeSubStream(Goffset startA,
      84             :                                                         bool limitedA,
      85             :                                                         Goffset lengthA,
      86             :                                                         Object &&dictA)
      87             : {
      88             :     return std::make_unique<VSIPDFFileStream>(this, startA, limitedA, lengthA,
      89             :                                               std::move(dictA));
      90             : }
      91             : #else
      92       10438 : Stream *VSIPDFFileStream::makeSubStream(Goffset startA, bool limitedA,
      93             :                                         Goffset lengthA, Object &&dictA)
      94             : {
      95             :     return new VSIPDFFileStream(this, startA, limitedA, lengthA,
      96       10438 :                                 std::move(dictA));
      97             : }
      98             : #endif
      99             : 
     100             : /************************************************************************/
     101             : /*                                 getPos()                             */
     102             : /************************************************************************/
     103             : 
     104        2546 : Goffset VSIPDFFileStream::getPos()
     105             : {
     106        2546 :     return static_cast<Goffset>(nCurrentPos);
     107             : }
     108             : 
     109             : /************************************************************************/
     110             : /*                                getStart()                            */
     111             : /************************************************************************/
     112             : 
     113         474 : Goffset VSIPDFFileStream::getStart()
     114             : {
     115         474 :     return static_cast<Goffset>(nStart);
     116             : }
     117             : 
     118             : /************************************************************************/
     119             : /*                             getKind()                                */
     120             : /************************************************************************/
     121             : 
     122           0 : StreamKind VSIPDFFileStream::getKind() const
     123             : {
     124           0 :     return strFile;
     125             : }
     126             : 
     127             : /************************************************************************/
     128             : /*                           getFileName()                               */
     129             : /************************************************************************/
     130             : 
     131         466 : GooString *VSIPDFFileStream::getFileName()
     132             : {
     133         466 :     return poFilename;
     134             : }
     135             : 
     136             : /************************************************************************/
     137             : /*                             FillBuffer()                             */
     138             : /************************************************************************/
     139             : 
     140       42407 : int VSIPDFFileStream::FillBuffer()
     141             : {
     142       42407 :     if (nBufferLength == 0)
     143           0 :         return FALSE;
     144       42407 :     if (nBufferLength != -1 && nBufferLength < BUFFER_SIZE)
     145         455 :         return FALSE;
     146             : 
     147       41952 :     nPosInBuffer = 0;
     148             :     int nToRead;
     149       41952 :     if (!bLimited)
     150       10981 :         nToRead = BUFFER_SIZE;
     151       30971 :     else if (nCurrentPos + BUFFER_SIZE > nStart + nLength)
     152        1296 :         nToRead = static_cast<int>(nStart + nLength - nCurrentPos);
     153             :     else
     154       29675 :         nToRead = BUFFER_SIZE;
     155       41952 :     if (nToRead < 0)
     156           0 :         return FALSE;
     157       41952 :     nBufferLength = static_cast<int>(VSIFReadL(abyBuffer, 1, nToRead, f));
     158       41952 :     if (nBufferLength == 0)
     159          11 :         return FALSE;
     160             : 
     161             :     // Since we now report a non-zero length (as BaseStream::length member),
     162             :     // PDFDoc::getPage() can go to the linearized mode if the file is
     163             :     // linearized, and thus create a pageCache. If so, in PDFDoc::~PDFDoc(), if
     164             :     // pageCache is not null, it would try to access the stream (str) through
     165             :     // getPageCount(), but we have just freed and nullify str before in
     166             :     // PDFFreeDoc(). So make as if the file is not linearized to avoid those
     167             :     // issues... All this is due to our attempt of avoiding cross-heap issues
     168             :     // with allocation and liberation of VSIPDFFileStream as PDFDoc::str member.
     169       41941 :     if (nCurrentPos == 0 || nCurrentPos == VSI_L_OFFSET_MAX)
     170             :     {
     171      506966 :         for (int i = 0;
     172      506966 :              i < nBufferLength - static_cast<int>(strlen("/Linearized ")); i++)
     173             :         {
     174      506446 :             if (memcmp(abyBuffer + i, "/Linearized ", strlen("/Linearized ")) ==
     175             :                 0)
     176             :             {
     177           0 :                 bFoundLinearizedHint = true;
     178           0 :                 memcpy(abyBuffer + i, "/XXXXXXXXXX ", strlen("/Linearized "));
     179           0 :                 break;
     180             :             }
     181             :         }
     182             :     }
     183             : 
     184       41941 :     return TRUE;
     185             : }
     186             : 
     187             : /************************************************************************/
     188             : /*                                getChar()                             */
     189             : /************************************************************************/
     190             : 
     191             : /* The unoptimized version performs a bit less since we must go through */
     192             : /* the whole virtual I/O chain for each character reading. We save a few */
     193             : /* percent with this extra internal caching */
     194             : 
     195    31424600 : int VSIPDFFileStream::getChar()
     196             : {
     197             : #ifdef unoptimized_version
     198             :     GByte chRead;
     199             :     if (bLimited && nCurrentPos >= nStart + nLength)
     200             :         return EOF;
     201             :     if (VSIFReadL(&chRead, 1, 1, f) == 0)
     202             :         return EOF;
     203             : #else
     204    31424600 :     if (nPosInBuffer == nBufferLength)
     205             :     {
     206       41271 :         if (!FillBuffer() || nPosInBuffer >= nBufferLength)
     207         165 :             return EOF;
     208             :     }
     209             : 
     210    31424500 :     GByte chRead = abyBuffer[nPosInBuffer];
     211    31424500 :     nPosInBuffer++;
     212             : #endif
     213    31424500 :     nCurrentPos++;
     214    31424500 :     return chRead;
     215             : }
     216             : 
     217             : /************************************************************************/
     218             : /*                       getUnfilteredChar()                            */
     219             : /************************************************************************/
     220             : 
     221           0 : int VSIPDFFileStream::getUnfilteredChar()
     222             : {
     223           0 :     return getChar();
     224             : }
     225             : 
     226             : /************************************************************************/
     227             : /*                               lookChar()                             */
     228             : /************************************************************************/
     229             : 
     230         528 : int VSIPDFFileStream::lookChar()
     231             : {
     232             : #ifdef unoptimized_version
     233             :     int nPosBefore = nCurrentPos;
     234             :     int chRead = getChar();
     235             :     if (chRead == EOF)
     236             :         return EOF;
     237             :     VSIFSeekL(f, nCurrentPos = nPosBefore, SEEK_SET);
     238             :     return chRead;
     239             : #else
     240         528 :     int chRead = getChar();
     241         528 :     if (chRead == EOF)
     242          28 :         return EOF;
     243         500 :     nPosInBuffer--;
     244         500 :     nCurrentPos--;
     245         500 :     return chRead;
     246             : #endif
     247             : }
     248             : 
     249             : /************************************************************************/
     250             : /*                                reset()                               */
     251             : /************************************************************************/
     252             : 
     253             : #if POPPLER_MAJOR_VERSION > 25
     254             : bool VSIPDFFileStream::rewind()
     255             : #elif POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2
     256             : bool VSIPDFFileStream::reset()
     257             : #else
     258       10414 : void VSIPDFFileStream::reset()
     259             : #endif
     260             : {
     261       10414 :     nSavedPos = VSIFTellL(f);
     262       10414 :     bHasSavedPos = TRUE;
     263       10414 :     VSIFSeekL(f, nCurrentPos = nStart, SEEK_SET);
     264       10414 :     nPosInBuffer = -1;
     265       10414 :     nBufferLength = -1;
     266             : #if POPPLER_MAJOR_VERSION > 25 ||                                              \
     267             :     (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2)
     268             :     return true;
     269             : #endif
     270       10414 : }
     271             : 
     272             : /************************************************************************/
     273             : /*                         unfilteredReset()                            */
     274             : /************************************************************************/
     275             : 
     276             : #if POPPLER_MAJOR_VERSION > 25
     277             : bool VSIPDFFileStream::unfilteredRewind()
     278             : {
     279             :     return rewind();
     280             : }
     281             : #elif POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 3
     282             : bool VSIPDFFileStream::unfilteredReset()
     283             : {
     284             :     return reset();
     285             : }
     286             : #else
     287           0 : void VSIPDFFileStream::unfilteredReset()
     288             : {
     289           0 :     reset();
     290           0 : }
     291             : #endif
     292             : 
     293             : /************************************************************************/
     294             : /*                                close()                               */
     295             : /************************************************************************/
     296             : 
     297       19474 : void VSIPDFFileStream::close()
     298             : {
     299       19474 :     if (bHasSavedPos)
     300             :     {
     301        9768 :         nCurrentPos = nSavedPos;
     302        9768 :         VSIFSeekL(f, nCurrentPos, SEEK_SET);
     303             :     }
     304       19474 :     bHasSavedPos = FALSE;
     305       19474 :     nSavedPos = 0;
     306       19474 : }
     307             : 
     308             : /************************************************************************/
     309             : /*                               setPos()                               */
     310             : /************************************************************************/
     311             : 
     312        2293 : void VSIPDFFileStream::setPos(Goffset pos, int dir)
     313             : {
     314        2293 :     if (dir >= 0)
     315             :     {
     316        1815 :         VSIFSeekL(f, nCurrentPos = pos, SEEK_SET);
     317             :     }
     318             :     else
     319             :     {
     320         478 :         if (bLimited == false)
     321             :         {
     322         478 :             VSIFSeekL(f, 0, SEEK_END);
     323             :         }
     324             :         else
     325             :         {
     326           0 :             VSIFSeekL(f, nStart + nLength, SEEK_SET);
     327             :         }
     328         478 :         vsi_l_offset size = VSIFTellL(f);
     329         478 :         vsi_l_offset newpos = static_cast<vsi_l_offset>(pos);
     330         478 :         if (newpos > size)
     331          38 :             newpos = size;
     332         478 :         VSIFSeekL(f, nCurrentPos = size - newpos, SEEK_SET);
     333             :     }
     334        2293 :     nPosInBuffer = -1;
     335        2293 :     nBufferLength = -1;
     336        2293 : }
     337             : 
     338             : /************************************************************************/
     339             : /*                            moveStart()                               */
     340             : /************************************************************************/
     341             : 
     342         233 : void VSIPDFFileStream::moveStart(Goffset delta)
     343             : {
     344         233 :     nStart += delta;
     345         233 :     nCurrentPos = nStart;
     346         233 :     VSIFSeekL(f, nCurrentPos, SEEK_SET);
     347         233 :     nPosInBuffer = -1;
     348         233 :     nBufferLength = -1;
     349         233 : }
     350             : 
     351             : /************************************************************************/
     352             : /*                          hasGetChars()                               */
     353             : /************************************************************************/
     354             : 
     355         880 : bool VSIPDFFileStream::hasGetChars()
     356             : {
     357         880 :     return true;
     358             : }
     359             : 
     360             : /************************************************************************/
     361             : /*                            getChars()                                */
     362             : /************************************************************************/
     363             : 
     364         880 : int VSIPDFFileStream::getChars(int nChars, unsigned char *buffer)
     365             : {
     366         880 :     int nRead = 0;
     367        2263 :     while (nRead < nChars)
     368             :     {
     369        1684 :         int nToRead = nChars - nRead;
     370        1684 :         if (nPosInBuffer == nBufferLength)
     371             :         {
     372        1136 :             if (!bLimited && nToRead > BUFFER_SIZE)
     373             :             {
     374             :                 int nJustRead =
     375           0 :                     static_cast<int>(VSIFReadL(buffer + nRead, 1, nToRead, f));
     376           0 :                 nPosInBuffer = -1;
     377           0 :                 nBufferLength = -1;
     378           0 :                 nCurrentPos += nJustRead;
     379           0 :                 nRead += nJustRead;
     380           0 :                 break;
     381             :             }
     382        1136 :             else if (!FillBuffer() || nPosInBuffer >= nBufferLength)
     383         301 :                 break;
     384             :         }
     385        1383 :         if (nToRead > nBufferLength - nPosInBuffer)
     386         804 :             nToRead = nBufferLength - nPosInBuffer;
     387             : 
     388        1383 :         memcpy(buffer + nRead, abyBuffer + nPosInBuffer, nToRead);
     389        1383 :         nPosInBuffer += nToRead;
     390        1383 :         nCurrentPos += nToRead;
     391        1383 :         nRead += nToRead;
     392             :     }
     393         880 :     return nRead;
     394             : }
     395             : 
     396             : #endif

Generated by: LCOV version 1.14