LCOV - code coverage report
Current view: top level - frmts/pdf - pdfio.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 115 137 83.9 %
Date: 2025-09-10 17:48:50 Functions: 18 22 81.8 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  *
       3             :  * Project:  PDF driver
       4             :  * Purpose:  GDALDataset driver for PDF dataset.
       5             :  * Author:   Even Rouault, <even dot rouault at spatialys.com>
       6             :  *
       7             :  ******************************************************************************
       8             :  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
       9             :  *
      10             :  * SPDX-License-Identifier: MIT
      11             :  ****************************************************************************/
      12             : 
      13             : #include "gdal_pdf.h"
      14             : 
      15             : #ifdef HAVE_POPPLER
      16             : 
      17             : #include "pdfio.h"
      18             : 
      19             : #include "cpl_vsi.h"
      20             : 
      21         228 : static vsi_l_offset VSIPDFFileStreamGetSize(VSILFILE *f)
      22             : {
      23         228 :     VSIFSeekL(f, 0, SEEK_END);
      24         228 :     vsi_l_offset nSize = VSIFTellL(f);
      25         228 :     VSIFSeekL(f, 0, SEEK_SET);
      26         228 :     return nSize;
      27             : }
      28             : 
      29             : /************************************************************************/
      30             : /*                         VSIPDFFileStream()                           */
      31             : /************************************************************************/
      32             : 
      33         228 : VSIPDFFileStream::VSIPDFFileStream(VSILFILE *fIn, const char *pszFilename,
      34         228 :                                    Object &&dictA)
      35         228 :     : BaseStream(std::move(dictA),
      36         456 :                  static_cast<Goffset>(VSIPDFFileStreamGetSize(fIn))),
      37         228 :       poParent(nullptr), poFilename(new GooString(pszFilename)), f(fIn)
      38             : {
      39         228 : }
      40             : 
      41             : /************************************************************************/
      42             : /*                         VSIPDFFileStream()                           */
      43             : /************************************************************************/
      44             : 
      45       10307 : VSIPDFFileStream::VSIPDFFileStream(VSIPDFFileStream *poParentIn,
      46             :                                    vsi_l_offset startA, bool limitedA,
      47       10307 :                                    vsi_l_offset lengthA, Object &&dictA)
      48       10307 :     : BaseStream(std::move(dictA), static_cast<Goffset>(lengthA)),
      49       10307 :       poParent(poParentIn), poFilename(poParentIn->poFilename),
      50       10307 :       f(poParentIn->f), nStart(startA), bLimited(limitedA), nLength(lengthA)
      51             : {
      52       10307 : }
      53             : 
      54             : /************************************************************************/
      55             : /*                        ~VSIPDFFileStream()                           */
      56             : /************************************************************************/
      57             : 
      58       21070 : VSIPDFFileStream::~VSIPDFFileStream()
      59             : {
      60       10535 :     close();
      61       10535 :     if (poParent == nullptr)
      62             :     {
      63         228 :         delete poFilename;
      64             :     }
      65       21070 : }
      66             : 
      67             : /************************************************************************/
      68             : /*                                  copy()                              */
      69             : /************************************************************************/
      70             : 
      71           0 : BaseStream *VSIPDFFileStream::copy()
      72             : {
      73           0 :     return new VSIPDFFileStream(poParent, nStart, bLimited, nLength,
      74           0 :                                 dict.copy());
      75             : }
      76             : 
      77             : /************************************************************************/
      78             : /*                             makeSubStream()                          */
      79             : /************************************************************************/
      80             : 
      81             : #if POPPLER_MAJOR_VERSION > 25 ||                                              \
      82             :     (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 5)
      83             : std::unique_ptr<Stream> VSIPDFFileStream::makeSubStream(Goffset startA,
      84             :                                                         bool limitedA,
      85             :                                                         Goffset lengthA,
      86             :                                                         Object &&dictA)
      87             : {
      88             :     return std::make_unique<VSIPDFFileStream>(this, startA, limitedA, lengthA,
      89             :                                               std::move(dictA));
      90             : }
      91             : #else
      92       10307 : Stream *VSIPDFFileStream::makeSubStream(Goffset startA, bool limitedA,
      93             :                                         Goffset lengthA, Object &&dictA)
      94             : {
      95             :     return new VSIPDFFileStream(this, startA, limitedA, lengthA,
      96       10307 :                                 std::move(dictA));
      97             : }
      98             : #endif
      99             : 
     100             : /************************************************************************/
     101             : /*                                 getPos()                             */
     102             : /************************************************************************/
     103             : 
     104        2524 : Goffset VSIPDFFileStream::getPos()
     105             : {
     106        2524 :     return static_cast<Goffset>(nCurrentPos);
     107             : }
     108             : 
     109             : /************************************************************************/
     110             : /*                                getStart()                            */
     111             : /************************************************************************/
     112             : 
     113         464 : Goffset VSIPDFFileStream::getStart()
     114             : {
     115         464 :     return static_cast<Goffset>(nStart);
     116             : }
     117             : 
     118             : /************************************************************************/
     119             : /*                             getKind()                                */
     120             : /************************************************************************/
     121             : 
     122           0 : StreamKind VSIPDFFileStream::getKind() const
     123             : {
     124           0 :     return strFile;
     125             : }
     126             : 
     127             : /************************************************************************/
     128             : /*                           getFileName()                               */
     129             : /************************************************************************/
     130             : 
     131         456 : GooString *VSIPDFFileStream::getFileName()
     132             : {
     133         456 :     return poFilename;
     134             : }
     135             : 
     136             : /************************************************************************/
     137             : /*                             FillBuffer()                             */
     138             : /************************************************************************/
     139             : 
     140       42232 : int VSIPDFFileStream::FillBuffer()
     141             : {
     142       42232 :     if (nBufferLength == 0)
     143           0 :         return FALSE;
     144       42232 :     if (nBufferLength != -1 && nBufferLength < BUFFER_SIZE)
     145         435 :         return FALSE;
     146             : 
     147       41797 :     nPosInBuffer = 0;
     148             :     int nToRead;
     149       41797 :     if (!bLimited)
     150       10840 :         nToRead = BUFFER_SIZE;
     151       30957 :     else if (nCurrentPos + BUFFER_SIZE > nStart + nLength)
     152        1282 :         nToRead = static_cast<int>(nStart + nLength - nCurrentPos);
     153             :     else
     154       29675 :         nToRead = BUFFER_SIZE;
     155       41797 :     if (nToRead < 0)
     156           0 :         return FALSE;
     157       41797 :     nBufferLength = static_cast<int>(VSIFReadL(abyBuffer, 1, nToRead, f));
     158       41797 :     if (nBufferLength == 0)
     159          11 :         return FALSE;
     160             : 
     161             :     // Since we now report a non-zero length (as BaseStream::length member),
     162             :     // PDFDoc::getPage() can go to the linearized mode if the file is
     163             :     // linearized, and thus create a pageCache. If so, in PDFDoc::~PDFDoc(), if
     164             :     // pageCache is not null, it would try to access the stream (str) through
     165             :     // getPageCount(), but we have just freed and nullify str before in
     166             :     // PDFFreeDoc(). So make as if the file is not linearized to avoid those
     167             :     // issues... All this is due to our attempt of avoiding cross-heap issues
     168             :     // with allocation and liberation of VSIPDFFileStream as PDFDoc::str member.
     169       41786 :     if (nCurrentPos == 0 || nCurrentPos == VSI_L_OFFSET_MAX)
     170             :     {
     171      496836 :         for (int i = 0;
     172      496836 :              i < nBufferLength - static_cast<int>(strlen("/Linearized ")); i++)
     173             :         {
     174      496326 :             if (memcmp(abyBuffer + i, "/Linearized ", strlen("/Linearized ")) ==
     175             :                 0)
     176             :             {
     177           0 :                 bFoundLinearizedHint = true;
     178           0 :                 memcpy(abyBuffer + i, "/XXXXXXXXXX ", strlen("/Linearized "));
     179           0 :                 break;
     180             :             }
     181             :         }
     182             :     }
     183             : 
     184       41786 :     return TRUE;
     185             : }
     186             : 
     187             : /************************************************************************/
     188             : /*                                getChar()                             */
     189             : /************************************************************************/
     190             : 
     191             : /* The unoptimized version performs a bit less since we must go through */
     192             : /* the whole virtual I/O chain for each character reading. We save a few */
     193             : /* percent with this extra internal caching */
     194             : 
     195    31403500 : int VSIPDFFileStream::getChar()
     196             : {
     197             : #ifdef unoptimized_version
     198             :     GByte chRead;
     199             :     if (bLimited && nCurrentPos >= nStart + nLength)
     200             :         return EOF;
     201             :     if (VSIFReadL(&chRead, 1, 1, f) == 0)
     202             :         return EOF;
     203             : #else
     204    31403500 :     if (nPosInBuffer == nBufferLength)
     205             :     {
     206       41126 :         if (!FillBuffer() || nPosInBuffer >= nBufferLength)
     207         165 :             return EOF;
     208             :     }
     209             : 
     210    31403400 :     GByte chRead = abyBuffer[nPosInBuffer];
     211    31403400 :     nPosInBuffer++;
     212             : #endif
     213    31403400 :     nCurrentPos++;
     214    31403400 :     return chRead;
     215             : }
     216             : 
     217             : /************************************************************************/
     218             : /*                       getUnfilteredChar()                            */
     219             : /************************************************************************/
     220             : 
     221           0 : int VSIPDFFileStream::getUnfilteredChar()
     222             : {
     223           0 :     return getChar();
     224             : }
     225             : 
     226             : /************************************************************************/
     227             : /*                               lookChar()                             */
     228             : /************************************************************************/
     229             : 
     230         528 : int VSIPDFFileStream::lookChar()
     231             : {
     232             : #ifdef unoptimized_version
     233             :     int nPosBefore = nCurrentPos;
     234             :     int chRead = getChar();
     235             :     if (chRead == EOF)
     236             :         return EOF;
     237             :     VSIFSeekL(f, nCurrentPos = nPosBefore, SEEK_SET);
     238             :     return chRead;
     239             : #else
     240         528 :     int chRead = getChar();
     241         528 :     if (chRead == EOF)
     242          28 :         return EOF;
     243         500 :     nPosInBuffer--;
     244         500 :     nCurrentPos--;
     245         500 :     return chRead;
     246             : #endif
     247             : }
     248             : 
     249             : /************************************************************************/
     250             : /*                                reset()                               */
     251             : /************************************************************************/
     252             : 
     253             : #if POPPLER_MAJOR_VERSION > 25 ||                                              \
     254             :     (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2)
     255             : bool VSIPDFFileStream::reset()
     256             : #else
     257       10276 : void VSIPDFFileStream::reset()
     258             : #endif
     259             : {
     260       10276 :     nSavedPos = VSIFTellL(f);
     261       10276 :     bHasSavedPos = TRUE;
     262       10276 :     VSIFSeekL(f, nCurrentPos = nStart, SEEK_SET);
     263       10276 :     nPosInBuffer = -1;
     264       10276 :     nBufferLength = -1;
     265             : #if POPPLER_MAJOR_VERSION > 25 ||                                              \
     266             :     (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2)
     267             :     return true;
     268             : #endif
     269       10276 : }
     270             : 
     271             : /************************************************************************/
     272             : /*                         unfilteredReset()                            */
     273             : /************************************************************************/
     274             : 
     275             : #if POPPLER_MAJOR_VERSION > 25 ||                                              \
     276             :     (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 3)
     277             : bool VSIPDFFileStream::unfilteredReset()
     278             : {
     279             :     return reset();
     280             : }
     281             : #else
     282           0 : void VSIPDFFileStream::unfilteredReset()
     283             : {
     284           0 :     reset();
     285           0 : }
     286             : #endif
     287             : 
     288             : /************************************************************************/
     289             : /*                                close()                               */
     290             : /************************************************************************/
     291             : 
     292       19224 : void VSIPDFFileStream::close()
     293             : {
     294       19224 :     if (bHasSavedPos)
     295             :     {
     296        9642 :         nCurrentPos = nSavedPos;
     297        9642 :         VSIFSeekL(f, nCurrentPos, SEEK_SET);
     298             :     }
     299       19224 :     bHasSavedPos = FALSE;
     300       19224 :     nSavedPos = 0;
     301       19224 : }
     302             : 
     303             : /************************************************************************/
     304             : /*                               setPos()                               */
     305             : /************************************************************************/
     306             : 
     307        2266 : void VSIPDFFileStream::setPos(Goffset pos, int dir)
     308             : {
     309        2266 :     if (dir >= 0)
     310             :     {
     311        1798 :         VSIFSeekL(f, nCurrentPos = pos, SEEK_SET);
     312             :     }
     313             :     else
     314             :     {
     315         468 :         if (bLimited == false)
     316             :         {
     317         468 :             VSIFSeekL(f, 0, SEEK_END);
     318             :         }
     319             :         else
     320             :         {
     321           0 :             VSIFSeekL(f, nStart + nLength, SEEK_SET);
     322             :         }
     323         468 :         vsi_l_offset size = VSIFTellL(f);
     324         468 :         vsi_l_offset newpos = static_cast<vsi_l_offset>(pos);
     325         468 :         if (newpos > size)
     326          38 :             newpos = size;
     327         468 :         VSIFSeekL(f, nCurrentPos = size - newpos, SEEK_SET);
     328             :     }
     329        2266 :     nPosInBuffer = -1;
     330        2266 :     nBufferLength = -1;
     331        2266 : }
     332             : 
     333             : /************************************************************************/
     334             : /*                            moveStart()                               */
     335             : /************************************************************************/
     336             : 
     337         228 : void VSIPDFFileStream::moveStart(Goffset delta)
     338             : {
     339         228 :     nStart += delta;
     340         228 :     nCurrentPos = nStart;
     341         228 :     VSIFSeekL(f, nCurrentPos, SEEK_SET);
     342         228 :     nPosInBuffer = -1;
     343         228 :     nBufferLength = -1;
     344         228 : }
     345             : 
     346             : /************************************************************************/
     347             : /*                          hasGetChars()                               */
     348             : /************************************************************************/
     349             : 
     350         860 : bool VSIPDFFileStream::hasGetChars()
     351             : {
     352         860 :     return true;
     353             : }
     354             : 
     355             : /************************************************************************/
     356             : /*                            getChars()                                */
     357             : /************************************************************************/
     358             : 
     359         860 : int VSIPDFFileStream::getChars(int nChars, unsigned char *buffer)
     360             : {
     361         860 :     int nRead = 0;
     362        2233 :     while (nRead < nChars)
     363             :     {
     364        1654 :         int nToRead = nChars - nRead;
     365        1654 :         if (nPosInBuffer == nBufferLength)
     366             :         {
     367        1106 :             if (!bLimited && nToRead > BUFFER_SIZE)
     368             :             {
     369             :                 int nJustRead =
     370           0 :                     static_cast<int>(VSIFReadL(buffer + nRead, 1, nToRead, f));
     371           0 :                 nPosInBuffer = -1;
     372           0 :                 nBufferLength = -1;
     373           0 :                 nCurrentPos += nJustRead;
     374           0 :                 nRead += nJustRead;
     375           0 :                 break;
     376             :             }
     377        1106 :             else if (!FillBuffer() || nPosInBuffer >= nBufferLength)
     378         281 :                 break;
     379             :         }
     380        1373 :         if (nToRead > nBufferLength - nPosInBuffer)
     381         794 :             nToRead = nBufferLength - nPosInBuffer;
     382             : 
     383        1373 :         memcpy(buffer + nRead, abyBuffer + nPosInBuffer, nToRead);
     384        1373 :         nPosInBuffer += nToRead;
     385        1373 :         nCurrentPos += nToRead;
     386        1373 :         nRead += nToRead;
     387             :     }
     388         860 :     return nRead;
     389             : }
     390             : 
     391             : #endif

Generated by: LCOV version 1.14