LCOV - code coverage report
Current view: top level - ogr/ogrsf_frmts/mitab - mitab_utils.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 125 177 70.6 %
Date: 2025-01-18 12:42:00 Functions: 10 11 90.9 %

          Line data    Source code
       1             : /**********************************************************************
       2             :  *
       3             :  * Name:     mitab_utils.cpp
       4             :  * Project:  MapInfo TAB Read/Write library
       5             :  * Language: C++
       6             :  * Purpose:  Misc. util. functions for the library
       7             :  * Author:   Daniel Morissette, dmorissette@dmsolutions.ca
       8             :  *
       9             :  **********************************************************************
      10             :  * Copyright (c) 1999-2001, Daniel Morissette
      11             :  *
      12             :  * SPDX-License-Identifier: MIT
      13             :  **********************************************************************/
      14             : 
      15             : #include "cpl_port.h"
      16             : #include "mitab_utils.h"
      17             : 
      18             : #include <cctype>
      19             : #include <climits>
      20             : #include <cmath>
      21             : #include <cstring>
      22             : #include <limits>
      23             : 
      24             : #include "mitab.h"
      25             : #include "cpl_conv.h"
      26             : #include "cpl_error.h"
      27             : #include "cpl_string.h"
      28             : #include "cpl_vsi.h"
      29             : 
      30             : /**********************************************************************
      31             :  *                       TABGenerateArc()
      32             :  *
      33             :  * Generate the coordinates for an arc and ADD the coordinates to the
      34             :  * geometry object.  If the geometry already contains some points then
      35             :  * these won't be lost.
      36             :  *
      37             :  * poLine can be a OGRLineString or one of its derived classes, such as
      38             :  *        OGRLinearRing
      39             :  * numPoints is the number of points to generate.
      40             :  * Angles are specified in radians, valid values are in the range [0..2*PI]
      41             :  *
      42             :  * Arcs are always generated counterclockwise, even if StartAngle > EndAngle
      43             :  *
      44             :  * Returns 0 on success, -1 on error.
      45             :  **********************************************************************/
      46        2793 : int TABGenerateArc(OGRLineString *poLine, int numPoints, double dCenterX,
      47             :                    double dCenterY, double dXRadius, double dYRadius,
      48             :                    double dStartAngle, double dEndAngle)
      49             : {
      50             :     // Adjust angles to go counterclockwise
      51        2793 :     if (dEndAngle < dStartAngle)
      52           0 :         dEndAngle += 2.0 * M_PI;
      53             : 
      54        2793 :     const double dAngleStep = (dEndAngle - dStartAngle) / (numPoints - 1.0);
      55             : 
      56        2793 :     double dAngle = 0.0;
      57      268981 :     for (int i = 0; i < numPoints; i++)
      58             :     {
      59      266188 :         dAngle = dStartAngle + i * dAngleStep;
      60      266188 :         const double dX = dCenterX + dXRadius * cos(dAngle);
      61      266188 :         const double dY = dCenterY + dYRadius * sin(dAngle);
      62      266188 :         poLine->addPoint(dX, dY);
      63             :     }
      64             : 
      65             :     // Complete the arc with the last EndAngle, to make sure that
      66             :     // the arc is correctly closed.
      67        2793 :     const double dX = dCenterX + dXRadius * cos(dAngle);
      68        2793 :     const double dY = dCenterY + dYRadius * sin(dAngle);
      69        2793 :     poLine->addPoint(dX, dY);
      70             : 
      71        2793 :     return 0;
      72             : }
      73             : 
      74             : /**********************************************************************
      75             :  *                       TABCloseRing()
      76             :  *
      77             :  * Check if a ring is closed, and add a point to close it if necessary.
      78             :  *
      79             :  * Returns 0 on success, -1 on error.
      80             :  **********************************************************************/
      81         829 : int TABCloseRing(OGRLineString *poRing)
      82             : {
      83         829 :     if (poRing->getNumPoints() > 0 && !poRing->get_IsClosed())
      84             :     {
      85         829 :         poRing->addPoint(poRing->getX(0), poRing->getY(0));
      86             :     }
      87             : 
      88         829 :     return 0;
      89             : }
      90             : 
      91             : /**********************************************************************
      92             :  *                     TABAdjustCaseSensitiveFilename()
      93             :  *
      94             :  * Scan a filename and its path, adjust uppercase/lowercases if
      95             :  * necessary.
      96             :  *
      97             :  * Returns TRUE if file found, or FALSE if it could not be located with
      98             :  * a case-insensitive search.
      99             :  *
     100             :  * This function works on the original buffer and returns a reference to it.
     101             :  * It does nothing on Windows systems where filenames are not case sensitive.
     102             :  **********************************************************************/
     103             : #ifdef _WIN32
     104             : static bool TABAdjustCaseSensitiveFilename(char * /* pszFname */)
     105             : {
     106             :     // Nothing to do on Windows.
     107             :     return true;
     108             : }
     109             : #else
     110             : // Unix case.
     111         688 : static bool TABAdjustCaseSensitiveFilename(char *pszFname)
     112             : {
     113             :     VSIStatBufL sStatBuf;
     114             : 
     115             :     // First check if the filename is OK as is.
     116         688 :     if (VSIStatL(pszFname, &sStatBuf) == 0)
     117             :     {
     118           0 :         return true;
     119             :     }
     120             : 
     121             :     // File either does not exist or has the wrong cases.
     122             :     // Go backwards until we find a portion of the path that is valid.
     123         688 :     char *pszTmpPath = CPLStrdup(pszFname);
     124         688 :     const int nTotalLen = static_cast<int>(strlen(pszTmpPath));
     125         688 :     int iTmpPtr = nTotalLen;
     126         688 :     bool bValidPath = false;
     127             : 
     128        1376 :     while (iTmpPtr > 0 && !bValidPath)
     129             :     {
     130             :         // Move back to the previous '/' separator.
     131         688 :         pszTmpPath[--iTmpPtr] = '\0';
     132        9710 :         while (iTmpPtr > 0 && pszTmpPath[iTmpPtr - 1] != '/')
     133             :         {
     134        9022 :             pszTmpPath[--iTmpPtr] = '\0';
     135             :         }
     136             : 
     137         688 :         if (iTmpPtr > 0 && VSIStatL(pszTmpPath, &sStatBuf) == 0)
     138         671 :             bValidPath = true;
     139             :     }
     140             : 
     141         688 :     CPLAssert(iTmpPtr >= 0);
     142             : 
     143             :     // Assume that CWD is valid.  Therefore an empty path is a valid.
     144         688 :     if (iTmpPtr == 0)
     145          17 :         bValidPath = true;
     146             : 
     147             :     // Now that we have a valid base, reconstruct the whole path
     148             :     // by scanning all the sub-directories.
     149             :     // If we get to a point where a path component does not exist then
     150             :     // we simply return the rest of the path as is.
     151        1376 :     while (bValidPath && static_cast<int>(strlen(pszTmpPath)) < nTotalLen)
     152             :     {
     153         688 :         int iLastPartStart = iTmpPtr;
     154         688 :         char **papszDir = VSIReadDir(pszTmpPath);
     155             : 
     156             :         // Add one component to the current path.
     157         688 :         pszTmpPath[iTmpPtr] = pszFname[iTmpPtr];
     158         688 :         iTmpPtr++;
     159        9710 :         for (; pszFname[iTmpPtr] != '\0' && pszFname[iTmpPtr] != '/'; iTmpPtr++)
     160             :         {
     161        9022 :             pszTmpPath[iTmpPtr] = pszFname[iTmpPtr];
     162             :         }
     163             : 
     164         688 :         while (iLastPartStart < iTmpPtr && pszTmpPath[iLastPartStart] == '/')
     165           0 :             iLastPartStart++;
     166             : 
     167             :         // And do a case insensitive search in the current dir.
     168        4258 :         for (int iEntry = 0; papszDir && papszDir[iEntry]; iEntry++)
     169             :         {
     170        3570 :             if (EQUAL(pszTmpPath + iLastPartStart, papszDir[iEntry]))
     171             :             {
     172             :                 // Fount it.
     173           0 :                 strcpy(pszTmpPath + iLastPartStart, papszDir[iEntry]);
     174           0 :                 break;
     175             :             }
     176             :         }
     177             : 
     178         688 :         if (iTmpPtr > 0 && VSIStatL(pszTmpPath, &sStatBuf) != 0)
     179         688 :             bValidPath = false;
     180             : 
     181         688 :         CSLDestroy(papszDir);
     182             :     }
     183             : 
     184             :     // We reached the last valid path component... just copy the rest
     185             :     // of the path as is.
     186         688 :     if (iTmpPtr < nTotalLen - 1)
     187             :     {
     188           0 :         strncpy(pszTmpPath + iTmpPtr, pszFname + iTmpPtr, nTotalLen - iTmpPtr);
     189             :     }
     190             : 
     191             :     // Update the source buffer and return.
     192         688 :     strcpy(pszFname, pszTmpPath);
     193         688 :     CPLFree(pszTmpPath);
     194             : 
     195         688 :     return bValidPath;
     196             : }
     197             : #endif  // Not win32.
     198             : 
     199             : /**********************************************************************
     200             :  *                       TABAdjustFilenameExtension()
     201             :  *
     202             :  * Because Unix filenames are case sensitive and MapInfo datasets often have
     203             :  * mixed cases filenames, we use this function to find the right filename
     204             :  * to use to open a specific file.
     205             :  *
     206             :  * This function works directly on the source string, so the filename it
     207             :  * contains at the end of the call is the one that should be used.
     208             :  *
     209             :  * Returns TRUE if one of the extensions worked, and FALSE otherwise.
     210             :  * If none of the extensions worked then the original extension will NOT be
     211             :  * restored.
     212             :  **********************************************************************/
     213        8227 : GBool TABAdjustFilenameExtension(char *pszFname)
     214             : {
     215             :     VSIStatBufL sStatBuf;
     216             : 
     217             :     // First try using filename as provided
     218        8227 :     if (VSIStatL(pszFname, &sStatBuf) == 0)
     219             :     {
     220        7539 :         return TRUE;
     221             :     }
     222             : 
     223             :     // Try using uppercase extension (we assume that fname contains a '.')
     224         688 :     for (int i = static_cast<int>(strlen(pszFname)) - 1;
     225        2630 :          i >= 0 && pszFname[i] != '.'; i--)
     226             :     {
     227        1942 :         pszFname[i] = static_cast<char>(
     228        1942 :             CPLToupper(static_cast<unsigned char>(pszFname[i])));
     229             :     }
     230             : 
     231         688 :     if (VSIStatL(pszFname, &sStatBuf) == 0)
     232             :     {
     233           0 :         return TRUE;
     234             :     }
     235             : 
     236             :     // Try using lowercase extension.
     237         688 :     for (int i = static_cast<int>(strlen(pszFname)) - 1;
     238        2630 :          i >= 0 && pszFname[i] != '.'; i--)
     239             :     {
     240        1942 :         pszFname[i] = static_cast<char>(
     241        1942 :             CPLTolower(static_cast<unsigned char>(pszFname[i])));
     242             :     }
     243             : 
     244         688 :     if (VSIStatL(pszFname, &sStatBuf) == 0)
     245             :     {
     246           0 :         return TRUE;
     247             :     }
     248             : 
     249             :     // None of the extensions worked.
     250             :     // Try adjusting cases in the whole path and filename.
     251         688 :     return TABAdjustCaseSensitiveFilename(pszFname);
     252             : }
     253             : 
     254             : /**********************************************************************
     255             :  *                       TABGetBasename()
     256             :  *
     257             :  * Extract the basename part of a complete file path.
     258             :  *
     259             :  * Returns a newly allocated string without the leading path (dirs) and
     260             :  * the extension.  The returned string should be freed using CPLFree().
     261             :  **********************************************************************/
     262        2396 : char *TABGetBasename(const char *pszFname)
     263             : {
     264             :     // Skip leading path or use whole name if no path dividers are encountered.
     265        2396 :     const char *pszTmp = pszFname + strlen(pszFname) - 1;
     266       39875 :     while (pszTmp != pszFname && *pszTmp != '/' && *pszTmp != '\\')
     267       37479 :         pszTmp--;
     268             : 
     269        2396 :     if (pszTmp != pszFname)
     270        2390 :         pszTmp++;
     271             : 
     272             :     // Now allocate our own copy and remove extension.
     273        2396 :     char *pszBasename = CPLStrdup(pszTmp);
     274        9584 :     for (int i = static_cast<int>(strlen(pszBasename)) - 1; i >= 0; i--)
     275             :     {
     276        9584 :         if (pszBasename[i] == '.')
     277             :         {
     278        2396 :             pszBasename[i] = '\0';
     279        2396 :             break;
     280             :         }
     281             :     }
     282             : 
     283        2396 :     return pszBasename;
     284             : }
     285             : 
     286             : /**********************************************************************
     287             :  *                       TAB_CSLLoad()
     288             :  *
     289             :  * Same as CSLLoad(), but does not produce an error if it fails... it
     290             :  * just returns NULL silently instead.
     291             :  *
     292             :  * Load a test file into a stringlist.
     293             :  *
     294             :  * Lines are limited in length by the size of the CPLReadLine() buffer.
     295             :  **********************************************************************/
     296        1313 : char **TAB_CSLLoad(const char *pszFname)
     297             : {
     298        2626 :     CPLStringList oList;
     299             : 
     300        1313 :     VSILFILE *fp = VSIFOpenL(pszFname, "rt");
     301             : 
     302        1313 :     if (fp)
     303             :     {
     304       12135 :         while (const char *pszLine = CPLReadLineL(fp))
     305             :         {
     306       10822 :             oList.AddString(pszLine);
     307       10822 :         }
     308             : 
     309        1313 :         VSIFCloseL(fp);
     310             :     }
     311             : 
     312        2626 :     return oList.StealList();
     313             : }
     314             : 
     315             : /**********************************************************************
     316             :  *                       TABUnEscapeString()
     317             :  *
     318             :  * Convert a string that can possibly contain escaped "\n" chars in
     319             :  * into into a new one with binary newlines in it.
     320             :  *
     321             :  * Tries to work on the original buffer unless bSrcIsConst=TRUE, in
     322             :  * which case the original is always untouched and a copy is allocated
     323             :  * ONLY IF NECESSARY.  This means that the caller should compare the
     324             :  * return value and the source (pszString) to see if a copy was returned,
     325             :  * in which case the caller becomes responsible of freeing both the
     326             :  * source and the copy.
     327             :  **********************************************************************/
     328         297 : char *TABUnEscapeString(char *pszString, GBool bSrcIsConst)
     329             : {
     330             :     // First check if we need to do any replacement.
     331         297 :     if (pszString == nullptr || strstr(pszString, "\\n") == nullptr)
     332             :     {
     333         297 :         return pszString;
     334             :     }
     335             : 
     336             :     // Yes, we need to replace at least one "\n".
     337             :     // We try to work on the original buffer unless we have bSrcIsConst=TRUE.
     338             :     //
     339             :     // Note that we do not worry about freeing the source buffer when we
     340             :     // return a copy.  It is up to the caller to decide if the source needs
     341             :     // to be freed based on context and by comparing pszString with
     342             :     // the returned pointer (pszWorkString) to see if they are identical.
     343           0 :     char *pszWorkString = nullptr;
     344           0 :     if (bSrcIsConst)
     345             :     {
     346             :         // We have to create a copy to work on.
     347             :         pszWorkString = static_cast<char *>(
     348           0 :             CPLMalloc(sizeof(char) * (strlen(pszString) + 1)));
     349             :     }
     350             :     else
     351             :     {
     352             :         // Work on the original.
     353           0 :         pszWorkString = pszString;
     354             :     }
     355             : 
     356           0 :     int i = 0;
     357           0 :     int j = 0;
     358           0 :     while (pszString[i])
     359             :     {
     360           0 :         if (pszString[i] == '\\' && pszString[i + 1] == 'n')
     361             :         {
     362           0 :             pszWorkString[j++] = '\n';
     363           0 :             i += 2;
     364             :         }
     365           0 :         else if (pszString[i] == '\\' && pszString[i + 1] == '\\')
     366             :         {
     367           0 :             pszWorkString[j++] = '\\';
     368           0 :             i += 2;
     369             :         }
     370             :         else
     371             :         {
     372           0 :             pszWorkString[j++] = pszString[i++];
     373             :         }
     374             :     }
     375           0 :     pszWorkString[j++] = '\0';
     376             : 
     377           0 :     return pszWorkString;
     378             : }
     379             : 
     380             : /**********************************************************************
     381             :  *                       TABEscapeString()
     382             :  *
     383             :  * Convert a string that can possibly contain binary "\n" chars in
     384             :  * into into a new one with escaped newlines ("\\" + "n") in it.
     385             :  *
     386             :  * The function returns the original string pointer if it did not need to
     387             :  * be modified, or a copy that has to be freed by the caller if the
     388             :  * string had to be modified.
     389             :  *
     390             :  * It is up to the caller to decide if the returned string needs to be
     391             :  * freed by comparing the source (pszString) pointer with the returned
     392             :  * pointer (pszWorkString) to see if they are identical.
     393             :  **********************************************************************/
     394           0 : char *TABEscapeString(char *pszString)
     395             : {
     396             :     // First check if we need to do any replacement
     397           0 :     if (pszString == nullptr || strchr(pszString, '\n') == nullptr)
     398             :     {
     399           0 :         return pszString;
     400             :     }
     401             : 
     402             :     // Need to do some replacements.  Alloc a copy big enough
     403             :     // to hold the worst possible case.
     404             :     char *pszWorkString = static_cast<char *>(
     405           0 :         CPLMalloc(2 * sizeof(char) * (strlen(pszString) + 1)));
     406             : 
     407           0 :     int i = 0;
     408           0 :     int j = 0;
     409             : 
     410           0 :     while (pszString[i])
     411             :     {
     412           0 :         if (pszString[i] == '\n')
     413             :         {
     414           0 :             pszWorkString[j++] = '\\';
     415           0 :             pszWorkString[j++] = 'n';
     416           0 :             i++;
     417             :         }
     418           0 :         else if (pszString[i] == '\\')
     419             :         {
     420           0 :             pszWorkString[j++] = '\\';
     421           0 :             pszWorkString[j++] = '\\';
     422           0 :             i++;
     423             :         }
     424             :         else
     425             :         {
     426           0 :             pszWorkString[j++] = pszString[i++];
     427             :         }
     428             :     }
     429           0 :     pszWorkString[j++] = '\0';
     430             : 
     431           0 :     return pszWorkString;
     432             : }
     433             : 
     434             : /**********************************************************************
     435             :  *                       TABCleanFieldName()
     436             :  *
     437             :  * Return a copy of pszSrcName that contains only valid characters for a
     438             :  * TAB field name.  All invalid characters are replaced by '_'.
     439             :  *
     440             :  * The returned string should be freed by the caller.
     441             :  **********************************************************************/
     442         403 : char *TABCleanFieldName(const char *pszSrcName, const char *pszEncoding,
     443             :                         bool bStrictLaundering)
     444             : {
     445         403 :     char *pszNewName = CPLStrdup(pszSrcName);
     446         403 :     int numInvalidChars = 0;
     447             : 
     448         403 :     if (bStrictLaundering)
     449             :     {
     450         399 :         if (strlen(pszNewName) > 31)
     451             :         {
     452           0 :             pszNewName[31] = '\0';
     453           0 :             CPLError(CE_Warning,
     454             :                      static_cast<CPLErrorNum>(TAB_WarningInvalidFieldName),
     455             :                      "Field name '%s' is longer than the max of 31 characters. "
     456             :                      "'%s' will be used instead.",
     457             :                      pszSrcName, pszNewName);
     458             :         }
     459             : 
     460             :         // According to the MapInfo User's Guide (p. 240, v5.5).
     461             :         // New Table Command:
     462             :         //  Name:
     463             :         // Displays the field name in the name box. You can also enter new field
     464             :         // names here. Defaults are Field1, Field2, etc. A field name can contain
     465             :         // up to 31 alphanumeric characters. Use letters, numbers, and the
     466             :         // underscore. Do not use spaces; instead, use the underscore character
     467             :         // (_) to separate words in a field name. Use upper and lower case for
     468             :         // legibility, but MapInfo is not case-sensitive.
     469             :         //
     470             :         // It was also verified that extended chars with accents are also
     471             :         // accepted.
     472         399 :         bool bNeutralCharset =
     473         399 :             (pszEncoding == nullptr || strlen(pszEncoding) == 0);
     474        2335 :         for (int i = 0; pszSrcName && pszSrcName[i] != '\0'; i++)
     475             :         {
     476        1936 :             if (pszSrcName[i] == '#')
     477             :             {
     478           0 :                 if (i == 0)
     479             :                 {
     480           0 :                     pszNewName[i] = '_';
     481           0 :                     numInvalidChars++;
     482             :                 }
     483             :             }
     484        3276 :             else if (!(pszSrcName[i] == '_' ||
     485        1519 :                        (i != 0 && pszSrcName[i] >= '0' &&
     486        1328 :                         pszSrcName[i] <= '9') ||
     487        1454 :                        (!bNeutralCharset ||
     488        1454 :                         ((pszSrcName[i] >= 'a' && pszSrcName[i] <= 'z') ||
     489         277 :                          (pszSrcName[i] >= 'A' && pszSrcName[i] <= 'Z') ||
     490           1 :                          static_cast<GByte>(pszSrcName[i]) >= 192))))
     491             :             {
     492           1 :                 pszNewName[i] = '_';
     493           1 :                 numInvalidChars++;
     494             :             }
     495             :         }
     496             :     }
     497             :     else
     498             :     {
     499             :         // There is a note at mapinfo-pro-v2021-user-guide.pdf
     500             :         // (p. 1425, Columns section: "Field names cannot have spaces".
     501             :         // There seem to be no other constraints.
     502          56 :         for (int i = 0; pszSrcName && pszSrcName[i] != '\0'; i++)
     503             :         {
     504          52 :             if (pszSrcName[i] == ' ')
     505             :             {
     506           4 :                 pszNewName[i] = '_';
     507           4 :                 numInvalidChars++;
     508             :             }
     509             :         }
     510             :     }
     511         403 :     if (numInvalidChars > 0)
     512             :     {
     513           5 :         CPLError(CE_Warning,
     514             :                  static_cast<CPLErrorNum>(TAB_WarningInvalidFieldName),
     515             :                  "Field name '%s' contains invalid characters. "
     516             :                  "'%s' will be used instead.",
     517             :                  pszSrcName, pszNewName);
     518             :     }
     519             : 
     520         403 :     return pszNewName;
     521             : }
     522             : 
     523             : /**********************************************************************
     524             :  *                       TABSaturatedAdd()
     525             :  ***********************************************************************/
     526             : 
     527       78400 : void TABSaturatedAdd(GInt32 &nVal, GInt32 nAdd)
     528             : {
     529       78400 :     const GInt32 int_max = std::numeric_limits<GInt32>::max();
     530       78400 :     const GInt32 int_min = std::numeric_limits<GInt32>::min();
     531             : 
     532       78400 :     if (nAdd >= 0 && nVal > int_max - nAdd)
     533           0 :         nVal = int_max;
     534       78400 :     else if (nAdd == int_min && nVal < 0)
     535           0 :         nVal = int_min;
     536       78400 :     else if (nAdd != int_min && nAdd < 0 && nVal < int_min - nAdd)
     537           0 :         nVal = int_min;
     538             :     else
     539       78400 :         nVal += nAdd;
     540       78400 : }
     541             : 
     542             : /**********************************************************************
     543             :  *                           TABInt16Diff()
     544             :  **********************************************************************/
     545             : 
     546       10880 : GInt16 TABInt16Diff(int a, int b)
     547             : {
     548       10880 :     GIntBig nDiff = static_cast<GIntBig>(a) - b;
     549             :     // Maybe we should error out instead of saturating ???
     550       10880 :     if (nDiff < -32768)
     551           0 :         return -32768;
     552       10880 :     if (nDiff > 32767)
     553           0 :         return 32767;
     554       10880 :     return static_cast<GInt16>(nDiff);
     555             : }

Generated by: LCOV version 1.14