Line data Source code
1 : /**********************************************************************
2 : *
3 : * Name: mitab_utils.cpp
4 : * Project: MapInfo TAB Read/Write library
5 : * Language: C++
6 : * Purpose: Misc. util. functions for the library
7 : * Author: Daniel Morissette, dmorissette@dmsolutions.ca
8 : *
9 : **********************************************************************
10 : * Copyright (c) 1999-2001, Daniel Morissette
11 : *
12 : * SPDX-License-Identifier: MIT
13 : **********************************************************************/
14 :
15 : #include "cpl_port.h"
16 : #include "mitab_utils.h"
17 :
18 : #include <cctype>
19 : #include <climits>
20 : #include <cmath>
21 : #include <cstring>
22 : #include <limits>
23 :
24 : #include "mitab.h"
25 : #include "cpl_conv.h"
26 : #include "cpl_error.h"
27 : #include "cpl_string.h"
28 : #include "cpl_vsi.h"
29 :
30 : /**********************************************************************
31 : * TABGenerateArc()
32 : *
33 : * Generate the coordinates for an arc and ADD the coordinates to the
34 : * geometry object. If the geometry already contains some points then
35 : * these won't be lost.
36 : *
37 : * poLine can be a OGRLineString or one of its derived classes, such as
38 : * OGRLinearRing
39 : * numPoints is the number of points to generate.
40 : * Angles are specified in radians, valid values are in the range [0..2*PI]
41 : *
42 : * Arcs are always generated counterclockwise, even if StartAngle > EndAngle
43 : *
44 : * Returns 0 on success, -1 on error.
45 : **********************************************************************/
46 2793 : int TABGenerateArc(OGRLineString *poLine, int numPoints, double dCenterX,
47 : double dCenterY, double dXRadius, double dYRadius,
48 : double dStartAngle, double dEndAngle)
49 : {
50 : // Adjust angles to go counterclockwise
51 2793 : if (dEndAngle < dStartAngle)
52 0 : dEndAngle += 2.0 * M_PI;
53 :
54 2793 : const double dAngleStep = (dEndAngle - dStartAngle) / (numPoints - 1.0);
55 :
56 2793 : double dAngle = 0.0;
57 268981 : for (int i = 0; i < numPoints; i++)
58 : {
59 266188 : dAngle = dStartAngle + i * dAngleStep;
60 266188 : const double dX = dCenterX + dXRadius * cos(dAngle);
61 266188 : const double dY = dCenterY + dYRadius * sin(dAngle);
62 266188 : poLine->addPoint(dX, dY);
63 : }
64 :
65 : // Complete the arc with the last EndAngle, to make sure that
66 : // the arc is correctly closed.
67 2793 : const double dX = dCenterX + dXRadius * cos(dAngle);
68 2793 : const double dY = dCenterY + dYRadius * sin(dAngle);
69 2793 : poLine->addPoint(dX, dY);
70 :
71 2793 : return 0;
72 : }
73 :
74 : /**********************************************************************
75 : * TABCloseRing()
76 : *
77 : * Check if a ring is closed, and add a point to close it if necessary.
78 : *
79 : * Returns 0 on success, -1 on error.
80 : **********************************************************************/
81 829 : int TABCloseRing(OGRLineString *poRing)
82 : {
83 829 : if (poRing->getNumPoints() > 0 && !poRing->get_IsClosed())
84 : {
85 829 : poRing->addPoint(poRing->getX(0), poRing->getY(0));
86 : }
87 :
88 829 : return 0;
89 : }
90 :
91 : /**********************************************************************
92 : * TABAdjustCaseSensitiveFilename()
93 : *
94 : * Scan a filename and its path, adjust uppercase/lowercases if
95 : * necessary.
96 : *
97 : * Returns TRUE if file found, or FALSE if it could not be located with
98 : * a case-insensitive search.
99 : *
100 : * This function works on the original buffer and returns a reference to it.
101 : * It does nothing on Windows systems where filenames are not case sensitive.
102 : **********************************************************************/
103 : #ifdef _WIN32
104 : static bool TABAdjustCaseSensitiveFilename(char * /* pszFname */)
105 : {
106 : // Nothing to do on Windows.
107 : return true;
108 : }
109 : #else
110 : // Unix case.
111 688 : static bool TABAdjustCaseSensitiveFilename(char *pszFname)
112 : {
113 : VSIStatBufL sStatBuf;
114 :
115 : // First check if the filename is OK as is.
116 688 : if (VSIStatL(pszFname, &sStatBuf) == 0)
117 : {
118 0 : return true;
119 : }
120 :
121 : // File either does not exist or has the wrong cases.
122 : // Go backwards until we find a portion of the path that is valid.
123 688 : char *pszTmpPath = CPLStrdup(pszFname);
124 688 : const int nTotalLen = static_cast<int>(strlen(pszTmpPath));
125 688 : int iTmpPtr = nTotalLen;
126 688 : bool bValidPath = false;
127 :
128 1376 : while (iTmpPtr > 0 && !bValidPath)
129 : {
130 : // Move back to the previous '/' separator.
131 688 : pszTmpPath[--iTmpPtr] = '\0';
132 9710 : while (iTmpPtr > 0 && pszTmpPath[iTmpPtr - 1] != '/')
133 : {
134 9022 : pszTmpPath[--iTmpPtr] = '\0';
135 : }
136 :
137 688 : if (iTmpPtr > 0 && VSIStatL(pszTmpPath, &sStatBuf) == 0)
138 671 : bValidPath = true;
139 : }
140 :
141 688 : CPLAssert(iTmpPtr >= 0);
142 :
143 : // Assume that CWD is valid. Therefore an empty path is a valid.
144 688 : if (iTmpPtr == 0)
145 17 : bValidPath = true;
146 :
147 : // Now that we have a valid base, reconstruct the whole path
148 : // by scanning all the sub-directories.
149 : // If we get to a point where a path component does not exist then
150 : // we simply return the rest of the path as is.
151 1376 : while (bValidPath && static_cast<int>(strlen(pszTmpPath)) < nTotalLen)
152 : {
153 688 : int iLastPartStart = iTmpPtr;
154 688 : char **papszDir = VSIReadDir(pszTmpPath);
155 :
156 : // Add one component to the current path.
157 688 : pszTmpPath[iTmpPtr] = pszFname[iTmpPtr];
158 688 : iTmpPtr++;
159 9710 : for (; pszFname[iTmpPtr] != '\0' && pszFname[iTmpPtr] != '/'; iTmpPtr++)
160 : {
161 9022 : pszTmpPath[iTmpPtr] = pszFname[iTmpPtr];
162 : }
163 :
164 688 : while (iLastPartStart < iTmpPtr && pszTmpPath[iLastPartStart] == '/')
165 0 : iLastPartStart++;
166 :
167 : // And do a case insensitive search in the current dir.
168 4258 : for (int iEntry = 0; papszDir && papszDir[iEntry]; iEntry++)
169 : {
170 3570 : if (EQUAL(pszTmpPath + iLastPartStart, papszDir[iEntry]))
171 : {
172 : // Fount it.
173 0 : strcpy(pszTmpPath + iLastPartStart, papszDir[iEntry]);
174 0 : break;
175 : }
176 : }
177 :
178 688 : if (iTmpPtr > 0 && VSIStatL(pszTmpPath, &sStatBuf) != 0)
179 688 : bValidPath = false;
180 :
181 688 : CSLDestroy(papszDir);
182 : }
183 :
184 : // We reached the last valid path component... just copy the rest
185 : // of the path as is.
186 688 : if (iTmpPtr < nTotalLen - 1)
187 : {
188 0 : strncpy(pszTmpPath + iTmpPtr, pszFname + iTmpPtr, nTotalLen - iTmpPtr);
189 : }
190 :
191 : // Update the source buffer and return.
192 688 : strcpy(pszFname, pszTmpPath);
193 688 : CPLFree(pszTmpPath);
194 :
195 688 : return bValidPath;
196 : }
197 : #endif // Not win32.
198 :
199 : /**********************************************************************
200 : * TABAdjustFilenameExtension()
201 : *
202 : * Because Unix filenames are case sensitive and MapInfo datasets often have
203 : * mixed cases filenames, we use this function to find the right filename
204 : * to use to open a specific file.
205 : *
206 : * This function works directly on the source string, so the filename it
207 : * contains at the end of the call is the one that should be used.
208 : *
209 : * Returns TRUE if one of the extensions worked, and FALSE otherwise.
210 : * If none of the extensions worked then the original extension will NOT be
211 : * restored.
212 : **********************************************************************/
213 8227 : GBool TABAdjustFilenameExtension(char *pszFname)
214 : {
215 : VSIStatBufL sStatBuf;
216 :
217 : // First try using filename as provided
218 8227 : if (VSIStatL(pszFname, &sStatBuf) == 0)
219 : {
220 7539 : return TRUE;
221 : }
222 :
223 : // Try using uppercase extension (we assume that fname contains a '.')
224 688 : for (int i = static_cast<int>(strlen(pszFname)) - 1;
225 2630 : i >= 0 && pszFname[i] != '.'; i--)
226 : {
227 1942 : pszFname[i] = static_cast<char>(
228 1942 : CPLToupper(static_cast<unsigned char>(pszFname[i])));
229 : }
230 :
231 688 : if (VSIStatL(pszFname, &sStatBuf) == 0)
232 : {
233 0 : return TRUE;
234 : }
235 :
236 : // Try using lowercase extension.
237 688 : for (int i = static_cast<int>(strlen(pszFname)) - 1;
238 2630 : i >= 0 && pszFname[i] != '.'; i--)
239 : {
240 1942 : pszFname[i] = static_cast<char>(
241 1942 : CPLTolower(static_cast<unsigned char>(pszFname[i])));
242 : }
243 :
244 688 : if (VSIStatL(pszFname, &sStatBuf) == 0)
245 : {
246 0 : return TRUE;
247 : }
248 :
249 : // None of the extensions worked.
250 : // Try adjusting cases in the whole path and filename.
251 688 : return TABAdjustCaseSensitiveFilename(pszFname);
252 : }
253 :
254 : /**********************************************************************
255 : * TABGetBasename()
256 : *
257 : * Extract the basename part of a complete file path.
258 : *
259 : * Returns a newly allocated string without the leading path (dirs) and
260 : * the extension. The returned string should be freed using CPLFree().
261 : **********************************************************************/
262 2396 : char *TABGetBasename(const char *pszFname)
263 : {
264 : // Skip leading path or use whole name if no path dividers are encountered.
265 2396 : const char *pszTmp = pszFname + strlen(pszFname) - 1;
266 39875 : while (pszTmp != pszFname && *pszTmp != '/' && *pszTmp != '\\')
267 37479 : pszTmp--;
268 :
269 2396 : if (pszTmp != pszFname)
270 2390 : pszTmp++;
271 :
272 : // Now allocate our own copy and remove extension.
273 2396 : char *pszBasename = CPLStrdup(pszTmp);
274 9584 : for (int i = static_cast<int>(strlen(pszBasename)) - 1; i >= 0; i--)
275 : {
276 9584 : if (pszBasename[i] == '.')
277 : {
278 2396 : pszBasename[i] = '\0';
279 2396 : break;
280 : }
281 : }
282 :
283 2396 : return pszBasename;
284 : }
285 :
286 : /**********************************************************************
287 : * TAB_CSLLoad()
288 : *
289 : * Same as CSLLoad(), but does not produce an error if it fails... it
290 : * just returns NULL silently instead.
291 : *
292 : * Load a test file into a stringlist.
293 : *
294 : * Lines are limited in length by the size of the CPLReadLine() buffer.
295 : **********************************************************************/
296 1313 : char **TAB_CSLLoad(const char *pszFname)
297 : {
298 2626 : CPLStringList oList;
299 :
300 1313 : VSILFILE *fp = VSIFOpenL(pszFname, "rt");
301 :
302 1313 : if (fp)
303 : {
304 12135 : while (const char *pszLine = CPLReadLineL(fp))
305 : {
306 10822 : oList.AddString(pszLine);
307 10822 : }
308 :
309 1313 : VSIFCloseL(fp);
310 : }
311 :
312 2626 : return oList.StealList();
313 : }
314 :
315 : /**********************************************************************
316 : * TABUnEscapeString()
317 : *
318 : * Convert a string that can possibly contain escaped "\n" chars in
319 : * into into a new one with binary newlines in it.
320 : *
321 : * Tries to work on the original buffer unless bSrcIsConst=TRUE, in
322 : * which case the original is always untouched and a copy is allocated
323 : * ONLY IF NECESSARY. This means that the caller should compare the
324 : * return value and the source (pszString) to see if a copy was returned,
325 : * in which case the caller becomes responsible of freeing both the
326 : * source and the copy.
327 : **********************************************************************/
328 297 : char *TABUnEscapeString(char *pszString, GBool bSrcIsConst)
329 : {
330 : // First check if we need to do any replacement.
331 297 : if (pszString == nullptr || strstr(pszString, "\\n") == nullptr)
332 : {
333 297 : return pszString;
334 : }
335 :
336 : // Yes, we need to replace at least one "\n".
337 : // We try to work on the original buffer unless we have bSrcIsConst=TRUE.
338 : //
339 : // Note that we do not worry about freeing the source buffer when we
340 : // return a copy. It is up to the caller to decide if the source needs
341 : // to be freed based on context and by comparing pszString with
342 : // the returned pointer (pszWorkString) to see if they are identical.
343 0 : char *pszWorkString = nullptr;
344 0 : if (bSrcIsConst)
345 : {
346 : // We have to create a copy to work on.
347 : pszWorkString = static_cast<char *>(
348 0 : CPLMalloc(sizeof(char) * (strlen(pszString) + 1)));
349 : }
350 : else
351 : {
352 : // Work on the original.
353 0 : pszWorkString = pszString;
354 : }
355 :
356 0 : int i = 0;
357 0 : int j = 0;
358 0 : while (pszString[i])
359 : {
360 0 : if (pszString[i] == '\\' && pszString[i + 1] == 'n')
361 : {
362 0 : pszWorkString[j++] = '\n';
363 0 : i += 2;
364 : }
365 0 : else if (pszString[i] == '\\' && pszString[i + 1] == '\\')
366 : {
367 0 : pszWorkString[j++] = '\\';
368 0 : i += 2;
369 : }
370 : else
371 : {
372 0 : pszWorkString[j++] = pszString[i++];
373 : }
374 : }
375 0 : pszWorkString[j++] = '\0';
376 :
377 0 : return pszWorkString;
378 : }
379 :
380 : /**********************************************************************
381 : * TABEscapeString()
382 : *
383 : * Convert a string that can possibly contain binary "\n" chars in
384 : * into into a new one with escaped newlines ("\\" + "n") in it.
385 : *
386 : * The function returns the original string pointer if it did not need to
387 : * be modified, or a copy that has to be freed by the caller if the
388 : * string had to be modified.
389 : *
390 : * It is up to the caller to decide if the returned string needs to be
391 : * freed by comparing the source (pszString) pointer with the returned
392 : * pointer (pszWorkString) to see if they are identical.
393 : **********************************************************************/
394 0 : char *TABEscapeString(char *pszString)
395 : {
396 : // First check if we need to do any replacement
397 0 : if (pszString == nullptr || strchr(pszString, '\n') == nullptr)
398 : {
399 0 : return pszString;
400 : }
401 :
402 : // Need to do some replacements. Alloc a copy big enough
403 : // to hold the worst possible case.
404 : char *pszWorkString = static_cast<char *>(
405 0 : CPLMalloc(2 * sizeof(char) * (strlen(pszString) + 1)));
406 :
407 0 : int i = 0;
408 0 : int j = 0;
409 :
410 0 : while (pszString[i])
411 : {
412 0 : if (pszString[i] == '\n')
413 : {
414 0 : pszWorkString[j++] = '\\';
415 0 : pszWorkString[j++] = 'n';
416 0 : i++;
417 : }
418 0 : else if (pszString[i] == '\\')
419 : {
420 0 : pszWorkString[j++] = '\\';
421 0 : pszWorkString[j++] = '\\';
422 0 : i++;
423 : }
424 : else
425 : {
426 0 : pszWorkString[j++] = pszString[i++];
427 : }
428 : }
429 0 : pszWorkString[j++] = '\0';
430 :
431 0 : return pszWorkString;
432 : }
433 :
434 : /**********************************************************************
435 : * TABCleanFieldName()
436 : *
437 : * Return a copy of pszSrcName that contains only valid characters for a
438 : * TAB field name. All invalid characters are replaced by '_'.
439 : *
440 : * The returned string should be freed by the caller.
441 : **********************************************************************/
442 403 : char *TABCleanFieldName(const char *pszSrcName, const char *pszEncoding,
443 : bool bStrictLaundering)
444 : {
445 403 : char *pszNewName = CPLStrdup(pszSrcName);
446 403 : int numInvalidChars = 0;
447 :
448 403 : if (bStrictLaundering)
449 : {
450 399 : if (strlen(pszNewName) > 31)
451 : {
452 0 : pszNewName[31] = '\0';
453 0 : CPLError(CE_Warning,
454 : static_cast<CPLErrorNum>(TAB_WarningInvalidFieldName),
455 : "Field name '%s' is longer than the max of 31 characters. "
456 : "'%s' will be used instead.",
457 : pszSrcName, pszNewName);
458 : }
459 :
460 : // According to the MapInfo User's Guide (p. 240, v5.5).
461 : // New Table Command:
462 : // Name:
463 : // Displays the field name in the name box. You can also enter new field
464 : // names here. Defaults are Field1, Field2, etc. A field name can contain
465 : // up to 31 alphanumeric characters. Use letters, numbers, and the
466 : // underscore. Do not use spaces; instead, use the underscore character
467 : // (_) to separate words in a field name. Use upper and lower case for
468 : // legibility, but MapInfo is not case-sensitive.
469 : //
470 : // It was also verified that extended chars with accents are also
471 : // accepted.
472 399 : bool bNeutralCharset =
473 399 : (pszEncoding == nullptr || strlen(pszEncoding) == 0);
474 2335 : for (int i = 0; pszSrcName && pszSrcName[i] != '\0'; i++)
475 : {
476 1936 : if (pszSrcName[i] == '#')
477 : {
478 0 : if (i == 0)
479 : {
480 0 : pszNewName[i] = '_';
481 0 : numInvalidChars++;
482 : }
483 : }
484 3276 : else if (!(pszSrcName[i] == '_' ||
485 1519 : (i != 0 && pszSrcName[i] >= '0' &&
486 1328 : pszSrcName[i] <= '9') ||
487 1454 : (!bNeutralCharset ||
488 1454 : ((pszSrcName[i] >= 'a' && pszSrcName[i] <= 'z') ||
489 277 : (pszSrcName[i] >= 'A' && pszSrcName[i] <= 'Z') ||
490 1 : static_cast<GByte>(pszSrcName[i]) >= 192))))
491 : {
492 1 : pszNewName[i] = '_';
493 1 : numInvalidChars++;
494 : }
495 : }
496 : }
497 : else
498 : {
499 : // There is a note at mapinfo-pro-v2021-user-guide.pdf
500 : // (p. 1425, Columns section: "Field names cannot have spaces".
501 : // There seem to be no other constraints.
502 56 : for (int i = 0; pszSrcName && pszSrcName[i] != '\0'; i++)
503 : {
504 52 : if (pszSrcName[i] == ' ')
505 : {
506 4 : pszNewName[i] = '_';
507 4 : numInvalidChars++;
508 : }
509 : }
510 : }
511 403 : if (numInvalidChars > 0)
512 : {
513 5 : CPLError(CE_Warning,
514 : static_cast<CPLErrorNum>(TAB_WarningInvalidFieldName),
515 : "Field name '%s' contains invalid characters. "
516 : "'%s' will be used instead.",
517 : pszSrcName, pszNewName);
518 : }
519 :
520 403 : return pszNewName;
521 : }
522 :
523 : /**********************************************************************
524 : * TABSaturatedAdd()
525 : ***********************************************************************/
526 :
527 78400 : void TABSaturatedAdd(GInt32 &nVal, GInt32 nAdd)
528 : {
529 78400 : const GInt32 int_max = std::numeric_limits<GInt32>::max();
530 78400 : const GInt32 int_min = std::numeric_limits<GInt32>::min();
531 :
532 78400 : if (nAdd >= 0 && nVal > int_max - nAdd)
533 0 : nVal = int_max;
534 78400 : else if (nAdd == int_min && nVal < 0)
535 0 : nVal = int_min;
536 78400 : else if (nAdd != int_min && nAdd < 0 && nVal < int_min - nAdd)
537 0 : nVal = int_min;
538 : else
539 78400 : nVal += nAdd;
540 78400 : }
541 :
542 : /**********************************************************************
543 : * TABInt16Diff()
544 : **********************************************************************/
545 :
546 10880 : GInt16 TABInt16Diff(int a, int b)
547 : {
548 10880 : GIntBig nDiff = static_cast<GIntBig>(a) - b;
549 : // Maybe we should error out instead of saturating ???
550 10880 : if (nDiff < -32768)
551 0 : return -32768;
552 10880 : if (nDiff > 32767)
553 0 : return 32767;
554 10880 : return static_cast<GInt16>(nDiff);
555 : }
|