Line data Source code
1 : /**********************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: Implementation of MiniXML Parser and handling.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2001, Frank Warmerdam
9 : * Copyright (c) 2007-2013, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : **********************************************************************
13 : *
14 : * Independent Security Audit 2003/04/05 Andrey Kiselev:
15 : * Completed audit of this module. Any documents may be parsed without
16 : * buffer overflows and stack corruptions.
17 : *
18 : * Security Audit 2003/03/28 warmerda:
19 : * Completed security audit. I believe that this module may be safely used
20 : * to parse, and serialize arbitrary documents provided by a potentially
21 : * hostile source.
22 : *
23 : */
24 :
25 : #include "cpl_minixml.h"
26 :
27 : #include <cctype>
28 : #include <climits>
29 : #include <cstddef>
30 : #include <cstdio>
31 : #include <cstring>
32 :
33 : #include <algorithm>
34 :
35 : #include "cpl_conv.h"
36 : #include "cpl_error.h"
37 : #include "cpl_string.h"
38 : #include "cpl_vsi.h"
39 :
40 : typedef enum
41 : {
42 : TNone,
43 : TString,
44 : TOpen,
45 : TClose,
46 : TEqual,
47 : TToken,
48 : TSlashClose,
49 : TQuestionClose,
50 : TComment,
51 : TLiteral
52 : } XMLTokenType;
53 :
54 : typedef struct
55 : {
56 : CPLXMLNode *psFirstNode;
57 : CPLXMLNode *psLastChild;
58 : } StackContext;
59 :
60 : typedef struct
61 : {
62 : const char *pszInput;
63 : int nInputOffset;
64 : int nInputLine;
65 : bool bInElement;
66 : XMLTokenType eTokenType;
67 : char *pszToken;
68 : size_t nTokenMaxSize;
69 : size_t nTokenSize;
70 :
71 : int nStackMaxSize;
72 : int nStackSize;
73 : StackContext *papsStack;
74 :
75 : CPLXMLNode *psFirstNode;
76 : CPLXMLNode *psLastNode;
77 : } ParseContext;
78 :
79 : static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
80 : const char *pszText);
81 :
82 : /************************************************************************/
83 : /* ReadChar() */
84 : /************************************************************************/
85 :
86 430674000 : static CPL_INLINE char ReadChar(ParseContext *psContext)
87 :
88 : {
89 430674000 : const char chReturn = psContext->pszInput[psContext->nInputOffset++];
90 :
91 430674000 : if (chReturn == '\0')
92 259500 : psContext->nInputOffset--;
93 430415000 : else if (chReturn == 10)
94 4200850 : psContext->nInputLine++;
95 :
96 430674000 : return chReturn;
97 : }
98 :
99 : /************************************************************************/
100 : /* UnreadChar() */
101 : /************************************************************************/
102 :
103 16653600 : static CPL_INLINE void UnreadChar(ParseContext *psContext, char chToUnread)
104 :
105 : {
106 16653600 : if (chToUnread == '\0')
107 34 : return;
108 :
109 16653600 : CPLAssert(chToUnread == psContext->pszInput[psContext->nInputOffset - 1]);
110 :
111 16653600 : psContext->nInputOffset--;
112 :
113 16653600 : if (chToUnread == 10)
114 314 : psContext->nInputLine--;
115 : }
116 :
117 : /************************************************************************/
118 : /* ReallocToken() */
119 : /************************************************************************/
120 :
121 1099300 : static bool ReallocToken(ParseContext *psContext)
122 : {
123 1099300 : if (psContext->nTokenMaxSize > INT_MAX / 2)
124 : {
125 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
126 : "Out of memory allocating %d*2 bytes",
127 0 : static_cast<int>(psContext->nTokenMaxSize));
128 0 : VSIFree(psContext->pszToken);
129 0 : psContext->pszToken = nullptr;
130 0 : return false;
131 : }
132 :
133 1099300 : psContext->nTokenMaxSize *= 2;
134 : char *pszToken = static_cast<char *>(
135 1099300 : VSIRealloc(psContext->pszToken, psContext->nTokenMaxSize));
136 1099300 : if (pszToken == nullptr)
137 : {
138 1 : CPLError(CE_Failure, CPLE_OutOfMemory,
139 : "Out of memory allocating %d bytes",
140 1 : static_cast<int>(psContext->nTokenMaxSize));
141 0 : VSIFree(psContext->pszToken);
142 0 : psContext->pszToken = nullptr;
143 0 : return false;
144 : }
145 1099300 : psContext->pszToken = pszToken;
146 1099300 : return true;
147 : }
148 :
149 : /************************************************************************/
150 : /* AddToToken() */
151 : /************************************************************************/
152 :
153 323215000 : static CPL_INLINE bool _AddToToken(ParseContext *psContext, char chNewChar)
154 :
155 : {
156 323215000 : if (psContext->nTokenSize >= psContext->nTokenMaxSize - 2)
157 : {
158 1099300 : if (!ReallocToken(psContext))
159 0 : return false;
160 : }
161 :
162 323215000 : psContext->pszToken[psContext->nTokenSize++] = chNewChar;
163 323215000 : psContext->pszToken[psContext->nTokenSize] = '\0';
164 323215000 : return true;
165 : }
166 :
167 : // TODO(schwehr): Remove the goto.
168 : #define AddToToken(psContext, chNewChar) \
169 : if (!_AddToToken(psContext, chNewChar)) \
170 : goto fail;
171 :
172 : /************************************************************************/
173 : /* ReadToken() */
174 : /************************************************************************/
175 :
176 47553000 : static XMLTokenType ReadToken(ParseContext *psContext, CPLErr &eLastErrorType)
177 :
178 : {
179 47553000 : psContext->nTokenSize = 0;
180 47553000 : psContext->pszToken[0] = '\0';
181 :
182 47553000 : char chNext = ReadChar(psContext);
183 96616600 : while (isspace(static_cast<unsigned char>(chNext)))
184 49063800 : chNext = ReadChar(psContext);
185 :
186 : /* -------------------------------------------------------------------- */
187 : /* Handle comments. */
188 : /* -------------------------------------------------------------------- */
189 47552900 : if (chNext == '<' &&
190 6855900 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset, "!--"))
191 : {
192 56425 : psContext->eTokenType = TComment;
193 :
194 : // Skip "!--" characters.
195 56425 : ReadChar(psContext);
196 56425 : ReadChar(psContext);
197 56425 : ReadChar(psContext);
198 :
199 3517880 : while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
200 7092200 : "-->") &&
201 3517890 : (chNext = ReadChar(psContext)) != '\0')
202 3517890 : AddToToken(psContext, chNext);
203 :
204 : // Skip "-->" characters.
205 56425 : ReadChar(psContext);
206 56425 : ReadChar(psContext);
207 56425 : ReadChar(psContext);
208 : }
209 : /* -------------------------------------------------------------------- */
210 : /* Handle DOCTYPE. */
211 : /* -------------------------------------------------------------------- */
212 47496400 : else if (chNext == '<' &&
213 6799480 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
214 : "!DOCTYPE"))
215 : {
216 18 : bool bInQuotes = false;
217 18 : psContext->eTokenType = TLiteral;
218 :
219 18 : AddToToken(psContext, '<');
220 : do
221 : {
222 1153 : chNext = ReadChar(psContext);
223 1153 : if (chNext == '\0')
224 : {
225 0 : eLastErrorType = CE_Failure;
226 0 : CPLError(eLastErrorType, CPLE_AppDefined,
227 : "Parse error in DOCTYPE on or before line %d, "
228 : "reached end of file without '>'.",
229 : psContext->nInputLine);
230 :
231 0 : break;
232 : }
233 :
234 : /* The markup declaration block within a DOCTYPE tag consists of:
235 : * - a left square bracket [
236 : * - a list of declarations
237 : * - a right square bracket ]
238 : * Example:
239 : * <!DOCTYPE RootElement [ ...declarations... ]>
240 : */
241 1153 : if (chNext == '[')
242 : {
243 1 : AddToToken(psContext, chNext);
244 :
245 98 : do
246 : {
247 99 : chNext = ReadChar(psContext);
248 99 : if (chNext == ']')
249 0 : break;
250 99 : AddToToken(psContext, chNext);
251 99 : } while (chNext != '\0' &&
252 99 : !STARTS_WITH_CI(psContext->pszInput +
253 : psContext->nInputOffset,
254 : "]>"));
255 :
256 1 : if (chNext == '\0')
257 : {
258 0 : eLastErrorType = CE_Failure;
259 0 : CPLError(eLastErrorType, CPLE_AppDefined,
260 : "Parse error in DOCTYPE on or before line %d, "
261 : "reached end of file without ']'.",
262 : psContext->nInputLine);
263 0 : break;
264 : }
265 :
266 1 : if (chNext != ']')
267 : {
268 1 : chNext = ReadChar(psContext);
269 1 : AddToToken(psContext, chNext);
270 :
271 : // Skip ">" character, will be consumed below.
272 1 : chNext = ReadChar(psContext);
273 : }
274 : }
275 :
276 1153 : if (chNext == '\"')
277 46 : bInQuotes = !bInQuotes;
278 :
279 1153 : if (chNext == '>' && !bInQuotes)
280 : {
281 18 : AddToToken(psContext, '>');
282 18 : break;
283 : }
284 :
285 1135 : AddToToken(psContext, chNext);
286 18 : } while (true);
287 : }
288 : /* -------------------------------------------------------------------- */
289 : /* Handle CDATA. */
290 : /* -------------------------------------------------------------------- */
291 47496400 : else if (chNext == '<' &&
292 6799460 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
293 : "![CDATA["))
294 : {
295 175 : psContext->eTokenType = TString;
296 :
297 : // Skip !CDATA[
298 175 : ReadChar(psContext);
299 175 : ReadChar(psContext);
300 175 : ReadChar(psContext);
301 175 : ReadChar(psContext);
302 175 : ReadChar(psContext);
303 175 : ReadChar(psContext);
304 175 : ReadChar(psContext);
305 175 : ReadChar(psContext);
306 :
307 91846 : while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
308 183868 : "]]>") &&
309 91847 : (chNext = ReadChar(psContext)) != '\0')
310 91846 : AddToToken(psContext, chNext);
311 :
312 : // Skip "]]>" characters.
313 175 : ReadChar(psContext);
314 175 : ReadChar(psContext);
315 175 : ReadChar(psContext);
316 : }
317 : /* -------------------------------------------------------------------- */
318 : /* Simple single tokens of interest. */
319 : /* -------------------------------------------------------------------- */
320 47496300 : else if (chNext == '<' && !psContext->bInElement)
321 : {
322 6799290 : psContext->eTokenType = TOpen;
323 6799290 : psContext->bInElement = true;
324 : }
325 40697000 : else if (chNext == '>' && psContext->bInElement)
326 : {
327 4786290 : psContext->eTokenType = TClose;
328 4786290 : psContext->bInElement = false;
329 : }
330 35910700 : else if (chNext == '=' && psContext->bInElement)
331 : {
332 8492440 : psContext->eTokenType = TEqual;
333 : }
334 27418200 : else if (chNext == '\0')
335 : {
336 259461 : psContext->eTokenType = TNone;
337 : }
338 : /* -------------------------------------------------------------------- */
339 : /* Handle the /> token terminator. */
340 : /* -------------------------------------------------------------------- */
341 27158800 : else if (chNext == '/' && psContext->bInElement &&
342 4392770 : psContext->pszInput[psContext->nInputOffset] == '>')
343 : {
344 2004650 : chNext = ReadChar(psContext);
345 : (void)chNext;
346 2004650 : CPLAssert(chNext == '>');
347 :
348 2004650 : psContext->eTokenType = TSlashClose;
349 2004650 : psContext->bInElement = false;
350 : }
351 : /* -------------------------------------------------------------------- */
352 : /* Handle the ?> token terminator. */
353 : /* -------------------------------------------------------------------- */
354 25154100 : else if (chNext == '?' && psContext->bInElement &&
355 16599 : psContext->pszInput[psContext->nInputOffset] == '>')
356 : {
357 8299 : chNext = ReadChar(psContext);
358 : (void)chNext;
359 8299 : CPLAssert(chNext == '>');
360 :
361 8299 : psContext->eTokenType = TQuestionClose;
362 8299 : psContext->bInElement = false;
363 : }
364 : /* -------------------------------------------------------------------- */
365 : /* Collect a quoted string. */
366 : /* -------------------------------------------------------------------- */
367 25145800 : else if (psContext->bInElement && chNext == '"')
368 : {
369 6396760 : psContext->eTokenType = TString;
370 :
371 60650300 : while ((chNext = ReadChar(psContext)) != '"' && chNext != '\0')
372 54253600 : AddToToken(psContext, chNext);
373 :
374 6396720 : if (chNext != '"')
375 : {
376 0 : psContext->eTokenType = TNone;
377 0 : eLastErrorType = CE_Failure;
378 0 : CPLError(
379 : eLastErrorType, CPLE_AppDefined,
380 : "Parse error on line %d, reached EOF before closing quote.",
381 : psContext->nInputLine);
382 : }
383 :
384 : // Do we need to unescape it?
385 6396760 : if (strchr(psContext->pszToken, '&') != nullptr)
386 : {
387 227 : int nLength = 0;
388 : char *pszUnescaped =
389 227 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
390 227 : strcpy(psContext->pszToken, pszUnescaped);
391 227 : CPLFree(pszUnescaped);
392 227 : psContext->nTokenSize = strlen(psContext->pszToken);
393 6396760 : }
394 : }
395 18749100 : else if (psContext->bInElement && chNext == '\'')
396 : {
397 2095690 : psContext->eTokenType = TString;
398 :
399 56504000 : while ((chNext = ReadChar(psContext)) != '\'' && chNext != '\0')
400 54408200 : AddToToken(psContext, chNext);
401 :
402 2095690 : if (chNext != '\'')
403 : {
404 1 : psContext->eTokenType = TNone;
405 1 : eLastErrorType = CE_Failure;
406 1 : CPLError(
407 : eLastErrorType, CPLE_AppDefined,
408 : "Parse error on line %d, reached EOF before closing quote.",
409 : psContext->nInputLine);
410 : }
411 :
412 : // Do we need to unescape it?
413 2095690 : if (strchr(psContext->pszToken, '&') != nullptr)
414 : {
415 1342 : int nLength = 0;
416 : char *pszUnescaped =
417 1342 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
418 1342 : strcpy(psContext->pszToken, pszUnescaped);
419 1342 : CPLFree(pszUnescaped);
420 1342 : psContext->nTokenSize = strlen(psContext->pszToken);
421 2095690 : }
422 : }
423 : /* -------------------------------------------------------------------- */
424 : /* Collect an unquoted string, terminated by a open angle */
425 : /* bracket. */
426 : /* -------------------------------------------------------------------- */
427 16653400 : else if (!psContext->bInElement)
428 : {
429 1361920 : psContext->eTokenType = TString;
430 :
431 1361920 : AddToToken(psContext, chNext);
432 103111000 : while ((chNext = ReadChar(psContext)) != '<' && chNext != '\0')
433 101749000 : AddToToken(psContext, chNext);
434 1361910 : UnreadChar(psContext, chNext);
435 :
436 : // Do we need to unescape it?
437 1361910 : if (strchr(psContext->pszToken, '&') != nullptr)
438 : {
439 21094 : int nLength = 0;
440 : char *pszUnescaped =
441 21094 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
442 21094 : strcpy(psContext->pszToken, pszUnescaped);
443 21094 : CPLFree(pszUnescaped);
444 21094 : psContext->nTokenSize = strlen(psContext->pszToken);
445 : }
446 : }
447 :
448 : /* -------------------------------------------------------------------- */
449 : /* Collect a regular token terminated by white space, or */
450 : /* special character(s) like an equal sign. */
451 : /* -------------------------------------------------------------------- */
452 : else
453 : {
454 15291500 : psContext->eTokenType = TToken;
455 :
456 : // Add the first character to the token regardless of what it is.
457 15291500 : AddToToken(psContext, chNext);
458 :
459 107833000 : for (chNext = ReadChar(psContext);
460 107833000 : (chNext >= 'A' && chNext <= 'Z') ||
461 103105000 : (chNext >= 'a' && chNext <= 'z') || chNext == '-' ||
462 125080000 : chNext == '_' || chNext == '.' || chNext == ':' ||
463 12457000 : (chNext >= '0' && chNext <= '9');
464 92541600 : chNext = ReadChar(psContext))
465 : {
466 92541500 : AddToToken(psContext, chNext);
467 : }
468 :
469 15291700 : UnreadChar(psContext, chNext);
470 : }
471 :
472 47553000 : return psContext->eTokenType;
473 :
474 0 : fail:
475 0 : psContext->eTokenType = TNone;
476 0 : return TNone;
477 : }
478 :
479 : /************************************************************************/
480 : /* PushNode() */
481 : /************************************************************************/
482 :
483 4411150 : static bool PushNode(ParseContext *psContext, CPLXMLNode *psNode,
484 : CPLErr &eLastErrorType)
485 :
486 : {
487 4411150 : if (psContext->nStackMaxSize <= psContext->nStackSize)
488 : {
489 : // Somewhat arbitrary number.
490 261281 : if (psContext->nStackMaxSize >= 10000)
491 : {
492 1 : eLastErrorType = CE_Failure;
493 1 : CPLError(CE_Failure, CPLE_NotSupported,
494 : "XML element depth beyond 10000. Giving up");
495 1 : VSIFree(psContext->papsStack);
496 1 : psContext->papsStack = nullptr;
497 1 : return false;
498 : }
499 261280 : psContext->nStackMaxSize += 10;
500 :
501 : StackContext *papsStack = static_cast<StackContext *>(
502 522560 : VSIRealloc(psContext->papsStack,
503 261280 : sizeof(StackContext) * psContext->nStackMaxSize));
504 261280 : if (papsStack == nullptr)
505 : {
506 1 : eLastErrorType = CE_Failure;
507 1 : CPLError(CE_Failure, CPLE_OutOfMemory,
508 : "Out of memory allocating %d bytes",
509 : static_cast<int>(sizeof(StackContext)) *
510 1 : psContext->nStackMaxSize);
511 0 : VSIFree(psContext->papsStack);
512 0 : psContext->papsStack = nullptr;
513 0 : return false;
514 : }
515 261279 : psContext->papsStack = papsStack;
516 : }
517 : #ifdef DEBUG
518 : // To make Coverity happy, but cannot happen.
519 4411150 : if (psContext->papsStack == nullptr)
520 0 : return false;
521 : #endif
522 :
523 4411150 : psContext->papsStack[psContext->nStackSize].psFirstNode = psNode;
524 4411150 : psContext->papsStack[psContext->nStackSize].psLastChild = nullptr;
525 4411150 : psContext->nStackSize++;
526 :
527 4411150 : return true;
528 : }
529 :
530 : /************************************************************************/
531 : /* AttachNode() */
532 : /* */
533 : /* Attach the passed node as a child of the current node. */
534 : /* Special handling exists for adding siblings to psFirst if */
535 : /* there is nothing on the stack. */
536 : /************************************************************************/
537 :
538 14322100 : static void AttachNode(ParseContext *psContext, CPLXMLNode *psNode)
539 :
540 : {
541 14322100 : if (psContext->psFirstNode == nullptr)
542 : {
543 259461 : psContext->psFirstNode = psNode;
544 259461 : psContext->psLastNode = psNode;
545 : }
546 14062700 : else if (psContext->nStackSize == 0)
547 : {
548 10228 : psContext->psLastNode->psNext = psNode;
549 10228 : psContext->psLastNode = psNode;
550 : }
551 : else
552 : {
553 14052500 : if (psContext->papsStack[psContext->nStackSize - 1]
554 14052500 : .psFirstNode->psChild == nullptr)
555 : {
556 4388260 : psContext->papsStack[psContext->nStackSize - 1]
557 4388260 : .psFirstNode->psChild = psNode;
558 : }
559 : else
560 : {
561 9664200 : psContext->papsStack[psContext->nStackSize - 1]
562 9664200 : .psLastChild->psNext = psNode;
563 : }
564 14052500 : psContext->papsStack[psContext->nStackSize - 1].psLastChild = psNode;
565 : }
566 14322100 : }
567 :
568 : /************************************************************************/
569 : /* CPLParseXMLString() */
570 : /************************************************************************/
571 :
572 : /**
573 : * \brief Parse an XML string into tree form.
574 : *
575 : * The passed document is parsed into a CPLXMLNode tree representation.
576 : * If the document is not well formed XML then NULL is returned, and errors
577 : * are reported via CPLError(). No validation beyond wellformedness is
578 : * done. The CPLParseXMLFile() convenience function can be used to parse
579 : * from a file.
580 : *
581 : * The returned document tree is owned by the caller and should be freed
582 : * with CPLDestroyXMLNode() when no longer needed.
583 : *
584 : * If the document has more than one "root level" element then those after the
585 : * first will be attached to the first as siblings (via the psNext pointers)
586 : * even though there is no common parent. A document with no XML structure
587 : * (no angle brackets for instance) would be considered well formed, and
588 : * returned as a single CXT_Text node.
589 : *
590 : * @param pszString the document to parse.
591 : *
592 : * @return parsed tree or NULL on error.
593 : */
594 :
595 259487 : CPLXMLNode *CPLParseXMLString(const char *pszString)
596 :
597 : {
598 259487 : if (pszString == nullptr)
599 : {
600 0 : CPLError(CE_Failure, CPLE_AppDefined,
601 : "CPLParseXMLString() called with NULL pointer.");
602 0 : return nullptr;
603 : }
604 :
605 : // Save back error context.
606 259487 : const CPLErr eErrClass = CPLGetLastErrorType();
607 259486 : const CPLErrorNum nErrNum = CPLGetLastErrorNo();
608 518972 : const CPLString osErrMsg = CPLGetLastErrorMsg();
609 :
610 : // Reset it now.
611 259486 : CPLErrorSetState(CE_None, CPLE_AppDefined, "");
612 :
613 : /* -------------------------------------------------------------------- */
614 : /* Check for a UTF-8 BOM and skip if found */
615 : /* */
616 : /* TODO: BOM is variable-length parameter and depends on encoding. */
617 : /* Add BOM detection for other encodings. */
618 : /* -------------------------------------------------------------------- */
619 :
620 : // Used to skip to actual beginning of XML data.
621 259485 : if ((static_cast<unsigned char>(pszString[0]) == 0xEF) &&
622 4 : (static_cast<unsigned char>(pszString[1]) == 0xBB) &&
623 4 : (static_cast<unsigned char>(pszString[2]) == 0xBF))
624 : {
625 4 : pszString += 3;
626 : }
627 :
628 : /* -------------------------------------------------------------------- */
629 : /* Initialize parse context. */
630 : /* -------------------------------------------------------------------- */
631 : ParseContext sContext;
632 259485 : sContext.pszInput = pszString;
633 259485 : sContext.nInputOffset = 0;
634 259485 : sContext.nInputLine = 0;
635 259485 : sContext.bInElement = false;
636 259485 : sContext.nTokenMaxSize = 10;
637 259485 : sContext.pszToken = static_cast<char *>(VSIMalloc(sContext.nTokenMaxSize));
638 259486 : if (sContext.pszToken == nullptr)
639 0 : return nullptr;
640 259486 : sContext.nTokenSize = 0;
641 259486 : sContext.eTokenType = TNone;
642 259486 : sContext.nStackMaxSize = 0;
643 259486 : sContext.nStackSize = 0;
644 259486 : sContext.papsStack = nullptr;
645 259486 : sContext.psFirstNode = nullptr;
646 259486 : sContext.psLastNode = nullptr;
647 :
648 : #ifdef DEBUG
649 259486 : bool bRecoverableError = true;
650 : #endif
651 259486 : CPLErr eLastErrorType = CE_None;
652 :
653 : /* ==================================================================== */
654 : /* Loop reading tokens. */
655 : /* ==================================================================== */
656 21380800 : while (ReadToken(&sContext, eLastErrorType) != TNone)
657 : {
658 21121400 : loop_beginning:
659 : /* --------------------------------------------------------------------
660 : */
661 : /* Create a new element. */
662 : /* --------------------------------------------------------------------
663 : */
664 21121400 : if (sContext.eTokenType == TOpen)
665 : {
666 6799270 : if (ReadToken(&sContext, eLastErrorType) != TToken)
667 : {
668 2 : eLastErrorType = CE_Failure;
669 2 : CPLError(eLastErrorType, CPLE_AppDefined,
670 : "Line %d: Didn't find element token after "
671 : "open angle bracket.",
672 : sContext.nInputLine);
673 2 : break;
674 : }
675 :
676 6799270 : CPLXMLNode *psElement = nullptr;
677 6799270 : if (sContext.pszToken[0] != '/')
678 : {
679 : psElement =
680 4411150 : _CPLCreateXMLNode(nullptr, CXT_Element, sContext.pszToken);
681 4411160 : if (!psElement)
682 0 : break;
683 4411160 : AttachNode(&sContext, psElement);
684 4411160 : if (!PushNode(&sContext, psElement, eLastErrorType))
685 1 : break;
686 : }
687 : else
688 : {
689 2388120 : if (sContext.nStackSize == 0 ||
690 2388120 : !EQUAL(sContext.pszToken + 1,
691 : sContext.papsStack[sContext.nStackSize - 1]
692 : .psFirstNode->pszValue))
693 : {
694 : #ifdef DEBUG
695 : // Makes life of fuzzers easier if we accept somewhat
696 : // corrupted XML like <foo> ... </not_foo>.
697 14 : if (CPLTestBool(
698 : CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
699 : {
700 0 : eLastErrorType = CE_Warning;
701 0 : CPLError(
702 : eLastErrorType, CPLE_AppDefined,
703 : "Line %d: <%.500s> doesn't have matching <%.500s>.",
704 : sContext.nInputLine, sContext.pszToken,
705 0 : sContext.pszToken + 1);
706 0 : if (sContext.nStackSize == 0)
707 0 : break;
708 0 : goto end_processing_close;
709 : }
710 : else
711 : #endif
712 : {
713 16 : eLastErrorType = CE_Failure;
714 16 : CPLError(
715 : eLastErrorType, CPLE_AppDefined,
716 : "Line %d: <%.500s> doesn't have matching <%.500s>.",
717 : sContext.nInputLine, sContext.pszToken,
718 16 : sContext.pszToken + 1);
719 16 : break;
720 : }
721 : }
722 : else
723 : {
724 2388110 : if (strcmp(sContext.pszToken + 1,
725 2388110 : sContext.papsStack[sContext.nStackSize - 1]
726 2388110 : .psFirstNode->pszValue) != 0)
727 : {
728 : // TODO: At some point we could just error out like any
729 : // other sane XML parser would do.
730 1 : eLastErrorType = CE_Warning;
731 1 : CPLError(
732 : eLastErrorType, CPLE_AppDefined,
733 : "Line %d: <%.500s> matches <%.500s>, but the case "
734 : "isn't the same. Going on, but this is invalid "
735 : "XML that might be rejected in future versions.",
736 : sContext.nInputLine,
737 1 : sContext.papsStack[sContext.nStackSize - 1]
738 1 : .psFirstNode->pszValue,
739 : sContext.pszToken);
740 : }
741 : #ifdef DEBUG
742 2388100 : end_processing_close:
743 : #endif
744 2388110 : if (ReadToken(&sContext, eLastErrorType) != TClose)
745 : {
746 3 : eLastErrorType = CE_Failure;
747 3 : CPLError(eLastErrorType, CPLE_AppDefined,
748 : "Line %d: Missing close angle bracket "
749 : "after <%.500s.",
750 : sContext.nInputLine, sContext.pszToken);
751 3 : break;
752 : }
753 :
754 : // Pop element off stack
755 2388100 : sContext.nStackSize--;
756 : }
757 : }
758 : }
759 :
760 : /* --------------------------------------------------------------------
761 : */
762 : /* Add an attribute to a token. */
763 : /* --------------------------------------------------------------------
764 : */
765 14322100 : else if (sContext.eTokenType == TToken)
766 : {
767 : CPLXMLNode *psAttr =
768 8492450 : _CPLCreateXMLNode(nullptr, CXT_Attribute, sContext.pszToken);
769 8492460 : if (!psAttr)
770 0 : break;
771 8492460 : AttachNode(&sContext, psAttr);
772 :
773 8492450 : XMLTokenType nextToken = ReadToken(&sContext, eLastErrorType);
774 8492440 : if (nextToken != TEqual)
775 : {
776 : // Parse stuff like <?valbuddy_schematron
777 : // ../wmtsSimpleGetCapabilities.sch?>
778 5 : if (sContext.nStackSize > 0 &&
779 5 : sContext.papsStack[sContext.nStackSize - 1]
780 5 : .psFirstNode->pszValue[0] == '?')
781 : {
782 3 : psAttr->eType = CXT_Text;
783 3 : if (nextToken == TNone)
784 0 : break;
785 3 : goto loop_beginning;
786 : }
787 :
788 2 : eLastErrorType = CE_Failure;
789 2 : CPLError(eLastErrorType, CPLE_AppDefined,
790 : "Line %d: Didn't find expected '=' for value of "
791 : "attribute '%.500s'.",
792 : sContext.nInputLine, psAttr->pszValue);
793 : #ifdef DEBUG
794 : // Accepting an attribute without child text
795 : // would break too much assumptions in driver code
796 2 : bRecoverableError = false;
797 : #endif
798 2 : break;
799 : }
800 :
801 8492440 : if (ReadToken(&sContext, eLastErrorType) == TToken)
802 : {
803 : /* TODO: at some point we could just error out like any other */
804 : /* sane XML parser would do */
805 2 : eLastErrorType = CE_Warning;
806 2 : CPLError(eLastErrorType, CPLE_AppDefined,
807 : "Line %d: Attribute value should be single or double "
808 : "quoted. Going on, but this is invalid XML that "
809 : "might be rejected in future versions.",
810 : sContext.nInputLine);
811 : }
812 8492440 : else if (sContext.eTokenType != TString)
813 : {
814 1 : eLastErrorType = CE_Failure;
815 1 : CPLError(eLastErrorType, CPLE_AppDefined,
816 : "Line %d: Didn't find expected attribute value.",
817 : sContext.nInputLine);
818 : #ifdef DEBUG
819 : // Accepting an attribute without child text
820 : // would break too much assumptions in driver code
821 1 : bRecoverableError = false;
822 : #endif
823 1 : break;
824 : }
825 :
826 8492440 : if (!_CPLCreateXMLNode(psAttr, CXT_Text, sContext.pszToken))
827 0 : break;
828 : }
829 :
830 : /* --------------------------------------------------------------------
831 : */
832 : /* Close the start section of an element. */
833 : /* --------------------------------------------------------------------
834 : */
835 5829650 : else if (sContext.eTokenType == TClose)
836 : {
837 2398180 : if (sContext.nStackSize == 0)
838 : {
839 0 : eLastErrorType = CE_Failure;
840 0 : CPLError(eLastErrorType, CPLE_AppDefined,
841 : "Line %d: Found unbalanced '>'.", sContext.nInputLine);
842 0 : break;
843 : }
844 : }
845 :
846 : /* --------------------------------------------------------------------
847 : */
848 : /* Close the start section of an element, and pop it */
849 : /* immediately. */
850 : /* --------------------------------------------------------------------
851 : */
852 3431470 : else if (sContext.eTokenType == TSlashClose)
853 : {
854 2004640 : if (sContext.nStackSize == 0)
855 : {
856 0 : eLastErrorType = CE_Failure;
857 0 : CPLError(eLastErrorType, CPLE_AppDefined,
858 : "Line %d: Found unbalanced '/>'.",
859 : sContext.nInputLine);
860 0 : break;
861 : }
862 :
863 2004640 : sContext.nStackSize--;
864 : }
865 : /* --------------------------------------------------------------------
866 : */
867 : /* Close the start section of a <?...?> element, and pop it */
868 : /* immediately. */
869 : /* --------------------------------------------------------------------
870 : */
871 1426830 : else if (sContext.eTokenType == TQuestionClose)
872 : {
873 8299 : if (sContext.nStackSize == 0)
874 : {
875 0 : eLastErrorType = CE_Failure;
876 0 : CPLError(eLastErrorType, CPLE_AppDefined,
877 : "Line %d: Found unbalanced '?>'.",
878 : sContext.nInputLine);
879 0 : break;
880 : }
881 8299 : else if (sContext.papsStack[sContext.nStackSize - 1]
882 8299 : .psFirstNode->pszValue[0] != '?')
883 : {
884 1 : eLastErrorType = CE_Failure;
885 1 : CPLError(eLastErrorType, CPLE_AppDefined,
886 : "Line %d: Found '?>' without matching '<?'.",
887 : sContext.nInputLine);
888 1 : break;
889 : }
890 :
891 8298 : sContext.nStackSize--;
892 : }
893 : /* --------------------------------------------------------------------
894 : */
895 : /* Handle comments. They are returned as a whole token with the */
896 : /* prefix and postfix omitted. No processing of white space */
897 : /* will be done. */
898 : /* --------------------------------------------------------------------
899 : */
900 1418530 : else if (sContext.eTokenType == TComment)
901 : {
902 : CPLXMLNode *psValue =
903 56425 : _CPLCreateXMLNode(nullptr, CXT_Comment, sContext.pszToken);
904 56425 : if (!psValue)
905 0 : break;
906 56425 : AttachNode(&sContext, psValue);
907 : }
908 : /* --------------------------------------------------------------------
909 : */
910 : /* Handle literals. They are returned without processing. */
911 : /* --------------------------------------------------------------------
912 : */
913 1362100 : else if (sContext.eTokenType == TLiteral)
914 : {
915 : CPLXMLNode *psValue =
916 18 : _CPLCreateXMLNode(nullptr, CXT_Literal, sContext.pszToken);
917 18 : if (!psValue)
918 0 : break;
919 18 : AttachNode(&sContext, psValue);
920 : }
921 : /* --------------------------------------------------------------------
922 : */
923 : /* Add a text value node as a child of the current element. */
924 : /* --------------------------------------------------------------------
925 : */
926 1362080 : else if (sContext.eTokenType == TString && !sContext.bInElement)
927 : {
928 : CPLXMLNode *psValue =
929 1362090 : _CPLCreateXMLNode(nullptr, CXT_Text, sContext.pszToken);
930 1362090 : if (!psValue)
931 0 : break;
932 1362090 : AttachNode(&sContext, psValue);
933 : }
934 : /* --------------------------------------------------------------------
935 : */
936 : /* Anything else is an error. */
937 : /* --------------------------------------------------------------------
938 : */
939 : else
940 : {
941 0 : eLastErrorType = CE_Failure;
942 0 : CPLError(eLastErrorType, CPLE_AppDefined,
943 : "Parse error at line %d, unexpected token:%.500s",
944 : sContext.nInputLine, sContext.pszToken);
945 1 : break;
946 : }
947 : }
948 :
949 : /* -------------------------------------------------------------------- */
950 : /* Did we pop all the way out of our stack? */
951 : /* -------------------------------------------------------------------- */
952 259547 : if (CPLGetLastErrorType() != CE_Failure && sContext.nStackSize > 0 &&
953 50 : sContext.papsStack != nullptr)
954 : {
955 : #ifdef DEBUG
956 : // Makes life of fuzzers easier if we accept somewhat corrupted XML
957 : // like <x> ...
958 100 : if (bRecoverableError &&
959 50 : CPLTestBool(CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
960 : {
961 0 : eLastErrorType = CE_Warning;
962 : }
963 : else
964 : #endif
965 : {
966 50 : eLastErrorType = CE_Failure;
967 : }
968 50 : CPLError(
969 : eLastErrorType, CPLE_AppDefined,
970 : "Parse error at EOF, not all elements have been closed, "
971 : "starting with %.500s",
972 50 : sContext.papsStack[sContext.nStackSize - 1].psFirstNode->pszValue);
973 : }
974 :
975 : /* -------------------------------------------------------------------- */
976 : /* Cleanup */
977 : /* -------------------------------------------------------------------- */
978 259487 : CPLFree(sContext.pszToken);
979 259487 : if (sContext.papsStack != nullptr)
980 259452 : CPLFree(sContext.papsStack);
981 :
982 : // We do not trust CPLGetLastErrorType() as if CPLTurnFailureIntoWarning()
983 : // has been set we would never get failures
984 259487 : if (eLastErrorType == CE_Failure)
985 : {
986 78 : CPLDestroyXMLNode(sContext.psFirstNode);
987 78 : sContext.psFirstNode = nullptr;
988 78 : sContext.psLastNode = nullptr;
989 : }
990 :
991 259487 : if (eLastErrorType == CE_None)
992 : {
993 : // Restore initial error state.
994 259407 : CPLErrorSetState(eErrClass, nErrNum, osErrMsg);
995 : }
996 :
997 259487 : return sContext.psFirstNode;
998 : }
999 :
1000 : /************************************************************************/
1001 : /* _GrowBuffer() */
1002 : /************************************************************************/
1003 :
1004 2902920 : static bool _GrowBuffer(size_t nNeeded, char **ppszText, size_t *pnMaxLength)
1005 :
1006 : {
1007 2902920 : if (nNeeded + 1 >= *pnMaxLength)
1008 : {
1009 29708 : *pnMaxLength = std::max(*pnMaxLength * 2, nNeeded + 1);
1010 : char *pszTextNew =
1011 29708 : static_cast<char *>(VSIRealloc(*ppszText, *pnMaxLength));
1012 29708 : if (pszTextNew == nullptr)
1013 0 : return false;
1014 29708 : *ppszText = pszTextNew;
1015 : }
1016 2902920 : return true;
1017 : }
1018 :
1019 : /************************************************************************/
1020 : /* CPLSerializeXMLNode() */
1021 : /************************************************************************/
1022 :
1023 : // TODO(schwehr): Rewrite this whole thing using C++ string.
1024 : // CPLSerializeXMLNode has buffer overflows.
1025 1143950 : static bool CPLSerializeXMLNode(const CPLXMLNode *psNode, int nIndent,
1026 : char **ppszText, size_t *pnLength,
1027 : size_t *pnMaxLength)
1028 :
1029 : {
1030 1143950 : if (psNode == nullptr)
1031 0 : return true;
1032 :
1033 : /* -------------------------------------------------------------------- */
1034 : /* Ensure the buffer is plenty large to hold this additional */
1035 : /* string. */
1036 : /* -------------------------------------------------------------------- */
1037 1143950 : *pnLength += strlen(*ppszText + *pnLength);
1038 1143950 : if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
1039 : ppszText, pnMaxLength))
1040 0 : return false;
1041 :
1042 : /* -------------------------------------------------------------------- */
1043 : /* Text is just directly emitted. */
1044 : /* -------------------------------------------------------------------- */
1045 1143950 : if (psNode->eType == CXT_Text)
1046 : {
1047 : char *pszEscaped =
1048 197486 : CPLEscapeString(psNode->pszValue, -1, CPLES_XML_BUT_QUOTES);
1049 :
1050 197486 : CPLAssert(psNode->psChild == nullptr);
1051 :
1052 : // Escaped text might be bigger than expected.
1053 197486 : if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1054 : {
1055 0 : CPLFree(pszEscaped);
1056 0 : return false;
1057 : }
1058 197486 : strcat(*ppszText + *pnLength, pszEscaped);
1059 :
1060 197486 : CPLFree(pszEscaped);
1061 : }
1062 :
1063 : /* -------------------------------------------------------------------- */
1064 : /* Attributes require a little formatting. */
1065 : /* -------------------------------------------------------------------- */
1066 946460 : else if (psNode->eType == CXT_Attribute)
1067 : {
1068 487937 : CPLAssert(psNode->psChild != nullptr &&
1069 : psNode->psChild->eType == CXT_Text);
1070 :
1071 487937 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, " %s=\"",
1072 487937 : psNode->pszValue);
1073 487937 : *pnLength += strlen(*ppszText + *pnLength);
1074 :
1075 : char *pszEscaped =
1076 487937 : CPLEscapeString(psNode->psChild->pszValue, -1, CPLES_XML);
1077 :
1078 487937 : if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1079 : {
1080 0 : CPLFree(pszEscaped);
1081 0 : return false;
1082 : }
1083 487937 : strcat(*ppszText + *pnLength, pszEscaped);
1084 :
1085 487937 : CPLFree(pszEscaped);
1086 :
1087 487937 : *pnLength += strlen(*ppszText + *pnLength);
1088 487937 : if (!_GrowBuffer(3 + *pnLength, ppszText, pnMaxLength))
1089 0 : return false;
1090 487937 : strcat(*ppszText + *pnLength, "\"");
1091 : }
1092 :
1093 : /* -------------------------------------------------------------------- */
1094 : /* Handle comment output. */
1095 : /* -------------------------------------------------------------------- */
1096 458523 : else if (psNode->eType == CXT_Comment)
1097 : {
1098 4438 : CPLAssert(psNode->psChild == nullptr);
1099 :
1100 28908 : for (int i = 0; i < nIndent; i++)
1101 24470 : (*ppszText)[(*pnLength)++] = ' ';
1102 :
1103 4438 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<!--%s-->\n",
1104 4438 : psNode->pszValue);
1105 : }
1106 :
1107 : /* -------------------------------------------------------------------- */
1108 : /* Handle literal output (like <!DOCTYPE...>) */
1109 : /* -------------------------------------------------------------------- */
1110 454085 : else if (psNode->eType == CXT_Literal)
1111 : {
1112 4 : CPLAssert(psNode->psChild == nullptr);
1113 :
1114 28 : for (int i = 0; i < nIndent; i++)
1115 24 : (*ppszText)[(*pnLength)++] = ' ';
1116 :
1117 4 : strcpy(*ppszText + *pnLength, psNode->pszValue);
1118 4 : strcat(*ppszText + *pnLength, "\n");
1119 : }
1120 :
1121 : /* -------------------------------------------------------------------- */
1122 : /* Elements actually have to deal with general children, and */
1123 : /* various formatting issues. */
1124 : /* -------------------------------------------------------------------- */
1125 454081 : else if (psNode->eType == CXT_Element)
1126 : {
1127 454081 : if (nIndent)
1128 443239 : memset(*ppszText + *pnLength, ' ', nIndent);
1129 454081 : *pnLength += nIndent;
1130 454081 : (*ppszText)[*pnLength] = '\0';
1131 :
1132 454081 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<%s",
1133 454081 : psNode->pszValue);
1134 :
1135 454081 : if (psNode->pszValue[0] == '?')
1136 : {
1137 3231 : for (const CPLXMLNode *psChild = psNode->psChild;
1138 9703 : psChild != nullptr; psChild = psChild->psNext)
1139 : {
1140 6472 : if (psChild->eType == CXT_Text)
1141 : {
1142 3 : *pnLength += strlen(*ppszText + *pnLength);
1143 3 : if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1144 0 : return false;
1145 3 : strcat(*ppszText + *pnLength, " ");
1146 : }
1147 :
1148 6472 : if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1149 : pnMaxLength))
1150 : {
1151 0 : return false;
1152 : }
1153 : }
1154 3231 : if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1155 0 : return false;
1156 :
1157 3231 : strcat(*ppszText + *pnLength, "?>\n");
1158 : }
1159 : else
1160 : {
1161 450850 : bool bHasNonAttributeChildren = false;
1162 : // Serialize *all* the attribute children, regardless of order
1163 450850 : for (const CPLXMLNode *psChild = psNode->psChild;
1164 1576980 : psChild != nullptr; psChild = psChild->psNext)
1165 : {
1166 1126130 : if (psChild->eType == CXT_Attribute)
1167 : {
1168 481468 : if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1169 : pnMaxLength))
1170 0 : return false;
1171 : }
1172 : else
1173 644664 : bHasNonAttributeChildren = true;
1174 : }
1175 :
1176 450850 : if (!bHasNonAttributeChildren)
1177 : {
1178 121869 : if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1179 0 : return false;
1180 :
1181 121869 : strcat(*ppszText + *pnLength, " />\n");
1182 : }
1183 : else
1184 : {
1185 328981 : bool bJustText = true;
1186 :
1187 328981 : strcat(*ppszText + *pnLength, ">");
1188 :
1189 328981 : for (const CPLXMLNode *psChild = psNode->psChild;
1190 1126620 : psChild != nullptr; psChild = psChild->psNext)
1191 : {
1192 797641 : if (psChild->eType == CXT_Attribute)
1193 152977 : continue;
1194 :
1195 644664 : if (psChild->eType != CXT_Text && bJustText)
1196 : {
1197 131531 : bJustText = false;
1198 131531 : *pnLength += strlen(*ppszText + *pnLength);
1199 131531 : if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1200 0 : return false;
1201 131531 : strcat(*ppszText + *pnLength, "\n");
1202 : }
1203 :
1204 644664 : if (!CPLSerializeXMLNode(psChild, nIndent + 2, ppszText,
1205 : pnLength, pnMaxLength))
1206 0 : return false;
1207 : }
1208 :
1209 328981 : *pnLength += strlen(*ppszText + *pnLength);
1210 328981 : if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 +
1211 328981 : nIndent,
1212 : ppszText, pnMaxLength))
1213 0 : return false;
1214 :
1215 328981 : if (!bJustText)
1216 : {
1217 131531 : if (nIndent)
1218 124034 : memset(*ppszText + *pnLength, ' ', nIndent);
1219 131531 : *pnLength += nIndent;
1220 131531 : (*ppszText)[*pnLength] = '\0';
1221 : }
1222 :
1223 328981 : *pnLength += strlen(*ppszText + *pnLength);
1224 328981 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength,
1225 328981 : "</%s>\n", psNode->pszValue);
1226 : }
1227 : }
1228 : }
1229 :
1230 1143950 : return true;
1231 : }
1232 :
1233 : /************************************************************************/
1234 : /* CPLSerializeXMLTree() */
1235 : /************************************************************************/
1236 :
1237 : /**
1238 : * \brief Convert tree into string document.
1239 : *
1240 : * This function converts a CPLXMLNode tree representation of a document
1241 : * into a flat string representation. White space indentation is used
1242 : * visually preserve the tree structure of the document. The returned
1243 : * document becomes owned by the caller and should be freed with CPLFree()
1244 : * when no longer needed.
1245 : *
1246 : * @param psNode the node to serialize.
1247 : *
1248 : * @return the document on success or NULL on failure.
1249 : */
1250 :
1251 7623 : char *CPLSerializeXMLTree(const CPLXMLNode *psNode)
1252 :
1253 : {
1254 7623 : size_t nMaxLength = 100;
1255 7623 : char *pszText = static_cast<char *>(CPLCalloc(nMaxLength, sizeof(char)));
1256 7623 : if (pszText == nullptr)
1257 0 : return nullptr;
1258 :
1259 7623 : size_t nLength = 0;
1260 18965 : for (const CPLXMLNode *psThis = psNode; psThis != nullptr;
1261 11342 : psThis = psThis->psNext)
1262 : {
1263 11342 : if (!CPLSerializeXMLNode(psThis, 0, &pszText, &nLength, &nMaxLength))
1264 : {
1265 0 : VSIFree(pszText);
1266 0 : return nullptr;
1267 : }
1268 : }
1269 :
1270 7623 : return pszText;
1271 : }
1272 :
1273 : /************************************************************************/
1274 : /* CPLCreateXMLNode() */
1275 : /************************************************************************/
1276 :
1277 : #ifdef DEBUG
1278 : static CPLXMLNode *psDummyStaticNode;
1279 : #endif
1280 :
1281 : /**
1282 : * \brief Create an document tree item.
1283 : *
1284 : * Create a single CPLXMLNode object with the desired value and type, and
1285 : * attach it as a child of the indicated parent.
1286 : *
1287 : * @param poParent the parent to which this node should be attached as a
1288 : * child. May be NULL to keep as free standing.
1289 : * @param eType the type of the newly created node
1290 : * @param pszText the value of the newly created node
1291 : *
1292 : * @return the newly created node, now owned by the caller (or parent node).
1293 : */
1294 :
1295 971998 : CPLXMLNode *CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1296 : const char *pszText)
1297 :
1298 : {
1299 971998 : auto ret = _CPLCreateXMLNode(poParent, eType, pszText);
1300 971998 : if (!ret)
1301 : {
1302 0 : CPLError(CE_Fatal, CPLE_OutOfMemory, "CPLCreateXMLNode() failed");
1303 : }
1304 971998 : return ret;
1305 : }
1306 :
1307 : /************************************************************************/
1308 : /* _CPLCreateXMLNode() */
1309 : /************************************************************************/
1310 :
1311 : /* Same as CPLCreateXMLNode() but can return NULL in case of out-of-memory */
1312 : /* situation */
1313 :
1314 23786600 : static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1315 : const char *pszText)
1316 :
1317 : {
1318 :
1319 : /* -------------------------------------------------------------------- */
1320 : /* Create new node. */
1321 : /* -------------------------------------------------------------------- */
1322 : CPLXMLNode *psNode =
1323 23786600 : static_cast<CPLXMLNode *>(VSICalloc(sizeof(CPLXMLNode), 1));
1324 23786600 : if (psNode == nullptr)
1325 : {
1326 4 : CPLError(CE_Failure, CPLE_OutOfMemory, "Cannot allocate CPLXMLNode");
1327 0 : return nullptr;
1328 : }
1329 :
1330 23786600 : psNode->eType = eType;
1331 23786600 : psNode->pszValue = VSIStrdup(pszText ? pszText : "");
1332 23786600 : if (psNode->pszValue == nullptr)
1333 : {
1334 7 : CPLError(CE_Failure, CPLE_OutOfMemory,
1335 : "Cannot allocate psNode->pszValue");
1336 0 : VSIFree(psNode);
1337 0 : return nullptr;
1338 : }
1339 :
1340 : /* -------------------------------------------------------------------- */
1341 : /* Attach to parent, if provided. */
1342 : /* -------------------------------------------------------------------- */
1343 23786600 : if (poParent != nullptr)
1344 : {
1345 9288960 : if (poParent->psChild == nullptr)
1346 8965320 : poParent->psChild = psNode;
1347 : else
1348 : {
1349 323638 : CPLXMLNode *psLink = poParent->psChild;
1350 323638 : if (psLink->psNext == nullptr && eType == CXT_Attribute &&
1351 60833 : psLink->eType == CXT_Text)
1352 : {
1353 14658 : psNode->psNext = psLink;
1354 14658 : poParent->psChild = psNode;
1355 : }
1356 : else
1357 : {
1358 1262250 : while (psLink->psNext != nullptr)
1359 : {
1360 957493 : if (eType == CXT_Attribute &&
1361 275394 : psLink->psNext->eType == CXT_Text)
1362 : {
1363 4224 : psNode->psNext = psLink->psNext;
1364 4224 : break;
1365 : }
1366 :
1367 953269 : psLink = psLink->psNext;
1368 : }
1369 :
1370 308980 : psLink->psNext = psNode;
1371 : }
1372 : }
1373 : }
1374 : #ifdef DEBUG
1375 : else
1376 : {
1377 : // Coverity sometimes doesn't realize that this function is passed
1378 : // with a non NULL parent and thinks that this branch is taken, leading
1379 : // to creating object being leak by caller. This ugly hack hopefully
1380 : // makes it believe that someone will reference it.
1381 14497600 : psDummyStaticNode = psNode;
1382 : }
1383 : #endif
1384 :
1385 23786600 : return psNode;
1386 : }
1387 :
1388 : /************************************************************************/
1389 : /* CPLDestroyXMLNode() */
1390 : /************************************************************************/
1391 :
1392 : /**
1393 : * \brief Destroy a tree.
1394 : *
1395 : * This function frees resources associated with a CPLXMLNode and all its
1396 : * children nodes.
1397 : *
1398 : * @param psNode the tree to free.
1399 : */
1400 :
1401 24103200 : void CPLDestroyXMLNode(CPLXMLNode *psNode)
1402 :
1403 : {
1404 24103200 : while (psNode != nullptr)
1405 : {
1406 23825700 : if (psNode->pszValue != nullptr)
1407 23825700 : CPLFree(psNode->pszValue);
1408 :
1409 23825700 : if (psNode->psChild != nullptr)
1410 : {
1411 13442100 : CPLXMLNode *psNext = psNode->psNext;
1412 13442100 : psNode->psNext = psNode->psChild;
1413 : // Move the child and its siblings as the next
1414 : // siblings of the current node.
1415 13442100 : if (psNext != nullptr)
1416 : {
1417 12806500 : CPLXMLNode *psIter = psNode->psChild;
1418 21252000 : while (psIter->psNext != nullptr)
1419 8445520 : psIter = psIter->psNext;
1420 12806500 : psIter->psNext = psNext;
1421 : }
1422 : }
1423 :
1424 23825700 : CPLXMLNode *psNext = psNode->psNext;
1425 :
1426 23825700 : CPLFree(psNode);
1427 :
1428 23825700 : psNode = psNext;
1429 : }
1430 277524 : }
1431 :
1432 : /************************************************************************/
1433 : /* CPLSearchXMLNode() */
1434 : /************************************************************************/
1435 :
1436 : /**
1437 : * \brief Search for a node in document.
1438 : *
1439 : * Searches the children (and potentially siblings) of the documented
1440 : * passed in for the named element or attribute. To search following
1441 : * siblings as well as children, prefix the pszElement name with an equal
1442 : * sign. This function does an in-order traversal of the document tree.
1443 : * So it will first match against the current node, then its first child,
1444 : * that child's first child, and so on.
1445 : *
1446 : * Use CPLGetXMLNode() to find a specific child, or along a specific
1447 : * node path.
1448 : *
1449 : * @param psRoot the subtree to search. This should be a node of type
1450 : * CXT_Element. NULL is safe.
1451 : *
1452 : * @param pszElement the name of the element or attribute to search for.
1453 : *
1454 : * @return The matching node or NULL on failure.
1455 : */
1456 :
1457 94865 : CPLXMLNode *CPLSearchXMLNode(CPLXMLNode *psRoot, const char *pszElement)
1458 :
1459 : {
1460 94865 : if (psRoot == nullptr || pszElement == nullptr)
1461 0 : return nullptr;
1462 :
1463 94865 : bool bSideSearch = false;
1464 :
1465 94865 : if (*pszElement == '=')
1466 : {
1467 5391 : bSideSearch = true;
1468 5391 : pszElement++;
1469 : }
1470 :
1471 : /* -------------------------------------------------------------------- */
1472 : /* Does this node match? */
1473 : /* -------------------------------------------------------------------- */
1474 94865 : if ((psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute) &&
1475 94694 : EQUAL(pszElement, psRoot->pszValue))
1476 4098 : return psRoot;
1477 :
1478 : /* -------------------------------------------------------------------- */
1479 : /* Search children. */
1480 : /* -------------------------------------------------------------------- */
1481 90767 : CPLXMLNode *psChild = nullptr;
1482 242250 : for (psChild = psRoot->psChild; psChild != nullptr;
1483 151483 : psChild = psChild->psNext)
1484 : {
1485 152753 : if ((psChild->eType == CXT_Element ||
1486 91414 : psChild->eType == CXT_Attribute) &&
1487 88512 : EQUAL(pszElement, psChild->pszValue))
1488 426 : return psChild;
1489 :
1490 152327 : if (psChild->psChild != nullptr)
1491 : {
1492 84352 : CPLXMLNode *psResult = CPLSearchXMLNode(psChild, pszElement);
1493 84352 : if (psResult != nullptr)
1494 844 : return psResult;
1495 : }
1496 : }
1497 :
1498 : /* -------------------------------------------------------------------- */
1499 : /* Search siblings if we are in side search mode. */
1500 : /* -------------------------------------------------------------------- */
1501 89497 : if (bSideSearch)
1502 : {
1503 5784 : for (psRoot = psRoot->psNext; psRoot != nullptr;
1504 843 : psRoot = psRoot->psNext)
1505 : {
1506 4866 : CPLXMLNode *psResult = CPLSearchXMLNode(psRoot, pszElement);
1507 4866 : if (psResult != nullptr)
1508 4023 : return psResult;
1509 : }
1510 : }
1511 :
1512 85474 : return nullptr;
1513 : }
1514 :
1515 : /************************************************************************/
1516 : /* CPLGetXMLNode() */
1517 : /************************************************************************/
1518 :
1519 : /**
1520 : * \brief Find node by path.
1521 : *
1522 : * Searches the document or subdocument indicated by psRoot for an element
1523 : * (or attribute) with the given path. The path should consist of a set of
1524 : * element names separated by dots, not including the name of the root
1525 : * element (psRoot). If the requested element is not found NULL is returned.
1526 : *
1527 : * Attribute names may only appear as the last item in the path.
1528 : *
1529 : * The search is done from the root nodes children, but all intermediate
1530 : * nodes in the path must be specified. Searching for "name" would only find
1531 : * a name element or attribute if it is a direct child of the root, not at any
1532 : * level in the subdocument.
1533 : *
1534 : * If the pszPath is prefixed by "=" then the search will begin with the
1535 : * root node, and its siblings, instead of the root nodes children. This
1536 : * is particularly useful when searching within a whole document which is
1537 : * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1538 : *
1539 : * @param psRoot the subtree in which to search. This should be a node of
1540 : * type CXT_Element. NULL is safe.
1541 : *
1542 : * @param pszPath the list of element names in the path (dot separated).
1543 : *
1544 : * @return the requested element node, or NULL if not found.
1545 : */
1546 :
1547 3447380 : CPLXMLNode *CPLGetXMLNode(CPLXMLNode *psRoot, const char *pszPath)
1548 :
1549 : {
1550 3447380 : if (psRoot == nullptr || pszPath == nullptr)
1551 1053 : return nullptr;
1552 :
1553 3446330 : bool bSideSearch = false;
1554 :
1555 3446330 : if (*pszPath == '=')
1556 : {
1557 237254 : bSideSearch = true;
1558 237254 : pszPath++;
1559 : }
1560 :
1561 3446330 : const char *const apszTokens[2] = {pszPath, nullptr};
1562 :
1563 : // Slight optimization: avoid using CSLTokenizeStringComplex that
1564 : // does memory allocations when it is not really necessary.
1565 3446330 : bool bFreeTokens = false;
1566 3446330 : char **papszTokensToFree = nullptr;
1567 : const char *const *papszTokens;
1568 3446330 : if (strchr(pszPath, '.'))
1569 : {
1570 : papszTokensToFree =
1571 246731 : CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1572 246731 : papszTokens = papszTokensToFree;
1573 246731 : bFreeTokens = true;
1574 : }
1575 : else
1576 : {
1577 3199600 : papszTokens = apszTokens;
1578 : }
1579 :
1580 3446330 : int iToken = 0;
1581 5275120 : while (papszTokens[iToken] != nullptr && psRoot != nullptr)
1582 : {
1583 3698050 : CPLXMLNode *psChild = nullptr;
1584 :
1585 3698050 : if (bSideSearch)
1586 : {
1587 237254 : psChild = psRoot;
1588 237254 : bSideSearch = false;
1589 : }
1590 : else
1591 3460800 : psChild = psRoot->psChild;
1592 :
1593 14783500 : for (; psChild != nullptr; psChild = psChild->psNext)
1594 : {
1595 12914200 : if (psChild->eType != CXT_Text &&
1596 12659700 : EQUAL(papszTokens[iToken], psChild->pszValue))
1597 1828800 : break;
1598 : }
1599 :
1600 3698050 : if (psChild == nullptr)
1601 : {
1602 1869260 : psRoot = nullptr;
1603 1869260 : break;
1604 : }
1605 :
1606 1828790 : psRoot = psChild;
1607 1828790 : iToken++;
1608 : }
1609 :
1610 3446330 : if (bFreeTokens)
1611 246731 : CSLDestroy(papszTokensToFree);
1612 3446330 : return psRoot;
1613 : }
1614 :
1615 : /************************************************************************/
1616 : /* CPLGetXMLValue() */
1617 : /************************************************************************/
1618 :
1619 : /**
1620 : * \brief Fetch element/attribute value.
1621 : *
1622 : * Searches the document for the element/attribute value associated with
1623 : * the path. The corresponding node is internally found with CPLGetXMLNode()
1624 : * (see there for details on path handling). Once found, the value is
1625 : * considered to be the first CXT_Text child of the node.
1626 : *
1627 : * If the attribute/element search fails, or if the found node has no
1628 : * value then the passed default value is returned.
1629 : *
1630 : * The returned value points to memory within the document tree, and should
1631 : * not be altered or freed.
1632 : *
1633 : * @param psRoot the subtree in which to search. This should be a node of
1634 : * type CXT_Element. NULL is safe.
1635 : *
1636 : * @param pszPath the list of element names in the path (dot separated). An
1637 : * empty path means get the value of the psRoot node.
1638 : *
1639 : * @param pszDefault the value to return if a corresponding value is not
1640 : * found, may be NULL.
1641 : *
1642 : * @return the requested value or pszDefault if not found.
1643 : */
1644 :
1645 2988680 : const char *CPLGetXMLValue(const CPLXMLNode *psRoot, const char *pszPath,
1646 : const char *pszDefault)
1647 :
1648 : {
1649 2988680 : const CPLXMLNode *psTarget = nullptr;
1650 :
1651 2988680 : if (pszPath == nullptr || *pszPath == '\0')
1652 193016 : psTarget = psRoot;
1653 : else
1654 2795670 : psTarget = CPLGetXMLNode(psRoot, pszPath);
1655 :
1656 2988680 : if (psTarget == nullptr)
1657 1401110 : return pszDefault;
1658 :
1659 1587570 : if (psTarget->eType == CXT_Attribute)
1660 : {
1661 949160 : CPLAssert(psTarget->psChild != nullptr &&
1662 : psTarget->psChild->eType == CXT_Text);
1663 :
1664 949164 : return psTarget->psChild->pszValue;
1665 : }
1666 :
1667 638407 : if (psTarget->eType == CXT_Element)
1668 : {
1669 : // Find first non-attribute child, and verify it is a single text
1670 : // with no siblings.
1671 :
1672 638387 : psTarget = psTarget->psChild;
1673 :
1674 692770 : while (psTarget != nullptr && psTarget->eType == CXT_Attribute)
1675 54383 : psTarget = psTarget->psNext;
1676 :
1677 638387 : if (psTarget != nullptr && psTarget->eType == CXT_Text &&
1678 635805 : psTarget->psNext == nullptr)
1679 635805 : return psTarget->pszValue;
1680 : }
1681 :
1682 2602 : return pszDefault;
1683 : }
1684 :
1685 : /************************************************************************/
1686 : /* CPLAddXMLChild() */
1687 : /************************************************************************/
1688 :
1689 : /**
1690 : * \brief Add child node to parent.
1691 : *
1692 : * The passed child is added to the list of children of the indicated
1693 : * parent. Normally the child is added at the end of the parents child
1694 : * list, but attributes (CXT_Attribute) will be inserted after any other
1695 : * attributes but before any other element type. Ownership of the child
1696 : * node is effectively assumed by the parent node. If the child has
1697 : * siblings (its psNext is not NULL) they will be trimmed, but if the child
1698 : * has children they are carried with it.
1699 : *
1700 : * @param psParent the node to attach the child to. May not be NULL.
1701 : *
1702 : * @param psChild the child to add to the parent. May not be NULL. Should
1703 : * not be a child of any other parent.
1704 : */
1705 :
1706 12042 : void CPLAddXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1707 :
1708 : {
1709 12042 : if (psParent->psChild == nullptr)
1710 : {
1711 2817 : psParent->psChild = psChild;
1712 2817 : return;
1713 : }
1714 :
1715 : // Insert at head of list if first child is not attribute.
1716 9225 : if (psChild->eType == CXT_Attribute &&
1717 21 : psParent->psChild->eType != CXT_Attribute)
1718 : {
1719 0 : psChild->psNext = psParent->psChild;
1720 0 : psParent->psChild = psChild;
1721 0 : return;
1722 : }
1723 :
1724 : // Search for end of list.
1725 9225 : CPLXMLNode *psSib = nullptr;
1726 64640 : for (psSib = psParent->psChild; psSib->psNext != nullptr;
1727 55415 : psSib = psSib->psNext)
1728 : {
1729 : // Insert attributes if the next node is not an attribute.
1730 55416 : if (psChild->eType == CXT_Attribute && psSib->psNext != nullptr &&
1731 5 : psSib->psNext->eType != CXT_Attribute)
1732 : {
1733 1 : psChild->psNext = psSib->psNext;
1734 1 : psSib->psNext = psChild;
1735 1 : return;
1736 : }
1737 : }
1738 :
1739 9224 : psSib->psNext = psChild;
1740 : }
1741 :
1742 : /************************************************************************/
1743 : /* CPLRemoveXMLChild() */
1744 : /************************************************************************/
1745 :
1746 : /**
1747 : * \brief Remove child node from parent.
1748 : *
1749 : * The passed child is removed from the child list of the passed parent,
1750 : * but the child is not destroyed. The child retains ownership of its
1751 : * own children, but is cleanly removed from the child list of the parent.
1752 : *
1753 : * @param psParent the node to the child is attached to.
1754 : *
1755 : * @param psChild the child to remove.
1756 : *
1757 : * @return TRUE on success or FALSE if the child was not found.
1758 : */
1759 :
1760 4764 : int CPLRemoveXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1761 :
1762 : {
1763 4764 : if (psParent == nullptr)
1764 0 : return FALSE;
1765 :
1766 4764 : CPLXMLNode *psLast = nullptr;
1767 4764 : CPLXMLNode *psThis = nullptr;
1768 9611 : for (psThis = psParent->psChild; psThis != nullptr; psThis = psThis->psNext)
1769 : {
1770 7137 : if (psThis == psChild)
1771 : {
1772 2290 : if (psLast == nullptr)
1773 1612 : psParent->psChild = psThis->psNext;
1774 : else
1775 678 : psLast->psNext = psThis->psNext;
1776 :
1777 2290 : psThis->psNext = nullptr;
1778 2290 : return TRUE;
1779 : }
1780 4847 : psLast = psThis;
1781 : }
1782 :
1783 2474 : return FALSE;
1784 : }
1785 :
1786 : /************************************************************************/
1787 : /* CPLAddXMLSibling() */
1788 : /************************************************************************/
1789 :
1790 : /**
1791 : * \brief Add new sibling.
1792 : *
1793 : * The passed psNewSibling is added to the end of siblings of the
1794 : * psOlderSibling node. That is, it is added to the end of the psNext
1795 : * chain. There is no special handling if psNewSibling is an attribute.
1796 : * If this is required, use CPLAddXMLChild().
1797 : *
1798 : * @param psOlderSibling the node to attach the sibling after.
1799 : *
1800 : * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1801 : * chain.
1802 : */
1803 :
1804 4859 : void CPLAddXMLSibling(CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling)
1805 :
1806 : {
1807 4859 : if (psOlderSibling == nullptr)
1808 0 : return;
1809 :
1810 4990 : while (psOlderSibling->psNext != nullptr)
1811 131 : psOlderSibling = psOlderSibling->psNext;
1812 :
1813 4859 : psOlderSibling->psNext = psNewSibling;
1814 : }
1815 :
1816 : /************************************************************************/
1817 : /* CPLCreateXMLElementAndValue() */
1818 : /************************************************************************/
1819 :
1820 : /**
1821 : * \brief Create an element and text value.
1822 : *
1823 : * This is function is a convenient short form for:
1824 : *
1825 : * \code
1826 : * CPLXMLNode *psTextNode;
1827 : * CPLXMLNode *psElementNode;
1828 : *
1829 : * psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
1830 : * psTextNode = CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
1831 : *
1832 : * return psElementNode;
1833 : * \endcode
1834 : *
1835 : * It creates a CXT_Element node, with a CXT_Text child, and
1836 : * attaches the element to the passed parent.
1837 : *
1838 : * @param psParent the parent node to which the resulting node should
1839 : * be attached. May be NULL to keep as freestanding.
1840 : *
1841 : * @param pszName the element name to create.
1842 : * @param pszValue the text to attach to the element. Must not be NULL.
1843 : *
1844 : * @return the pointer to the new element node.
1845 : */
1846 :
1847 94233 : CPLXMLNode *CPLCreateXMLElementAndValue(CPLXMLNode *psParent,
1848 : const char *pszName,
1849 : const char *pszValue)
1850 :
1851 : {
1852 : CPLXMLNode *psElementNode =
1853 94233 : CPLCreateXMLNode(psParent, CXT_Element, pszName);
1854 94233 : CPLCreateXMLNode(psElementNode, CXT_Text, pszValue);
1855 :
1856 94233 : return psElementNode;
1857 : }
1858 :
1859 : /************************************************************************/
1860 : /* CPLCreateXMLElementAndValue() */
1861 : /************************************************************************/
1862 :
1863 : /**
1864 : * \brief Create an attribute and text value.
1865 : *
1866 : * This is function is a convenient short form for:
1867 : *
1868 : * \code
1869 : * CPLXMLNode *psAttributeNode;
1870 : *
1871 : * psAttributeNode = CPLCreateXMLNode( psParent, CXT_Attribute, pszName );
1872 : * CPLCreateXMLNode( psAttributeNode, CXT_Text, pszValue );
1873 : * \endcode
1874 : *
1875 : * It creates a CXT_Attribute node, with a CXT_Text child, and
1876 : * attaches the element to the passed parent.
1877 : *
1878 : * @param psParent the parent node to which the resulting node should
1879 : * be attached. Must not be NULL.
1880 : * @param pszName the attribute name to create.
1881 : * @param pszValue the text to attach to the attribute. Must not be NULL.
1882 : *
1883 : */
1884 :
1885 58551 : void CPLAddXMLAttributeAndValue(CPLXMLNode *psParent, const char *pszName,
1886 : const char *pszValue)
1887 : {
1888 58551 : CPLAssert(psParent != nullptr);
1889 : CPLXMLNode *psAttributeNode =
1890 58551 : CPLCreateXMLNode(psParent, CXT_Attribute, pszName);
1891 58551 : CPLCreateXMLNode(psAttributeNode, CXT_Text, pszValue);
1892 58551 : }
1893 :
1894 : /************************************************************************/
1895 : /* CPLCloneXMLTree() */
1896 : /************************************************************************/
1897 :
1898 : /**
1899 : * \brief Copy tree.
1900 : *
1901 : * Creates a deep copy of a CPLXMLNode tree.
1902 : *
1903 : * @param psTree the tree to duplicate.
1904 : *
1905 : * @return a copy of the whole tree.
1906 : */
1907 :
1908 55571 : CPLXMLNode *CPLCloneXMLTree(const CPLXMLNode *psTree)
1909 :
1910 : {
1911 55571 : CPLXMLNode *psPrevious = nullptr;
1912 55571 : CPLXMLNode *psReturn = nullptr;
1913 :
1914 144532 : while (psTree != nullptr)
1915 : {
1916 : CPLXMLNode *psCopy =
1917 88961 : CPLCreateXMLNode(nullptr, psTree->eType, psTree->pszValue);
1918 88961 : if (psReturn == nullptr)
1919 55571 : psReturn = psCopy;
1920 88961 : if (psPrevious != nullptr)
1921 33390 : psPrevious->psNext = psCopy;
1922 :
1923 88961 : if (psTree->psChild != nullptr)
1924 51066 : psCopy->psChild = CPLCloneXMLTree(psTree->psChild);
1925 :
1926 88961 : psPrevious = psCopy;
1927 88961 : psTree = psTree->psNext;
1928 : }
1929 :
1930 55571 : return psReturn;
1931 : }
1932 :
1933 : /************************************************************************/
1934 : /* CPLSetXMLValue() */
1935 : /************************************************************************/
1936 :
1937 : /**
1938 : * \brief Set element value by path.
1939 : *
1940 : * Find (or create) the target element or attribute specified in the
1941 : * path, and assign it the indicated value.
1942 : *
1943 : * Any path elements that do not already exist will be created. The target
1944 : * nodes value (the first CXT_Text child) will be replaced with the provided
1945 : * value.
1946 : *
1947 : * If the target node is an attribute instead of an element, the name
1948 : * should be prefixed with a #.
1949 : *
1950 : * Example:
1951 : * CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1952 : * CPLSetXMLValue( "Citation.Id.Description.#name", "doq" );
1953 : *
1954 : * @param psRoot the subdocument to be updated.
1955 : *
1956 : * @param pszPath the dot separated path to the target element/attribute.
1957 : *
1958 : * @param pszValue the text value to assign.
1959 : *
1960 : * @return TRUE on success.
1961 : */
1962 :
1963 196566 : int CPLSetXMLValue(CPLXMLNode *psRoot, const char *pszPath,
1964 : const char *pszValue)
1965 :
1966 : {
1967 196566 : char **papszTokens = CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1968 196566 : int iToken = 0;
1969 :
1970 426663 : while (papszTokens[iToken] != nullptr)
1971 : {
1972 230097 : bool bIsAttribute = false;
1973 230097 : const char *pszName = papszTokens[iToken];
1974 :
1975 230097 : if (pszName[0] == '#')
1976 : {
1977 185012 : bIsAttribute = true;
1978 185012 : pszName++;
1979 : }
1980 :
1981 230097 : if (psRoot->eType != CXT_Element)
1982 : {
1983 0 : CSLDestroy(papszTokens);
1984 0 : return FALSE;
1985 : }
1986 :
1987 230097 : CPLXMLNode *psChild = nullptr;
1988 735886 : for (psChild = psRoot->psChild; psChild != nullptr;
1989 505789 : psChild = psChild->psNext)
1990 : {
1991 535871 : if (psChild->eType != CXT_Text && EQUAL(pszName, psChild->pszValue))
1992 30082 : break;
1993 : }
1994 :
1995 230097 : if (psChild == nullptr)
1996 : {
1997 200015 : if (bIsAttribute)
1998 184382 : psChild = CPLCreateXMLNode(psRoot, CXT_Attribute, pszName);
1999 : else
2000 15633 : psChild = CPLCreateXMLNode(psRoot, CXT_Element, pszName);
2001 : }
2002 :
2003 230097 : psRoot = psChild;
2004 230097 : iToken++;
2005 : }
2006 :
2007 196566 : CSLDestroy(papszTokens);
2008 :
2009 : /* -------------------------------------------------------------------- */
2010 : /* Find the "text" child if there is one. */
2011 : /* -------------------------------------------------------------------- */
2012 196566 : CPLXMLNode *psTextChild = psRoot->psChild;
2013 :
2014 196764 : while (psTextChild != nullptr && psTextChild->eType != CXT_Text)
2015 198 : psTextChild = psTextChild->psNext;
2016 :
2017 : /* -------------------------------------------------------------------- */
2018 : /* Now set a value node under this node. */
2019 : /* -------------------------------------------------------------------- */
2020 :
2021 196566 : if (psTextChild == nullptr)
2022 195672 : CPLCreateXMLNode(psRoot, CXT_Text, pszValue);
2023 : else
2024 : {
2025 894 : CPLFree(psTextChild->pszValue);
2026 894 : psTextChild->pszValue = CPLStrdup(pszValue);
2027 : }
2028 :
2029 196566 : return TRUE;
2030 : }
2031 :
2032 : /************************************************************************/
2033 : /* CPLStripXMLNamespace() */
2034 : /************************************************************************/
2035 :
2036 : /**
2037 : * \brief Strip indicated namespaces.
2038 : *
2039 : * The subdocument (psRoot) is recursively examined, and any elements
2040 : * with the indicated namespace prefix will have the namespace prefix
2041 : * stripped from the element names. If the passed namespace is NULL, then
2042 : * all namespace prefixes will be stripped.
2043 : *
2044 : * Nodes other than elements should remain unaffected. The changes are
2045 : * made "in place", and should not alter any node locations, only the
2046 : * pszValue field of affected nodes.
2047 : *
2048 : * @param psRoot the document to operate on.
2049 : * @param pszNamespace the name space prefix (not including colon), or NULL.
2050 : * @param bRecurse TRUE to recurse over whole document, or FALSE to only
2051 : * operate on the passed node.
2052 : */
2053 :
2054 1633130 : void CPLStripXMLNamespace(CPLXMLNode *psRoot, const char *pszNamespace,
2055 : int bRecurse)
2056 :
2057 : {
2058 1633130 : size_t nNameSpaceLen = (pszNamespace) ? strlen(pszNamespace) : 0;
2059 :
2060 4196250 : while (psRoot != nullptr)
2061 : {
2062 2563120 : if (psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute)
2063 : {
2064 1440100 : if (pszNamespace != nullptr)
2065 : {
2066 597 : if (EQUALN(pszNamespace, psRoot->pszValue, nNameSpaceLen) &&
2067 177 : psRoot->pszValue[nNameSpaceLen] == ':')
2068 : {
2069 177 : memmove(psRoot->pszValue,
2070 177 : psRoot->pszValue + nNameSpaceLen + 1,
2071 177 : strlen(psRoot->pszValue + nNameSpaceLen + 1) + 1);
2072 : }
2073 : }
2074 : else
2075 : {
2076 8956830 : for (const char *pszCheck = psRoot->pszValue; *pszCheck != '\0';
2077 : pszCheck++)
2078 : {
2079 8520530 : if (*pszCheck == ':')
2080 : {
2081 1003210 : memmove(psRoot->pszValue, pszCheck + 1,
2082 1003210 : strlen(pszCheck + 1) + 1);
2083 1003210 : break;
2084 : }
2085 : }
2086 : }
2087 : }
2088 :
2089 2563120 : if (bRecurse)
2090 : {
2091 2563120 : if (psRoot->psChild != nullptr)
2092 1425070 : CPLStripXMLNamespace(psRoot->psChild, pszNamespace, 1);
2093 :
2094 2563120 : psRoot = psRoot->psNext;
2095 : }
2096 : else
2097 : {
2098 0 : break;
2099 : }
2100 : }
2101 1633130 : }
2102 :
2103 : /************************************************************************/
2104 : /* CPLParseXMLFile() */
2105 : /************************************************************************/
2106 :
2107 : /**
2108 : * \brief Parse XML file into tree.
2109 : *
2110 : * The named file is opened, loaded into memory as a big string, and
2111 : * parsed with CPLParseXMLString(). Errors in reading the file or parsing
2112 : * the XML will be reported by CPLError().
2113 : *
2114 : * The "large file" API is used, so XML files can come from virtualized
2115 : * files.
2116 : *
2117 : * @param pszFilename the file to open.
2118 : *
2119 : * @return NULL on failure, or the document tree on success.
2120 : */
2121 :
2122 4945 : CPLXMLNode *CPLParseXMLFile(const char *pszFilename)
2123 :
2124 : {
2125 : /* -------------------------------------------------------------------- */
2126 : /* Ingest the file. */
2127 : /* -------------------------------------------------------------------- */
2128 4945 : GByte *pabyOut = nullptr;
2129 4945 : if (!VSIIngestFile(nullptr, pszFilename, &pabyOut, nullptr, -1))
2130 63 : return nullptr;
2131 :
2132 4882 : char *pszDoc = reinterpret_cast<char *>(pabyOut);
2133 :
2134 : /* -------------------------------------------------------------------- */
2135 : /* Parse it. */
2136 : /* -------------------------------------------------------------------- */
2137 4882 : CPLXMLNode *psTree = CPLParseXMLString(pszDoc);
2138 4882 : CPLFree(pszDoc);
2139 :
2140 4882 : return psTree;
2141 : }
2142 :
2143 : /************************************************************************/
2144 : /* CPLSerializeXMLTreeToFile() */
2145 : /************************************************************************/
2146 :
2147 : /**
2148 : * \brief Write document tree to a file.
2149 : *
2150 : * The passed document tree is converted into one big string (with
2151 : * CPLSerializeXMLTree()) and then written to the named file. Errors writing
2152 : * the file will be reported by CPLError(). The source document tree is
2153 : * not altered. If the output file already exists it will be overwritten.
2154 : *
2155 : * @param psTree the document tree to write.
2156 : * @param pszFilename the name of the file to write to.
2157 : * @return TRUE on success, FALSE otherwise.
2158 : */
2159 :
2160 2552 : int CPLSerializeXMLTreeToFile(const CPLXMLNode *psTree, const char *pszFilename)
2161 :
2162 : {
2163 : /* -------------------------------------------------------------------- */
2164 : /* Serialize document. */
2165 : /* -------------------------------------------------------------------- */
2166 2552 : char *pszDoc = CPLSerializeXMLTree(psTree);
2167 2552 : if (pszDoc == nullptr)
2168 0 : return FALSE;
2169 :
2170 2552 : const vsi_l_offset nLength = strlen(pszDoc);
2171 :
2172 : /* -------------------------------------------------------------------- */
2173 : /* Create file. */
2174 : /* -------------------------------------------------------------------- */
2175 2552 : VSILFILE *fp = VSIFOpenL(pszFilename, "wt");
2176 2552 : if (fp == nullptr)
2177 : {
2178 7 : CPLError(CE_Failure, CPLE_OpenFailed, "Failed to open %.500s to write.",
2179 : pszFilename);
2180 7 : CPLFree(pszDoc);
2181 7 : return FALSE;
2182 : }
2183 :
2184 : /* -------------------------------------------------------------------- */
2185 : /* Write file. */
2186 : /* -------------------------------------------------------------------- */
2187 2545 : if (VSIFWriteL(pszDoc, 1, static_cast<size_t>(nLength), fp) != nLength)
2188 : {
2189 75 : CPLError(CE_Failure, CPLE_FileIO,
2190 : "Failed to write whole XML document (%.500s).", pszFilename);
2191 75 : CPL_IGNORE_RET_VAL(VSIFCloseL(fp));
2192 75 : CPLFree(pszDoc);
2193 75 : return FALSE;
2194 : }
2195 :
2196 : /* -------------------------------------------------------------------- */
2197 : /* Cleanup */
2198 : /* -------------------------------------------------------------------- */
2199 2470 : const bool bRet = VSIFCloseL(fp) == 0;
2200 2470 : if (!bRet)
2201 : {
2202 0 : CPLError(CE_Failure, CPLE_FileIO,
2203 : "Failed to write whole XML document (%.500s).", pszFilename);
2204 : }
2205 2470 : CPLFree(pszDoc);
2206 :
2207 2470 : return bRet;
2208 : }
2209 :
2210 : /************************************************************************/
2211 : /* CPLCleanXMLElementName() */
2212 : /************************************************************************/
2213 :
2214 : /**
2215 : * \brief Make string into safe XML token.
2216 : *
2217 : * Modifies a string in place to try and make it into a legal
2218 : * XML token that can be used as an element name. This is accomplished
2219 : * by changing any characters not legal in a token into an underscore.
2220 : *
2221 : * NOTE: This function should implement the rules in section 2.3 of
2222 : * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly. We
2223 : * only do a rough approximation of that.
2224 : *
2225 : * @param pszTarget the string to be adjusted. It is altered in place.
2226 : */
2227 :
2228 386 : void CPLCleanXMLElementName(char *pszTarget)
2229 : {
2230 386 : if (pszTarget == nullptr)
2231 0 : return;
2232 :
2233 3497 : for (; *pszTarget != '\0'; pszTarget++)
2234 : {
2235 3111 : if ((static_cast<unsigned char>(*pszTarget) & 0x80) ||
2236 3111 : isalnum(static_cast<unsigned char>(*pszTarget)) ||
2237 202 : *pszTarget == '_' || *pszTarget == '.')
2238 : {
2239 : // Ok.
2240 : }
2241 : else
2242 : {
2243 0 : *pszTarget = '_';
2244 : }
2245 : }
2246 : }
2247 :
2248 : /************************************************************************/
2249 : /* CPLXMLNodeGetRAMUsageEstimate() */
2250 : /************************************************************************/
2251 :
2252 133154 : static size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode,
2253 : bool bVisitSiblings)
2254 : {
2255 133154 : size_t nRet = sizeof(CPLXMLNode);
2256 : // malloc() aligns on 16-byte boundaries on 64 bit.
2257 133154 : nRet += std::max(2 * sizeof(void *), strlen(psNode->pszValue) + 1);
2258 133154 : if (bVisitSiblings)
2259 : {
2260 133154 : for (const CPLXMLNode *psIter = psNode->psNext; psIter;
2261 55593 : psIter = psIter->psNext)
2262 : {
2263 55593 : nRet += CPLXMLNodeGetRAMUsageEstimate(psIter, false);
2264 : }
2265 : }
2266 133154 : if (psNode->psChild)
2267 : {
2268 74027 : nRet += CPLXMLNodeGetRAMUsageEstimate(psNode->psChild, true);
2269 : }
2270 133154 : return nRet;
2271 : }
2272 :
2273 : /** Return a conservative estimate of the RAM usage of this node, its children
2274 : * and siblings. The returned values is in bytes.
2275 : *
2276 : * @since 3.9
2277 : */
2278 3534 : size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode)
2279 : {
2280 3534 : return CPLXMLNodeGetRAMUsageEstimate(psNode, true);
2281 : }
2282 :
2283 : /************************************************************************/
2284 : /* CPLXMLTreeCloser::getDocumentElement() */
2285 : /************************************************************************/
2286 :
2287 72 : CPLXMLNode *CPLXMLTreeCloser::getDocumentElement()
2288 : {
2289 72 : CPLXMLNode *doc = get();
2290 : // skip the Declaration and assume the next is the root element
2291 120 : while (doc != nullptr &&
2292 120 : (doc->eType != CXT_Element || doc->pszValue[0] == '?'))
2293 : {
2294 48 : doc = doc->psNext;
2295 : }
2296 72 : return doc;
2297 : }
|