Line data Source code
1 : /**********************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: Implementation of MiniXML Parser and handling.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2001, Frank Warmerdam
9 : * Copyright (c) 2007-2013, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * SPDX-License-Identifier: MIT
12 : **********************************************************************
13 : *
14 : * Independent Security Audit 2003/04/05 Andrey Kiselev:
15 : * Completed audit of this module. Any documents may be parsed without
16 : * buffer overflows and stack corruptions.
17 : *
18 : * Security Audit 2003/03/28 warmerda:
19 : * Completed security audit. I believe that this module may be safely used
20 : * to parse, and serialize arbitrary documents provided by a potentially
21 : * hostile source.
22 : *
23 : */
24 :
25 : #include "cpl_minixml.h"
26 :
27 : #include <cctype>
28 : #include <climits>
29 : #include <cstddef>
30 : #include <cstdio>
31 : #include <cstring>
32 :
33 : #include <algorithm>
34 :
35 : #include "cpl_conv.h"
36 : #include "cpl_error.h"
37 : #include "cpl_string.h"
38 : #include "cpl_vsi.h"
39 :
40 : typedef enum
41 : {
42 : TNone,
43 : TString,
44 : TOpen,
45 : TClose,
46 : TEqual,
47 : TToken,
48 : TSlashClose,
49 : TQuestionClose,
50 : TComment,
51 : TLiteral
52 : } XMLTokenType;
53 :
54 : typedef struct
55 : {
56 : CPLXMLNode *psFirstNode;
57 : CPLXMLNode *psLastChild;
58 : } StackContext;
59 :
60 : typedef struct
61 : {
62 : const char *pszInput;
63 : int nInputOffset;
64 : int nInputLine;
65 : bool bInElement;
66 : XMLTokenType eTokenType;
67 : char *pszToken;
68 : size_t nTokenMaxSize;
69 : size_t nTokenSize;
70 :
71 : int nStackMaxSize;
72 : int nStackSize;
73 : StackContext *papsStack;
74 :
75 : CPLXMLNode *psFirstNode;
76 : CPLXMLNode *psLastNode;
77 : } ParseContext;
78 :
79 : static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
80 : const char *pszText);
81 :
82 : /************************************************************************/
83 : /* ReadChar() */
84 : /************************************************************************/
85 :
86 375038000 : static CPL_INLINE char ReadChar(ParseContext *psContext)
87 :
88 : {
89 375038000 : const char chReturn = psContext->pszInput[psContext->nInputOffset++];
90 :
91 375038000 : if (chReturn == '\0')
92 227825 : psContext->nInputOffset--;
93 374810000 : else if (chReturn == 10)
94 4395100 : psContext->nInputLine++;
95 :
96 375038000 : return chReturn;
97 : }
98 :
99 : /************************************************************************/
100 : /* UnreadChar() */
101 : /************************************************************************/
102 :
103 15370600 : static CPL_INLINE void UnreadChar(ParseContext *psContext, char chToUnread)
104 :
105 : {
106 15370600 : if (chToUnread == '\0')
107 35 : return;
108 :
109 15370500 : CPLAssert(chToUnread == psContext->pszInput[psContext->nInputOffset - 1]);
110 :
111 15370500 : psContext->nInputOffset--;
112 :
113 15370500 : if (chToUnread == 10)
114 305 : psContext->nInputLine--;
115 : }
116 :
117 : /************************************************************************/
118 : /* ReallocToken() */
119 : /************************************************************************/
120 :
121 966668 : static bool ReallocToken(ParseContext *psContext)
122 : {
123 966668 : if (psContext->nTokenMaxSize > INT_MAX / 2)
124 : {
125 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
126 : "Out of memory allocating %d*2 bytes",
127 0 : static_cast<int>(psContext->nTokenMaxSize));
128 0 : VSIFree(psContext->pszToken);
129 0 : psContext->pszToken = nullptr;
130 0 : return false;
131 : }
132 :
133 966668 : psContext->nTokenMaxSize *= 2;
134 : char *pszToken = static_cast<char *>(
135 966668 : VSIRealloc(psContext->pszToken, psContext->nTokenMaxSize));
136 966668 : if (pszToken == nullptr)
137 : {
138 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
139 : "Out of memory allocating %d bytes",
140 0 : static_cast<int>(psContext->nTokenMaxSize));
141 0 : VSIFree(psContext->pszToken);
142 0 : psContext->pszToken = nullptr;
143 0 : return false;
144 : }
145 966668 : psContext->pszToken = pszToken;
146 966668 : return true;
147 : }
148 :
149 : /************************************************************************/
150 : /* AddToToken() */
151 : /************************************************************************/
152 :
153 268467000 : static CPL_INLINE bool _AddToToken(ParseContext *psContext, char chNewChar)
154 :
155 : {
156 268467000 : if (psContext->nTokenSize >= psContext->nTokenMaxSize - 2)
157 : {
158 966668 : if (!ReallocToken(psContext))
159 0 : return false;
160 : }
161 :
162 268467000 : psContext->pszToken[psContext->nTokenSize++] = chNewChar;
163 268467000 : psContext->pszToken[psContext->nTokenSize] = '\0';
164 268467000 : return true;
165 : }
166 :
167 : // TODO(schwehr): Remove the goto.
168 : #define AddToToken(psContext, chNewChar) \
169 : if (!_AddToToken(psContext, chNewChar)) \
170 : goto fail;
171 :
172 : /************************************************************************/
173 : /* ReadToken() */
174 : /************************************************************************/
175 :
176 44322100 : static XMLTokenType ReadToken(ParseContext *psContext, CPLErr &eLastErrorType)
177 :
178 : {
179 44322100 : psContext->nTokenSize = 0;
180 44322100 : psContext->pszToken[0] = '\0';
181 :
182 44322100 : char chNext = ReadChar(psContext);
183 95770500 : while (isspace(static_cast<unsigned char>(chNext)))
184 51448400 : chNext = ReadChar(psContext);
185 :
186 : /* -------------------------------------------------------------------- */
187 : /* Handle comments. */
188 : /* -------------------------------------------------------------------- */
189 44322100 : if (chNext == '<' &&
190 6014270 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset, "!--"))
191 : {
192 70678 : psContext->eTokenType = TComment;
193 :
194 : // Skip "!--" characters.
195 70678 : ReadChar(psContext);
196 70678 : ReadChar(psContext);
197 70678 : ReadChar(psContext);
198 :
199 4060150 : while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
200 8190970 : "-->") &&
201 4060150 : (chNext = ReadChar(psContext)) != '\0')
202 4060150 : AddToToken(psContext, chNext);
203 :
204 : // Skip "-->" characters.
205 70678 : ReadChar(psContext);
206 70678 : ReadChar(psContext);
207 70678 : ReadChar(psContext);
208 : }
209 : /* -------------------------------------------------------------------- */
210 : /* Handle DOCTYPE. */
211 : /* -------------------------------------------------------------------- */
212 44251400 : else if (chNext == '<' &&
213 5943590 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
214 : "!DOCTYPE"))
215 : {
216 16 : bool bInQuotes = false;
217 16 : psContext->eTokenType = TLiteral;
218 :
219 16 : AddToToken(psContext, '<');
220 : do
221 : {
222 1039 : chNext = ReadChar(psContext);
223 1039 : if (chNext == '\0')
224 : {
225 0 : eLastErrorType = CE_Failure;
226 0 : CPLError(eLastErrorType, CPLE_AppDefined,
227 : "Parse error in DOCTYPE on or before line %d, "
228 : "reached end of file without '>'.",
229 : psContext->nInputLine);
230 :
231 0 : break;
232 : }
233 :
234 : /* The markup declaration block within a DOCTYPE tag consists of:
235 : * - a left square bracket [
236 : * - a list of declarations
237 : * - a right square bracket ]
238 : * Example:
239 : * <!DOCTYPE RootElement [ ...declarations... ]>
240 : */
241 1039 : if (chNext == '[')
242 : {
243 1 : AddToToken(psContext, chNext);
244 :
245 98 : do
246 : {
247 99 : chNext = ReadChar(psContext);
248 99 : if (chNext == ']')
249 0 : break;
250 99 : AddToToken(psContext, chNext);
251 99 : } while (chNext != '\0' &&
252 99 : !STARTS_WITH_CI(psContext->pszInput +
253 : psContext->nInputOffset,
254 : "]>"));
255 :
256 1 : if (chNext == '\0')
257 : {
258 0 : eLastErrorType = CE_Failure;
259 0 : CPLError(eLastErrorType, CPLE_AppDefined,
260 : "Parse error in DOCTYPE on or before line %d, "
261 : "reached end of file without ']'.",
262 : psContext->nInputLine);
263 0 : break;
264 : }
265 :
266 1 : if (chNext != ']')
267 : {
268 1 : chNext = ReadChar(psContext);
269 1 : AddToToken(psContext, chNext);
270 :
271 : // Skip ">" character, will be consumed below.
272 1 : chNext = ReadChar(psContext);
273 : }
274 : }
275 :
276 1039 : if (chNext == '\"')
277 46 : bInQuotes = !bInQuotes;
278 :
279 1039 : if (chNext == '>' && !bInQuotes)
280 : {
281 16 : AddToToken(psContext, '>');
282 16 : break;
283 : }
284 :
285 1023 : AddToToken(psContext, chNext);
286 16 : } while (true);
287 : }
288 : /* -------------------------------------------------------------------- */
289 : /* Handle CDATA. */
290 : /* -------------------------------------------------------------------- */
291 44251400 : else if (chNext == '<' &&
292 5943580 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
293 : "![CDATA["))
294 : {
295 173 : psContext->eTokenType = TString;
296 :
297 : // Skip !CDATA[
298 173 : ReadChar(psContext);
299 173 : ReadChar(psContext);
300 173 : ReadChar(psContext);
301 173 : ReadChar(psContext);
302 173 : ReadChar(psContext);
303 173 : ReadChar(psContext);
304 173 : ReadChar(psContext);
305 173 : ReadChar(psContext);
306 :
307 91580 : while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
308 183334 : "]]>") &&
309 91581 : (chNext = ReadChar(psContext)) != '\0')
310 91580 : AddToToken(psContext, chNext);
311 :
312 : // Skip "]]>" characters.
313 173 : ReadChar(psContext);
314 173 : ReadChar(psContext);
315 173 : ReadChar(psContext);
316 : }
317 : /* -------------------------------------------------------------------- */
318 : /* Simple single tokens of interest. */
319 : /* -------------------------------------------------------------------- */
320 44251200 : else if (chNext == '<' && !psContext->bInElement)
321 : {
322 5943400 : psContext->eTokenType = TOpen;
323 5943400 : psContext->bInElement = true;
324 : }
325 38307800 : else if (chNext == '>' && psContext->bInElement)
326 : {
327 3952100 : psContext->eTokenType = TClose;
328 3952100 : psContext->bInElement = false;
329 : }
330 34355700 : else if (chNext == '=' && psContext->bInElement)
331 : {
332 8383050 : psContext->eTokenType = TEqual;
333 : }
334 25972600 : else if (chNext == '\0')
335 : {
336 227785 : psContext->eTokenType = TNone;
337 : }
338 : /* -------------------------------------------------------------------- */
339 : /* Handle the /> token terminator. */
340 : /* -------------------------------------------------------------------- */
341 25744900 : else if (chNext == '/' && psContext->bInElement &&
342 3953740 : psContext->pszInput[psContext->nInputOffset] == '>')
343 : {
344 1982700 : chNext = ReadChar(psContext);
345 : (void)chNext;
346 1982700 : CPLAssert(chNext == '>');
347 :
348 1982700 : psContext->eTokenType = TSlashClose;
349 1982700 : psContext->bInElement = false;
350 : }
351 : /* -------------------------------------------------------------------- */
352 : /* Handle the ?> token terminator. */
353 : /* -------------------------------------------------------------------- */
354 23762200 : else if (chNext == '?' && psContext->bInElement &&
355 17105 : psContext->pszInput[psContext->nInputOffset] == '>')
356 : {
357 8551 : chNext = ReadChar(psContext);
358 : (void)chNext;
359 8551 : CPLAssert(chNext == '>');
360 :
361 8551 : psContext->eTokenType = TQuestionClose;
362 8551 : psContext->bInElement = false;
363 : }
364 : /* -------------------------------------------------------------------- */
365 : /* Collect a quoted string. */
366 : /* -------------------------------------------------------------------- */
367 23753600 : else if (psContext->bInElement && chNext == '"')
368 : {
369 7211510 : psContext->eTokenType = TString;
370 :
371 62340000 : while ((chNext = ReadChar(psContext)) != '"' && chNext != '\0')
372 55128500 : AddToToken(psContext, chNext);
373 :
374 7211510 : if (chNext != '"')
375 : {
376 0 : psContext->eTokenType = TNone;
377 0 : eLastErrorType = CE_Failure;
378 0 : CPLError(
379 : eLastErrorType, CPLE_AppDefined,
380 : "Parse error on line %d, reached EOF before closing quote.",
381 : psContext->nInputLine);
382 : }
383 :
384 : // Do we need to unescape it?
385 7211510 : if (strchr(psContext->pszToken, '&') != nullptr)
386 : {
387 220 : int nLength = 0;
388 : char *pszUnescaped =
389 220 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
390 220 : strcpy(psContext->pszToken, pszUnescaped);
391 220 : CPLFree(pszUnescaped);
392 220 : psContext->nTokenSize = strlen(psContext->pszToken);
393 7211510 : }
394 : }
395 16542100 : else if (psContext->bInElement && chNext == '\'')
396 : {
397 1171540 : psContext->eTokenType = TString;
398 :
399 20403100 : while ((chNext = ReadChar(psContext)) != '\'' && chNext != '\0')
400 19231500 : AddToToken(psContext, chNext);
401 :
402 1171540 : if (chNext != '\'')
403 : {
404 1 : psContext->eTokenType = TNone;
405 1 : eLastErrorType = CE_Failure;
406 1 : CPLError(
407 : eLastErrorType, CPLE_AppDefined,
408 : "Parse error on line %d, reached EOF before closing quote.",
409 : psContext->nInputLine);
410 : }
411 :
412 : // Do we need to unescape it?
413 1171540 : if (strchr(psContext->pszToken, '&') != nullptr)
414 : {
415 1681 : int nLength = 0;
416 : char *pszUnescaped =
417 1681 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
418 1681 : strcpy(psContext->pszToken, pszUnescaped);
419 1681 : CPLFree(pszUnescaped);
420 1681 : psContext->nTokenSize = strlen(psContext->pszToken);
421 1171540 : }
422 : }
423 : /* -------------------------------------------------------------------- */
424 : /* Collect an unquoted string, terminated by a open angle */
425 : /* bracket. */
426 : /* -------------------------------------------------------------------- */
427 15370600 : else if (!psContext->bInElement)
428 : {
429 1044090 : psContext->eTokenType = TString;
430 :
431 1044090 : AddToToken(psContext, chNext);
432 93907100 : while ((chNext = ReadChar(psContext)) != '<' && chNext != '\0')
433 92863000 : AddToToken(psContext, chNext);
434 1044090 : UnreadChar(psContext, chNext);
435 :
436 : // Do we need to unescape it?
437 1044090 : if (strchr(psContext->pszToken, '&') != nullptr)
438 : {
439 19785 : int nLength = 0;
440 : char *pszUnescaped =
441 19785 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
442 19785 : strcpy(psContext->pszToken, pszUnescaped);
443 19785 : CPLFree(pszUnescaped);
444 19785 : psContext->nTokenSize = strlen(psContext->pszToken);
445 : }
446 : }
447 :
448 : /* -------------------------------------------------------------------- */
449 : /* Collect a regular token terminated by white space, or */
450 : /* special character(s) like an equal sign. */
451 : /* -------------------------------------------------------------------- */
452 : else
453 : {
454 14326500 : psContext->eTokenType = TToken;
455 :
456 : // Add the first character to the token regardless of what it is.
457 14326500 : AddToToken(psContext, chNext);
458 :
459 96047500 : for (chNext = ReadChar(psContext);
460 96047500 : (chNext >= 'A' && chNext <= 'Z') ||
461 92351900 : (chNext >= 'a' && chNext <= 'z') || chNext == '-' ||
462 112265000 : chNext == '_' || chNext == '.' || chNext == ':' ||
463 11516300 : (chNext >= '0' && chNext <= '9');
464 81721100 : chNext = ReadChar(psContext))
465 : {
466 81721100 : AddToToken(psContext, chNext);
467 : }
468 :
469 14326500 : UnreadChar(psContext, chNext);
470 : }
471 :
472 44322100 : return psContext->eTokenType;
473 :
474 0 : fail:
475 0 : psContext->eTokenType = TNone;
476 0 : return TNone;
477 : }
478 :
479 : /************************************************************************/
480 : /* PushNode() */
481 : /************************************************************************/
482 :
483 3972370 : static bool PushNode(ParseContext *psContext, CPLXMLNode *psNode,
484 : CPLErr &eLastErrorType)
485 :
486 : {
487 3972370 : if (psContext->nStackMaxSize <= psContext->nStackSize)
488 : {
489 : // Somewhat arbitrary number.
490 229727 : if (psContext->nStackMaxSize >= 10000)
491 : {
492 1 : eLastErrorType = CE_Failure;
493 1 : CPLError(CE_Failure, CPLE_NotSupported,
494 : "XML element depth beyond 10000. Giving up");
495 1 : VSIFree(psContext->papsStack);
496 1 : psContext->papsStack = nullptr;
497 1 : return false;
498 : }
499 229726 : psContext->nStackMaxSize += 10;
500 :
501 : StackContext *papsStack = static_cast<StackContext *>(
502 459452 : VSIRealloc(psContext->papsStack,
503 229726 : sizeof(StackContext) * psContext->nStackMaxSize));
504 229726 : if (papsStack == nullptr)
505 : {
506 0 : eLastErrorType = CE_Failure;
507 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
508 : "Out of memory allocating %d bytes",
509 : static_cast<int>(sizeof(StackContext)) *
510 0 : psContext->nStackMaxSize);
511 0 : VSIFree(psContext->papsStack);
512 0 : psContext->papsStack = nullptr;
513 0 : return false;
514 : }
515 229726 : psContext->papsStack = papsStack;
516 : }
517 : #ifdef DEBUG
518 : // To make Coverity happy, but cannot happen.
519 3972370 : if (psContext->papsStack == nullptr)
520 0 : return false;
521 : #endif
522 :
523 3972370 : psContext->papsStack[psContext->nStackSize].psFirstNode = psNode;
524 3972370 : psContext->papsStack[psContext->nStackSize].psLastChild = nullptr;
525 3972370 : psContext->nStackSize++;
526 :
527 3972370 : return true;
528 : }
529 :
530 : /************************************************************************/
531 : /* AttachNode() */
532 : /* */
533 : /* Attach the passed node as a child of the current node. */
534 : /* Special handling exists for adding siblings to psFirst if */
535 : /* there is nothing on the stack. */
536 : /************************************************************************/
537 :
538 13470400 : static void AttachNode(ParseContext *psContext, CPLXMLNode *psNode)
539 :
540 : {
541 13470400 : if (psContext->psFirstNode == nullptr)
542 : {
543 227783 : psContext->psFirstNode = psNode;
544 227783 : psContext->psLastNode = psNode;
545 : }
546 13242600 : else if (psContext->nStackSize == 0)
547 : {
548 10744 : psContext->psLastNode->psNext = psNode;
549 10744 : psContext->psLastNode = psNode;
550 : }
551 : else
552 : {
553 13231900 : if (psContext->papsStack[psContext->nStackSize - 1]
554 13231900 : .psFirstNode->psChild == nullptr)
555 : {
556 3948840 : psContext->papsStack[psContext->nStackSize - 1]
557 3948840 : .psFirstNode->psChild = psNode;
558 : }
559 : else
560 : {
561 9283020 : psContext->papsStack[psContext->nStackSize - 1]
562 9283020 : .psLastChild->psNext = psNode;
563 : }
564 13231900 : psContext->papsStack[psContext->nStackSize - 1].psLastChild = psNode;
565 : }
566 13470400 : }
567 :
568 : /************************************************************************/
569 : /* CPLParseXMLString() */
570 : /************************************************************************/
571 :
572 : /**
573 : * \brief Parse an XML string into tree form.
574 : *
575 : * The passed document is parsed into a CPLXMLNode tree representation.
576 : * If the document is not well formed XML then NULL is returned, and errors
577 : * are reported via CPLError(). No validation beyond wellformedness is
578 : * done. The CPLParseXMLFile() convenience function can be used to parse
579 : * from a file.
580 : *
581 : * The returned document tree is owned by the caller and should be freed
582 : * with CPLDestroyXMLNode() when no longer needed.
583 : *
584 : * If the document has more than one "root level" element then those after the
585 : * first will be attached to the first as siblings (via the psNext pointers)
586 : * even though there is no common parent. A document with no XML structure
587 : * (no angle brackets for instance) would be considered well formed, and
588 : * returned as a single CXT_Text node.
589 : *
590 : * @param pszString the document to parse.
591 : *
592 : * @return parsed tree or NULL on error.
593 : */
594 :
595 227808 : CPLXMLNode *CPLParseXMLString(const char *pszString)
596 :
597 : {
598 227808 : if (pszString == nullptr)
599 : {
600 0 : CPLError(CE_Failure, CPLE_AppDefined,
601 : "CPLParseXMLString() called with NULL pointer.");
602 0 : return nullptr;
603 : }
604 :
605 : // Save back error context.
606 227808 : const CPLErr eErrClass = CPLGetLastErrorType();
607 227808 : const CPLErrorNum nErrNum = CPLGetLastErrorNo();
608 455616 : const CPLString osErrMsg = CPLGetLastErrorMsg();
609 :
610 : // Reset it now.
611 227808 : CPLErrorSetState(CE_None, CPLE_AppDefined, "");
612 :
613 : /* -------------------------------------------------------------------- */
614 : /* Check for a UTF-8 BOM and skip if found */
615 : /* */
616 : /* TODO: BOM is variable-length parameter and depends on encoding. */
617 : /* Add BOM detection for other encodings. */
618 : /* -------------------------------------------------------------------- */
619 :
620 : // Used to skip to actual beginning of XML data.
621 227808 : if ((static_cast<unsigned char>(pszString[0]) == 0xEF) &&
622 4 : (static_cast<unsigned char>(pszString[1]) == 0xBB) &&
623 4 : (static_cast<unsigned char>(pszString[2]) == 0xBF))
624 : {
625 4 : pszString += 3;
626 : }
627 :
628 : /* -------------------------------------------------------------------- */
629 : /* Initialize parse context. */
630 : /* -------------------------------------------------------------------- */
631 : ParseContext sContext;
632 227808 : sContext.pszInput = pszString;
633 227808 : sContext.nInputOffset = 0;
634 227808 : sContext.nInputLine = 0;
635 227808 : sContext.bInElement = false;
636 227808 : sContext.nTokenMaxSize = 10;
637 227808 : sContext.pszToken = static_cast<char *>(VSIMalloc(sContext.nTokenMaxSize));
638 227808 : if (sContext.pszToken == nullptr)
639 0 : return nullptr;
640 227808 : sContext.nTokenSize = 0;
641 227808 : sContext.eTokenType = TNone;
642 227808 : sContext.nStackMaxSize = 0;
643 227808 : sContext.nStackSize = 0;
644 227808 : sContext.papsStack = nullptr;
645 227808 : sContext.psFirstNode = nullptr;
646 227808 : sContext.psLastNode = nullptr;
647 :
648 : #ifdef DEBUG
649 227808 : bool bRecoverableError = true;
650 : #endif
651 227808 : CPLErr eLastErrorType = CE_None;
652 :
653 : /* ==================================================================== */
654 : /* Loop reading tokens. */
655 : /* ==================================================================== */
656 19641500 : while (ReadToken(&sContext, eLastErrorType) != TNone)
657 : {
658 19413800 : loop_beginning:
659 : /* --------------------------------------------------------------------
660 : */
661 : /* Create a new element. */
662 : /* --------------------------------------------------------------------
663 : */
664 19413800 : if (sContext.eTokenType == TOpen)
665 : {
666 5943400 : if (ReadToken(&sContext, eLastErrorType) != TToken)
667 : {
668 2 : eLastErrorType = CE_Failure;
669 2 : CPLError(eLastErrorType, CPLE_AppDefined,
670 : "Line %d: Didn't find element token after "
671 : "open angle bracket.",
672 : sContext.nInputLine);
673 2 : break;
674 : }
675 :
676 5943400 : CPLXMLNode *psElement = nullptr;
677 5943400 : if (sContext.pszToken[0] != '/')
678 : {
679 : psElement =
680 3972370 : _CPLCreateXMLNode(nullptr, CXT_Element, sContext.pszToken);
681 3972370 : if (!psElement)
682 0 : break;
683 3972370 : AttachNode(&sContext, psElement);
684 3972370 : if (!PushNode(&sContext, psElement, eLastErrorType))
685 1 : break;
686 : }
687 : else
688 : {
689 1971030 : if (sContext.nStackSize == 0 ||
690 1971030 : !EQUAL(sContext.pszToken + 1,
691 : sContext.papsStack[sContext.nStackSize - 1]
692 : .psFirstNode->pszValue))
693 : {
694 : #ifdef DEBUG
695 : // Makes life of fuzzers easier if we accept somewhat
696 : // corrupted XML like <foo> ... </not_foo>.
697 14 : if (CPLTestBool(
698 : CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
699 : {
700 0 : eLastErrorType = CE_Warning;
701 0 : CPLError(
702 : eLastErrorType, CPLE_AppDefined,
703 : "Line %d: <%.500s> doesn't have matching <%.500s>.",
704 : sContext.nInputLine, sContext.pszToken,
705 0 : sContext.pszToken + 1);
706 0 : if (sContext.nStackSize == 0)
707 0 : break;
708 0 : goto end_processing_close;
709 : }
710 : else
711 : #endif
712 : {
713 14 : eLastErrorType = CE_Failure;
714 14 : CPLError(
715 : eLastErrorType, CPLE_AppDefined,
716 : "Line %d: <%.500s> doesn't have matching <%.500s>.",
717 : sContext.nInputLine, sContext.pszToken,
718 14 : sContext.pszToken + 1);
719 14 : break;
720 : }
721 : }
722 : else
723 : {
724 1971020 : if (strcmp(sContext.pszToken + 1,
725 1971020 : sContext.papsStack[sContext.nStackSize - 1]
726 1971020 : .psFirstNode->pszValue) != 0)
727 : {
728 : // TODO: At some point we could just error out like any
729 : // other sane XML parser would do.
730 1 : eLastErrorType = CE_Warning;
731 1 : CPLError(
732 : eLastErrorType, CPLE_AppDefined,
733 : "Line %d: <%.500s> matches <%.500s>, but the case "
734 : "isn't the same. Going on, but this is invalid "
735 : "XML that might be rejected in future versions.",
736 : sContext.nInputLine,
737 1 : sContext.papsStack[sContext.nStackSize - 1]
738 1 : .psFirstNode->pszValue,
739 : sContext.pszToken);
740 : }
741 : #ifdef DEBUG
742 1971020 : end_processing_close:
743 : #endif
744 1971020 : if (ReadToken(&sContext, eLastErrorType) != TClose)
745 : {
746 3 : eLastErrorType = CE_Failure;
747 3 : CPLError(eLastErrorType, CPLE_AppDefined,
748 : "Line %d: Missing close angle bracket "
749 : "after <%.500s.",
750 : sContext.nInputLine, sContext.pszToken);
751 3 : break;
752 : }
753 :
754 : // Pop element off stack
755 1971010 : sContext.nStackSize--;
756 : }
757 : }
758 : }
759 :
760 : /* --------------------------------------------------------------------
761 : */
762 : /* Add an attribute to a token. */
763 : /* --------------------------------------------------------------------
764 : */
765 13470400 : else if (sContext.eTokenType == TToken)
766 : {
767 : CPLXMLNode *psAttr =
768 8383060 : _CPLCreateXMLNode(nullptr, CXT_Attribute, sContext.pszToken);
769 8383060 : if (!psAttr)
770 0 : break;
771 8383060 : AttachNode(&sContext, psAttr);
772 :
773 8383060 : XMLTokenType nextToken = ReadToken(&sContext, eLastErrorType);
774 8383060 : if (nextToken != TEqual)
775 : {
776 : // Parse stuff like <?valbuddy_schematron
777 : // ../wmtsSimpleGetCapabilities.sch?>
778 5 : if (sContext.nStackSize > 0 &&
779 5 : sContext.papsStack[sContext.nStackSize - 1]
780 5 : .psFirstNode->pszValue[0] == '?')
781 : {
782 3 : psAttr->eType = CXT_Text;
783 3 : if (nextToken == TNone)
784 0 : break;
785 3 : goto loop_beginning;
786 : }
787 :
788 2 : eLastErrorType = CE_Failure;
789 2 : CPLError(eLastErrorType, CPLE_AppDefined,
790 : "Line %d: Didn't find expected '=' for value of "
791 : "attribute '%.500s'.",
792 : sContext.nInputLine, psAttr->pszValue);
793 : #ifdef DEBUG
794 : // Accepting an attribute without child text
795 : // would break too much assumptions in driver code
796 2 : bRecoverableError = false;
797 : #endif
798 2 : break;
799 : }
800 :
801 8383050 : if (ReadToken(&sContext, eLastErrorType) == TToken)
802 : {
803 : /* TODO: at some point we could just error out like any other */
804 : /* sane XML parser would do */
805 2 : eLastErrorType = CE_Warning;
806 2 : CPLError(eLastErrorType, CPLE_AppDefined,
807 : "Line %d: Attribute value should be single or double "
808 : "quoted. Going on, but this is invalid XML that "
809 : "might be rejected in future versions.",
810 : sContext.nInputLine);
811 : }
812 8383050 : else if (sContext.eTokenType != TString)
813 : {
814 1 : eLastErrorType = CE_Failure;
815 1 : CPLError(eLastErrorType, CPLE_AppDefined,
816 : "Line %d: Didn't find expected attribute value.",
817 : sContext.nInputLine);
818 : #ifdef DEBUG
819 : // Accepting an attribute without child text
820 : // would break too much assumptions in driver code
821 1 : bRecoverableError = false;
822 : #endif
823 1 : break;
824 : }
825 :
826 8383050 : if (!_CPLCreateXMLNode(psAttr, CXT_Text, sContext.pszToken))
827 0 : break;
828 : }
829 :
830 : /* --------------------------------------------------------------------
831 : */
832 : /* Close the start section of an element. */
833 : /* --------------------------------------------------------------------
834 : */
835 5087300 : else if (sContext.eTokenType == TClose)
836 : {
837 1981090 : if (sContext.nStackSize == 0)
838 : {
839 0 : eLastErrorType = CE_Failure;
840 0 : CPLError(eLastErrorType, CPLE_AppDefined,
841 : "Line %d: Found unbalanced '>'.", sContext.nInputLine);
842 0 : break;
843 : }
844 : }
845 :
846 : /* --------------------------------------------------------------------
847 : */
848 : /* Close the start section of an element, and pop it */
849 : /* immediately. */
850 : /* --------------------------------------------------------------------
851 : */
852 3106220 : else if (sContext.eTokenType == TSlashClose)
853 : {
854 1982700 : if (sContext.nStackSize == 0)
855 : {
856 0 : eLastErrorType = CE_Failure;
857 0 : CPLError(eLastErrorType, CPLE_AppDefined,
858 : "Line %d: Found unbalanced '/>'.",
859 : sContext.nInputLine);
860 0 : break;
861 : }
862 :
863 1982700 : sContext.nStackSize--;
864 : }
865 : /* --------------------------------------------------------------------
866 : */
867 : /* Close the start section of a <?...?> element, and pop it */
868 : /* immediately. */
869 : /* --------------------------------------------------------------------
870 : */
871 1123510 : else if (sContext.eTokenType == TQuestionClose)
872 : {
873 8551 : if (sContext.nStackSize == 0)
874 : {
875 0 : eLastErrorType = CE_Failure;
876 0 : CPLError(eLastErrorType, CPLE_AppDefined,
877 : "Line %d: Found unbalanced '?>'.",
878 : sContext.nInputLine);
879 0 : break;
880 : }
881 8551 : else if (sContext.papsStack[sContext.nStackSize - 1]
882 8551 : .psFirstNode->pszValue[0] != '?')
883 : {
884 1 : eLastErrorType = CE_Failure;
885 1 : CPLError(eLastErrorType, CPLE_AppDefined,
886 : "Line %d: Found '?>' without matching '<?'.",
887 : sContext.nInputLine);
888 1 : break;
889 : }
890 :
891 8550 : sContext.nStackSize--;
892 : }
893 : /* --------------------------------------------------------------------
894 : */
895 : /* Handle comments. They are returned as a whole token with the */
896 : /* prefix and postfix omitted. No processing of white space */
897 : /* will be done. */
898 : /* --------------------------------------------------------------------
899 : */
900 1114960 : else if (sContext.eTokenType == TComment)
901 : {
902 : CPLXMLNode *psValue =
903 70678 : _CPLCreateXMLNode(nullptr, CXT_Comment, sContext.pszToken);
904 70678 : if (!psValue)
905 0 : break;
906 70678 : AttachNode(&sContext, psValue);
907 : }
908 : /* --------------------------------------------------------------------
909 : */
910 : /* Handle literals. They are returned without processing. */
911 : /* --------------------------------------------------------------------
912 : */
913 1044280 : else if (sContext.eTokenType == TLiteral)
914 : {
915 : CPLXMLNode *psValue =
916 16 : _CPLCreateXMLNode(nullptr, CXT_Literal, sContext.pszToken);
917 16 : if (!psValue)
918 0 : break;
919 16 : AttachNode(&sContext, psValue);
920 : }
921 : /* --------------------------------------------------------------------
922 : */
923 : /* Add a text value node as a child of the current element. */
924 : /* --------------------------------------------------------------------
925 : */
926 1044270 : else if (sContext.eTokenType == TString && !sContext.bInElement)
927 : {
928 : CPLXMLNode *psValue =
929 1044270 : _CPLCreateXMLNode(nullptr, CXT_Text, sContext.pszToken);
930 1044270 : if (!psValue)
931 0 : break;
932 1044270 : AttachNode(&sContext, psValue);
933 : }
934 : /* --------------------------------------------------------------------
935 : */
936 : /* Anything else is an error. */
937 : /* --------------------------------------------------------------------
938 : */
939 : else
940 : {
941 1 : eLastErrorType = CE_Failure;
942 1 : CPLError(eLastErrorType, CPLE_AppDefined,
943 : "Parse error at line %d, unexpected token:%.500s",
944 : sContext.nInputLine, sContext.pszToken);
945 1 : break;
946 : }
947 : }
948 :
949 : /* -------------------------------------------------------------------- */
950 : /* Did we pop all the way out of our stack? */
951 : /* -------------------------------------------------------------------- */
952 227858 : if (CPLGetLastErrorType() != CE_Failure && sContext.nStackSize > 0 &&
953 50 : sContext.papsStack != nullptr)
954 : {
955 : #ifdef DEBUG
956 : // Makes life of fuzzers easier if we accept somewhat corrupted XML
957 : // like <x> ...
958 100 : if (bRecoverableError &&
959 50 : CPLTestBool(CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
960 : {
961 0 : eLastErrorType = CE_Warning;
962 : }
963 : else
964 : #endif
965 : {
966 50 : eLastErrorType = CE_Failure;
967 : }
968 50 : CPLError(
969 : eLastErrorType, CPLE_AppDefined,
970 : "Parse error at EOF, not all elements have been closed, "
971 : "starting with %.500s",
972 50 : sContext.papsStack[sContext.nStackSize - 1].psFirstNode->pszValue);
973 : }
974 :
975 : /* -------------------------------------------------------------------- */
976 : /* Cleanup */
977 : /* -------------------------------------------------------------------- */
978 227808 : CPLFree(sContext.pszToken);
979 227808 : if (sContext.papsStack != nullptr)
980 227772 : CPLFree(sContext.papsStack);
981 :
982 : // We do not trust CPLGetLastErrorType() as if CPLTurnFailureIntoWarning()
983 : // has been set we would never get failures
984 227808 : if (eLastErrorType == CE_Failure)
985 : {
986 76 : CPLDestroyXMLNode(sContext.psFirstNode);
987 76 : sContext.psFirstNode = nullptr;
988 76 : sContext.psLastNode = nullptr;
989 : }
990 :
991 227808 : if (eLastErrorType == CE_None)
992 : {
993 : // Restore initial error state.
994 227730 : CPLErrorSetState(eErrClass, nErrNum, osErrMsg);
995 : }
996 :
997 227808 : return sContext.psFirstNode;
998 : }
999 :
1000 : /************************************************************************/
1001 : /* _GrowBuffer() */
1002 : /************************************************************************/
1003 :
1004 2566120 : static bool _GrowBuffer(size_t nNeeded, char **ppszText, size_t *pnMaxLength)
1005 :
1006 : {
1007 2566120 : if (nNeeded + 1 >= *pnMaxLength)
1008 : {
1009 27246 : *pnMaxLength = std::max(*pnMaxLength * 2, nNeeded + 1);
1010 : char *pszTextNew =
1011 27246 : static_cast<char *>(VSIRealloc(*ppszText, *pnMaxLength));
1012 27246 : if (pszTextNew == nullptr)
1013 0 : return false;
1014 27246 : *ppszText = pszTextNew;
1015 : }
1016 2566120 : return true;
1017 : }
1018 :
1019 : /************************************************************************/
1020 : /* CPLSerializeXMLNode() */
1021 : /************************************************************************/
1022 :
1023 : // TODO(schwehr): Rewrite this whole thing using C++ string.
1024 : // CPLSerializeXMLNode has buffer overflows.
1025 1008260 : static bool CPLSerializeXMLNode(const CPLXMLNode *psNode, int nIndent,
1026 : char **ppszText, size_t *pnLength,
1027 : size_t *pnMaxLength)
1028 :
1029 : {
1030 1008260 : if (psNode == nullptr)
1031 0 : return true;
1032 :
1033 : /* -------------------------------------------------------------------- */
1034 : /* Ensure the buffer is plenty large to hold this additional */
1035 : /* string. */
1036 : /* -------------------------------------------------------------------- */
1037 1008260 : *pnLength += strlen(*ppszText + *pnLength);
1038 1008260 : if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
1039 : ppszText, pnMaxLength))
1040 0 : return false;
1041 :
1042 : /* -------------------------------------------------------------------- */
1043 : /* Text is just directly emitted. */
1044 : /* -------------------------------------------------------------------- */
1045 1008260 : if (psNode->eType == CXT_Text)
1046 : {
1047 : char *pszEscaped =
1048 157653 : CPLEscapeString(psNode->pszValue, -1, CPLES_XML_BUT_QUOTES);
1049 :
1050 157653 : CPLAssert(psNode->psChild == nullptr);
1051 :
1052 : // Escaped text might be bigger than expected.
1053 157653 : if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1054 : {
1055 0 : CPLFree(pszEscaped);
1056 0 : return false;
1057 : }
1058 157653 : strcat(*ppszText + *pnLength, pszEscaped);
1059 :
1060 157653 : CPLFree(pszEscaped);
1061 : }
1062 :
1063 : /* -------------------------------------------------------------------- */
1064 : /* Attributes require a little formatting. */
1065 : /* -------------------------------------------------------------------- */
1066 850603 : else if (psNode->eType == CXT_Attribute)
1067 : {
1068 440930 : CPLAssert(psNode->psChild != nullptr &&
1069 : psNode->psChild->eType == CXT_Text);
1070 :
1071 440930 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, " %s=\"",
1072 440930 : psNode->pszValue);
1073 440930 : *pnLength += strlen(*ppszText + *pnLength);
1074 :
1075 : char *pszEscaped =
1076 440930 : CPLEscapeString(psNode->psChild->pszValue, -1, CPLES_XML);
1077 :
1078 440930 : if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1079 : {
1080 0 : CPLFree(pszEscaped);
1081 0 : return false;
1082 : }
1083 440930 : strcat(*ppszText + *pnLength, pszEscaped);
1084 :
1085 440930 : CPLFree(pszEscaped);
1086 :
1087 440930 : *pnLength += strlen(*ppszText + *pnLength);
1088 440930 : if (!_GrowBuffer(3 + *pnLength, ppszText, pnMaxLength))
1089 0 : return false;
1090 440930 : strcat(*ppszText + *pnLength, "\"");
1091 : }
1092 :
1093 : /* -------------------------------------------------------------------- */
1094 : /* Handle comment output. */
1095 : /* -------------------------------------------------------------------- */
1096 409673 : else if (psNode->eType == CXT_Comment)
1097 : {
1098 12552 : CPLAssert(psNode->psChild == nullptr);
1099 :
1100 82406 : for (int i = 0; i < nIndent; i++)
1101 69854 : (*ppszText)[(*pnLength)++] = ' ';
1102 :
1103 12552 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<!--%s-->\n",
1104 12552 : psNode->pszValue);
1105 : }
1106 :
1107 : /* -------------------------------------------------------------------- */
1108 : /* Handle literal output (like <!DOCTYPE...>) */
1109 : /* -------------------------------------------------------------------- */
1110 397121 : else if (psNode->eType == CXT_Literal)
1111 : {
1112 4 : CPLAssert(psNode->psChild == nullptr);
1113 :
1114 28 : for (int i = 0; i < nIndent; i++)
1115 24 : (*ppszText)[(*pnLength)++] = ' ';
1116 :
1117 4 : strcpy(*ppszText + *pnLength, psNode->pszValue);
1118 4 : strcat(*ppszText + *pnLength, "\n");
1119 : }
1120 :
1121 : /* -------------------------------------------------------------------- */
1122 : /* Elements actually have to deal with general children, and */
1123 : /* various formatting issues. */
1124 : /* -------------------------------------------------------------------- */
1125 397117 : else if (psNode->eType == CXT_Element)
1126 : {
1127 397117 : if (nIndent)
1128 386440 : memset(*ppszText + *pnLength, ' ', nIndent);
1129 397117 : *pnLength += nIndent;
1130 397117 : (*ppszText)[*pnLength] = '\0';
1131 :
1132 397117 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<%s",
1133 397117 : psNode->pszValue);
1134 :
1135 397117 : if (psNode->pszValue[0] == '?')
1136 : {
1137 2723 : for (const CPLXMLNode *psChild = psNode->psChild;
1138 8182 : psChild != nullptr; psChild = psChild->psNext)
1139 : {
1140 5459 : if (psChild->eType == CXT_Text)
1141 : {
1142 3 : *pnLength += strlen(*ppszText + *pnLength);
1143 3 : if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1144 0 : return false;
1145 3 : strcat(*ppszText + *pnLength, " ");
1146 : }
1147 :
1148 5459 : if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1149 : pnMaxLength))
1150 : {
1151 0 : return false;
1152 : }
1153 : }
1154 2723 : if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1155 0 : return false;
1156 :
1157 2723 : strcat(*ppszText + *pnLength, "?>\n");
1158 : }
1159 : else
1160 : {
1161 394394 : bool bHasNonAttributeChildren = false;
1162 : // Serialize *all* the attribute children, regardless of order
1163 394394 : for (const CPLXMLNode *psChild = psNode->psChild;
1164 1386010 : psChild != nullptr; psChild = psChild->psNext)
1165 : {
1166 991612 : if (psChild->eType == CXT_Attribute)
1167 : {
1168 435474 : if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1169 : pnMaxLength))
1170 0 : return false;
1171 : }
1172 : else
1173 556138 : bHasNonAttributeChildren = true;
1174 : }
1175 :
1176 394394 : if (!bHasNonAttributeChildren)
1177 : {
1178 115541 : if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1179 0 : return false;
1180 :
1181 115541 : strcat(*ppszText + *pnLength, " />\n");
1182 : }
1183 : else
1184 : {
1185 278853 : bool bJustText = true;
1186 :
1187 278853 : strcat(*ppszText + *pnLength, ">");
1188 :
1189 278853 : for (const CPLXMLNode *psChild = psNode->psChild;
1190 969736 : psChild != nullptr; psChild = psChild->psNext)
1191 : {
1192 690883 : if (psChild->eType == CXT_Attribute)
1193 134745 : continue;
1194 :
1195 556138 : if (psChild->eType != CXT_Text && bJustText)
1196 : {
1197 121236 : bJustText = false;
1198 121236 : *pnLength += strlen(*ppszText + *pnLength);
1199 121236 : if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1200 0 : return false;
1201 121236 : strcat(*ppszText + *pnLength, "\n");
1202 : }
1203 :
1204 556138 : if (!CPLSerializeXMLNode(psChild, nIndent + 2, ppszText,
1205 : pnLength, pnMaxLength))
1206 0 : return false;
1207 : }
1208 :
1209 278853 : *pnLength += strlen(*ppszText + *pnLength);
1210 278853 : if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 +
1211 278853 : nIndent,
1212 : ppszText, pnMaxLength))
1213 0 : return false;
1214 :
1215 278853 : if (!bJustText)
1216 : {
1217 121236 : if (nIndent)
1218 113386 : memset(*ppszText + *pnLength, ' ', nIndent);
1219 121236 : *pnLength += nIndent;
1220 121236 : (*ppszText)[*pnLength] = '\0';
1221 : }
1222 :
1223 278853 : *pnLength += strlen(*ppszText + *pnLength);
1224 278853 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength,
1225 278853 : "</%s>\n", psNode->pszValue);
1226 : }
1227 : }
1228 : }
1229 :
1230 1008260 : return true;
1231 : }
1232 :
1233 : /************************************************************************/
1234 : /* CPLSerializeXMLTree() */
1235 : /************************************************************************/
1236 :
1237 : /**
1238 : * \brief Convert tree into string document.
1239 : *
1240 : * This function converts a CPLXMLNode tree representation of a document
1241 : * into a flat string representation. White space indentation is used
1242 : * visually preserve the tree structure of the document. The returned
1243 : * document becomes owned by the caller and should be freed with CPLFree()
1244 : * when no longer needed.
1245 : *
1246 : * @param psNode the node to serialize.
1247 : *
1248 : * @return the document on success or NULL on failure.
1249 : */
1250 :
1251 7962 : char *CPLSerializeXMLTree(const CPLXMLNode *psNode)
1252 :
1253 : {
1254 7962 : size_t nMaxLength = 100;
1255 7962 : char *pszText = static_cast<char *>(CPLCalloc(nMaxLength, sizeof(char)));
1256 7962 : if (pszText == nullptr)
1257 0 : return nullptr;
1258 :
1259 7962 : size_t nLength = 0;
1260 19147 : for (const CPLXMLNode *psThis = psNode; psThis != nullptr;
1261 11185 : psThis = psThis->psNext)
1262 : {
1263 11185 : if (!CPLSerializeXMLNode(psThis, 0, &pszText, &nLength, &nMaxLength))
1264 : {
1265 0 : VSIFree(pszText);
1266 0 : return nullptr;
1267 : }
1268 : }
1269 :
1270 7962 : return pszText;
1271 : }
1272 :
1273 : /************************************************************************/
1274 : /* CPLCreateXMLNode() */
1275 : /************************************************************************/
1276 :
1277 : #ifdef DEBUG
1278 : static CPLXMLNode *psDummyStaticNode;
1279 : #endif
1280 :
1281 : /**
1282 : * \brief Create an document tree item.
1283 : *
1284 : * Create a single CPLXMLNode object with the desired value and type, and
1285 : * attach it as a child of the indicated parent.
1286 : *
1287 : * @param poParent the parent to which this node should be attached as a
1288 : * child. May be NULL to keep as free standing.
1289 : * @param eType the type of the newly created node
1290 : * @param pszText the value of the newly created node
1291 : *
1292 : * @return the newly created node, now owned by the caller (or parent node).
1293 : */
1294 :
1295 814239 : CPLXMLNode *CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1296 : const char *pszText)
1297 :
1298 : {
1299 814239 : auto ret = _CPLCreateXMLNode(poParent, eType, pszText);
1300 814239 : if (!ret)
1301 : {
1302 0 : CPLError(CE_Fatal, CPLE_OutOfMemory, "CPLCreateXMLNode() failed");
1303 : }
1304 814239 : return ret;
1305 : }
1306 :
1307 : /************************************************************************/
1308 : /* _CPLCreateXMLNode() */
1309 : /************************************************************************/
1310 :
1311 : /* Same as CPLCreateXMLNode() but can return NULL in case of out-of-memory */
1312 : /* situation */
1313 :
1314 22667700 : static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1315 : const char *pszText)
1316 :
1317 : {
1318 :
1319 : /* -------------------------------------------------------------------- */
1320 : /* Create new node. */
1321 : /* -------------------------------------------------------------------- */
1322 : CPLXMLNode *psNode =
1323 22667700 : static_cast<CPLXMLNode *>(VSICalloc(sizeof(CPLXMLNode), 1));
1324 22667700 : if (psNode == nullptr)
1325 : {
1326 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "Cannot allocate CPLXMLNode");
1327 0 : return nullptr;
1328 : }
1329 :
1330 22667700 : psNode->eType = eType;
1331 22667700 : psNode->pszValue = VSIStrdup(pszText ? pszText : "");
1332 22667700 : if (psNode->pszValue == nullptr)
1333 : {
1334 1 : CPLError(CE_Failure, CPLE_OutOfMemory,
1335 : "Cannot allocate psNode->pszValue");
1336 0 : VSIFree(psNode);
1337 0 : return nullptr;
1338 : }
1339 :
1340 : /* -------------------------------------------------------------------- */
1341 : /* Attach to parent, if provided. */
1342 : /* -------------------------------------------------------------------- */
1343 22667700 : if (poParent != nullptr)
1344 : {
1345 9064580 : if (poParent->psChild == nullptr)
1346 8790040 : poParent->psChild = psNode;
1347 : else
1348 : {
1349 274538 : CPLXMLNode *psLink = poParent->psChild;
1350 274538 : if (psLink->psNext == nullptr && eType == CXT_Attribute &&
1351 53625 : psLink->eType == CXT_Text)
1352 : {
1353 10766 : psNode->psNext = psLink;
1354 10766 : poParent->psChild = psNode;
1355 : }
1356 : else
1357 : {
1358 988566 : while (psLink->psNext != nullptr)
1359 : {
1360 729018 : if (eType == CXT_Attribute &&
1361 272342 : psLink->psNext->eType == CXT_Text)
1362 : {
1363 4224 : psNode->psNext = psLink->psNext;
1364 4224 : break;
1365 : }
1366 :
1367 724794 : psLink = psLink->psNext;
1368 : }
1369 :
1370 263772 : psLink->psNext = psNode;
1371 : }
1372 : }
1373 : }
1374 : #ifdef DEBUG
1375 : else
1376 : {
1377 : // Coverity sometimes doesn't realize that this function is passed
1378 : // with a non NULL parent and thinks that this branch is taken, leading
1379 : // to creating object being leak by caller. This ugly hack hopefully
1380 : // makes it believe that someone will reference it.
1381 13603100 : psDummyStaticNode = psNode;
1382 : }
1383 : #endif
1384 :
1385 22667700 : return psNode;
1386 : }
1387 :
1388 : /************************************************************************/
1389 : /* CPLDestroyXMLNode() */
1390 : /************************************************************************/
1391 :
1392 : /**
1393 : * \brief Destroy a tree.
1394 : *
1395 : * This function frees resources associated with a CPLXMLNode and all its
1396 : * children nodes.
1397 : *
1398 : * @param psNode the tree to free.
1399 : */
1400 :
1401 22937800 : void CPLDestroyXMLNode(CPLXMLNode *psNode)
1402 :
1403 : {
1404 22937800 : while (psNode != nullptr)
1405 : {
1406 22694000 : if (psNode->pszValue != nullptr)
1407 22694000 : CPLFree(psNode->pszValue);
1408 :
1409 22694000 : if (psNode->psChild != nullptr)
1410 : {
1411 12797900 : CPLXMLNode *psNext = psNode->psNext;
1412 12797900 : psNode->psNext = psNode->psChild;
1413 : // Move the child and its siblings as the next
1414 : // siblings of the current node.
1415 12797900 : if (psNext != nullptr)
1416 : {
1417 12245600 : CPLXMLNode *psIter = psNode->psChild;
1418 20684200 : while (psIter->psNext != nullptr)
1419 8438660 : psIter = psIter->psNext;
1420 12245600 : psIter->psNext = psNext;
1421 : }
1422 : }
1423 :
1424 22694000 : CPLXMLNode *psNext = psNode->psNext;
1425 :
1426 22694000 : CPLFree(psNode);
1427 :
1428 22694000 : psNode = psNext;
1429 : }
1430 243774 : }
1431 :
1432 : /************************************************************************/
1433 : /* CPLSearchXMLNode() */
1434 : /************************************************************************/
1435 :
1436 : /**
1437 : * \brief Search for a node in document.
1438 : *
1439 : * Searches the children (and potentially siblings) of the documented
1440 : * passed in for the named element or attribute. To search following
1441 : * siblings as well as children, prefix the pszElement name with an equal
1442 : * sign. This function does an in-order traversal of the document tree.
1443 : * So it will first match against the current node, then its first child,
1444 : * that child's first child, and so on.
1445 : *
1446 : * Use CPLGetXMLNode() to find a specific child, or along a specific
1447 : * node path.
1448 : *
1449 : * @param psRoot the subtree to search. This should be a node of type
1450 : * CXT_Element. NULL is safe.
1451 : *
1452 : * @param pszElement the name of the element or attribute to search for.
1453 : *
1454 : * @return The matching node or NULL on failure.
1455 : */
1456 :
1457 94687 : CPLXMLNode *CPLSearchXMLNode(CPLXMLNode *psRoot, const char *pszElement)
1458 :
1459 : {
1460 94687 : if (psRoot == nullptr || pszElement == nullptr)
1461 0 : return nullptr;
1462 :
1463 94687 : bool bSideSearch = false;
1464 :
1465 94687 : if (*pszElement == '=')
1466 : {
1467 5375 : bSideSearch = true;
1468 5375 : pszElement++;
1469 : }
1470 :
1471 : /* -------------------------------------------------------------------- */
1472 : /* Does this node match? */
1473 : /* -------------------------------------------------------------------- */
1474 94687 : if ((psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute) &&
1475 94516 : EQUAL(pszElement, psRoot->pszValue))
1476 4082 : return psRoot;
1477 :
1478 : /* -------------------------------------------------------------------- */
1479 : /* Search children. */
1480 : /* -------------------------------------------------------------------- */
1481 90605 : CPLXMLNode *psChild = nullptr;
1482 241807 : for (psChild = psRoot->psChild; psChild != nullptr;
1483 151202 : psChild = psChild->psNext)
1484 : {
1485 152465 : if ((psChild->eType == CXT_Element ||
1486 91279 : psChild->eType == CXT_Attribute) &&
1487 88362 : EQUAL(pszElement, psChild->pszValue))
1488 423 : return psChild;
1489 :
1490 152042 : if (psChild->psChild != nullptr)
1491 : {
1492 84206 : CPLXMLNode *psResult = CPLSearchXMLNode(psChild, pszElement);
1493 84206 : if (psResult != nullptr)
1494 840 : return psResult;
1495 : }
1496 : }
1497 :
1498 : /* -------------------------------------------------------------------- */
1499 : /* Search siblings if we are in side search mode. */
1500 : /* -------------------------------------------------------------------- */
1501 89342 : if (bSideSearch)
1502 : {
1503 5772 : for (psRoot = psRoot->psNext; psRoot != nullptr;
1504 840 : psRoot = psRoot->psNext)
1505 : {
1506 4853 : CPLXMLNode *psResult = CPLSearchXMLNode(psRoot, pszElement);
1507 4853 : if (psResult != nullptr)
1508 4013 : return psResult;
1509 : }
1510 : }
1511 :
1512 85329 : return nullptr;
1513 : }
1514 :
1515 : /************************************************************************/
1516 : /* CPLGetXMLNode() */
1517 : /************************************************************************/
1518 :
1519 : /**
1520 : * \brief Find node by path.
1521 : *
1522 : * Searches the document or subdocument indicated by psRoot for an element
1523 : * (or attribute) with the given path. The path should consist of a set of
1524 : * element names separated by dots, not including the name of the root
1525 : * element (psRoot). If the requested element is not found NULL is returned.
1526 : *
1527 : * Attribute names may only appear as the last item in the path.
1528 : *
1529 : * The search is done from the root nodes children, but all intermediate
1530 : * nodes in the path must be specified. Searching for "name" would only find
1531 : * a name element or attribute if it is a direct child of the root, not at any
1532 : * level in the subdocument.
1533 : *
1534 : * If the pszPath is prefixed by "=" then the search will begin with the
1535 : * root node, and its siblings, instead of the root nodes children. This
1536 : * is particularly useful when searching within a whole document which is
1537 : * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1538 : *
1539 : * @param psRoot the subtree in which to search. This should be a node of
1540 : * type CXT_Element. NULL is safe.
1541 : *
1542 : * @param pszPath the list of element names in the path (dot separated).
1543 : *
1544 : * @return the requested element node, or NULL if not found.
1545 : */
1546 :
1547 1760830 : CPLXMLNode *CPLGetXMLNode(CPLXMLNode *psRoot, const char *pszPath)
1548 :
1549 : {
1550 1760830 : if (psRoot == nullptr || pszPath == nullptr)
1551 1073 : return nullptr;
1552 :
1553 1759760 : bool bSideSearch = false;
1554 :
1555 1759760 : if (*pszPath == '=')
1556 : {
1557 216584 : bSideSearch = true;
1558 216584 : pszPath++;
1559 : }
1560 :
1561 1759760 : const char *const apszTokens[2] = {pszPath, nullptr};
1562 :
1563 : // Slight optimization: avoid using CSLTokenizeStringComplex that
1564 : // does memory allocations when it is not really necessary.
1565 1759760 : bool bFreeTokens = false;
1566 1759760 : char **papszTokensToFree = nullptr;
1567 : const char *const *papszTokens;
1568 1759760 : if (strchr(pszPath, '.'))
1569 : {
1570 : papszTokensToFree =
1571 231176 : CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1572 231176 : papszTokens = papszTokensToFree;
1573 231176 : bFreeTokens = true;
1574 : }
1575 : else
1576 : {
1577 1528580 : papszTokens = apszTokens;
1578 : }
1579 :
1580 1759760 : int iToken = 0;
1581 3077130 : while (papszTokens[iToken] != nullptr && psRoot != nullptr)
1582 : {
1583 1996790 : CPLXMLNode *psChild = nullptr;
1584 :
1585 1996790 : if (bSideSearch)
1586 : {
1587 216584 : psChild = psRoot;
1588 216584 : bSideSearch = false;
1589 : }
1590 : else
1591 1780200 : psChild = psRoot->psChild;
1592 :
1593 8008410 : for (; psChild != nullptr; psChild = psChild->psNext)
1594 : {
1595 7329000 : if (psChild->eType != CXT_Text &&
1596 7287140 : EQUAL(papszTokens[iToken], psChild->pszValue))
1597 1317370 : break;
1598 : }
1599 :
1600 1996790 : if (psChild == nullptr)
1601 : {
1602 679413 : psRoot = nullptr;
1603 679413 : break;
1604 : }
1605 :
1606 1317370 : psRoot = psChild;
1607 1317370 : iToken++;
1608 : }
1609 :
1610 1759760 : if (bFreeTokens)
1611 231176 : CSLDestroy(papszTokensToFree);
1612 1759760 : return psRoot;
1613 : }
1614 :
1615 : /************************************************************************/
1616 : /* CPLGetXMLValue() */
1617 : /************************************************************************/
1618 :
1619 : /**
1620 : * \brief Fetch element/attribute value.
1621 : *
1622 : * Searches the document for the element/attribute value associated with
1623 : * the path. The corresponding node is internally found with CPLGetXMLNode()
1624 : * (see there for details on path handling). Once found, the value is
1625 : * considered to be the first CXT_Text child of the node.
1626 : *
1627 : * If the attribute/element search fails, or if the found node has no
1628 : * value then the passed default value is returned.
1629 : *
1630 : * The returned value points to memory within the document tree, and should
1631 : * not be altered or freed.
1632 : *
1633 : * @param psRoot the subtree in which to search. This should be a node of
1634 : * type CXT_Element. NULL is safe.
1635 : *
1636 : * @param pszPath the list of element names in the path (dot separated). An
1637 : * empty path means get the value of the psRoot node.
1638 : *
1639 : * @param pszDefault the value to return if a corresponding value is not
1640 : * found, may be NULL.
1641 : *
1642 : * @return the requested value or pszDefault if not found.
1643 : */
1644 :
1645 1755400 : const char *CPLGetXMLValue(const CPLXMLNode *psRoot, const char *pszPath,
1646 : const char *pszDefault)
1647 :
1648 : {
1649 1755400 : const CPLXMLNode *psTarget = nullptr;
1650 :
1651 1755400 : if (pszPath == nullptr || *pszPath == '\0')
1652 107797 : psTarget = psRoot;
1653 : else
1654 1647600 : psTarget = CPLGetXMLNode(psRoot, pszPath);
1655 :
1656 1755400 : if (psTarget == nullptr)
1657 630625 : return pszDefault;
1658 :
1659 1124780 : if (psTarget->eType == CXT_Attribute)
1660 : {
1661 688713 : CPLAssert(psTarget->psChild != nullptr &&
1662 : psTarget->psChild->eType == CXT_Text);
1663 :
1664 688713 : return psTarget->psChild->pszValue;
1665 : }
1666 :
1667 436064 : if (psTarget->eType == CXT_Element)
1668 : {
1669 : // Find first non-attribute child, and verify it is a single text
1670 : // with no siblings.
1671 :
1672 436040 : psTarget = psTarget->psChild;
1673 :
1674 486593 : while (psTarget != nullptr && psTarget->eType == CXT_Attribute)
1675 50553 : psTarget = psTarget->psNext;
1676 :
1677 436040 : if (psTarget != nullptr && psTarget->eType == CXT_Text &&
1678 433367 : psTarget->psNext == nullptr)
1679 433367 : return psTarget->pszValue;
1680 : }
1681 :
1682 2697 : return pszDefault;
1683 : }
1684 :
1685 : /************************************************************************/
1686 : /* CPLAddXMLChild() */
1687 : /************************************************************************/
1688 :
1689 : /**
1690 : * \brief Add child node to parent.
1691 : *
1692 : * The passed child is added to the list of children of the indicated
1693 : * parent. Normally the child is added at the end of the parents child
1694 : * list, but attributes (CXT_Attribute) will be inserted after any other
1695 : * attributes but before any other element type. Ownership of the child
1696 : * node is effectively assumed by the parent node. If the child has
1697 : * siblings (its psNext is not NULL) they will be trimmed, but if the child
1698 : * has children they are carried with it.
1699 : *
1700 : * @param psParent the node to attach the child to. May not be NULL.
1701 : *
1702 : * @param psChild the child to add to the parent. May not be NULL. Should
1703 : * not be a child of any other parent.
1704 : */
1705 :
1706 5877 : void CPLAddXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1707 :
1708 : {
1709 5877 : if (psParent->psChild == nullptr)
1710 : {
1711 2439 : psParent->psChild = psChild;
1712 2439 : return;
1713 : }
1714 :
1715 : // Insert at head of list if first child is not attribute.
1716 3438 : if (psChild->eType == CXT_Attribute &&
1717 21 : psParent->psChild->eType != CXT_Attribute)
1718 : {
1719 0 : psChild->psNext = psParent->psChild;
1720 0 : psParent->psChild = psChild;
1721 0 : return;
1722 : }
1723 :
1724 : // Search for end of list.
1725 3438 : CPLXMLNode *psSib = nullptr;
1726 14985 : for (psSib = psParent->psChild; psSib->psNext != nullptr;
1727 11547 : psSib = psSib->psNext)
1728 : {
1729 : // Insert attributes if the next node is not an attribute.
1730 11548 : if (psChild->eType == CXT_Attribute && psSib->psNext != nullptr &&
1731 5 : psSib->psNext->eType != CXT_Attribute)
1732 : {
1733 1 : psChild->psNext = psSib->psNext;
1734 1 : psSib->psNext = psChild;
1735 1 : return;
1736 : }
1737 : }
1738 :
1739 3437 : psSib->psNext = psChild;
1740 : }
1741 :
1742 : /************************************************************************/
1743 : /* CPLRemoveXMLChild() */
1744 : /************************************************************************/
1745 :
1746 : /**
1747 : * \brief Remove child node from parent.
1748 : *
1749 : * The passed child is removed from the child list of the passed parent,
1750 : * but the child is not destroyed. The child retains ownership of its
1751 : * own children, but is cleanly removed from the child list of the parent.
1752 : *
1753 : * @param psParent the node to the child is attached to.
1754 : *
1755 : * @param psChild the child to remove.
1756 : *
1757 : * @return TRUE on success or FALSE if the child was not found.
1758 : */
1759 :
1760 2596 : int CPLRemoveXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1761 :
1762 : {
1763 2596 : if (psParent == nullptr)
1764 0 : return FALSE;
1765 :
1766 2596 : CPLXMLNode *psLast = nullptr;
1767 2596 : CPLXMLNode *psThis = nullptr;
1768 5690 : for (psThis = psParent->psChild; psThis != nullptr; psThis = psThis->psNext)
1769 : {
1770 4456 : if (psThis == psChild)
1771 : {
1772 1362 : if (psLast == nullptr)
1773 875 : psParent->psChild = psThis->psNext;
1774 : else
1775 487 : psLast->psNext = psThis->psNext;
1776 :
1777 1362 : psThis->psNext = nullptr;
1778 1362 : return TRUE;
1779 : }
1780 3094 : psLast = psThis;
1781 : }
1782 :
1783 1234 : return FALSE;
1784 : }
1785 :
1786 : /************************************************************************/
1787 : /* CPLAddXMLSibling() */
1788 : /************************************************************************/
1789 :
1790 : /**
1791 : * \brief Add new sibling.
1792 : *
1793 : * The passed psNewSibling is added to the end of siblings of the
1794 : * psOlderSibling node. That is, it is added to the end of the psNext
1795 : * chain. There is no special handling if psNewSibling is an attribute.
1796 : * If this is required, use CPLAddXMLChild().
1797 : *
1798 : * @param psOlderSibling the node to attach the sibling after.
1799 : *
1800 : * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1801 : * chain.
1802 : */
1803 :
1804 4327 : void CPLAddXMLSibling(CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling)
1805 :
1806 : {
1807 4327 : if (psOlderSibling == nullptr)
1808 0 : return;
1809 :
1810 4438 : while (psOlderSibling->psNext != nullptr)
1811 111 : psOlderSibling = psOlderSibling->psNext;
1812 :
1813 4327 : psOlderSibling->psNext = psNewSibling;
1814 : }
1815 :
1816 : /************************************************************************/
1817 : /* CPLCreateXMLElementAndValue() */
1818 : /************************************************************************/
1819 :
1820 : /**
1821 : * \brief Create an element and text value.
1822 : *
1823 : * This is function is a convenient short form for:
1824 : *
1825 : * \code
1826 : * CPLXMLNode *psTextNode;
1827 : * CPLXMLNode *psElementNode;
1828 : *
1829 : * psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
1830 : * psTextNode = CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
1831 : *
1832 : * return psElementNode;
1833 : * \endcode
1834 : *
1835 : * It creates a CXT_Element node, with a CXT_Text child, and
1836 : * attaches the element to the passed parent.
1837 : *
1838 : * @param psParent the parent node to which the resulting node should
1839 : * be attached. May be NULL to keep as freestanding.
1840 : *
1841 : * @param pszName the element name to create.
1842 : * @param pszValue the text to attach to the element. Must not be NULL.
1843 : *
1844 : * @return the pointer to the new element node.
1845 : */
1846 :
1847 64488 : CPLXMLNode *CPLCreateXMLElementAndValue(CPLXMLNode *psParent,
1848 : const char *pszName,
1849 : const char *pszValue)
1850 :
1851 : {
1852 : CPLXMLNode *psElementNode =
1853 64488 : CPLCreateXMLNode(psParent, CXT_Element, pszName);
1854 64488 : CPLCreateXMLNode(psElementNode, CXT_Text, pszValue);
1855 :
1856 64488 : return psElementNode;
1857 : }
1858 :
1859 : /************************************************************************/
1860 : /* CPLCreateXMLElementAndValue() */
1861 : /************************************************************************/
1862 :
1863 : /**
1864 : * \brief Create an attribute and text value.
1865 : *
1866 : * This is function is a convenient short form for:
1867 : *
1868 : * \code
1869 : * CPLXMLNode *psAttributeNode;
1870 : *
1871 : * psAttributeNode = CPLCreateXMLNode( psParent, CXT_Attribute, pszName );
1872 : * CPLCreateXMLNode( psAttributeNode, CXT_Text, pszValue );
1873 : * \endcode
1874 : *
1875 : * It creates a CXT_Attribute node, with a CXT_Text child, and
1876 : * attaches the element to the passed parent.
1877 : *
1878 : * @param psParent the parent node to which the resulting node should
1879 : * be attached. Must not be NULL.
1880 : * @param pszName the attribute name to create.
1881 : * @param pszValue the text to attach to the attribute. Must not be NULL.
1882 : *
1883 : * @since GDAL 2.0
1884 : */
1885 :
1886 28743 : void CPLAddXMLAttributeAndValue(CPLXMLNode *psParent, const char *pszName,
1887 : const char *pszValue)
1888 : {
1889 28743 : CPLAssert(psParent != nullptr);
1890 : CPLXMLNode *psAttributeNode =
1891 28743 : CPLCreateXMLNode(psParent, CXT_Attribute, pszName);
1892 28743 : CPLCreateXMLNode(psAttributeNode, CXT_Text, pszValue);
1893 28743 : }
1894 :
1895 : /************************************************************************/
1896 : /* CPLCloneXMLTree() */
1897 : /************************************************************************/
1898 :
1899 : /**
1900 : * \brief Copy tree.
1901 : *
1902 : * Creates a deep copy of a CPLXMLNode tree.
1903 : *
1904 : * @param psTree the tree to duplicate.
1905 : *
1906 : * @return a copy of the whole tree.
1907 : */
1908 :
1909 33846 : CPLXMLNode *CPLCloneXMLTree(const CPLXMLNode *psTree)
1910 :
1911 : {
1912 33846 : CPLXMLNode *psPrevious = nullptr;
1913 33846 : CPLXMLNode *psReturn = nullptr;
1914 :
1915 89049 : while (psTree != nullptr)
1916 : {
1917 : CPLXMLNode *psCopy =
1918 55203 : CPLCreateXMLNode(nullptr, psTree->eType, psTree->pszValue);
1919 55203 : if (psReturn == nullptr)
1920 33846 : psReturn = psCopy;
1921 55203 : if (psPrevious != nullptr)
1922 21357 : psPrevious->psNext = psCopy;
1923 :
1924 55203 : if (psTree->psChild != nullptr)
1925 32272 : psCopy->psChild = CPLCloneXMLTree(psTree->psChild);
1926 :
1927 55203 : psPrevious = psCopy;
1928 55203 : psTree = psTree->psNext;
1929 : }
1930 :
1931 33846 : return psReturn;
1932 : }
1933 :
1934 : /************************************************************************/
1935 : /* CPLSetXMLValue() */
1936 : /************************************************************************/
1937 :
1938 : /**
1939 : * \brief Set element value by path.
1940 : *
1941 : * Find (or create) the target element or attribute specified in the
1942 : * path, and assign it the indicated value.
1943 : *
1944 : * Any path elements that do not already exist will be created. The target
1945 : * nodes value (the first CXT_Text child) will be replaced with the provided
1946 : * value.
1947 : *
1948 : * If the target node is an attribute instead of an element, the name
1949 : * should be prefixed with a #.
1950 : *
1951 : * Example:
1952 : * CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1953 : * CPLSetXMLValue( "Citation.Id.Description.#name", "doq" );
1954 : *
1955 : * @param psRoot the subdocument to be updated.
1956 : *
1957 : * @param pszPath the dot separated path to the target element/attribute.
1958 : *
1959 : * @param pszValue the text value to assign.
1960 : *
1961 : * @return TRUE on success.
1962 : */
1963 :
1964 195530 : int CPLSetXMLValue(CPLXMLNode *psRoot, const char *pszPath,
1965 : const char *pszValue)
1966 :
1967 : {
1968 195530 : char **papszTokens = CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1969 195530 : int iToken = 0;
1970 :
1971 423469 : while (papszTokens[iToken] != nullptr)
1972 : {
1973 227939 : bool bIsAttribute = false;
1974 227939 : const char *pszName = papszTokens[iToken];
1975 :
1976 227939 : if (pszName[0] == '#')
1977 : {
1978 184229 : bIsAttribute = true;
1979 184229 : pszName++;
1980 : }
1981 :
1982 227939 : if (psRoot->eType != CXT_Element)
1983 : {
1984 0 : CSLDestroy(papszTokens);
1985 0 : return FALSE;
1986 : }
1987 :
1988 227939 : CPLXMLNode *psChild = nullptr;
1989 727780 : for (psChild = psRoot->psChild; psChild != nullptr;
1990 499841 : psChild = psChild->psNext)
1991 : {
1992 528728 : if (psChild->eType != CXT_Text && EQUAL(pszName, psChild->pszValue))
1993 28887 : break;
1994 : }
1995 :
1996 227939 : if (psChild == nullptr)
1997 : {
1998 199052 : if (bIsAttribute)
1999 183914 : psChild = CPLCreateXMLNode(psRoot, CXT_Attribute, pszName);
2000 : else
2001 15138 : psChild = CPLCreateXMLNode(psRoot, CXT_Element, pszName);
2002 : }
2003 :
2004 227939 : psRoot = psChild;
2005 227939 : iToken++;
2006 : }
2007 :
2008 195530 : CSLDestroy(papszTokens);
2009 :
2010 : /* -------------------------------------------------------------------- */
2011 : /* Find the "text" child if there is one. */
2012 : /* -------------------------------------------------------------------- */
2013 195530 : CPLXMLNode *psTextChild = psRoot->psChild;
2014 :
2015 195727 : while (psTextChild != nullptr && psTextChild->eType != CXT_Text)
2016 197 : psTextChild = psTextChild->psNext;
2017 :
2018 : /* -------------------------------------------------------------------- */
2019 : /* Now set a value node under this node. */
2020 : /* -------------------------------------------------------------------- */
2021 :
2022 195530 : if (psTextChild == nullptr)
2023 194952 : CPLCreateXMLNode(psRoot, CXT_Text, pszValue);
2024 : else
2025 : {
2026 578 : CPLFree(psTextChild->pszValue);
2027 578 : psTextChild->pszValue = CPLStrdup(pszValue);
2028 : }
2029 :
2030 195530 : return TRUE;
2031 : }
2032 :
2033 : /************************************************************************/
2034 : /* CPLStripXMLNamespace() */
2035 : /************************************************************************/
2036 :
2037 : /**
2038 : * \brief Strip indicated namespaces.
2039 : *
2040 : * The subdocument (psRoot) is recursively examined, and any elements
2041 : * with the indicated namespace prefix will have the namespace prefix
2042 : * stripped from the element names. If the passed namespace is NULL, then
2043 : * all namespace prefixes will be stripped.
2044 : *
2045 : * Nodes other than elements should remain unaffected. The changes are
2046 : * made "in place", and should not alter any node locations, only the
2047 : * pszValue field of affected nodes.
2048 : *
2049 : * @param psRoot the document to operate on.
2050 : * @param pszNamespace the name space prefix (not including colon), or NULL.
2051 : * @param bRecurse TRUE to recurse over whole document, or FALSE to only
2052 : * operate on the passed node.
2053 : */
2054 :
2055 1533360 : void CPLStripXMLNamespace(CPLXMLNode *psRoot, const char *pszNamespace,
2056 : int bRecurse)
2057 :
2058 : {
2059 1533360 : size_t nNameSpaceLen = (pszNamespace) ? strlen(pszNamespace) : 0;
2060 :
2061 3948440 : while (psRoot != nullptr)
2062 : {
2063 2415080 : if (psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute)
2064 : {
2065 1356960 : if (pszNamespace != nullptr)
2066 : {
2067 597 : if (EQUALN(pszNamespace, psRoot->pszValue, nNameSpaceLen) &&
2068 177 : psRoot->pszValue[nNameSpaceLen] == ':')
2069 : {
2070 177 : memmove(psRoot->pszValue,
2071 177 : psRoot->pszValue + nNameSpaceLen + 1,
2072 177 : strlen(psRoot->pszValue + nNameSpaceLen + 1) + 1);
2073 : }
2074 : }
2075 : else
2076 : {
2077 8470600 : for (const char *pszCheck = psRoot->pszValue; *pszCheck != '\0';
2078 : pszCheck++)
2079 : {
2080 8050960 : if (*pszCheck == ':')
2081 : {
2082 936722 : memmove(psRoot->pszValue, pszCheck + 1,
2083 936722 : strlen(pszCheck + 1) + 1);
2084 936722 : break;
2085 : }
2086 : }
2087 : }
2088 : }
2089 :
2090 2415080 : if (bRecurse)
2091 : {
2092 2415080 : if (psRoot->psChild != nullptr)
2093 1341990 : CPLStripXMLNamespace(psRoot->psChild, pszNamespace, 1);
2094 :
2095 2415080 : psRoot = psRoot->psNext;
2096 : }
2097 : else
2098 : {
2099 0 : break;
2100 : }
2101 : }
2102 1533360 : }
2103 :
2104 : /************************************************************************/
2105 : /* CPLParseXMLFile() */
2106 : /************************************************************************/
2107 :
2108 : /**
2109 : * \brief Parse XML file into tree.
2110 : *
2111 : * The named file is opened, loaded into memory as a big string, and
2112 : * parsed with CPLParseXMLString(). Errors in reading the file or parsing
2113 : * the XML will be reported by CPLError().
2114 : *
2115 : * The "large file" API is used, so XML files can come from virtualized
2116 : * files.
2117 : *
2118 : * @param pszFilename the file to open.
2119 : *
2120 : * @return NULL on failure, or the document tree on success.
2121 : */
2122 :
2123 5110 : CPLXMLNode *CPLParseXMLFile(const char *pszFilename)
2124 :
2125 : {
2126 : /* -------------------------------------------------------------------- */
2127 : /* Ingest the file. */
2128 : /* -------------------------------------------------------------------- */
2129 5110 : GByte *pabyOut = nullptr;
2130 5110 : if (!VSIIngestFile(nullptr, pszFilename, &pabyOut, nullptr, -1))
2131 62 : return nullptr;
2132 :
2133 5048 : char *pszDoc = reinterpret_cast<char *>(pabyOut);
2134 :
2135 : /* -------------------------------------------------------------------- */
2136 : /* Parse it. */
2137 : /* -------------------------------------------------------------------- */
2138 5048 : CPLXMLNode *psTree = CPLParseXMLString(pszDoc);
2139 5048 : CPLFree(pszDoc);
2140 :
2141 5048 : return psTree;
2142 : }
2143 :
2144 : /************************************************************************/
2145 : /* CPLSerializeXMLTreeToFile() */
2146 : /************************************************************************/
2147 :
2148 : /**
2149 : * \brief Write document tree to a file.
2150 : *
2151 : * The passed document tree is converted into one big string (with
2152 : * CPLSerializeXMLTree()) and then written to the named file. Errors writing
2153 : * the file will be reported by CPLError(). The source document tree is
2154 : * not altered. If the output file already exists it will be overwritten.
2155 : *
2156 : * @param psTree the document tree to write.
2157 : * @param pszFilename the name of the file to write to.
2158 : * @return TRUE on success, FALSE otherwise.
2159 : */
2160 :
2161 2650 : int CPLSerializeXMLTreeToFile(const CPLXMLNode *psTree, const char *pszFilename)
2162 :
2163 : {
2164 : /* -------------------------------------------------------------------- */
2165 : /* Serialize document. */
2166 : /* -------------------------------------------------------------------- */
2167 2650 : char *pszDoc = CPLSerializeXMLTree(psTree);
2168 2650 : if (pszDoc == nullptr)
2169 0 : return FALSE;
2170 :
2171 2650 : const vsi_l_offset nLength = strlen(pszDoc);
2172 :
2173 : /* -------------------------------------------------------------------- */
2174 : /* Create file. */
2175 : /* -------------------------------------------------------------------- */
2176 2650 : VSILFILE *fp = VSIFOpenL(pszFilename, "wt");
2177 2650 : if (fp == nullptr)
2178 : {
2179 7 : CPLError(CE_Failure, CPLE_OpenFailed, "Failed to open %.500s to write.",
2180 : pszFilename);
2181 7 : CPLFree(pszDoc);
2182 7 : return FALSE;
2183 : }
2184 :
2185 : /* -------------------------------------------------------------------- */
2186 : /* Write file. */
2187 : /* -------------------------------------------------------------------- */
2188 2643 : if (VSIFWriteL(pszDoc, 1, static_cast<size_t>(nLength), fp) != nLength)
2189 : {
2190 77 : CPLError(CE_Failure, CPLE_FileIO,
2191 : "Failed to write whole XML document (%.500s).", pszFilename);
2192 77 : CPL_IGNORE_RET_VAL(VSIFCloseL(fp));
2193 77 : CPLFree(pszDoc);
2194 77 : return FALSE;
2195 : }
2196 :
2197 : /* -------------------------------------------------------------------- */
2198 : /* Cleanup */
2199 : /* -------------------------------------------------------------------- */
2200 2566 : const bool bRet = VSIFCloseL(fp) == 0;
2201 2566 : if (!bRet)
2202 : {
2203 0 : CPLError(CE_Failure, CPLE_FileIO,
2204 : "Failed to write whole XML document (%.500s).", pszFilename);
2205 : }
2206 2566 : CPLFree(pszDoc);
2207 :
2208 2566 : return bRet;
2209 : }
2210 :
2211 : /************************************************************************/
2212 : /* CPLCleanXMLElementName() */
2213 : /************************************************************************/
2214 :
2215 : /**
2216 : * \brief Make string into safe XML token.
2217 : *
2218 : * Modifies a string in place to try and make it into a legal
2219 : * XML token that can be used as an element name. This is accomplished
2220 : * by changing any characters not legal in a token into an underscore.
2221 : *
2222 : * NOTE: This function should implement the rules in section 2.3 of
2223 : * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly. We
2224 : * only do a rough approximation of that.
2225 : *
2226 : * @param pszTarget the string to be adjusted. It is altered in place.
2227 : */
2228 :
2229 386 : void CPLCleanXMLElementName(char *pszTarget)
2230 : {
2231 386 : if (pszTarget == nullptr)
2232 0 : return;
2233 :
2234 3497 : for (; *pszTarget != '\0'; pszTarget++)
2235 : {
2236 3111 : if ((static_cast<unsigned char>(*pszTarget) & 0x80) ||
2237 3111 : isalnum(static_cast<unsigned char>(*pszTarget)) ||
2238 203 : *pszTarget == '_' || *pszTarget == '.')
2239 : {
2240 : // Ok.
2241 : }
2242 : else
2243 : {
2244 0 : *pszTarget = '_';
2245 : }
2246 : }
2247 : }
2248 :
2249 : /************************************************************************/
2250 : /* CPLXMLNodeGetRAMUsageEstimate() */
2251 : /************************************************************************/
2252 :
2253 106219 : static size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode,
2254 : bool bVisitSiblings)
2255 : {
2256 106219 : size_t nRet = sizeof(CPLXMLNode);
2257 : // malloc() aligns on 16-byte boundaries on 64 bit.
2258 106219 : nRet += std::max(2 * sizeof(void *), strlen(psNode->pszValue) + 1);
2259 106219 : if (bVisitSiblings)
2260 : {
2261 106219 : for (const CPLXMLNode *psIter = psNode->psNext; psIter;
2262 43856 : psIter = psIter->psNext)
2263 : {
2264 43856 : nRet += CPLXMLNodeGetRAMUsageEstimate(psIter, false);
2265 : }
2266 : }
2267 106219 : if (psNode->psChild)
2268 : {
2269 59023 : nRet += CPLXMLNodeGetRAMUsageEstimate(psNode->psChild, true);
2270 : }
2271 106219 : return nRet;
2272 : }
2273 :
2274 : /** Return a conservative estimate of the RAM usage of this node, its children
2275 : * and siblings. The returned values is in bytes.
2276 : *
2277 : * @since 3.9
2278 : */
2279 3340 : size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode)
2280 : {
2281 3340 : return CPLXMLNodeGetRAMUsageEstimate(psNode, true);
2282 : }
2283 :
2284 : /************************************************************************/
2285 : /* CPLXMLTreeCloser::getDocumentElement() */
2286 : /************************************************************************/
2287 :
2288 72 : CPLXMLNode *CPLXMLTreeCloser::getDocumentElement()
2289 : {
2290 72 : CPLXMLNode *doc = get();
2291 : // skip the Declaration and assume the next is the root element
2292 120 : while (doc != nullptr &&
2293 120 : (doc->eType != CXT_Element || doc->pszValue[0] == '?'))
2294 : {
2295 48 : doc = doc->psNext;
2296 : }
2297 72 : return doc;
2298 : }
|