Line data Source code
1 : /**********************************************************************
2 : *
3 : * Project: CPL - Common Portability Library
4 : * Purpose: Implementation of MiniXML Parser and handling.
5 : * Author: Frank Warmerdam, warmerdam@pobox.com
6 : *
7 : **********************************************************************
8 : * Copyright (c) 2001, Frank Warmerdam
9 : * Copyright (c) 2007-2013, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : **********************************************************************
29 : *
30 : * Independent Security Audit 2003/04/05 Andrey Kiselev:
31 : * Completed audit of this module. Any documents may be parsed without
32 : * buffer overflows and stack corruptions.
33 : *
34 : * Security Audit 2003/03/28 warmerda:
35 : * Completed security audit. I believe that this module may be safely used
36 : * to parse, and serialize arbitrary documents provided by a potentially
37 : * hostile source.
38 : *
39 : */
40 :
41 : #include "cpl_minixml.h"
42 :
43 : #include <cctype>
44 : #include <climits>
45 : #include <cstddef>
46 : #include <cstdio>
47 : #include <cstring>
48 :
49 : #include <algorithm>
50 :
51 : #include "cpl_conv.h"
52 : #include "cpl_error.h"
53 : #include "cpl_string.h"
54 : #include "cpl_vsi.h"
55 :
56 : typedef enum
57 : {
58 : TNone,
59 : TString,
60 : TOpen,
61 : TClose,
62 : TEqual,
63 : TToken,
64 : TSlashClose,
65 : TQuestionClose,
66 : TComment,
67 : TLiteral
68 : } XMLTokenType;
69 :
70 : typedef struct
71 : {
72 : CPLXMLNode *psFirstNode;
73 : CPLXMLNode *psLastChild;
74 : } StackContext;
75 :
76 : typedef struct
77 : {
78 : const char *pszInput;
79 : int nInputOffset;
80 : int nInputLine;
81 : bool bInElement;
82 : XMLTokenType eTokenType;
83 : char *pszToken;
84 : size_t nTokenMaxSize;
85 : size_t nTokenSize;
86 :
87 : int nStackMaxSize;
88 : int nStackSize;
89 : StackContext *papsStack;
90 :
91 : CPLXMLNode *psFirstNode;
92 : CPLXMLNode *psLastNode;
93 : } ParseContext;
94 :
95 : static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
96 : const char *pszText);
97 :
98 : /************************************************************************/
99 : /* ReadChar() */
100 : /************************************************************************/
101 :
102 357279000 : static CPL_INLINE char ReadChar(ParseContext *psContext)
103 :
104 : {
105 357279000 : const char chReturn = psContext->pszInput[psContext->nInputOffset++];
106 :
107 357279000 : if (chReturn == '\0')
108 224151 : psContext->nInputOffset--;
109 357054000 : else if (chReturn == 10)
110 4211680 : psContext->nInputLine++;
111 :
112 357279000 : return chReturn;
113 : }
114 :
115 : /************************************************************************/
116 : /* UnreadChar() */
117 : /************************************************************************/
118 :
119 14432400 : static CPL_INLINE void UnreadChar(ParseContext *psContext, char chToUnread)
120 :
121 : {
122 14432400 : if (chToUnread == '\0')
123 36 : return;
124 :
125 14432300 : CPLAssert(chToUnread == psContext->pszInput[psContext->nInputOffset - 1]);
126 :
127 14432300 : psContext->nInputOffset--;
128 :
129 14432300 : if (chToUnread == 10)
130 273 : psContext->nInputLine--;
131 : }
132 :
133 : /************************************************************************/
134 : /* ReallocToken() */
135 : /************************************************************************/
136 :
137 957922 : static bool ReallocToken(ParseContext *psContext)
138 : {
139 957922 : if (psContext->nTokenMaxSize > INT_MAX / 2)
140 : {
141 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
142 : "Out of memory allocating %d*2 bytes",
143 0 : static_cast<int>(psContext->nTokenMaxSize));
144 0 : VSIFree(psContext->pszToken);
145 0 : psContext->pszToken = nullptr;
146 0 : return false;
147 : }
148 :
149 957922 : psContext->nTokenMaxSize *= 2;
150 : char *pszToken = static_cast<char *>(
151 957922 : VSIRealloc(psContext->pszToken, psContext->nTokenMaxSize));
152 957922 : if (pszToken == nullptr)
153 : {
154 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
155 : "Out of memory allocating %d bytes",
156 0 : static_cast<int>(psContext->nTokenMaxSize));
157 0 : VSIFree(psContext->pszToken);
158 0 : psContext->pszToken = nullptr;
159 0 : return false;
160 : }
161 957922 : psContext->pszToken = pszToken;
162 957922 : return true;
163 : }
164 :
165 : /************************************************************************/
166 : /* AddToToken() */
167 : /************************************************************************/
168 :
169 257949000 : static CPL_INLINE bool _AddToToken(ParseContext *psContext, char chNewChar)
170 :
171 : {
172 257949000 : if (psContext->nTokenSize >= psContext->nTokenMaxSize - 2)
173 : {
174 957922 : if (!ReallocToken(psContext))
175 0 : return false;
176 : }
177 :
178 257949000 : psContext->pszToken[psContext->nTokenSize++] = chNewChar;
179 257949000 : psContext->pszToken[psContext->nTokenSize] = '\0';
180 257949000 : return true;
181 : }
182 :
183 : // TODO(schwehr): Remove the goto.
184 : #define AddToToken(psContext, chNewChar) \
185 : if (!_AddToToken(psContext, chNewChar)) \
186 : goto fail;
187 :
188 : /************************************************************************/
189 : /* ReadToken() */
190 : /************************************************************************/
191 :
192 41593900 : static XMLTokenType ReadToken(ParseContext *psContext, CPLErr &eLastErrorType)
193 :
194 : {
195 41593900 : psContext->nTokenSize = 0;
196 41593900 : psContext->pszToken[0] = '\0';
197 :
198 41593900 : char chNext = ReadChar(psContext);
199 89270200 : while (isspace(static_cast<unsigned char>(chNext)))
200 47676400 : chNext = ReadChar(psContext);
201 :
202 : /* -------------------------------------------------------------------- */
203 : /* Handle comments. */
204 : /* -------------------------------------------------------------------- */
205 41593900 : if (chNext == '<' &&
206 5710700 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset, "!--"))
207 : {
208 67331 : psContext->eTokenType = TComment;
209 :
210 : // Skip "!--" characters.
211 67331 : ReadChar(psContext);
212 67331 : ReadChar(psContext);
213 67331 : ReadChar(psContext);
214 :
215 5121350 : while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
216 10310000 : "-->") &&
217 5121350 : (chNext = ReadChar(psContext)) != '\0')
218 5121350 : AddToToken(psContext, chNext);
219 :
220 : // Skip "-->" characters.
221 67331 : ReadChar(psContext);
222 67331 : ReadChar(psContext);
223 67331 : ReadChar(psContext);
224 : }
225 : /* -------------------------------------------------------------------- */
226 : /* Handle DOCTYPE. */
227 : /* -------------------------------------------------------------------- */
228 41526600 : else if (chNext == '<' &&
229 5643370 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
230 : "!DOCTYPE"))
231 : {
232 18 : bool bInQuotes = false;
233 18 : psContext->eTokenType = TLiteral;
234 :
235 18 : AddToToken(psContext, '<');
236 : do
237 : {
238 1233 : chNext = ReadChar(psContext);
239 1233 : if (chNext == '\0')
240 : {
241 0 : eLastErrorType = CE_Failure;
242 0 : CPLError(eLastErrorType, CPLE_AppDefined,
243 : "Parse error in DOCTYPE on or before line %d, "
244 : "reached end of file without '>'.",
245 : psContext->nInputLine);
246 :
247 0 : break;
248 : }
249 :
250 : /* The markup declaration block within a DOCTYPE tag consists of:
251 : * - a left square bracket [
252 : * - a list of declarations
253 : * - a right square bracket ]
254 : * Example:
255 : * <!DOCTYPE RootElement [ ...declarations... ]>
256 : */
257 1233 : if (chNext == '[')
258 : {
259 1 : AddToToken(psContext, chNext);
260 :
261 98 : do
262 : {
263 99 : chNext = ReadChar(psContext);
264 99 : if (chNext == ']')
265 0 : break;
266 99 : AddToToken(psContext, chNext);
267 99 : } while (chNext != '\0' &&
268 99 : !STARTS_WITH_CI(psContext->pszInput +
269 : psContext->nInputOffset,
270 : "]>"));
271 :
272 1 : if (chNext == '\0')
273 : {
274 0 : eLastErrorType = CE_Failure;
275 0 : CPLError(eLastErrorType, CPLE_AppDefined,
276 : "Parse error in DOCTYPE on or before line %d, "
277 : "reached end of file without ']'.",
278 : psContext->nInputLine);
279 0 : break;
280 : }
281 :
282 1 : if (chNext != ']')
283 : {
284 1 : chNext = ReadChar(psContext);
285 1 : AddToToken(psContext, chNext);
286 :
287 : // Skip ">" character, will be consumed below.
288 1 : chNext = ReadChar(psContext);
289 : }
290 : }
291 :
292 1233 : if (chNext == '\"')
293 54 : bInQuotes = !bInQuotes;
294 :
295 1233 : if (chNext == '>' && !bInQuotes)
296 : {
297 18 : AddToToken(psContext, '>');
298 18 : break;
299 : }
300 :
301 1215 : AddToToken(psContext, chNext);
302 18 : } while (true);
303 : }
304 : /* -------------------------------------------------------------------- */
305 : /* Handle CDATA. */
306 : /* -------------------------------------------------------------------- */
307 41526500 : else if (chNext == '<' &&
308 5643350 : STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
309 : "![CDATA["))
310 : {
311 173 : psContext->eTokenType = TString;
312 :
313 : // Skip !CDATA[
314 173 : ReadChar(psContext);
315 173 : ReadChar(psContext);
316 173 : ReadChar(psContext);
317 173 : ReadChar(psContext);
318 173 : ReadChar(psContext);
319 173 : ReadChar(psContext);
320 173 : ReadChar(psContext);
321 173 : ReadChar(psContext);
322 :
323 91580 : while (!STARTS_WITH_CI(psContext->pszInput + psContext->nInputOffset,
324 183334 : "]]>") &&
325 91581 : (chNext = ReadChar(psContext)) != '\0')
326 91580 : AddToToken(psContext, chNext);
327 :
328 : // Skip "]]>" characters.
329 173 : ReadChar(psContext);
330 173 : ReadChar(psContext);
331 173 : ReadChar(psContext);
332 : }
333 : /* -------------------------------------------------------------------- */
334 : /* Simple single tokens of interest. */
335 : /* -------------------------------------------------------------------- */
336 41526400 : else if (chNext == '<' && !psContext->bInElement)
337 : {
338 5643180 : psContext->eTokenType = TOpen;
339 5643180 : psContext->bInElement = true;
340 : }
341 35883200 : else if (chNext == '>' && psContext->bInElement)
342 : {
343 3781250 : psContext->eTokenType = TClose;
344 3781250 : psContext->bInElement = false;
345 : }
346 32101900 : else if (chNext == '=' && psContext->bInElement)
347 : {
348 7791800 : psContext->eTokenType = TEqual;
349 : }
350 24310100 : else if (chNext == '\0')
351 : {
352 224110 : psContext->eTokenType = TNone;
353 : }
354 : /* -------------------------------------------------------------------- */
355 : /* Handle the /> token terminator. */
356 : /* -------------------------------------------------------------------- */
357 24086000 : else if (chNext == '/' && psContext->bInElement &&
358 3739380 : psContext->pszInput[psContext->nInputOffset] == '>')
359 : {
360 1853780 : chNext = ReadChar(psContext);
361 : (void)chNext;
362 1853780 : CPLAssert(chNext == '>');
363 :
364 1853780 : psContext->eTokenType = TSlashClose;
365 1853780 : psContext->bInElement = false;
366 : }
367 : /* -------------------------------------------------------------------- */
368 : /* Handle the ?> token terminator. */
369 : /* -------------------------------------------------------------------- */
370 22232200 : else if (chNext == '?' && psContext->bInElement &&
371 16201 : psContext->pszInput[psContext->nInputOffset] == '>')
372 : {
373 8099 : chNext = ReadChar(psContext);
374 : (void)chNext;
375 8099 : CPLAssert(chNext == '>');
376 :
377 8099 : psContext->eTokenType = TQuestionClose;
378 8099 : psContext->bInElement = false;
379 : }
380 : /* -------------------------------------------------------------------- */
381 : /* Collect a quoted string. */
382 : /* -------------------------------------------------------------------- */
383 22224100 : else if (psContext->bInElement && chNext == '"')
384 : {
385 6706140 : psContext->eTokenType = TString;
386 :
387 58249700 : while ((chNext = ReadChar(psContext)) != '"' && chNext != '\0')
388 51543500 : AddToToken(psContext, chNext);
389 :
390 6706140 : if (chNext != '"')
391 : {
392 0 : psContext->eTokenType = TNone;
393 0 : eLastErrorType = CE_Failure;
394 0 : CPLError(
395 : eLastErrorType, CPLE_AppDefined,
396 : "Parse error on line %d, reached EOF before closing quote.",
397 : psContext->nInputLine);
398 : }
399 :
400 : // Do we need to unescape it?
401 6706140 : if (strchr(psContext->pszToken, '&') != nullptr)
402 : {
403 208 : int nLength = 0;
404 : char *pszUnescaped =
405 208 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
406 208 : strcpy(psContext->pszToken, pszUnescaped);
407 208 : CPLFree(pszUnescaped);
408 208 : psContext->nTokenSize = strlen(psContext->pszToken);
409 6706140 : }
410 : }
411 15518000 : else if (psContext->bInElement && chNext == '\'')
412 : {
413 1085660 : psContext->eTokenType = TString;
414 :
415 19058000 : while ((chNext = ReadChar(psContext)) != '\'' && chNext != '\0')
416 17972400 : AddToToken(psContext, chNext);
417 :
418 1085630 : if (chNext != '\'')
419 : {
420 1 : psContext->eTokenType = TNone;
421 1 : eLastErrorType = CE_Failure;
422 1 : CPLError(
423 : eLastErrorType, CPLE_AppDefined,
424 : "Parse error on line %d, reached EOF before closing quote.",
425 : psContext->nInputLine);
426 : }
427 :
428 : // Do we need to unescape it?
429 1085660 : if (strchr(psContext->pszToken, '&') != nullptr)
430 : {
431 1552 : int nLength = 0;
432 : char *pszUnescaped =
433 1552 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
434 1552 : strcpy(psContext->pszToken, pszUnescaped);
435 1552 : CPLFree(pszUnescaped);
436 1552 : psContext->nTokenSize = strlen(psContext->pszToken);
437 1085660 : }
438 : }
439 : /* -------------------------------------------------------------------- */
440 : /* Collect an unquoted string, terminated by a open angle */
441 : /* bracket. */
442 : /* -------------------------------------------------------------------- */
443 14432300 : else if (!psContext->bInElement)
444 : {
445 997381 : psContext->eTokenType = TString;
446 :
447 997381 : AddToToken(psContext, chNext);
448 92239900 : while ((chNext = ReadChar(psContext)) != '<' && chNext != '\0')
449 91242500 : AddToToken(psContext, chNext);
450 997381 : UnreadChar(psContext, chNext);
451 :
452 : // Do we need to unescape it?
453 997381 : if (strchr(psContext->pszToken, '&') != nullptr)
454 : {
455 19039 : int nLength = 0;
456 : char *pszUnescaped =
457 19039 : CPLUnescapeString(psContext->pszToken, &nLength, CPLES_XML);
458 19039 : strcpy(psContext->pszToken, pszUnescaped);
459 19039 : CPLFree(pszUnescaped);
460 19039 : psContext->nTokenSize = strlen(psContext->pszToken);
461 : }
462 : }
463 :
464 : /* -------------------------------------------------------------------- */
465 : /* Collect a regular token terminated by white space, or */
466 : /* special character(s) like an equal sign. */
467 : /* -------------------------------------------------------------------- */
468 : else
469 : {
470 13435000 : psContext->eTokenType = TToken;
471 :
472 : // Add the first character to the token regardless of what it is.
473 13435000 : AddToToken(psContext, chNext);
474 :
475 90979300 : for (chNext = ReadChar(psContext);
476 90979300 : (chNext >= 'A' && chNext <= 'Z') ||
477 87571700 : (chNext >= 'a' && chNext <= 'z') || chNext == '-' ||
478 106295000 : chNext == '_' || chNext == '.' || chNext == ':' ||
479 10791600 : (chNext >= '0' && chNext <= '9');
480 77544400 : chNext = ReadChar(psContext))
481 : {
482 77544400 : AddToToken(psContext, chNext);
483 : }
484 :
485 13435000 : UnreadChar(psContext, chNext);
486 : }
487 :
488 41593900 : return psContext->eTokenType;
489 :
490 0 : fail:
491 0 : psContext->eTokenType = TNone;
492 0 : return TNone;
493 : }
494 :
495 : /************************************************************************/
496 : /* PushNode() */
497 : /************************************************************************/
498 :
499 3757570 : static bool PushNode(ParseContext *psContext, CPLXMLNode *psNode,
500 : CPLErr &eLastErrorType)
501 :
502 : {
503 3757570 : if (psContext->nStackMaxSize <= psContext->nStackSize)
504 : {
505 : // Somewhat arbitrary number.
506 226010 : if (psContext->nStackMaxSize >= 10000)
507 : {
508 1 : eLastErrorType = CE_Failure;
509 1 : CPLError(CE_Failure, CPLE_NotSupported,
510 : "XML element depth beyond 10000. Giving up");
511 1 : VSIFree(psContext->papsStack);
512 1 : psContext->papsStack = nullptr;
513 1 : return false;
514 : }
515 226009 : psContext->nStackMaxSize += 10;
516 :
517 : StackContext *papsStack = static_cast<StackContext *>(
518 452018 : VSIRealloc(psContext->papsStack,
519 226009 : sizeof(StackContext) * psContext->nStackMaxSize));
520 226009 : if (papsStack == nullptr)
521 : {
522 0 : eLastErrorType = CE_Failure;
523 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
524 : "Out of memory allocating %d bytes",
525 : static_cast<int>(sizeof(StackContext)) *
526 0 : psContext->nStackMaxSize);
527 0 : VSIFree(psContext->papsStack);
528 0 : psContext->papsStack = nullptr;
529 0 : return false;
530 : }
531 226009 : psContext->papsStack = papsStack;
532 : }
533 : #ifdef DEBUG
534 : // To make Coverity happy, but cannot happen.
535 3757570 : if (psContext->papsStack == nullptr)
536 0 : return false;
537 : #endif
538 :
539 3757570 : psContext->papsStack[psContext->nStackSize].psFirstNode = psNode;
540 3757570 : psContext->papsStack[psContext->nStackSize].psLastChild = nullptr;
541 3757570 : psContext->nStackSize++;
542 :
543 3757570 : return true;
544 : }
545 :
546 : /************************************************************************/
547 : /* AttachNode() */
548 : /* */
549 : /* Attach the passed node as a child of the current node. */
550 : /* Special handling exists for adding siblings to psFirst if */
551 : /* there is nothing on the stack. */
552 : /************************************************************************/
553 :
554 12614300 : static void AttachNode(ParseContext *psContext, CPLXMLNode *psNode)
555 :
556 : {
557 12614300 : if (psContext->psFirstNode == nullptr)
558 : {
559 224110 : psContext->psFirstNode = psNode;
560 224110 : psContext->psLastNode = psNode;
561 : }
562 12390200 : else if (psContext->nStackSize == 0)
563 : {
564 10207 : psContext->psLastNode->psNext = psNode;
565 10207 : psContext->psLastNode = psNode;
566 : }
567 : else
568 : {
569 12380000 : if (psContext->papsStack[psContext->nStackSize - 1]
570 12380000 : .psFirstNode->psChild == nullptr)
571 : {
572 3735670 : psContext->papsStack[psContext->nStackSize - 1]
573 3735670 : .psFirstNode->psChild = psNode;
574 : }
575 : else
576 : {
577 8644290 : psContext->papsStack[psContext->nStackSize - 1]
578 8644290 : .psLastChild->psNext = psNode;
579 : }
580 12380000 : psContext->papsStack[psContext->nStackSize - 1].psLastChild = psNode;
581 : }
582 12614300 : }
583 :
584 : /************************************************************************/
585 : /* CPLParseXMLString() */
586 : /************************************************************************/
587 :
588 : /**
589 : * \brief Parse an XML string into tree form.
590 : *
591 : * The passed document is parsed into a CPLXMLNode tree representation.
592 : * If the document is not well formed XML then NULL is returned, and errors
593 : * are reported via CPLError(). No validation beyond wellformedness is
594 : * done. The CPLParseXMLFile() convenience function can be used to parse
595 : * from a file.
596 : *
597 : * The returned document tree is owned by the caller and should be freed
598 : * with CPLDestroyXMLNode() when no longer needed.
599 : *
600 : * If the document has more than one "root level" element then those after the
601 : * first will be attached to the first as siblings (via the psNext pointers)
602 : * even though there is no common parent. A document with no XML structure
603 : * (no angle brackets for instance) would be considered well formed, and
604 : * returned as a single CXT_Text node.
605 : *
606 : * @param pszString the document to parse.
607 : *
608 : * @return parsed tree or NULL on error.
609 : */
610 :
611 224135 : CPLXMLNode *CPLParseXMLString(const char *pszString)
612 :
613 : {
614 224135 : if (pszString == nullptr)
615 : {
616 0 : CPLError(CE_Failure, CPLE_AppDefined,
617 : "CPLParseXMLString() called with NULL pointer.");
618 0 : return nullptr;
619 : }
620 :
621 : // Save back error context.
622 224135 : const CPLErr eErrClass = CPLGetLastErrorType();
623 224135 : const CPLErrorNum nErrNum = CPLGetLastErrorNo();
624 448270 : const CPLString osErrMsg = CPLGetLastErrorMsg();
625 :
626 : // Reset it now.
627 224135 : CPLErrorSetState(CE_None, CPLE_AppDefined, "");
628 :
629 : /* -------------------------------------------------------------------- */
630 : /* Check for a UTF-8 BOM and skip if found */
631 : /* */
632 : /* TODO: BOM is variable-length parameter and depends on encoding. */
633 : /* Add BOM detection for other encodings. */
634 : /* -------------------------------------------------------------------- */
635 :
636 : // Used to skip to actual beginning of XML data.
637 224135 : if ((static_cast<unsigned char>(pszString[0]) == 0xEF) &&
638 3 : (static_cast<unsigned char>(pszString[1]) == 0xBB) &&
639 3 : (static_cast<unsigned char>(pszString[2]) == 0xBF))
640 : {
641 3 : pszString += 3;
642 : }
643 :
644 : /* -------------------------------------------------------------------- */
645 : /* Initialize parse context. */
646 : /* -------------------------------------------------------------------- */
647 : ParseContext sContext;
648 224135 : sContext.pszInput = pszString;
649 224135 : sContext.nInputOffset = 0;
650 224135 : sContext.nInputLine = 0;
651 224135 : sContext.bInElement = false;
652 224135 : sContext.nTokenMaxSize = 10;
653 224135 : sContext.pszToken = static_cast<char *>(VSIMalloc(sContext.nTokenMaxSize));
654 224135 : if (sContext.pszToken == nullptr)
655 0 : return nullptr;
656 224135 : sContext.nTokenSize = 0;
657 224135 : sContext.eTokenType = TNone;
658 224135 : sContext.nStackMaxSize = 0;
659 224135 : sContext.nStackSize = 0;
660 224135 : sContext.papsStack = nullptr;
661 224135 : sContext.psFirstNode = nullptr;
662 224135 : sContext.psLastNode = nullptr;
663 :
664 : #ifdef DEBUG
665 224135 : bool bRecoverableError = true;
666 : #endif
667 224135 : CPLErr eLastErrorType = CE_None;
668 :
669 : /* ==================================================================== */
670 : /* Loop reading tokens. */
671 : /* ==================================================================== */
672 18481500 : while (ReadToken(&sContext, eLastErrorType) != TNone)
673 : {
674 18257400 : loop_beginning:
675 : /* --------------------------------------------------------------------
676 : */
677 : /* Create a new element. */
678 : /* --------------------------------------------------------------------
679 : */
680 18256800 : if (sContext.eTokenType == TOpen)
681 : {
682 5643180 : if (ReadToken(&sContext, eLastErrorType) != TToken)
683 : {
684 2 : eLastErrorType = CE_Failure;
685 2 : CPLError(eLastErrorType, CPLE_AppDefined,
686 : "Line %d: Didn't find element token after "
687 : "open angle bracket.",
688 : sContext.nInputLine);
689 2 : break;
690 : }
691 :
692 5643180 : CPLXMLNode *psElement = nullptr;
693 5643180 : if (sContext.pszToken[0] != '/')
694 : {
695 : psElement =
696 3757570 : _CPLCreateXMLNode(nullptr, CXT_Element, sContext.pszToken);
697 3757570 : if (!psElement)
698 0 : break;
699 3757570 : AttachNode(&sContext, psElement);
700 3757570 : if (!PushNode(&sContext, psElement, eLastErrorType))
701 1 : break;
702 : }
703 : else
704 : {
705 1885600 : if (sContext.nStackSize == 0 ||
706 1885600 : !EQUAL(sContext.pszToken + 1,
707 : sContext.papsStack[sContext.nStackSize - 1]
708 : .psFirstNode->pszValue))
709 : {
710 : #ifdef DEBUG
711 : // Makes life of fuzzers easier if we accept somewhat
712 : // corrupted XML like <foo> ... </not_foo>.
713 16 : if (CPLTestBool(
714 : CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
715 : {
716 0 : eLastErrorType = CE_Warning;
717 0 : CPLError(
718 : eLastErrorType, CPLE_AppDefined,
719 : "Line %d: <%.500s> doesn't have matching <%.500s>.",
720 : sContext.nInputLine, sContext.pszToken,
721 0 : sContext.pszToken + 1);
722 0 : if (sContext.nStackSize == 0)
723 0 : break;
724 0 : goto end_processing_close;
725 : }
726 : else
727 : #endif
728 : {
729 16 : eLastErrorType = CE_Failure;
730 16 : CPLError(
731 : eLastErrorType, CPLE_AppDefined,
732 : "Line %d: <%.500s> doesn't have matching <%.500s>.",
733 : sContext.nInputLine, sContext.pszToken,
734 16 : sContext.pszToken + 1);
735 16 : break;
736 : }
737 : }
738 : else
739 : {
740 1885590 : if (strcmp(sContext.pszToken + 1,
741 1885590 : sContext.papsStack[sContext.nStackSize - 1]
742 1885590 : .psFirstNode->pszValue) != 0)
743 : {
744 : // TODO: At some point we could just error out like any
745 : // other sane XML parser would do.
746 1 : eLastErrorType = CE_Warning;
747 1 : CPLError(
748 : eLastErrorType, CPLE_AppDefined,
749 : "Line %d: <%.500s> matches <%.500s>, but the case "
750 : "isn't the same. Going on, but this is invalid "
751 : "XML that might be rejected in future versions.",
752 : sContext.nInputLine,
753 1 : sContext.papsStack[sContext.nStackSize - 1]
754 1 : .psFirstNode->pszValue,
755 : sContext.pszToken);
756 : }
757 : #ifdef DEBUG
758 1885590 : end_processing_close:
759 : #endif
760 1885590 : if (ReadToken(&sContext, eLastErrorType) != TClose)
761 : {
762 3 : eLastErrorType = CE_Failure;
763 3 : CPLError(eLastErrorType, CPLE_AppDefined,
764 : "Line %d: Missing close angle bracket "
765 : "after <%.500s.",
766 : sContext.nInputLine, sContext.pszToken);
767 3 : break;
768 : }
769 :
770 : // Pop element off stack
771 1885580 : sContext.nStackSize--;
772 : }
773 : }
774 : }
775 :
776 : /* --------------------------------------------------------------------
777 : */
778 : /* Add an attribute to a token. */
779 : /* --------------------------------------------------------------------
780 : */
781 12613600 : else if (sContext.eTokenType == TToken)
782 : {
783 : CPLXMLNode *psAttr =
784 7791800 : _CPLCreateXMLNode(nullptr, CXT_Attribute, sContext.pszToken);
785 7791810 : if (!psAttr)
786 0 : break;
787 7791810 : AttachNode(&sContext, psAttr);
788 :
789 7791800 : XMLTokenType nextToken = ReadToken(&sContext, eLastErrorType);
790 7791810 : if (nextToken != TEqual)
791 : {
792 : // Parse stuff like <?valbuddy_schematron
793 : // ../wmtsSimpleGetCapabilities.sch?>
794 5 : if (sContext.nStackSize > 0 &&
795 5 : sContext.papsStack[sContext.nStackSize - 1]
796 5 : .psFirstNode->pszValue[0] == '?')
797 : {
798 3 : psAttr->eType = CXT_Text;
799 3 : if (nextToken == TNone)
800 612 : break;
801 0 : goto loop_beginning;
802 : }
803 :
804 2 : eLastErrorType = CE_Failure;
805 2 : CPLError(eLastErrorType, CPLE_AppDefined,
806 : "Line %d: Didn't find expected '=' for value of "
807 : "attribute '%.500s'.",
808 : sContext.nInputLine, psAttr->pszValue);
809 : #ifdef DEBUG
810 : // Accepting an attribute without child text
811 : // would break too much assumptions in driver code
812 2 : bRecoverableError = false;
813 : #endif
814 2 : break;
815 : }
816 :
817 7791800 : if (ReadToken(&sContext, eLastErrorType) == TToken)
818 : {
819 : /* TODO: at some point we could just error out like any other */
820 : /* sane XML parser would do */
821 2 : eLastErrorType = CE_Warning;
822 2 : CPLError(eLastErrorType, CPLE_AppDefined,
823 : "Line %d: Attribute value should be single or double "
824 : "quoted. Going on, but this is invalid XML that "
825 : "might be rejected in future versions.",
826 : sContext.nInputLine);
827 : }
828 7791800 : else if (sContext.eTokenType != TString)
829 : {
830 1 : eLastErrorType = CE_Failure;
831 1 : CPLError(eLastErrorType, CPLE_AppDefined,
832 : "Line %d: Didn't find expected attribute value.",
833 : sContext.nInputLine);
834 : #ifdef DEBUG
835 : // Accepting an attribute without child text
836 : // would break too much assumptions in driver code
837 1 : bRecoverableError = false;
838 : #endif
839 1 : break;
840 : }
841 :
842 7791800 : if (!_CPLCreateXMLNode(psAttr, CXT_Text, sContext.pszToken))
843 0 : break;
844 : }
845 :
846 : /* --------------------------------------------------------------------
847 : */
848 : /* Close the start section of an element. */
849 : /* --------------------------------------------------------------------
850 : */
851 4821830 : else if (sContext.eTokenType == TClose)
852 : {
853 1895660 : if (sContext.nStackSize == 0)
854 : {
855 0 : eLastErrorType = CE_Failure;
856 0 : CPLError(eLastErrorType, CPLE_AppDefined,
857 : "Line %d: Found unbalanced '>'.", sContext.nInputLine);
858 0 : break;
859 : }
860 : }
861 :
862 : /* --------------------------------------------------------------------
863 : */
864 : /* Close the start section of an element, and pop it */
865 : /* immediately. */
866 : /* --------------------------------------------------------------------
867 : */
868 2926170 : else if (sContext.eTokenType == TSlashClose)
869 : {
870 1853780 : if (sContext.nStackSize == 0)
871 : {
872 0 : eLastErrorType = CE_Failure;
873 0 : CPLError(eLastErrorType, CPLE_AppDefined,
874 : "Line %d: Found unbalanced '/>'.",
875 : sContext.nInputLine);
876 0 : break;
877 : }
878 :
879 1853780 : sContext.nStackSize--;
880 : }
881 : /* --------------------------------------------------------------------
882 : */
883 : /* Close the start section of a <?...?> element, and pop it */
884 : /* immediately. */
885 : /* --------------------------------------------------------------------
886 : */
887 1072390 : else if (sContext.eTokenType == TQuestionClose)
888 : {
889 8099 : if (sContext.nStackSize == 0)
890 : {
891 0 : eLastErrorType = CE_Failure;
892 0 : CPLError(eLastErrorType, CPLE_AppDefined,
893 : "Line %d: Found unbalanced '?>'.",
894 : sContext.nInputLine);
895 0 : break;
896 : }
897 8099 : else if (sContext.papsStack[sContext.nStackSize - 1]
898 8099 : .psFirstNode->pszValue[0] != '?')
899 : {
900 1 : eLastErrorType = CE_Failure;
901 1 : CPLError(eLastErrorType, CPLE_AppDefined,
902 : "Line %d: Found '?>' without matching '<?'.",
903 : sContext.nInputLine);
904 1 : break;
905 : }
906 :
907 8098 : sContext.nStackSize--;
908 : }
909 : /* --------------------------------------------------------------------
910 : */
911 : /* Handle comments. They are returned as a whole token with the */
912 : /* prefix and postfix omitted. No processing of white space */
913 : /* will be done. */
914 : /* --------------------------------------------------------------------
915 : */
916 1064290 : else if (sContext.eTokenType == TComment)
917 : {
918 : CPLXMLNode *psValue =
919 67331 : _CPLCreateXMLNode(nullptr, CXT_Comment, sContext.pszToken);
920 67331 : if (!psValue)
921 0 : break;
922 67331 : AttachNode(&sContext, psValue);
923 : }
924 : /* --------------------------------------------------------------------
925 : */
926 : /* Handle literals. They are returned without processing. */
927 : /* --------------------------------------------------------------------
928 : */
929 996958 : else if (sContext.eTokenType == TLiteral)
930 : {
931 : CPLXMLNode *psValue =
932 18 : _CPLCreateXMLNode(nullptr, CXT_Literal, sContext.pszToken);
933 18 : if (!psValue)
934 0 : break;
935 18 : AttachNode(&sContext, psValue);
936 : }
937 : /* --------------------------------------------------------------------
938 : */
939 : /* Add a text value node as a child of the current element. */
940 : /* --------------------------------------------------------------------
941 : */
942 996940 : else if (sContext.eTokenType == TString && !sContext.bInElement)
943 : {
944 : CPLXMLNode *psValue =
945 997554 : _CPLCreateXMLNode(nullptr, CXT_Text, sContext.pszToken);
946 997554 : if (!psValue)
947 0 : break;
948 997554 : AttachNode(&sContext, psValue);
949 : }
950 : /* --------------------------------------------------------------------
951 : */
952 : /* Anything else is an error. */
953 : /* --------------------------------------------------------------------
954 : */
955 : else
956 : {
957 0 : eLastErrorType = CE_Failure;
958 0 : CPLError(eLastErrorType, CPLE_AppDefined,
959 : "Parse error at line %d, unexpected token:%.500s",
960 : sContext.nInputLine, sContext.pszToken);
961 1 : break;
962 : }
963 : }
964 :
965 : /* -------------------------------------------------------------------- */
966 : /* Did we pop all the way out of our stack? */
967 : /* -------------------------------------------------------------------- */
968 224797 : if (CPLGetLastErrorType() != CE_Failure && sContext.nStackSize > 0 &&
969 50 : sContext.papsStack != nullptr)
970 : {
971 : #ifdef DEBUG
972 : // Makes life of fuzzers easier if we accept somewhat corrupted XML
973 : // like <x> ...
974 100 : if (bRecoverableError &&
975 50 : CPLTestBool(CPLGetConfigOption("CPL_MINIXML_RELAXED", "FALSE")))
976 : {
977 0 : eLastErrorType = CE_Warning;
978 : }
979 : else
980 : #endif
981 : {
982 50 : eLastErrorType = CE_Failure;
983 : }
984 50 : CPLError(
985 : eLastErrorType, CPLE_AppDefined,
986 : "Parse error at EOF, not all elements have been closed, "
987 : "starting with %.500s",
988 50 : sContext.papsStack[sContext.nStackSize - 1].psFirstNode->pszValue);
989 : }
990 :
991 : /* -------------------------------------------------------------------- */
992 : /* Cleanup */
993 : /* -------------------------------------------------------------------- */
994 224135 : CPLFree(sContext.pszToken);
995 224135 : if (sContext.papsStack != nullptr)
996 224098 : CPLFree(sContext.papsStack);
997 :
998 : // We do not trust CPLGetLastErrorType() as if CPLTurnFailureIntoWarning()
999 : // has been set we would never get failures
1000 224135 : if (eLastErrorType == CE_Failure)
1001 : {
1002 78 : CPLDestroyXMLNode(sContext.psFirstNode);
1003 78 : sContext.psFirstNode = nullptr;
1004 78 : sContext.psLastNode = nullptr;
1005 : }
1006 :
1007 224135 : if (eLastErrorType == CE_None)
1008 : {
1009 : // Restore initial error state.
1010 224055 : CPLErrorSetState(eErrClass, nErrNum, osErrMsg);
1011 : }
1012 :
1013 224135 : return sContext.psFirstNode;
1014 : }
1015 :
1016 : /************************************************************************/
1017 : /* _GrowBuffer() */
1018 : /************************************************************************/
1019 :
1020 1995670 : static bool _GrowBuffer(size_t nNeeded, char **ppszText, size_t *pnMaxLength)
1021 :
1022 : {
1023 1995670 : if (nNeeded + 1 >= *pnMaxLength)
1024 : {
1025 25308 : *pnMaxLength = std::max(*pnMaxLength * 2, nNeeded + 1);
1026 : char *pszTextNew =
1027 25308 : static_cast<char *>(VSIRealloc(*ppszText, *pnMaxLength));
1028 25308 : if (pszTextNew == nullptr)
1029 0 : return false;
1030 25308 : *ppszText = pszTextNew;
1031 : }
1032 1995670 : return true;
1033 : }
1034 :
1035 : /************************************************************************/
1036 : /* CPLSerializeXMLNode() */
1037 : /************************************************************************/
1038 :
1039 : // TODO(schwehr): Rewrite this whole thing using C++ string.
1040 : // CPLSerializeXMLNode has buffer overflows.
1041 803776 : static bool CPLSerializeXMLNode(const CPLXMLNode *psNode, int nIndent,
1042 : char **ppszText, size_t *pnLength,
1043 : size_t *pnMaxLength)
1044 :
1045 : {
1046 803776 : if (psNode == nullptr)
1047 0 : return true;
1048 :
1049 : /* -------------------------------------------------------------------- */
1050 : /* Ensure the buffer is plenty large to hold this additional */
1051 : /* string. */
1052 : /* -------------------------------------------------------------------- */
1053 803776 : *pnLength += strlen(*ppszText + *pnLength);
1054 803776 : if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 + nIndent,
1055 : ppszText, pnMaxLength))
1056 0 : return false;
1057 :
1058 : /* -------------------------------------------------------------------- */
1059 : /* Text is just directly emitted. */
1060 : /* -------------------------------------------------------------------- */
1061 803776 : if (psNode->eType == CXT_Text)
1062 : {
1063 : char *pszEscaped =
1064 150113 : CPLEscapeString(psNode->pszValue, -1, CPLES_XML_BUT_QUOTES);
1065 :
1066 150113 : CPLAssert(psNode->psChild == nullptr);
1067 :
1068 : // Escaped text might be bigger than expected.
1069 150113 : if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1070 : {
1071 0 : CPLFree(pszEscaped);
1072 0 : return false;
1073 : }
1074 150113 : strcat(*ppszText + *pnLength, pszEscaped);
1075 :
1076 150113 : CPLFree(pszEscaped);
1077 : }
1078 :
1079 : /* -------------------------------------------------------------------- */
1080 : /* Attributes require a little formatting. */
1081 : /* -------------------------------------------------------------------- */
1082 653663 : else if (psNode->eType == CXT_Attribute)
1083 : {
1084 282675 : CPLAssert(psNode->psChild != nullptr &&
1085 : psNode->psChild->eType == CXT_Text);
1086 :
1087 282675 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, " %s=\"",
1088 282675 : psNode->pszValue);
1089 282675 : *pnLength += strlen(*ppszText + *pnLength);
1090 :
1091 : char *pszEscaped =
1092 282675 : CPLEscapeString(psNode->psChild->pszValue, -1, CPLES_XML);
1093 :
1094 282675 : if (!_GrowBuffer(strlen(pszEscaped) + *pnLength, ppszText, pnMaxLength))
1095 : {
1096 0 : CPLFree(pszEscaped);
1097 0 : return false;
1098 : }
1099 282675 : strcat(*ppszText + *pnLength, pszEscaped);
1100 :
1101 282675 : CPLFree(pszEscaped);
1102 :
1103 282675 : *pnLength += strlen(*ppszText + *pnLength);
1104 282675 : if (!_GrowBuffer(3 + *pnLength, ppszText, pnMaxLength))
1105 0 : return false;
1106 282675 : strcat(*ppszText + *pnLength, "\"");
1107 : }
1108 :
1109 : /* -------------------------------------------------------------------- */
1110 : /* Handle comment output. */
1111 : /* -------------------------------------------------------------------- */
1112 370988 : else if (psNode->eType == CXT_Comment)
1113 : {
1114 12451 : CPLAssert(psNode->psChild == nullptr);
1115 :
1116 81757 : for (int i = 0; i < nIndent; i++)
1117 69306 : (*ppszText)[(*pnLength)++] = ' ';
1118 :
1119 12451 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<!--%s-->\n",
1120 12451 : psNode->pszValue);
1121 : }
1122 :
1123 : /* -------------------------------------------------------------------- */
1124 : /* Handle literal output (like <!DOCTYPE...>) */
1125 : /* -------------------------------------------------------------------- */
1126 358537 : else if (psNode->eType == CXT_Literal)
1127 : {
1128 4 : CPLAssert(psNode->psChild == nullptr);
1129 :
1130 28 : for (int i = 0; i < nIndent; i++)
1131 24 : (*ppszText)[(*pnLength)++] = ' ';
1132 :
1133 4 : strcpy(*ppszText + *pnLength, psNode->pszValue);
1134 4 : strcat(*ppszText + *pnLength, "\n");
1135 : }
1136 :
1137 : /* -------------------------------------------------------------------- */
1138 : /* Elements actually have to deal with general children, and */
1139 : /* various formatting issues. */
1140 : /* -------------------------------------------------------------------- */
1141 358533 : else if (psNode->eType == CXT_Element)
1142 : {
1143 358533 : if (nIndent)
1144 348198 : memset(*ppszText + *pnLength, ' ', nIndent);
1145 358533 : *pnLength += nIndent;
1146 358533 : (*ppszText)[*pnLength] = '\0';
1147 :
1148 358533 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength, "<%s",
1149 358533 : psNode->pszValue);
1150 :
1151 358533 : if (psNode->pszValue[0] == '?')
1152 : {
1153 2665 : for (const CPLXMLNode *psChild = psNode->psChild;
1154 8008 : psChild != nullptr; psChild = psChild->psNext)
1155 : {
1156 5343 : if (psChild->eType == CXT_Text)
1157 : {
1158 3 : *pnLength += strlen(*ppszText + *pnLength);
1159 3 : if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1160 0 : return false;
1161 3 : strcat(*ppszText + *pnLength, " ");
1162 : }
1163 :
1164 5343 : if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1165 : pnMaxLength))
1166 : {
1167 0 : return false;
1168 : }
1169 : }
1170 2665 : if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1171 0 : return false;
1172 :
1173 2665 : strcat(*ppszText + *pnLength, "?>\n");
1174 : }
1175 : else
1176 : {
1177 355868 : bool bHasNonAttributeChildren = false;
1178 : // Serialize *all* the attribute children, regardless of order
1179 355868 : for (const CPLXMLNode *psChild = psNode->psChild;
1180 1143460 : psChild != nullptr; psChild = psChild->psNext)
1181 : {
1182 787593 : if (psChild->eType == CXT_Attribute)
1183 : {
1184 277335 : if (!CPLSerializeXMLNode(psChild, 0, ppszText, pnLength,
1185 : pnMaxLength))
1186 0 : return false;
1187 : }
1188 : else
1189 510258 : bHasNonAttributeChildren = true;
1190 : }
1191 :
1192 355868 : if (!bHasNonAttributeChildren)
1193 : {
1194 87896 : if (!_GrowBuffer(*pnLength + 40, ppszText, pnMaxLength))
1195 0 : return false;
1196 :
1197 87896 : strcat(*ppszText + *pnLength, " />\n");
1198 : }
1199 : else
1200 : {
1201 267972 : bool bJustText = true;
1202 :
1203 267972 : strcat(*ppszText + *pnLength, ">");
1204 :
1205 267972 : for (const CPLXMLNode *psChild = psNode->psChild;
1206 908641 : psChild != nullptr; psChild = psChild->psNext)
1207 : {
1208 640669 : if (psChild->eType == CXT_Attribute)
1209 130411 : continue;
1210 :
1211 510258 : if (psChild->eType != CXT_Text && bJustText)
1212 : {
1213 117895 : bJustText = false;
1214 117895 : *pnLength += strlen(*ppszText + *pnLength);
1215 117895 : if (!_GrowBuffer(1 + *pnLength, ppszText, pnMaxLength))
1216 0 : return false;
1217 117895 : strcat(*ppszText + *pnLength, "\n");
1218 : }
1219 :
1220 510258 : if (!CPLSerializeXMLNode(psChild, nIndent + 2, ppszText,
1221 : pnLength, pnMaxLength))
1222 0 : return false;
1223 : }
1224 :
1225 267972 : *pnLength += strlen(*ppszText + *pnLength);
1226 267972 : if (!_GrowBuffer(strlen(psNode->pszValue) + *pnLength + 40 +
1227 267972 : nIndent,
1228 : ppszText, pnMaxLength))
1229 0 : return false;
1230 :
1231 267972 : if (!bJustText)
1232 : {
1233 117895 : if (nIndent)
1234 110328 : memset(*ppszText + *pnLength, ' ', nIndent);
1235 117895 : *pnLength += nIndent;
1236 117895 : (*ppszText)[*pnLength] = '\0';
1237 : }
1238 :
1239 267972 : *pnLength += strlen(*ppszText + *pnLength);
1240 267972 : snprintf(*ppszText + *pnLength, *pnMaxLength - *pnLength,
1241 267972 : "</%s>\n", psNode->pszValue);
1242 : }
1243 : }
1244 : }
1245 :
1246 803776 : return true;
1247 : }
1248 :
1249 : /************************************************************************/
1250 : /* CPLSerializeXMLTree() */
1251 : /************************************************************************/
1252 :
1253 : /**
1254 : * \brief Convert tree into string document.
1255 : *
1256 : * This function converts a CPLXMLNode tree representation of a document
1257 : * into a flat string representation. White space indentation is used
1258 : * visually preserve the tree structure of the document. The returned
1259 : * document becomes owned by the caller and should be freed with CPLFree()
1260 : * when no longer needed.
1261 : *
1262 : * @param psNode the node to serialize.
1263 : *
1264 : * @return the document on success or NULL on failure.
1265 : */
1266 :
1267 7678 : char *CPLSerializeXMLTree(const CPLXMLNode *psNode)
1268 :
1269 : {
1270 7678 : size_t nMaxLength = 100;
1271 7678 : char *pszText = static_cast<char *>(CPLCalloc(nMaxLength, sizeof(char)));
1272 7678 : if (pszText == nullptr)
1273 0 : return nullptr;
1274 :
1275 7678 : size_t nLength = 0;
1276 18518 : for (const CPLXMLNode *psThis = psNode; psThis != nullptr;
1277 10840 : psThis = psThis->psNext)
1278 : {
1279 10840 : if (!CPLSerializeXMLNode(psThis, 0, &pszText, &nLength, &nMaxLength))
1280 : {
1281 0 : VSIFree(pszText);
1282 0 : return nullptr;
1283 : }
1284 : }
1285 :
1286 7678 : return pszText;
1287 : }
1288 :
1289 : /************************************************************************/
1290 : /* CPLCreateXMLNode() */
1291 : /************************************************************************/
1292 :
1293 : #ifdef DEBUG
1294 : static CPLXMLNode *psDummyStaticNode;
1295 : #endif
1296 :
1297 : /**
1298 : * \brief Create an document tree item.
1299 : *
1300 : * Create a single CPLXMLNode object with the desired value and type, and
1301 : * attach it as a child of the indicated parent.
1302 : *
1303 : * @param poParent the parent to which this node should be attached as a
1304 : * child. May be NULL to keep as free standing.
1305 : * @param eType the type of the newly created node
1306 : * @param pszText the value of the newly created node
1307 : *
1308 : * @return the newly created node, now owned by the caller (or parent node).
1309 : */
1310 :
1311 451334 : CPLXMLNode *CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1312 : const char *pszText)
1313 :
1314 : {
1315 451334 : auto ret = _CPLCreateXMLNode(poParent, eType, pszText);
1316 451334 : if (!ret)
1317 : {
1318 0 : CPLError(CE_Fatal, CPLE_OutOfMemory, "CPLCreateXMLNode() failed");
1319 : }
1320 451334 : return ret;
1321 : }
1322 :
1323 : /************************************************************************/
1324 : /* _CPLCreateXMLNode() */
1325 : /************************************************************************/
1326 :
1327 : /* Same as CPLCreateXMLNode() but can return NULL in case of out-of-memory */
1328 : /* situation */
1329 :
1330 20857400 : static CPLXMLNode *_CPLCreateXMLNode(CPLXMLNode *poParent, CPLXMLNodeType eType,
1331 : const char *pszText)
1332 :
1333 : {
1334 :
1335 : /* -------------------------------------------------------------------- */
1336 : /* Create new node. */
1337 : /* -------------------------------------------------------------------- */
1338 : CPLXMLNode *psNode =
1339 20857400 : static_cast<CPLXMLNode *>(VSICalloc(sizeof(CPLXMLNode), 1));
1340 20857400 : if (psNode == nullptr)
1341 : {
1342 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "Cannot allocate CPLXMLNode");
1343 0 : return nullptr;
1344 : }
1345 :
1346 20857400 : psNode->eType = eType;
1347 20857400 : psNode->pszValue = VSIStrdup(pszText ? pszText : "");
1348 20857400 : if (psNode->pszValue == nullptr)
1349 : {
1350 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
1351 : "Cannot allocate psNode->pszValue");
1352 0 : VSIFree(psNode);
1353 0 : return nullptr;
1354 : }
1355 :
1356 : /* -------------------------------------------------------------------- */
1357 : /* Attach to parent, if provided. */
1358 : /* -------------------------------------------------------------------- */
1359 20857400 : if (poParent != nullptr)
1360 : {
1361 8141060 : if (poParent->psChild == nullptr)
1362 8005290 : poParent->psChild = psNode;
1363 : else
1364 : {
1365 135766 : CPLXMLNode *psLink = poParent->psChild;
1366 135766 : if (psLink->psNext == nullptr && eType == CXT_Attribute &&
1367 24636 : psLink->eType == CXT_Text)
1368 : {
1369 8995 : psNode->psNext = psLink;
1370 8995 : poParent->psChild = psNode;
1371 : }
1372 : else
1373 : {
1374 578899 : while (psLink->psNext != nullptr)
1375 : {
1376 456352 : if (eType == CXT_Attribute &&
1377 34080 : psLink->psNext->eType == CXT_Text)
1378 : {
1379 4224 : psNode->psNext = psLink->psNext;
1380 4224 : break;
1381 : }
1382 :
1383 452128 : psLink = psLink->psNext;
1384 : }
1385 :
1386 126771 : psLink->psNext = psNode;
1387 : }
1388 : }
1389 : }
1390 : #ifdef DEBUG
1391 : else
1392 : {
1393 : // Coverity sometimes doesn't realize that this function is passed
1394 : // with a non NULL parent and thinks that this branch is taken, leading
1395 : // to creating object being leak by caller. This ugly hack hopefully
1396 : // makes it believe that someone will reference it.
1397 12716400 : psDummyStaticNode = psNode;
1398 : }
1399 : #endif
1400 :
1401 20857400 : return psNode;
1402 : }
1403 :
1404 : /************************************************************************/
1405 : /* CPLDestroyXMLNode() */
1406 : /************************************************************************/
1407 :
1408 : /**
1409 : * \brief Destroy a tree.
1410 : *
1411 : * This function frees resources associated with a CPLXMLNode and all its
1412 : * children nodes.
1413 : *
1414 : * @param psNode the tree to free.
1415 : */
1416 :
1417 21122800 : void CPLDestroyXMLNode(CPLXMLNode *psNode)
1418 :
1419 : {
1420 21122800 : while (psNode != nullptr)
1421 : {
1422 20883200 : if (psNode->pszValue != nullptr)
1423 20883200 : CPLFree(psNode->pszValue);
1424 :
1425 20883200 : if (psNode->psChild != nullptr)
1426 : {
1427 11796300 : CPLXMLNode *psNext = psNode->psNext;
1428 11796300 : psNode->psNext = psNode->psChild;
1429 : // Move the child and its siblings as the next
1430 : // siblings of the current node.
1431 11796300 : if (psNext != nullptr)
1432 : {
1433 11256700 : CPLXMLNode *psIter = psNode->psChild;
1434 18981200 : while (psIter->psNext != nullptr)
1435 7724500 : psIter = psIter->psNext;
1436 11256700 : psIter->psNext = psNext;
1437 : }
1438 : }
1439 :
1440 20883200 : CPLXMLNode *psNext = psNode->psNext;
1441 :
1442 20883200 : CPLFree(psNode);
1443 :
1444 20883200 : psNode = psNext;
1445 : }
1446 239649 : }
1447 :
1448 : /************************************************************************/
1449 : /* CPLSearchXMLNode() */
1450 : /************************************************************************/
1451 :
1452 : /**
1453 : * \brief Search for a node in document.
1454 : *
1455 : * Searches the children (and potentially siblings) of the documented
1456 : * passed in for the named element or attribute. To search following
1457 : * siblings as well as children, prefix the pszElement name with an equal
1458 : * sign. This function does an in-order traversal of the document tree.
1459 : * So it will first match against the current node, then its first child,
1460 : * that child's first child, and so on.
1461 : *
1462 : * Use CPLGetXMLNode() to find a specific child, or along a specific
1463 : * node path.
1464 : *
1465 : * @param psRoot the subtree to search. This should be a node of type
1466 : * CXT_Element. NULL is safe.
1467 : *
1468 : * @param pszElement the name of the element or attribute to search for.
1469 : *
1470 : * @return The matching node or NULL on failure.
1471 : */
1472 :
1473 91489 : CPLXMLNode *CPLSearchXMLNode(CPLXMLNode *psRoot, const char *pszElement)
1474 :
1475 : {
1476 91489 : if (psRoot == nullptr || pszElement == nullptr)
1477 0 : return nullptr;
1478 :
1479 91489 : bool bSideSearch = false;
1480 :
1481 91489 : if (*pszElement == '=')
1482 : {
1483 5104 : bSideSearch = true;
1484 5104 : pszElement++;
1485 : }
1486 :
1487 : /* -------------------------------------------------------------------- */
1488 : /* Does this node match? */
1489 : /* -------------------------------------------------------------------- */
1490 91489 : if ((psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute) &&
1491 91318 : EQUAL(pszElement, psRoot->pszValue))
1492 3859 : return psRoot;
1493 :
1494 : /* -------------------------------------------------------------------- */
1495 : /* Search children. */
1496 : /* -------------------------------------------------------------------- */
1497 87630 : CPLXMLNode *psChild = nullptr;
1498 233489 : for (psChild = psRoot->psChild; psChild != nullptr;
1499 145859 : psChild = psChild->psNext)
1500 : {
1501 147122 : if ((psChild->eType == CXT_Element ||
1502 87983 : psChild->eType == CXT_Attribute) &&
1503 85430 : EQUAL(pszElement, psChild->pszValue))
1504 423 : return psChild;
1505 :
1506 146699 : if (psChild->psChild != nullptr)
1507 : {
1508 81468 : CPLXMLNode *psResult = CPLSearchXMLNode(psChild, pszElement);
1509 81468 : if (psResult != nullptr)
1510 840 : return psResult;
1511 : }
1512 : }
1513 :
1514 : /* -------------------------------------------------------------------- */
1515 : /* Search siblings if we are in side search mode. */
1516 : /* -------------------------------------------------------------------- */
1517 86367 : if (bSideSearch)
1518 : {
1519 5542 : for (psRoot = psRoot->psNext; psRoot != nullptr;
1520 810 : psRoot = psRoot->psNext)
1521 : {
1522 4671 : CPLXMLNode *psResult = CPLSearchXMLNode(psRoot, pszElement);
1523 4671 : if (psResult != nullptr)
1524 3861 : return psResult;
1525 : }
1526 : }
1527 :
1528 82506 : return nullptr;
1529 : }
1530 :
1531 : /************************************************************************/
1532 : /* CPLGetXMLNode() */
1533 : /************************************************************************/
1534 :
1535 : /**
1536 : * \brief Find node by path.
1537 : *
1538 : * Searches the document or subdocument indicated by psRoot for an element
1539 : * (or attribute) with the given path. The path should consist of a set of
1540 : * element names separated by dots, not including the name of the root
1541 : * element (psRoot). If the requested element is not found NULL is returned.
1542 : *
1543 : * Attribute names may only appear as the last item in the path.
1544 : *
1545 : * The search is done from the root nodes children, but all intermediate
1546 : * nodes in the path must be specified. Searching for "name" would only find
1547 : * a name element or attribute if it is a direct child of the root, not at any
1548 : * level in the subdocument.
1549 : *
1550 : * If the pszPath is prefixed by "=" then the search will begin with the
1551 : * root node, and its siblings, instead of the root nodes children. This
1552 : * is particularly useful when searching within a whole document which is
1553 : * often prefixed by one or more "junk" nodes like the <?xml> declaration.
1554 : *
1555 : * @param psRoot the subtree in which to search. This should be a node of
1556 : * type CXT_Element. NULL is safe.
1557 : *
1558 : * @param pszPath the list of element names in the path (dot separated).
1559 : *
1560 : * @return the requested element node, or NULL if not found.
1561 : */
1562 :
1563 1483460 : CPLXMLNode *CPLGetXMLNode(CPLXMLNode *psRoot, const char *pszPath)
1564 :
1565 : {
1566 1483460 : if (psRoot == nullptr || pszPath == nullptr)
1567 1050 : return nullptr;
1568 :
1569 1482420 : bool bSideSearch = false;
1570 :
1571 1482420 : if (*pszPath == '=')
1572 : {
1573 213607 : bSideSearch = true;
1574 213607 : pszPath++;
1575 : }
1576 :
1577 1482420 : const char *const apszTokens[2] = {pszPath, nullptr};
1578 :
1579 : // Slight optimization: avoid using CSLTokenizeStringComplex that
1580 : // does memory allocations when it is not really necessary.
1581 1482420 : bool bFreeTokens = false;
1582 1482420 : char **papszTokensToFree = nullptr;
1583 : const char *const *papszTokens;
1584 1482420 : if (strchr(pszPath, '.'))
1585 : {
1586 : papszTokensToFree =
1587 227730 : CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1588 227730 : papszTokens = papszTokensToFree;
1589 227730 : bFreeTokens = true;
1590 : }
1591 : else
1592 : {
1593 1254680 : papszTokens = apszTokens;
1594 : }
1595 :
1596 1482420 : int iToken = 0;
1597 2588950 : while (papszTokens[iToken] != nullptr && psRoot != nullptr)
1598 : {
1599 1715130 : CPLXMLNode *psChild = nullptr;
1600 :
1601 1715130 : if (bSideSearch)
1602 : {
1603 213607 : psChild = psRoot;
1604 213607 : bSideSearch = false;
1605 : }
1606 : else
1607 1501520 : psChild = psRoot->psChild;
1608 :
1609 6784840 : for (; psChild != nullptr; psChild = psChild->psNext)
1610 : {
1611 6176250 : if (psChild->eType != CXT_Text &&
1612 6140710 : EQUAL(papszTokens[iToken], psChild->pszValue))
1613 1106540 : break;
1614 : }
1615 :
1616 1715130 : if (psChild == nullptr)
1617 : {
1618 608593 : psRoot = nullptr;
1619 608593 : break;
1620 : }
1621 :
1622 1106540 : psRoot = psChild;
1623 1106540 : iToken++;
1624 : }
1625 :
1626 1482420 : if (bFreeTokens)
1627 227730 : CSLDestroy(papszTokensToFree);
1628 1482420 : return psRoot;
1629 : }
1630 :
1631 : /************************************************************************/
1632 : /* CPLGetXMLValue() */
1633 : /************************************************************************/
1634 :
1635 : /**
1636 : * \brief Fetch element/attribute value.
1637 : *
1638 : * Searches the document for the element/attribute value associated with
1639 : * the path. The corresponding node is internally found with CPLGetXMLNode()
1640 : * (see there for details on path handling). Once found, the value is
1641 : * considered to be the first CXT_Text child of the node.
1642 : *
1643 : * If the attribute/element search fails, or if the found node has no
1644 : * value then the passed default value is returned.
1645 : *
1646 : * The returned value points to memory within the document tree, and should
1647 : * not be altered or freed.
1648 : *
1649 : * @param psRoot the subtree in which to search. This should be a node of
1650 : * type CXT_Element. NULL is safe.
1651 : *
1652 : * @param pszPath the list of element names in the path (dot separated). An
1653 : * empty path means get the value of the psRoot node.
1654 : *
1655 : * @param pszDefault the value to return if a corresponding value is not
1656 : * found, may be NULL.
1657 : *
1658 : * @return the requested value or pszDefault if not found.
1659 : */
1660 :
1661 1490240 : const char *CPLGetXMLValue(const CPLXMLNode *psRoot, const char *pszPath,
1662 : const char *pszDefault)
1663 :
1664 : {
1665 1490240 : const CPLXMLNode *psTarget = nullptr;
1666 :
1667 1490240 : if (pszPath == nullptr || *pszPath == '\0')
1668 102008 : psTarget = psRoot;
1669 : else
1670 1388230 : psTarget = CPLGetXMLNode(psRoot, pszPath);
1671 :
1672 1490240 : if (psTarget == nullptr)
1673 566143 : return pszDefault;
1674 :
1675 924093 : if (psTarget->eType == CXT_Attribute)
1676 : {
1677 501624 : CPLAssert(psTarget->psChild != nullptr &&
1678 : psTarget->psChild->eType == CXT_Text);
1679 :
1680 501624 : return psTarget->psChild->pszValue;
1681 : }
1682 :
1683 422469 : if (psTarget->eType == CXT_Element)
1684 : {
1685 : // Find first non-attribute child, and verify it is a single text
1686 : // with no siblings.
1687 :
1688 422445 : psTarget = psTarget->psChild;
1689 :
1690 468356 : while (psTarget != nullptr && psTarget->eType == CXT_Attribute)
1691 45911 : psTarget = psTarget->psNext;
1692 :
1693 422445 : if (psTarget != nullptr && psTarget->eType == CXT_Text &&
1694 419923 : psTarget->psNext == nullptr)
1695 419923 : return psTarget->pszValue;
1696 : }
1697 :
1698 2546 : return pszDefault;
1699 : }
1700 :
1701 : /************************************************************************/
1702 : /* CPLAddXMLChild() */
1703 : /************************************************************************/
1704 :
1705 : /**
1706 : * \brief Add child node to parent.
1707 : *
1708 : * The passed child is added to the list of children of the indicated
1709 : * parent. Normally the child is added at the end of the parents child
1710 : * list, but attributes (CXT_Attribute) will be inserted after any other
1711 : * attributes but before any other element type. Ownership of the child
1712 : * node is effectively assumed by the parent node. If the child has
1713 : * siblings (its psNext is not NULL) they will be trimmed, but if the child
1714 : * has children they are carried with it.
1715 : *
1716 : * @param psParent the node to attach the child to. May not be NULL.
1717 : *
1718 : * @param psChild the child to add to the parent. May not be NULL. Should
1719 : * not be a child of any other parent.
1720 : */
1721 :
1722 5646 : void CPLAddXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1723 :
1724 : {
1725 5646 : if (psParent->psChild == nullptr)
1726 : {
1727 2353 : psParent->psChild = psChild;
1728 2353 : return;
1729 : }
1730 :
1731 : // Insert at head of list if first child is not attribute.
1732 3293 : if (psChild->eType == CXT_Attribute &&
1733 21 : psParent->psChild->eType != CXT_Attribute)
1734 : {
1735 0 : psChild->psNext = psParent->psChild;
1736 0 : psParent->psChild = psChild;
1737 0 : return;
1738 : }
1739 :
1740 : // Search for end of list.
1741 3293 : CPLXMLNode *psSib = nullptr;
1742 14460 : for (psSib = psParent->psChild; psSib->psNext != nullptr;
1743 11167 : psSib = psSib->psNext)
1744 : {
1745 : // Insert attributes if the next node is not an attribute.
1746 11168 : if (psChild->eType == CXT_Attribute && psSib->psNext != nullptr &&
1747 5 : psSib->psNext->eType != CXT_Attribute)
1748 : {
1749 1 : psChild->psNext = psSib->psNext;
1750 1 : psSib->psNext = psChild;
1751 1 : return;
1752 : }
1753 : }
1754 :
1755 3292 : psSib->psNext = psChild;
1756 : }
1757 :
1758 : /************************************************************************/
1759 : /* CPLRemoveXMLChild() */
1760 : /************************************************************************/
1761 :
1762 : /**
1763 : * \brief Remove child node from parent.
1764 : *
1765 : * The passed child is removed from the child list of the passed parent,
1766 : * but the child is not destroyed. The child retains ownership of its
1767 : * own children, but is cleanly removed from the child list of the parent.
1768 : *
1769 : * @param psParent the node to the child is attached to.
1770 : *
1771 : * @param psChild the child to remove.
1772 : *
1773 : * @return TRUE on success or FALSE if the child was not found.
1774 : */
1775 :
1776 2592 : int CPLRemoveXMLChild(CPLXMLNode *psParent, CPLXMLNode *psChild)
1777 :
1778 : {
1779 2592 : if (psParent == nullptr)
1780 0 : return FALSE;
1781 :
1782 2592 : CPLXMLNode *psLast = nullptr;
1783 2592 : CPLXMLNode *psThis = nullptr;
1784 5685 : for (psThis = psParent->psChild; psThis != nullptr; psThis = psThis->psNext)
1785 : {
1786 4451 : if (psThis == psChild)
1787 : {
1788 1358 : if (psLast == nullptr)
1789 872 : psParent->psChild = psThis->psNext;
1790 : else
1791 486 : psLast->psNext = psThis->psNext;
1792 :
1793 1358 : psThis->psNext = nullptr;
1794 1358 : return TRUE;
1795 : }
1796 3093 : psLast = psThis;
1797 : }
1798 :
1799 1234 : return FALSE;
1800 : }
1801 :
1802 : /************************************************************************/
1803 : /* CPLAddXMLSibling() */
1804 : /************************************************************************/
1805 :
1806 : /**
1807 : * \brief Add new sibling.
1808 : *
1809 : * The passed psNewSibling is added to the end of siblings of the
1810 : * psOlderSibling node. That is, it is added to the end of the psNext
1811 : * chain. There is no special handling if psNewSibling is an attribute.
1812 : * If this is required, use CPLAddXMLChild().
1813 : *
1814 : * @param psOlderSibling the node to attach the sibling after.
1815 : *
1816 : * @param psNewSibling the node to add at the end of psOlderSiblings psNext
1817 : * chain.
1818 : */
1819 :
1820 4131 : void CPLAddXMLSibling(CPLXMLNode *psOlderSibling, CPLXMLNode *psNewSibling)
1821 :
1822 : {
1823 4131 : if (psOlderSibling == nullptr)
1824 0 : return;
1825 :
1826 4202 : while (psOlderSibling->psNext != nullptr)
1827 71 : psOlderSibling = psOlderSibling->psNext;
1828 :
1829 4131 : psOlderSibling->psNext = psNewSibling;
1830 : }
1831 :
1832 : /************************************************************************/
1833 : /* CPLCreateXMLElementAndValue() */
1834 : /************************************************************************/
1835 :
1836 : /**
1837 : * \brief Create an element and text value.
1838 : *
1839 : * This is function is a convenient short form for:
1840 : *
1841 : * \code
1842 : * CPLXMLNode *psTextNode;
1843 : * CPLXMLNode *psElementNode;
1844 : *
1845 : * psElementNode = CPLCreateXMLNode( psParent, CXT_Element, pszName );
1846 : * psTextNode = CPLCreateXMLNode( psElementNode, CXT_Text, pszValue );
1847 : *
1848 : * return psElementNode;
1849 : * \endcode
1850 : *
1851 : * It creates a CXT_Element node, with a CXT_Text child, and
1852 : * attaches the element to the passed parent.
1853 : *
1854 : * @param psParent the parent node to which the resulting node should
1855 : * be attached. May be NULL to keep as freestanding.
1856 : *
1857 : * @param pszName the element name to create.
1858 : * @param pszValue the text to attach to the element. Must not be NULL.
1859 : *
1860 : * @return the pointer to the new element node.
1861 : */
1862 :
1863 61852 : CPLXMLNode *CPLCreateXMLElementAndValue(CPLXMLNode *psParent,
1864 : const char *pszName,
1865 : const char *pszValue)
1866 :
1867 : {
1868 : CPLXMLNode *psElementNode =
1869 61852 : CPLCreateXMLNode(psParent, CXT_Element, pszName);
1870 61852 : CPLCreateXMLNode(psElementNode, CXT_Text, pszValue);
1871 :
1872 61852 : return psElementNode;
1873 : }
1874 :
1875 : /************************************************************************/
1876 : /* CPLCreateXMLElementAndValue() */
1877 : /************************************************************************/
1878 :
1879 : /**
1880 : * \brief Create an attribute and text value.
1881 : *
1882 : * This is function is a convenient short form for:
1883 : *
1884 : * \code
1885 : * CPLXMLNode *psAttributeNode;
1886 : *
1887 : * psAttributeNode = CPLCreateXMLNode( psParent, CXT_Attribute, pszName );
1888 : * CPLCreateXMLNode( psAttributeNode, CXT_Text, pszValue );
1889 : * \endcode
1890 : *
1891 : * It creates a CXT_Attribute node, with a CXT_Text child, and
1892 : * attaches the element to the passed parent.
1893 : *
1894 : * @param psParent the parent node to which the resulting node should
1895 : * be attached. Must not be NULL.
1896 : * @param pszName the attribute name to create.
1897 : * @param pszValue the text to attach to the attribute. Must not be NULL.
1898 : *
1899 : * @since GDAL 2.0
1900 : */
1901 :
1902 27536 : void CPLAddXMLAttributeAndValue(CPLXMLNode *psParent, const char *pszName,
1903 : const char *pszValue)
1904 : {
1905 27536 : CPLAssert(psParent != nullptr);
1906 : CPLXMLNode *psAttributeNode =
1907 27536 : CPLCreateXMLNode(psParent, CXT_Attribute, pszName);
1908 27536 : CPLCreateXMLNode(psAttributeNode, CXT_Text, pszValue);
1909 27536 : }
1910 :
1911 : /************************************************************************/
1912 : /* CPLCloneXMLTree() */
1913 : /************************************************************************/
1914 :
1915 : /**
1916 : * \brief Copy tree.
1917 : *
1918 : * Creates a deep copy of a CPLXMLNode tree.
1919 : *
1920 : * @param psTree the tree to duplicate.
1921 : *
1922 : * @return a copy of the whole tree.
1923 : */
1924 :
1925 30593 : CPLXMLNode *CPLCloneXMLTree(const CPLXMLNode *psTree)
1926 :
1927 : {
1928 30593 : CPLXMLNode *psPrevious = nullptr;
1929 30593 : CPLXMLNode *psReturn = nullptr;
1930 :
1931 80450 : while (psTree != nullptr)
1932 : {
1933 : CPLXMLNode *psCopy =
1934 49857 : CPLCreateXMLNode(nullptr, psTree->eType, psTree->pszValue);
1935 49857 : if (psReturn == nullptr)
1936 30593 : psReturn = psCopy;
1937 49857 : if (psPrevious != nullptr)
1938 19264 : psPrevious->psNext = psCopy;
1939 :
1940 49857 : if (psTree->psChild != nullptr)
1941 29208 : psCopy->psChild = CPLCloneXMLTree(psTree->psChild);
1942 :
1943 49857 : psPrevious = psCopy;
1944 49857 : psTree = psTree->psNext;
1945 : }
1946 :
1947 30593 : return psReturn;
1948 : }
1949 :
1950 : /************************************************************************/
1951 : /* CPLSetXMLValue() */
1952 : /************************************************************************/
1953 :
1954 : /**
1955 : * \brief Set element value by path.
1956 : *
1957 : * Find (or create) the target element or attribute specified in the
1958 : * path, and assign it the indicated value.
1959 : *
1960 : * Any path elements that do not already exist will be created. The target
1961 : * nodes value (the first CXT_Text child) will be replaced with the provided
1962 : * value.
1963 : *
1964 : * If the target node is an attribute instead of an element, the name
1965 : * should be prefixed with a #.
1966 : *
1967 : * Example:
1968 : * CPLSetXMLValue( "Citation.Id.Description", "DOQ dataset" );
1969 : * CPLSetXMLValue( "Citation.Id.Description.#name", "doq" );
1970 : *
1971 : * @param psRoot the subdocument to be updated.
1972 : *
1973 : * @param pszPath the dot separated path to the target element/attribute.
1974 : *
1975 : * @param pszValue the text value to assign.
1976 : *
1977 : * @return TRUE on success.
1978 : */
1979 :
1980 39043 : int CPLSetXMLValue(CPLXMLNode *psRoot, const char *pszPath,
1981 : const char *pszValue)
1982 :
1983 : {
1984 39043 : char **papszTokens = CSLTokenizeStringComplex(pszPath, ".", FALSE, FALSE);
1985 39043 : int iToken = 0;
1986 :
1987 89633 : while (papszTokens[iToken] != nullptr)
1988 : {
1989 50590 : bool bIsAttribute = false;
1990 50590 : const char *pszName = papszTokens[iToken];
1991 :
1992 50590 : if (pszName[0] == '#')
1993 : {
1994 31279 : bIsAttribute = true;
1995 31279 : pszName++;
1996 : }
1997 :
1998 50590 : if (psRoot->eType != CXT_Element)
1999 : {
2000 0 : CSLDestroy(papszTokens);
2001 0 : return FALSE;
2002 : }
2003 :
2004 50590 : CPLXMLNode *psChild = nullptr;
2005 123632 : for (psChild = psRoot->psChild; psChild != nullptr;
2006 73042 : psChild = psChild->psNext)
2007 : {
2008 85788 : if (psChild->eType != CXT_Text && EQUAL(pszName, psChild->pszValue))
2009 12746 : break;
2010 : }
2011 :
2012 50590 : if (psChild == nullptr)
2013 : {
2014 37844 : if (bIsAttribute)
2015 30964 : psChild = CPLCreateXMLNode(psRoot, CXT_Attribute, pszName);
2016 : else
2017 6880 : psChild = CPLCreateXMLNode(psRoot, CXT_Element, pszName);
2018 : }
2019 :
2020 50590 : psRoot = psChild;
2021 50590 : iToken++;
2022 : }
2023 :
2024 39043 : CSLDestroy(papszTokens);
2025 :
2026 : /* -------------------------------------------------------------------- */
2027 : /* Find the "text" child if there is one. */
2028 : /* -------------------------------------------------------------------- */
2029 39043 : CPLXMLNode *psTextChild = psRoot->psChild;
2030 :
2031 39233 : while (psTextChild != nullptr && psTextChild->eType != CXT_Text)
2032 190 : psTextChild = psTextChild->psNext;
2033 :
2034 : /* -------------------------------------------------------------------- */
2035 : /* Now set a value node under this node. */
2036 : /* -------------------------------------------------------------------- */
2037 :
2038 39043 : if (psTextChild == nullptr)
2039 38472 : CPLCreateXMLNode(psRoot, CXT_Text, pszValue);
2040 : else
2041 : {
2042 571 : CPLFree(psTextChild->pszValue);
2043 571 : psTextChild->pszValue = CPLStrdup(pszValue);
2044 : }
2045 :
2046 39043 : return TRUE;
2047 : }
2048 :
2049 : /************************************************************************/
2050 : /* CPLStripXMLNamespace() */
2051 : /************************************************************************/
2052 :
2053 : /**
2054 : * \brief Strip indicated namespaces.
2055 : *
2056 : * The subdocument (psRoot) is recursively examined, and any elements
2057 : * with the indicated namespace prefix will have the namespace prefix
2058 : * stripped from the element names. If the passed namespace is NULL, then
2059 : * all namespace prefixes will be stripped.
2060 : *
2061 : * Nodes other than elements should remain unaffected. The changes are
2062 : * made "in place", and should not alter any node locations, only the
2063 : * pszValue field of affected nodes.
2064 : *
2065 : * @param psRoot the document to operate on.
2066 : * @param pszNamespace the name space prefix (not including colon), or NULL.
2067 : * @param bRecurse TRUE to recurse over whole document, or FALSE to only
2068 : * operate on the passed node.
2069 : */
2070 :
2071 1509970 : void CPLStripXMLNamespace(CPLXMLNode *psRoot, const char *pszNamespace,
2072 : int bRecurse)
2073 :
2074 : {
2075 1509970 : size_t nNameSpaceLen = (pszNamespace) ? strlen(pszNamespace) : 0;
2076 :
2077 3881400 : while (psRoot != nullptr)
2078 : {
2079 2371420 : if (psRoot->eType == CXT_Element || psRoot->eType == CXT_Attribute)
2080 : {
2081 1332700 : if (pszNamespace != nullptr)
2082 : {
2083 597 : if (EQUALN(pszNamespace, psRoot->pszValue, nNameSpaceLen) &&
2084 177 : psRoot->pszValue[nNameSpaceLen] == ':')
2085 : {
2086 177 : memmove(psRoot->pszValue,
2087 177 : psRoot->pszValue + nNameSpaceLen + 1,
2088 177 : strlen(psRoot->pszValue + nNameSpaceLen + 1) + 1);
2089 : }
2090 : }
2091 : else
2092 : {
2093 8254640 : for (const char *pszCheck = psRoot->pszValue; *pszCheck != '\0';
2094 : pszCheck++)
2095 : {
2096 7854370 : if (*pszCheck == ':')
2097 : {
2098 931840 : memmove(psRoot->pszValue, pszCheck + 1,
2099 931840 : strlen(pszCheck + 1) + 1);
2100 931840 : break;
2101 : }
2102 : }
2103 : }
2104 : }
2105 :
2106 2371420 : if (bRecurse)
2107 : {
2108 2371420 : if (psRoot->psChild != nullptr)
2109 1318940 : CPLStripXMLNamespace(psRoot->psChild, pszNamespace, 1);
2110 :
2111 2371420 : psRoot = psRoot->psNext;
2112 : }
2113 : else
2114 : {
2115 0 : break;
2116 : }
2117 : }
2118 1509970 : }
2119 :
2120 : /************************************************************************/
2121 : /* CPLParseXMLFile() */
2122 : /************************************************************************/
2123 :
2124 : /**
2125 : * \brief Parse XML file into tree.
2126 : *
2127 : * The named file is opened, loaded into memory as a big string, and
2128 : * parsed with CPLParseXMLString(). Errors in reading the file or parsing
2129 : * the XML will be reported by CPLError().
2130 : *
2131 : * The "large file" API is used, so XML files can come from virtualized
2132 : * files.
2133 : *
2134 : * @param pszFilename the file to open.
2135 : *
2136 : * @return NULL on failure, or the document tree on success.
2137 : */
2138 :
2139 4910 : CPLXMLNode *CPLParseXMLFile(const char *pszFilename)
2140 :
2141 : {
2142 : /* -------------------------------------------------------------------- */
2143 : /* Ingest the file. */
2144 : /* -------------------------------------------------------------------- */
2145 4910 : GByte *pabyOut = nullptr;
2146 4910 : if (!VSIIngestFile(nullptr, pszFilename, &pabyOut, nullptr, -1))
2147 60 : return nullptr;
2148 :
2149 4850 : char *pszDoc = reinterpret_cast<char *>(pabyOut);
2150 :
2151 : /* -------------------------------------------------------------------- */
2152 : /* Parse it. */
2153 : /* -------------------------------------------------------------------- */
2154 4850 : CPLXMLNode *psTree = CPLParseXMLString(pszDoc);
2155 4850 : CPLFree(pszDoc);
2156 :
2157 4850 : return psTree;
2158 : }
2159 :
2160 : /************************************************************************/
2161 : /* CPLSerializeXMLTreeToFile() */
2162 : /************************************************************************/
2163 :
2164 : /**
2165 : * \brief Write document tree to a file.
2166 : *
2167 : * The passed document tree is converted into one big string (with
2168 : * CPLSerializeXMLTree()) and then written to the named file. Errors writing
2169 : * the file will be reported by CPLError(). The source document tree is
2170 : * not altered. If the output file already exists it will be overwritten.
2171 : *
2172 : * @param psTree the document tree to write.
2173 : * @param pszFilename the name of the file to write to.
2174 : * @return TRUE on success, FALSE otherwise.
2175 : */
2176 :
2177 2587 : int CPLSerializeXMLTreeToFile(const CPLXMLNode *psTree, const char *pszFilename)
2178 :
2179 : {
2180 : /* -------------------------------------------------------------------- */
2181 : /* Serialize document. */
2182 : /* -------------------------------------------------------------------- */
2183 2587 : char *pszDoc = CPLSerializeXMLTree(psTree);
2184 2587 : if (pszDoc == nullptr)
2185 0 : return FALSE;
2186 :
2187 2587 : const vsi_l_offset nLength = strlen(pszDoc);
2188 :
2189 : /* -------------------------------------------------------------------- */
2190 : /* Create file. */
2191 : /* -------------------------------------------------------------------- */
2192 2587 : VSILFILE *fp = VSIFOpenL(pszFilename, "wt");
2193 2587 : if (fp == nullptr)
2194 : {
2195 6 : CPLError(CE_Failure, CPLE_OpenFailed, "Failed to open %.500s to write.",
2196 : pszFilename);
2197 6 : CPLFree(pszDoc);
2198 6 : return FALSE;
2199 : }
2200 :
2201 : /* -------------------------------------------------------------------- */
2202 : /* Write file. */
2203 : /* -------------------------------------------------------------------- */
2204 2581 : if (VSIFWriteL(pszDoc, 1, static_cast<size_t>(nLength), fp) != nLength)
2205 : {
2206 77 : CPLError(CE_Failure, CPLE_FileIO,
2207 : "Failed to write whole XML document (%.500s).", pszFilename);
2208 77 : CPL_IGNORE_RET_VAL(VSIFCloseL(fp));
2209 77 : CPLFree(pszDoc);
2210 77 : return FALSE;
2211 : }
2212 :
2213 : /* -------------------------------------------------------------------- */
2214 : /* Cleanup */
2215 : /* -------------------------------------------------------------------- */
2216 2504 : const bool bRet = VSIFCloseL(fp) == 0;
2217 2504 : if (!bRet)
2218 : {
2219 0 : CPLError(CE_Failure, CPLE_FileIO,
2220 : "Failed to write whole XML document (%.500s).", pszFilename);
2221 : }
2222 2504 : CPLFree(pszDoc);
2223 :
2224 2504 : return bRet;
2225 : }
2226 :
2227 : /************************************************************************/
2228 : /* CPLCleanXMLElementName() */
2229 : /************************************************************************/
2230 :
2231 : /**
2232 : * \brief Make string into safe XML token.
2233 : *
2234 : * Modifies a string in place to try and make it into a legal
2235 : * XML token that can be used as an element name. This is accomplished
2236 : * by changing any characters not legal in a token into an underscore.
2237 : *
2238 : * NOTE: This function should implement the rules in section 2.3 of
2239 : * http://www.w3.org/TR/xml11/ but it doesn't yet do that properly. We
2240 : * only do a rough approximation of that.
2241 : *
2242 : * @param pszTarget the string to be adjusted. It is altered in place.
2243 : */
2244 :
2245 375 : void CPLCleanXMLElementName(char *pszTarget)
2246 : {
2247 375 : if (pszTarget == nullptr)
2248 0 : return;
2249 :
2250 3439 : for (; *pszTarget != '\0'; pszTarget++)
2251 : {
2252 3064 : if ((static_cast<unsigned char>(*pszTarget) & 0x80) ||
2253 3064 : isalnum(static_cast<unsigned char>(*pszTarget)) ||
2254 200 : *pszTarget == '_' || *pszTarget == '.')
2255 : {
2256 : // Ok.
2257 : }
2258 : else
2259 : {
2260 0 : *pszTarget = '_';
2261 : }
2262 : }
2263 : }
2264 :
2265 : /************************************************************************/
2266 : /* CPLXMLNodeGetRAMUsageEstimate() */
2267 : /************************************************************************/
2268 :
2269 47451 : static size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode,
2270 : bool bVisitSiblings)
2271 : {
2272 47451 : size_t nRet = sizeof(CPLXMLNode);
2273 : // malloc() aligns on 16-byte boundaries on 64 bit.
2274 47451 : nRet += std::max(2 * sizeof(void *), strlen(psNode->pszValue) + 1);
2275 47451 : if (bVisitSiblings)
2276 : {
2277 47451 : for (const CPLXMLNode *psIter = psNode->psNext; psIter;
2278 19410 : psIter = psIter->psNext)
2279 : {
2280 19410 : nRet += CPLXMLNodeGetRAMUsageEstimate(psIter, false);
2281 : }
2282 : }
2283 47451 : if (psNode->psChild)
2284 : {
2285 26374 : nRet += CPLXMLNodeGetRAMUsageEstimate(psNode->psChild, true);
2286 : }
2287 47451 : return nRet;
2288 : }
2289 :
2290 : /** Return a conservative estimate of the RAM usage of this node, its children
2291 : * and siblings. The returned values is in bytes.
2292 : *
2293 : * @since 3.9
2294 : */
2295 1667 : size_t CPLXMLNodeGetRAMUsageEstimate(const CPLXMLNode *psNode)
2296 : {
2297 1667 : return CPLXMLNodeGetRAMUsageEstimate(psNode, true);
2298 : }
2299 :
2300 : /************************************************************************/
2301 : /* CPLXMLTreeCloser::getDocumentElement() */
2302 : /************************************************************************/
2303 :
2304 72 : CPLXMLNode *CPLXMLTreeCloser::getDocumentElement()
2305 : {
2306 72 : CPLXMLNode *doc = get();
2307 : // skip the Declaration and assume the next is the root element
2308 120 : while (doc != nullptr &&
2309 120 : (doc->eType != CXT_Element || doc->pszValue[0] == '?'))
2310 : {
2311 48 : doc = doc->psNext;
2312 : }
2313 72 : return doc;
2314 : }
|