Line data Source code
1 : /******************************************************************************
2 : * Project: OGR
3 : * Purpose: OGRGMLASDriver implementation
4 : * Author: Even Rouault, <even dot rouault at spatialys dot com>
5 : *
6 : * Initial development funded by the European Earth observation programme
7 : * Copernicus
8 : *
9 : ******************************************************************************
10 : * Copyright (c) 2016, Even Rouault, <even dot rouault at spatialys dot com>
11 : *
12 : * SPDX-License-Identifier: MIT
13 : ****************************************************************************/
14 :
15 : #include "ogr_gmlas.h"
16 :
17 : #include "ogr_p.h"
18 :
19 : #include "cpl_json_header.h"
20 :
21 : #include <algorithm>
22 :
23 : /************************************************************************/
24 : /* GMLASBinInputStream */
25 : /************************************************************************/
26 :
27 : class GMLASBinInputStream : public BinInputStream
28 : {
29 : VSILFILE *m_fp = nullptr;
30 :
31 : CPL_DISALLOW_COPY_ASSIGN(GMLASBinInputStream)
32 :
33 : public:
34 : explicit GMLASBinInputStream(VSILFILE *fp);
35 : virtual ~GMLASBinInputStream();
36 :
37 : virtual XMLFilePos curPos() const override;
38 : virtual XMLSize_t readBytes(XMLByte *const toFill,
39 : const XMLSize_t maxToRead) override;
40 : virtual const XMLCh *getContentType() const override;
41 : };
42 :
43 : /************************************************************************/
44 : /* GMLASBinInputStream() */
45 : /************************************************************************/
46 :
47 3323 : GMLASBinInputStream::GMLASBinInputStream(VSILFILE *fp)
48 : {
49 3323 : m_fp = fp;
50 3323 : VSIFSeekL(fp, 0, SEEK_SET);
51 3323 : }
52 :
53 : /************************************************************************/
54 : /* ~GMLASBinInputStream() */
55 : /************************************************************************/
56 :
57 6646 : GMLASBinInputStream::~GMLASBinInputStream()
58 : {
59 6646 : }
60 :
61 : /************************************************************************/
62 : /* curPos() */
63 : /************************************************************************/
64 :
65 0 : XMLFilePos GMLASBinInputStream::curPos() const
66 : {
67 0 : return static_cast<XMLFilePos>(VSIFTellL(m_fp));
68 : }
69 :
70 : /************************************************************************/
71 : /* readBytes() */
72 : /************************************************************************/
73 :
74 8838 : XMLSize_t GMLASBinInputStream::readBytes(XMLByte *const toFill,
75 : const XMLSize_t maxToRead)
76 : {
77 8838 : return static_cast<XMLSize_t>(VSIFReadL(toFill, 1, maxToRead, m_fp));
78 : }
79 :
80 : /************************************************************************/
81 : /* getContentType() */
82 : /************************************************************************/
83 :
84 0 : const XMLCh *GMLASBinInputStream::getContentType() const
85 : {
86 0 : return nullptr;
87 : }
88 :
89 : /************************************************************************/
90 : /* GMLASInputSource() */
91 : /************************************************************************/
92 :
93 4070 : GMLASInputSource::GMLASInputSource(const char *pszFilename,
94 : const std::shared_ptr<VSIVirtualHandle> &fp,
95 4070 : MemoryManager *const manager)
96 4070 : : InputSource(manager), m_fp(fp), m_pnCounter(&m_nCounter),
97 4070 : m_osFilename(pszFilename)
98 : {
99 : try
100 : {
101 4070 : XMLCh *pFilename = XMLString::transcode(pszFilename);
102 4070 : setPublicId(pFilename);
103 4070 : setSystemId(pFilename);
104 4070 : XMLString::release(&pFilename);
105 : }
106 0 : catch (const TranscodingException &e)
107 : {
108 0 : CPLError(CE_Failure, CPLE_AppDefined, "TranscodingException: %s",
109 0 : transcode(e.getMessage()).c_str());
110 : }
111 4070 : }
112 :
113 : /************************************************************************/
114 : /* SetClosingCallback() */
115 : /************************************************************************/
116 :
117 1107 : void GMLASInputSource::SetClosingCallback(IGMLASInputSourceClosing *cbk)
118 : {
119 1107 : m_cbk = cbk;
120 1107 : }
121 :
122 : /************************************************************************/
123 : /* ~GMLASInputSource() */
124 : /************************************************************************/
125 :
126 6569 : GMLASInputSource::~GMLASInputSource()
127 : {
128 4070 : if (m_cbk)
129 1107 : m_cbk->notifyClosing(m_osFilename);
130 6569 : }
131 :
132 : /************************************************************************/
133 : /* makeStream() */
134 : /************************************************************************/
135 :
136 3325 : BinInputStream *GMLASInputSource::makeStream() const
137 : {
138 : // This is a lovely cheating around the const qualifier of this method !
139 : // We cannot modify m_nCounter directly, but we can change the value
140 : // pointed by m_pnCounter...
141 3325 : if (*m_pnCounter != 0)
142 : {
143 0 : CPLError(CE_Failure, CPLE_AppDefined,
144 : "makeStream() called several times on same GMLASInputSource");
145 0 : return nullptr;
146 : }
147 3325 : (*m_pnCounter)++;
148 3325 : if (m_fp == nullptr)
149 2 : return nullptr;
150 3323 : return new GMLASBinInputStream(m_fp.get());
151 : }
152 :
153 : /************************************************************************/
154 : /* warning() */
155 : /************************************************************************/
156 :
157 2 : void GMLASErrorHandler::warning(const SAXParseException &e)
158 : {
159 2 : handle(e, CE_Warning);
160 2 : }
161 :
162 : /************************************************************************/
163 : /* error() */
164 : /************************************************************************/
165 :
166 226 : void GMLASErrorHandler::error(const SAXParseException &e)
167 : {
168 226 : m_bFailed = true;
169 226 : handle(e, CE_Failure);
170 226 : }
171 :
172 : /************************************************************************/
173 : /* fatalError() */
174 : /************************************************************************/
175 :
176 4 : void GMLASErrorHandler::fatalError(const SAXParseException &e)
177 : {
178 4 : m_bFailed = true;
179 4 : handle(e, CE_Failure);
180 4 : }
181 :
182 : /************************************************************************/
183 : /* handle() */
184 : /************************************************************************/
185 :
186 232 : void GMLASErrorHandler::handle(const SAXParseException &e, CPLErr eErr)
187 : {
188 232 : const XMLCh *resourceId(e.getPublicId());
189 :
190 232 : if (resourceId == nullptr || resourceId[0] == 0)
191 112 : resourceId = e.getSystemId();
192 :
193 464 : CPLString osErrorMsg(transcode(e.getMessage()));
194 464 : if (m_bSchemaFullChecking &&
195 232 : osErrorMsg.find("forbidden restriction of any particle") !=
196 : std::string::npos)
197 : {
198 0 : osErrorMsg += ". You may retry with the " +
199 0 : CPLString(szSCHEMA_FULL_CHECKING_OPTION) +
200 0 : "=NO open option";
201 : }
202 464 : else if (!m_bHandleMultipleImports &&
203 232 : osErrorMsg.find("not found") != std::string::npos)
204 : {
205 8 : osErrorMsg += ". You may retry with the " +
206 24 : CPLString(szHANDLE_MULTIPLE_IMPORTS_OPTION) +
207 8 : "=YES open option";
208 : }
209 :
210 464 : CPLString osFullErrorMsg;
211 232 : osFullErrorMsg.Printf("%s:%d:%d %s", transcode(resourceId).c_str(),
212 464 : static_cast<int>(e.getLineNumber()),
213 232 : static_cast<int>(e.getColumnNumber()),
214 464 : osErrorMsg.c_str());
215 :
216 445 : if (m_bHideGMLTypeNotFound && m_osGMLTypeNotFoundError.empty() &&
217 213 : osErrorMsg.find(
218 : "http://www.opengis.net/gml/3.2:AbstractCRS' not found") !=
219 : std::string::npos)
220 : {
221 2 : m_osGMLTypeNotFoundError = std::move(osFullErrorMsg);
222 : }
223 230 : else if (m_bHideGMLTypeNotFound && !m_osGMLTypeNotFoundError.empty())
224 : {
225 : // do nothing
226 : }
227 : else
228 : {
229 224 : CPLError(eErr, CPLE_AppDefined, "%s", osFullErrorMsg.c_str());
230 : }
231 232 : }
232 :
233 : /************************************************************************/
234 : /* GMLASBaseEntityResolver() */
235 : /************************************************************************/
236 :
237 240 : GMLASBaseEntityResolver::GMLASBaseEntityResolver(const CPLString &osBasePath,
238 240 : GMLASXSDCache &oCache)
239 240 : : m_oCache(oCache)
240 : {
241 240 : m_aosPathStack.push_back(osBasePath);
242 240 : }
243 :
244 : /************************************************************************/
245 : /* ~GMLASBaseEntityResolver() */
246 : /************************************************************************/
247 :
248 290 : GMLASBaseEntityResolver::~GMLASBaseEntityResolver()
249 : {
250 240 : CPLAssert(m_aosPathStack.size() == 1);
251 290 : }
252 :
253 : /************************************************************************/
254 : /* notifyClosing() */
255 : /************************************************************************/
256 :
257 : /* Called by GMLASInputSource destructor. This is useful for use to */
258 : /* know where a .xsd has been finished from processing. Note that we */
259 : /* strongly depend on Xerces behavior here... */
260 1107 : void GMLASBaseEntityResolver::notifyClosing(const CPLString &osFilename)
261 : {
262 1107 : CPLDebug("GMLAS", "Closing %s", osFilename.c_str());
263 :
264 1107 : CPLAssert(m_aosPathStack.back() ==
265 : CPLString(CPLGetDirnameSafe(osFilename)));
266 1107 : m_aosPathStack.pop_back();
267 1107 : }
268 :
269 : /************************************************************************/
270 : /* SetBasePath() */
271 : /************************************************************************/
272 :
273 281 : void GMLASBaseEntityResolver::SetBasePath(const CPLString &osBasePath)
274 : {
275 281 : CPLAssert(m_aosPathStack.size() == 1);
276 281 : m_aosPathStack[0] = osBasePath;
277 281 : }
278 :
279 : /************************************************************************/
280 : /* DoExtraSchemaProcessing() */
281 : /************************************************************************/
282 :
283 254 : void GMLASBaseEntityResolver::DoExtraSchemaProcessing(
284 : const CPLString & /*osFilename*/,
285 : const std::shared_ptr<VSIVirtualHandle> & /*fp*/)
286 : {
287 254 : }
288 :
289 : /************************************************************************/
290 : /* resolveEntity() */
291 : /************************************************************************/
292 :
293 : InputSource *
294 1107 : GMLASBaseEntityResolver::resolveEntity(const XMLCh *const /*publicId*/,
295 : const XMLCh *const systemId)
296 : {
297 : // Can happen on things like <xs:import
298 : // namespace="http://www.w3.org/XML/1998/namespace"/>
299 1107 : if (systemId == nullptr)
300 0 : return nullptr;
301 :
302 2214 : CPLString osSystemId(transcode(systemId));
303 :
304 1107 : if (osSystemId.find("/gml/2.1.2/") != std::string::npos)
305 0 : m_osGMLVersionFound = "2.1.2";
306 1107 : else if (osSystemId.find("/gml/3.1.1/") != std::string::npos)
307 19 : m_osGMLVersionFound = "3.1.1";
308 1088 : else if (osSystemId.find("/gml/3.2.1/") != std::string::npos)
309 15 : m_osGMLVersionFound = "3.2.1";
310 :
311 1107 : constexpr const char *GML_321_LOC_SUFFIX = "/gml/3.2.1/gml.xsd";
312 1107 : constexpr const char *GML_321_OGC_SCHEMA_LOC =
313 : "http://schemas.opengis.net/gml/3.2.1/gml.xsd";
314 1107 : if (osSystemId.size() > strlen(GML_321_LOC_SUFFIX) &&
315 742 : strcmp(osSystemId.c_str() + osSystemId.size() -
316 : strlen(GML_321_LOC_SUFFIX),
317 1849 : GML_321_LOC_SUFFIX) == 0 &&
318 15 : osSystemId != GML_321_OGC_SCHEMA_LOC)
319 : {
320 2 : m_bFoundNonOfficialGMLSchemaLocation = true;
321 2 : if (m_bSubstituteWithOGCSchemaLocation)
322 0 : osSystemId = GML_321_OGC_SCHEMA_LOC;
323 : }
324 :
325 2214 : CPLString osNewPath;
326 : auto fp = std::shared_ptr<VSIVirtualHandle>(
327 1107 : m_oCache.Open(osSystemId, m_aosPathStack.back(), osNewPath),
328 1107 : VSIVirtualHandleCloser{});
329 :
330 1107 : if (fp != nullptr)
331 : {
332 1105 : m_oSetSchemaURLs.insert(osNewPath);
333 :
334 1105 : CPLDebug("GMLAS", "Opening %s", osNewPath.c_str());
335 1105 : DoExtraSchemaProcessing(osNewPath, fp);
336 : }
337 :
338 1107 : m_aosPathStack.push_back(CPLGetDirnameSafe(osNewPath).c_str());
339 1107 : GMLASInputSource *poIS = new GMLASInputSource(osNewPath, fp);
340 1107 : poIS->SetClosingCallback(this);
341 1107 : return poIS;
342 : }
343 :
344 : /************************************************************************/
345 : /* Dump() */
346 : /************************************************************************/
347 :
348 3517 : void GMLASReader::Context::Dump() const
349 : {
350 3517 : CPLDebug("GMLAS", "Context");
351 3517 : CPLDebug("GMLAS", " m_nLevel = %d", m_nLevel);
352 3517 : CPLDebug("GMLAS", " m_poFeature = %p", m_poFeature);
353 3517 : if (CPLIsDebugEnabled())
354 : {
355 0 : if (m_poFeature)
356 0 : m_poFeature->DumpReadable(stderr);
357 : }
358 3517 : CPLDebug("GMLAS", " m_poLayer = %p (%s)", m_poLayer,
359 3517 : m_poLayer ? m_poLayer->GetName() : "");
360 3517 : CPLDebug("GMLAS", " m_poGroupLayer = %p (%s)", m_poGroupLayer,
361 3517 : m_poGroupLayer ? m_poGroupLayer->GetName() : "");
362 3517 : CPLDebug("GMLAS", " m_nGroupLayerLevel = %d", m_nGroupLayerLevel);
363 3517 : CPLDebug("GMLAS", " m_nLastFieldIdxGroupLayer = %d",
364 3517 : m_nLastFieldIdxGroupLayer);
365 3517 : CPLDebug("GMLAS", " m_osCurSubXPath = %s", m_osCurSubXPath.c_str());
366 3517 : }
367 :
368 : /************************************************************************/
369 : /* GMLASReader() */
370 : /************************************************************************/
371 :
372 1392 : GMLASReader::GMLASReader(GMLASXSDCache &oCache,
373 : const GMLASXPathMatcher &oIgnoredXPathMatcher,
374 1392 : GMLASXLinkResolver &oXLinkResolver)
375 : : m_oCache(oCache), m_oIgnoredXPathMatcher(oIgnoredXPathMatcher),
376 : m_oXLinkResolver(oXLinkResolver),
377 2784 : m_nMaxLevel(atoi(CPLGetConfigOption("GMLAS_XML_MAX_LEVEL", "100"))),
378 1392 : m_nMaxContentSize(static_cast<size_t>(
379 2784 : atoi(CPLGetConfigOption("GMLAS_XML_MAX_CONTENT_SIZE", "512000000")))),
380 : m_bWarnUnexpected(
381 1392 : CPLTestBool(CPLGetConfigOption("GMLAS_WARN_UNEXPECTED", "FALSE")))
382 : {
383 1392 : }
384 :
385 : /************************************************************************/
386 : /* ~GMLASReader() */
387 : /************************************************************************/
388 :
389 2784 : GMLASReader::~GMLASReader()
390 : {
391 1653 : if (m_oCurCtxt.m_poFeature != nullptr && !m_aoStackContext.empty() &&
392 261 : m_oCurCtxt.m_poFeature != m_aoStackContext.back().m_poFeature)
393 : {
394 10 : CPLDebug("GMLAS", "Delete feature m_oCurCtxt.m_poFeature=%p",
395 : m_oCurCtxt.m_poFeature);
396 10 : delete m_oCurCtxt.m_poFeature;
397 : }
398 1854 : for (size_t i = 0; i < m_aoStackContext.size(); i++)
399 : {
400 663 : if (i == 0 || m_aoStackContext[i].m_poFeature !=
401 201 : m_aoStackContext[i - 1].m_poFeature)
402 : {
403 462 : CPLDebug("GMLAS",
404 : "Delete feature m_aoStackContext[%d].m_poFeature=%p",
405 462 : static_cast<int>(i), m_aoStackContext[i].m_poFeature);
406 462 : delete m_aoStackContext[i].m_poFeature;
407 : }
408 : }
409 :
410 1392 : if (!m_apsXMLNodeStack.empty())
411 : {
412 0 : CPLDestroyXMLNode(m_apsXMLNodeStack[0].psNode);
413 : }
414 2784 : }
415 :
416 : /************************************************************************/
417 : /* SetLayerOfInterest() */
418 : /************************************************************************/
419 :
420 1258 : void GMLASReader::SetLayerOfInterest(OGRGMLASLayer *poLayer)
421 : {
422 1258 : m_poLayerOfInterest = poLayer;
423 1258 : }
424 :
425 : /************************************************************************/
426 : /* SetSWEDataArrayLayersRef() */
427 : /************************************************************************/
428 :
429 1267 : void GMLASReader::SetSWEDataArrayLayersRef(
430 : const std::vector<OGRGMLASLayer *> &ar)
431 : {
432 1267 : m_apoSWEDataArrayLayersRef = ar;
433 1267 : m_bProcessSWEDataArray = !ar.empty();
434 1267 : }
435 :
436 : /************************************************************************/
437 : /* LoadXSDInParser() */
438 : /************************************************************************/
439 :
440 284 : bool GMLASReader::LoadXSDInParser(
441 : SAX2XMLReader *poParser, GMLASXSDCache &oCache,
442 : GMLASBaseEntityResolver &oXSDEntityResolver, const CPLString &osBaseDirname,
443 : const CPLString &osXSDFilename, Grammar **ppoGrammar,
444 : bool bSchemaFullChecking, bool bHandleMultipleImports)
445 : {
446 284 : if (ppoGrammar != nullptr)
447 283 : *ppoGrammar = nullptr;
448 :
449 : const CPLString osModifXSDFilename(
450 269 : (osXSDFilename.find("http://") != 0 &&
451 269 : osXSDFilename.find("https://") != 0 &&
452 150 : CPLIsFilenameRelative(osXSDFilename))
453 284 : ? CPLString(
454 386 : CPLFormFilenameSafe(osBaseDirname, osXSDFilename, nullptr))
455 852 : : osXSDFilename);
456 :
457 286 : for (int iPass = 0; iPass <= 1; ++iPass)
458 : {
459 286 : CPLString osResolvedFilename;
460 : auto fpXSD = std::shared_ptr<VSIVirtualHandle>(
461 286 : oCache.Open(osModifXSDFilename, CPLString(), osResolvedFilename),
462 286 : VSIVirtualHandleCloser{});
463 286 : if (fpXSD == nullptr)
464 : {
465 5 : return false;
466 : }
467 :
468 281 : poParser->setFeature(XMLUni::fgXercesSchemaFullChecking,
469 281 : bSchemaFullChecking);
470 281 : poParser->setFeature(XMLUni::fgXercesHandleMultipleImports,
471 281 : bHandleMultipleImports);
472 :
473 : // Install a temporary entity resolved based on the current XSD
474 281 : CPLString osXSDDirname(CPLGetDirnameSafe(osModifXSDFilename));
475 550 : if (osXSDFilename.find("http://") == 0 ||
476 269 : osXSDFilename.find("https://") == 0)
477 : {
478 131 : osXSDDirname = osXSDFilename.substr(0, osXSDFilename.rfind('/'));
479 : }
480 281 : oXSDEntityResolver.SetBasePath(osXSDDirname);
481 281 : oXSDEntityResolver.DoExtraSchemaProcessing(osResolvedFilename, fpXSD);
482 281 : if (iPass == 1)
483 2 : oXSDEntityResolver.SetSubstituteWithOGCSchemaLocation(true);
484 :
485 281 : EntityResolver *poOldEntityResolver = poParser->getEntityResolver();
486 281 : poParser->setEntityResolver(&oXSDEntityResolver);
487 :
488 : // Install a temporary error handler
489 281 : GMLASErrorHandler oErrorHandler;
490 281 : oErrorHandler.SetSchemaFullCheckingEnabled(bSchemaFullChecking);
491 281 : oErrorHandler.SetHandleMultipleImportsEnabled(bHandleMultipleImports);
492 281 : if (iPass == 0)
493 279 : oErrorHandler.SetHideGMLTypeNotFound(true);
494 281 : ErrorHandler *poOldErrorHandler = poParser->getErrorHandler();
495 281 : poParser->setErrorHandler(&oErrorHandler);
496 :
497 281 : GMLASInputSource oSource(osResolvedFilename, fpXSD);
498 281 : const bool bCacheGrammar = true;
499 281 : Grammar *poGrammar = nullptr;
500 281 : std::string osLoadGrammarErrorMsg("loadGrammar failed");
501 :
502 : const int nMaxMem = std::min(
503 281 : 2048, std::max(0, atoi(CPLGetConfigOption(
504 281 : "OGR_GMLAS_XERCES_MAX_MEMORY", "500"))));
505 : const std::string osMsgMaxMem = CPLSPrintf(
506 : "Xerces-C memory allocation exceeds %d MB. "
507 : "This can happen on schemas with a big value for maxOccurs. "
508 : "Define the OGR_GMLAS_XERCES_MAX_MEMORY configuration option to a "
509 : "bigger value (in MB) to increase that limitation, "
510 : "or 0 to remove it completely.",
511 281 : nMaxMem);
512 : const double dfTimeout =
513 281 : CPLAtof(CPLGetConfigOption("OGR_GMLAS_XERCES_MAX_TIME", "2"));
514 : const std::string osMsgTimeout = CPLSPrintf(
515 : "Processing in Xerces exceeded maximum allowed of %.3f s. "
516 : "This can happen on schemas with a big value for maxOccurs. "
517 : "Define the OGR_GMLAS_XERCES_MAX_TIME configuration option to a "
518 : "bigger value (in second) to increase that limitation, "
519 : "or 0 to remove it completely.",
520 281 : dfTimeout);
521 281 : OGRStartXercesLimitsForThisThread(
522 281 : static_cast<size_t>(nMaxMem) * 1024 * 1024, osMsgMaxMem.c_str(),
523 : dfTimeout, osMsgTimeout.c_str());
524 : try
525 : {
526 278 : poGrammar = poParser->loadGrammar(
527 281 : oSource, Grammar::SchemaGrammarType, bCacheGrammar);
528 : }
529 0 : catch (const SAXException &e)
530 : {
531 0 : osLoadGrammarErrorMsg += ": " + transcode(e.getMessage());
532 : }
533 0 : catch (const XMLException &e)
534 : {
535 0 : osLoadGrammarErrorMsg += ": " + transcode(e.getMessage());
536 : }
537 4 : catch (const OutOfMemoryException &e)
538 : {
539 2 : if (strstr(CPLGetLastErrorMsg(), "configuration option") == nullptr)
540 : {
541 0 : osLoadGrammarErrorMsg += ": " + transcode(e.getMessage());
542 : }
543 : }
544 1 : catch (const DOMException &e)
545 : {
546 : // Can happen with a .xsd that has a bad <?xml version="
547 : // declaration.
548 1 : osLoadGrammarErrorMsg += ": " + transcode(e.getMessage());
549 : }
550 281 : OGRStopXercesLimitsForThisThread();
551 :
552 : // Restore previous handlers
553 281 : poParser->setEntityResolver(poOldEntityResolver);
554 281 : poParser->setErrorHandler(poOldErrorHandler);
555 :
556 281 : if (poGrammar == nullptr)
557 : {
558 3 : if (!osLoadGrammarErrorMsg.empty())
559 : {
560 3 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
561 : osLoadGrammarErrorMsg.c_str());
562 : }
563 3 : return false;
564 : }
565 278 : if (oErrorHandler.hasFailed())
566 : {
567 4 : if (iPass == 0 && !oErrorHandler.GetGMLTypeNotFoundError().empty())
568 : {
569 2 : if (oXSDEntityResolver.GetFoundNonOfficialGMLSchemaLocation())
570 : {
571 2 : CPLDebug(
572 : "GMLAS",
573 : "Error '%s' encountered, but non-official GML schema "
574 : "location has been imported. Retry with official one",
575 2 : oErrorHandler.GetGMLTypeNotFoundError().c_str());
576 2 : continue;
577 : }
578 : else
579 : {
580 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
581 0 : oErrorHandler.GetGMLTypeNotFoundError().c_str());
582 : }
583 : }
584 2 : return false;
585 : }
586 :
587 274 : if (ppoGrammar != nullptr)
588 273 : *ppoGrammar = poGrammar;
589 :
590 274 : break;
591 : }
592 :
593 274 : return true;
594 : }
595 :
596 : /************************************************************************/
597 : /* Init() */
598 : /************************************************************************/
599 :
600 1392 : bool GMLASReader::Init(const char *pszFilename,
601 : const std::shared_ptr<VSIVirtualHandle> &fp,
602 : const std::map<CPLString, CPLString> &oMapURIToPrefix,
603 : std::vector<std::unique_ptr<OGRGMLASLayer>> &apoLayers,
604 : bool bValidate,
605 : const std::vector<PairURIFilename> &aoXSDs,
606 : bool bSchemaFullChecking, bool bHandleMultipleImports)
607 : {
608 1392 : m_oMapURIToPrefix = oMapURIToPrefix;
609 1392 : m_apoLayers = &apoLayers;
610 1392 : m_bValidate = bValidate;
611 :
612 1392 : m_poSAXReader.reset(XMLReaderFactory::createXMLReader());
613 :
614 : // Commonly useful configuration.
615 : //
616 1392 : m_poSAXReader->setFeature(XMLUni::fgSAX2CoreNameSpaces, true);
617 1392 : m_poSAXReader->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, true);
618 :
619 1392 : m_poSAXReader->setContentHandler(this);
620 1392 : m_poSAXReader->setLexicalHandler(this);
621 1392 : m_poSAXReader->setDTDHandler(this);
622 1392 : m_poSAXReader->setFeature(XMLUni::fgXercesDisableDefaultEntityResolution,
623 1392 : true);
624 :
625 1392 : m_oErrorHandler.SetSchemaFullCheckingEnabled(bSchemaFullChecking);
626 1392 : m_oErrorHandler.SetHandleMultipleImportsEnabled(bHandleMultipleImports);
627 1392 : m_poSAXReader->setErrorHandler(&m_oErrorHandler);
628 :
629 1392 : m_poSAXReader->setFeature(XMLUni::fgXercesSchemaFullChecking,
630 1392 : bSchemaFullChecking);
631 1392 : m_poSAXReader->setFeature(XMLUni::fgXercesHandleMultipleImports,
632 1392 : bHandleMultipleImports);
633 :
634 1392 : if (bValidate)
635 : {
636 : // Enable validation.
637 50 : m_poSAXReader->setFeature(XMLUni::fgSAX2CoreValidation, true);
638 50 : m_poSAXReader->setFeature(XMLUni::fgXercesSchema, true);
639 :
640 : // We want all errors to be reported
641 : // coverity[unsafe_xml_parse_config]
642 50 : m_poSAXReader->setFeature(XMLUni::fgXercesValidationErrorAsFatal,
643 50 : false);
644 :
645 50 : CPLString osBaseDirname(CPLGetDirnameSafe(pszFilename));
646 :
647 : // In the case the schemas are explicitly passed, we must do special
648 : // processing
649 50 : if (!aoXSDs.empty())
650 : {
651 1 : GMLASBaseEntityResolver oXSDEntityResolver(CPLString(), m_oCache);
652 2 : for (size_t i = 0; i < aoXSDs.size(); i++)
653 : {
654 1 : const CPLString osXSDFilename(aoXSDs[i].second);
655 1 : if (!LoadXSDInParser(
656 : m_poSAXReader.get(), m_oCache, oXSDEntityResolver,
657 : osBaseDirname, osXSDFilename, nullptr,
658 : bSchemaFullChecking, bHandleMultipleImports))
659 : {
660 0 : return false;
661 : }
662 : }
663 :
664 : // Make sure our previously loaded schemas are used
665 1 : m_poSAXReader->setFeature(XMLUni::fgXercesUseCachedGrammarInParse,
666 1 : true);
667 :
668 : // Don't load schemas from any other source (e.g., from XML
669 : // document's xsi:schemaLocation attributes).
670 : //
671 1 : m_poSAXReader->setFeature(XMLUni::fgXercesLoadSchema, false);
672 : }
673 :
674 : // Install entity resolver based on XML file
675 : m_poEntityResolver =
676 50 : std::make_unique<GMLASBaseEntityResolver>(osBaseDirname, m_oCache);
677 50 : m_poSAXReader->setEntityResolver(m_poEntityResolver.get());
678 : }
679 : else
680 : {
681 : // Don't load schemas from any other source (e.g., from XML document's
682 : // xsi:schemaLocation attributes).
683 : //
684 1342 : m_poSAXReader->setFeature(XMLUni::fgXercesLoadSchema, false);
685 1342 : m_poSAXReader->setEntityResolver(this);
686 : }
687 :
688 1392 : m_fp = fp;
689 1392 : m_GMLInputSource = std::make_unique<GMLASInputSource>(pszFilename, m_fp);
690 :
691 : // Establish a map from layer's XPath to layer to speed-up parsing
692 65653 : for (auto &poLayer : *m_apoLayers)
693 : {
694 : const CPLString *posLayerXPath =
695 64261 : &(poLayer->GetFeatureClass().GetXPath());
696 64261 : if (poLayer->GetFeatureClass().IsRepeatedSequence())
697 : {
698 5487 : size_t iPosExtra = posLayerXPath->find(szEXTRA_SUFFIX);
699 5487 : if (iPosExtra != std::string::npos)
700 : {
701 3320 : m_osLayerXPath = *posLayerXPath;
702 3320 : m_osLayerXPath.resize(iPosExtra);
703 3320 : posLayerXPath = &m_osLayerXPath;
704 : }
705 : }
706 :
707 64261 : const bool bIsGroup = poLayer->GetFeatureClass().IsGroup();
708 64261 : if (!bIsGroup)
709 : {
710 62024 : if (m_oMapXPathToLayer.find(*posLayerXPath) ==
711 124048 : m_oMapXPathToLayer.end())
712 60905 : m_oMapXPathToLayer[*posLayerXPath] = poLayer.get();
713 : }
714 : else
715 : {
716 28392 : for (const auto &[xpath, idx] :
717 30629 : poLayer->GetMapFieldXPathToOGRFieldIdx())
718 : {
719 28392 : if (idx != -1 && m_oMapFieldXPathToGroupLayer.find(xpath) ==
720 28392 : m_oMapFieldXPathToGroupLayer.end())
721 14196 : m_oMapFieldXPathToGroupLayer[xpath] = poLayer.get();
722 : }
723 : }
724 :
725 64261 : if (poLayer->GetFeatureClass().IsRepeatedSequence())
726 5487 : m_oMapXPathToLayerRepeadedSequence[*posLayerXPath].push_back(
727 5487 : poLayer.get());
728 : }
729 :
730 1392 : return true;
731 : }
732 :
733 : /************************************************************************/
734 : /* IsArrayType() */
735 : /************************************************************************/
736 :
737 221088 : static bool IsArrayType(OGRFieldType eType)
738 : {
739 206454 : return eType == OFTIntegerList || eType == OFTInteger64List ||
740 427542 : eType == OFTRealList || eType == OFTStringList;
741 : }
742 :
743 : /************************************************************************/
744 : /* SetField() */
745 : /************************************************************************/
746 :
747 119725 : void GMLASReader::SetField(OGRFeature *poFeature, OGRGMLASLayer *poLayer,
748 : int nAttrIdx, const CPLString &osAttrValue)
749 : {
750 119725 : const OGRFieldType eType(poFeature->GetFieldDefnRef(nAttrIdx)->GetType());
751 119725 : if (osAttrValue.empty())
752 : {
753 4599 : if (eType == OFTString &&
754 1238 : !poFeature->GetFieldDefnRef(nAttrIdx)->IsNullable())
755 : {
756 36 : poFeature->SetField(nAttrIdx, "");
757 : }
758 : }
759 116364 : else if (eType == OFTDate || eType == OFTDateTime)
760 : {
761 : OGRField sField;
762 36016 : if (OGRParseXMLDateTime((m_bInitialPass) ? "1970-01-01T00:00:00"
763 17846 : : osAttrValue.c_str(),
764 18170 : &sField))
765 : {
766 18170 : poFeature->SetField(nAttrIdx, &sField);
767 18170 : }
768 : }
769 : // Transform boolean values to something that OGR understands
770 117826 : else if (eType == OFTInteger &&
771 19632 : poFeature->GetFieldDefnRef(nAttrIdx)->GetSubType() == OFSTBoolean)
772 : {
773 3200 : if (osAttrValue == "true")
774 3179 : poFeature->SetField(nAttrIdx, TRUE);
775 : else
776 21 : poFeature->SetField(nAttrIdx, FALSE);
777 : }
778 94994 : else if (eType == OFTBinary)
779 : {
780 : const int nFCFieldIdx =
781 2142 : poLayer->GetFCFieldIndexFromOGRFieldIdx(nAttrIdx);
782 2142 : if (nFCFieldIdx >= 0)
783 : {
784 : const GMLASField &oField(
785 2142 : poLayer->GetFeatureClass().GetFields()[nFCFieldIdx]);
786 2142 : if (m_bInitialPass)
787 : {
788 36 : GByte b = 'X';
789 36 : poFeature->SetField(nAttrIdx, 1, &b);
790 : }
791 2106 : else if (oField.GetType() == GMLAS_FT_BASE64BINARY)
792 : {
793 : GByte *pabyBuffer =
794 1053 : reinterpret_cast<GByte *>(CPLStrdup(osAttrValue));
795 1053 : int nBytes = CPLBase64DecodeInPlace(pabyBuffer);
796 1053 : poFeature->SetField(nAttrIdx, nBytes, pabyBuffer);
797 1053 : CPLFree(pabyBuffer);
798 : }
799 : else
800 : {
801 1053 : int nBytes = 0;
802 1053 : GByte *pabyBuffer = CPLHexToBinary(osAttrValue, &nBytes);
803 1053 : poFeature->SetField(nAttrIdx, nBytes, pabyBuffer);
804 1053 : CPLFree(pabyBuffer);
805 : }
806 : }
807 : }
808 92852 : else if (IsArrayType(eType))
809 : {
810 : const int nFCFieldIdx =
811 6436 : poLayer->GetFCFieldIndexFromOGRFieldIdx(nAttrIdx);
812 12872 : if (nFCFieldIdx >= 0 &&
813 6436 : poLayer->GetFeatureClass().GetFields()[nFCFieldIdx].IsList())
814 : {
815 : char **papszTokens =
816 6420 : CSLTokenizeString2(osAttrValue.c_str(), " ", 0);
817 9600 : if (eType == OFTIntegerList &&
818 3180 : poFeature->GetFieldDefnRef(nAttrIdx)->GetSubType() ==
819 : OFSTBoolean)
820 : {
821 3162 : for (char **papszIter = papszTokens; *papszIter != nullptr;
822 : ++papszIter)
823 : {
824 2102 : if (strcmp(*papszIter, "true") == 0)
825 : {
826 1042 : (*papszIter)[0] = '1';
827 1042 : (*papszIter)[1] = '\0';
828 : }
829 1060 : else if (strcmp(*papszIter, "false") == 0)
830 : {
831 1042 : (*papszIter)[0] = '0';
832 1042 : (*papszIter)[1] = '\0';
833 : }
834 : }
835 : }
836 6420 : poFeature->SetField(nAttrIdx, papszTokens);
837 6420 : CSLDestroy(papszTokens);
838 : }
839 16 : else if (eType == OFTStringList)
840 : {
841 4 : OGRField *psRawField = poFeature->GetRawFieldRef(nAttrIdx);
842 4 : if (OGR_RawField_IsUnset(psRawField))
843 : {
844 2 : poFeature->SetField(nAttrIdx, osAttrValue.c_str());
845 : }
846 : else
847 : {
848 2 : ++psRawField->StringList.nCount;
849 2 : psRawField->StringList.paList = CSLAddString(
850 : psRawField->StringList.paList, osAttrValue.c_str());
851 : }
852 : }
853 12 : else if (eType == OFTIntegerList)
854 : {
855 4 : OGRField *psRawField = poFeature->GetRawFieldRef(nAttrIdx);
856 4 : if (OGR_RawField_IsUnset(psRawField))
857 : {
858 2 : psRawField->IntegerList.nCount = 1;
859 2 : psRawField->IntegerList.paList = static_cast<int *>(
860 2 : CPLMalloc(psRawField->IntegerList.nCount * sizeof(int)));
861 : }
862 : else
863 : {
864 2 : ++psRawField->IntegerList.nCount;
865 2 : psRawField->IntegerList.paList = static_cast<int *>(
866 2 : CPLRealloc(psRawField->IntegerList.paList,
867 2 : psRawField->IntegerList.nCount * sizeof(int)));
868 : }
869 4 : psRawField->IntegerList.paList[psRawField->IntegerList.nCount - 1] =
870 4 : atoi(osAttrValue.c_str());
871 : }
872 8 : else if (eType == OFTInteger64List)
873 : {
874 4 : OGRField *psRawField = poFeature->GetRawFieldRef(nAttrIdx);
875 4 : if (OGR_RawField_IsUnset(psRawField))
876 : {
877 2 : psRawField->Integer64List.nCount = 1;
878 2 : psRawField->Integer64List.paList =
879 2 : static_cast<GIntBig *>(CPLMalloc(
880 2 : psRawField->Integer64List.nCount * sizeof(GIntBig)));
881 : }
882 : else
883 : {
884 2 : ++psRawField->Integer64List.nCount;
885 2 : psRawField->Integer64List.paList =
886 2 : static_cast<GIntBig *>(CPLRealloc(
887 2 : psRawField->Integer64List.paList,
888 2 : psRawField->Integer64List.nCount * sizeof(GIntBig)));
889 : }
890 : psRawField->Integer64List
891 8 : .paList[psRawField->Integer64List.nCount - 1] =
892 4 : CPLAtoGIntBig(osAttrValue.c_str());
893 : }
894 : else
895 : {
896 4 : CPLAssert(eType == OFTRealList);
897 4 : OGRField *psRawField = poFeature->GetRawFieldRef(nAttrIdx);
898 4 : if (OGR_RawField_IsUnset(psRawField))
899 : {
900 2 : psRawField->RealList.nCount = 1;
901 2 : psRawField->RealList.paList = static_cast<double *>(
902 2 : CPLMalloc(psRawField->RealList.nCount * sizeof(double)));
903 : }
904 : else
905 : {
906 2 : ++psRawField->RealList.nCount;
907 2 : psRawField->RealList.paList = static_cast<double *>(
908 2 : CPLRealloc(psRawField->RealList.paList,
909 2 : psRawField->RealList.nCount * sizeof(double)));
910 : }
911 8 : psRawField->RealList.paList[psRawField->RealList.nCount - 1] =
912 4 : CPLAtof(osAttrValue.c_str());
913 : }
914 : }
915 : else
916 : {
917 86416 : poFeature->SetField(nAttrIdx, osAttrValue.c_str());
918 : }
919 119725 : }
920 :
921 : /************************************************************************/
922 : /* PushFeatureReady() */
923 : /************************************************************************/
924 :
925 56567 : void GMLASReader::PushFeatureReady(std::unique_ptr<OGRFeature> &&poFeature,
926 : OGRGMLASLayer *poLayer)
927 : {
928 : #ifdef DEBUG_VERBOSE
929 : CPLDebug("GMLAS", "PushFeatureReady(%p / %s / %s)", poFeature,
930 : poFeature->GetDefnRef()->GetName(), poLayer->GetName());
931 : #endif
932 :
933 : m_aoFeaturesReady.emplace_back(
934 56567 : std::make_pair(std::move(poFeature), poLayer));
935 56567 : }
936 :
937 : /************************************************************************/
938 : /* CreateNewFeature */
939 : /************************************************************************/
940 :
941 52425 : void GMLASReader::CreateNewFeature(const CPLString &osLocalname)
942 : {
943 52425 : m_oCurCtxt.m_poFeature =
944 52425 : new OGRFeature(m_oCurCtxt.m_poLayer->GetLayerDefn());
945 : #ifdef DEBUG_VERBOSE
946 : CPLDebug("GMLAS", "CreateNewFeature(element=%s / layer=%s) = %p",
947 : osLocalname.c_str(), m_oCurCtxt.m_poLayer->GetName(),
948 : m_oCurCtxt.m_poFeature);
949 : #endif
950 : // Assign FID (1, ...). Only for OGR compliance, but definitely
951 : // not a unique ID among datasets with the same schema
952 52425 : ++m_oMapGlobalCounter[m_oCurCtxt.m_poLayer];
953 52425 : const int nGlobalCounter = m_oMapGlobalCounter[m_oCurCtxt.m_poLayer];
954 52425 : m_oCurCtxt.m_poFeature->SetFID(nGlobalCounter);
955 :
956 : // Find parent ID
957 52425 : CPLString osParentId;
958 103585 : if (!m_aoStackContext.empty() &&
959 51160 : m_oCurCtxt.m_poLayer->GetParentIDFieldIdx() >= 0)
960 : {
961 39407 : CPLAssert(m_aoStackContext.back().m_poLayer->GetIDFieldIdx() >= 0);
962 39407 : osParentId = m_aoStackContext.back().m_poFeature->GetFieldAsString(
963 39407 : m_aoStackContext.back().m_poLayer->GetIDFieldIdx());
964 78814 : m_oCurCtxt.m_poFeature->SetField(
965 39407 : m_oCurCtxt.m_poLayer->GetParentIDFieldIdx(), osParentId.c_str());
966 : }
967 :
968 : // Should we generate a unique (child) ID from the parent ID ?
969 52425 : if (m_oCurCtxt.m_poLayer->IsGeneratedIDField())
970 : {
971 : // Local IDs (ie related to a parent feature are fine, but when
972 : // we might have cycles, that doesn't work anymore
973 : /*
974 : ++m_oCurCtxt.m_oMapCounter[m_oCurCtxt.m_poLayer];
975 : const int nCounter =
976 : m_oCurCtxt.m_oMapCounter[m_oCurCtxt.m_poLayer];*/
977 51337 : const int nCounter = nGlobalCounter;
978 :
979 51337 : CPLString osGeneratedID = (osParentId.empty() ? m_osHash : osParentId) +
980 154011 : "_" + osLocalname +
981 102674 : CPLSPrintf("_%d", nCounter);
982 51337 : m_oCurCtxt.m_poFeature->SetField(m_oCurCtxt.m_poLayer->GetIDFieldIdx(),
983 : osGeneratedID.c_str());
984 : }
985 :
986 52425 : m_nCurFieldIdx = -1;
987 52425 : }
988 :
989 : /************************************************************************/
990 : /* AttachAsLastChild() */
991 : /************************************************************************/
992 :
993 : /* Attach element as the last child of its parent */
994 2027 : void GMLASReader::AttachAsLastChild(CPLXMLNode *psNode)
995 : {
996 2027 : NodeLastChild &sNodeLastChild = m_apsXMLNodeStack.back();
997 2027 : CPLXMLNode *psLastChildParent = sNodeLastChild.psLastChild;
998 :
999 2027 : if (psLastChildParent == nullptr)
1000 : {
1001 774 : CPLAssert(sNodeLastChild.psNode);
1002 774 : sNodeLastChild.psNode->psChild = psNode;
1003 : }
1004 : else
1005 : {
1006 1253 : psLastChildParent->psNext = psNode;
1007 : }
1008 2027 : sNodeLastChild.psLastChild = psNode;
1009 2027 : }
1010 :
1011 : /************************************************************************/
1012 : /* BuildXMLBlobStartElement() */
1013 : /************************************************************************/
1014 :
1015 9607 : void GMLASReader::BuildXMLBlobStartElement(const CPLString &osXPath,
1016 : const Attributes &attrs)
1017 : {
1018 9607 : if (FillTextContent())
1019 : {
1020 6357 : m_osTextContent += "<";
1021 6357 : m_osTextContent += osXPath;
1022 : }
1023 :
1024 9607 : CPLXMLNode *psNode = nullptr;
1025 9607 : if (m_nCurGeomFieldIdx >= 0 || m_nSWEDataArrayLevel >= 0 ||
1026 7909 : m_nSWEDataRecordLevel >= 0)
1027 : {
1028 1785 : psNode = CPLCreateXMLNode(nullptr, CXT_Element, osXPath);
1029 1785 : if (!m_apsXMLNodeStack.empty())
1030 : {
1031 1348 : AttachAsLastChild(psNode);
1032 : }
1033 : }
1034 :
1035 9607 : CPLXMLNode *psLastChild = nullptr;
1036 11993 : for (unsigned int i = 0; i < attrs.getLength(); i++)
1037 : {
1038 : const CPLString &osAttrNSPrefix(
1039 : m_osAttrNSPrefix =
1040 2386 : m_oMapURIToPrefix[transcode(attrs.getURI(i), m_osAttrNSUri)]);
1041 : const CPLString &osAttrLocalname(
1042 2386 : transcode(attrs.getLocalName(i), m_osAttrLocalName));
1043 : const CPLString &osAttrValue(
1044 2386 : transcode(attrs.getValue(i), m_osAttrValue));
1045 2386 : CPLString &osAttrXPath(m_osAttrXPath);
1046 2386 : if (!osAttrNSPrefix.empty())
1047 : {
1048 1210 : osAttrXPath.reserve(osAttrNSPrefix.size() + 1 +
1049 605 : osAttrLocalname.size());
1050 605 : osAttrXPath = osAttrNSPrefix;
1051 605 : osAttrXPath += ":";
1052 605 : osAttrXPath += osAttrLocalname;
1053 : }
1054 : else
1055 : {
1056 1781 : osAttrXPath = osAttrLocalname;
1057 : }
1058 :
1059 2386 : if (psNode != nullptr)
1060 : {
1061 : CPLXMLNode *psAttrNode =
1062 879 : CPLCreateXMLNode(nullptr, CXT_Attribute, osAttrXPath);
1063 879 : CPLCreateXMLNode(psAttrNode, CXT_Text, osAttrValue);
1064 :
1065 879 : if (psLastChild == nullptr)
1066 : {
1067 773 : psNode->psChild = psAttrNode;
1068 : }
1069 : else
1070 : {
1071 106 : psLastChild->psNext = psAttrNode;
1072 : }
1073 879 : psLastChild = psAttrNode;
1074 : }
1075 :
1076 2386 : if (FillTextContent())
1077 : {
1078 1054 : m_osTextContent += " ";
1079 1054 : m_osTextContent += osAttrXPath;
1080 1054 : m_osTextContent += "=\"";
1081 1054 : char *pszEscaped = CPLEscapeString(
1082 1054 : osAttrValue.c_str(), static_cast<int>(osAttrValue.size()),
1083 : CPLES_XML);
1084 1054 : m_osTextContent += pszEscaped;
1085 1054 : CPLFree(pszEscaped);
1086 1054 : m_osTextContent += '"';
1087 : }
1088 : }
1089 9607 : if (FillTextContent())
1090 6357 : m_osTextContent += ">";
1091 :
1092 9607 : if (psNode != nullptr)
1093 : {
1094 : /* Push the element on the stack */
1095 1785 : NodeLastChild sNewNodeLastChild;
1096 1785 : sNewNodeLastChild.psNode = psNode;
1097 1785 : sNewNodeLastChild.psLastChild = psLastChild;
1098 1785 : m_apsXMLNodeStack.push_back(sNewNodeLastChild);
1099 : #ifdef DEBUG_VERBOSE
1100 : CPLDebug("GMLAS", "m_apsXMLNodeStack.push_back()");
1101 : #endif
1102 : }
1103 :
1104 9607 : if (m_osTextContent.size() > m_nMaxContentSize)
1105 : {
1106 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
1107 : "Too much data in a single element");
1108 0 : m_bParsingError = true;
1109 : }
1110 9607 : }
1111 :
1112 : /************************************************************************/
1113 : /* GetLayerByXPath() */
1114 : /************************************************************************/
1115 :
1116 16349 : OGRGMLASLayer *GMLASReader::GetLayerByXPath(const CPLString &osXPath)
1117 : {
1118 331871 : for (const auto &poLayer : *m_apoLayers)
1119 : {
1120 331871 : if (poLayer->GetFeatureClass().GetXPath() == osXPath)
1121 : {
1122 16349 : return poLayer.get();
1123 : }
1124 : }
1125 0 : return nullptr;
1126 : }
1127 :
1128 : /************************************************************************/
1129 : /* PushContext() */
1130 : /************************************************************************/
1131 :
1132 60994 : void GMLASReader::PushContext(const Context &oContext)
1133 : {
1134 60994 : m_aoStackContext.push_back(oContext);
1135 : #ifdef DEBUG_VERBOSE
1136 : CPLDebug("GMLAS", "Pushing new context:");
1137 : oContext.Dump();
1138 : #endif
1139 60994 : }
1140 :
1141 : /************************************************************************/
1142 : /* PopContext() */
1143 : /************************************************************************/
1144 :
1145 60532 : void GMLASReader::PopContext()
1146 : {
1147 : #ifdef DEBUG_VERBOSE
1148 : if (!m_aoStackContext.empty())
1149 : {
1150 : CPLDebug("GMLAS", "Popping up context:");
1151 : m_aoStackContext.back().Dump();
1152 : }
1153 : #endif
1154 60532 : m_aoStackContext.pop_back();
1155 : #ifdef DEBUG_VERBOSE
1156 : if (!m_aoStackContext.empty())
1157 : {
1158 : CPLDebug("GMLAS", "New top of stack is:");
1159 : m_aoStackContext.back().Dump();
1160 : }
1161 : #endif
1162 60532 : }
1163 :
1164 : /************************************************************************/
1165 : /* startElement() */
1166 : /************************************************************************/
1167 :
1168 : /* <xs:group ref="somegroup" maxOccurs="unbounded"/> are particularly hard to
1169 : deal with since we cannot easily know when the corresponding subfeature
1170 : is exactly terminated.
1171 :
1172 : Let's consider:
1173 :
1174 : <xs:group name="somegroup">
1175 : <xs:choice>
1176 : <xs:element name="first_elt_of_group" type="xs:string"/>
1177 : <xs:element name="second_elt_of_group" type="xs:string"/>
1178 : </xs:choice>
1179 : </xs:group>
1180 :
1181 : <xs:group name="another_group">
1182 : <xs:choice>
1183 : <xs:element name="first_elt_of_another_group" type="xs:string"/>
1184 : </xs:choice>
1185 : </xs:group>
1186 :
1187 : There are different cases :
1188 : *
1189 : <first_elt_of_group>...</first_elt_of_group>
1190 : <second_elt_of_group>...</first_elt_of_group>
1191 : <first_elt_of_group> <!-- we are here at startElement() -->
1192 : ...
1193 : </first_elt_of_group>
1194 :
1195 : *
1196 : <first_elt_of_group>...</first_elt_of_group>
1197 : <first_elt_of_group> <!-- we are here at startElement() -->
1198 : ...</first_elt_of_group>
1199 :
1200 : *
1201 : <first_elt_of_group>...</first_elt_of_group>
1202 : <first_elt_of_another_group> <!-- we are here at startElement()
1203 : -->
1204 : ...</first_elt_of_another_group>
1205 :
1206 : *
1207 : <first_elt_of_group>...</first_elt_of_group>
1208 : <some_other_elt> <!-- we are here at startElement() -->
1209 : ...</some_other_elt>
1210 :
1211 : *
1212 : <first_elt>...</first_elt>
1213 : <second_elt><sub>...</sub></second_elt>
1214 : <first_elt> <-- here -->
1215 : ...</first_elt>
1216 : *
1217 : <first_elt_of_group>...</first_elt_of_group>
1218 : </end_of_enclosing_element> <!-- we are here at endElement() -->
1219 : */
1220 162844 : void GMLASReader::startElement(const XMLCh *const uri,
1221 : const XMLCh *const localname,
1222 : const XMLCh *const
1223 : #ifdef DEBUG_VERBOSE
1224 : qname
1225 : #endif
1226 : ,
1227 : const Attributes &attrs)
1228 : {
1229 162844 : m_nEntityCounter = 0;
1230 :
1231 162844 : const CPLString &osLocalname(transcode(localname, m_osLocalname));
1232 162844 : const CPLString &osNSURI(transcode(uri, m_osNSUri));
1233 162844 : const CPLString &osNSPrefix(m_osNSPrefix = m_oMapURIToPrefix[osNSURI]);
1234 162844 : if (osNSPrefix.empty())
1235 5653 : m_osXPath = osLocalname;
1236 : else
1237 : {
1238 157191 : m_osXPath.reserve(osNSPrefix.size() + 1 + osLocalname.size());
1239 157191 : m_osXPath = osNSPrefix;
1240 157191 : m_osXPath += ":";
1241 157191 : m_osXPath += osLocalname;
1242 : }
1243 162844 : const CPLString &osXPath(m_osXPath);
1244 : #ifdef DEBUG_VERBOSE
1245 : CPLDebug("GMLAS", "startElement(%s / %s)", transcode(qname).c_str(),
1246 : osXPath.c_str());
1247 : #endif
1248 162844 : m_anStackXPathLength.push_back(osXPath.size());
1249 162844 : if (!m_osCurXPath.empty())
1250 161605 : m_osCurXPath += "/";
1251 162844 : m_osCurXPath += osXPath;
1252 :
1253 : #if 0
1254 : CPLString osSubXPathBefore(m_osCurSubXPath);
1255 : #endif
1256 162844 : if (!m_osCurSubXPath.empty())
1257 : {
1258 161362 : m_osCurSubXPath += "/";
1259 161362 : m_osCurSubXPath += osXPath;
1260 : }
1261 :
1262 162844 : if (m_bProcessSWEDataArray && m_nSWEDataArrayLevel < 0 &&
1263 100 : m_nSWEDataRecordLevel < 0 && m_nCurGeomFieldIdx < 0)
1264 : {
1265 58 : if (osNSURI == szSWE_URI &&
1266 20 : (osLocalname == "DataArray" || osLocalname == "DataStream"))
1267 : {
1268 12 : if (m_nCurFieldIdx >= 0)
1269 : {
1270 : m_osSWEDataArrayParentField =
1271 12 : m_oCurCtxt.m_poFeature->GetFieldDefnRef(m_nCurFieldIdx)
1272 12 : ->GetNameRef();
1273 : }
1274 : else
1275 : {
1276 0 : m_osSWEDataArrayParentField.clear();
1277 : }
1278 12 : m_nSWEDataArrayLevel = m_nLevel;
1279 : }
1280 : }
1281 :
1282 : // Deal with XML content
1283 162844 : if (m_bIsXMLBlob || m_nSWEDataArrayLevel >= 0 || m_nSWEDataRecordLevel >= 0)
1284 : {
1285 7914 : BuildXMLBlobStartElement(osXPath, attrs);
1286 : }
1287 :
1288 162844 : if (m_bIsXMLBlob)
1289 : {
1290 7832 : m_nLevel++;
1291 7832 : return;
1292 : }
1293 :
1294 155012 : if (m_nLevel == m_nMaxLevel)
1295 : {
1296 0 : CPLError(CE_Failure, CPLE_AppDefined, "Too deeply nested XML content");
1297 0 : m_bParsingError = true;
1298 0 : return;
1299 : }
1300 :
1301 155012 : if (m_bInitialPass)
1302 : {
1303 : // Collect the gml:boundedBy/gml:Envelope@srsDimension attribute
1304 6169 : if (m_bInGMLBoundedByLevel1 && m_nLevel == 2 &&
1305 1 : m_osXPath == "gml:Envelope")
1306 : {
1307 2 : for (unsigned int i = 0; i < attrs.getLength(); i++)
1308 : {
1309 : const CPLString &osAttrLocalname(
1310 1 : transcode(attrs.getLocalName(i), m_osAttrLocalName));
1311 1 : if (osAttrLocalname == "srsDimension")
1312 : {
1313 : const CPLString &osAttrValue(
1314 1 : transcode(attrs.getValue(i), m_osAttrValue));
1315 1 : m_nDefaultSrsDimension = atoi(osAttrValue.c_str());
1316 : }
1317 : }
1318 : }
1319 6168 : m_bInGMLBoundedByLevel1 =
1320 6168 : (m_nLevel == 1 && m_osXPath == "gml:boundedBy");
1321 : }
1322 :
1323 155012 : CPLAssert(m_aoFeaturesReady.empty());
1324 :
1325 : // Look which layer might match the current XPath
1326 155012 : OGRGMLASLayer *poLayer = nullptr;
1327 155012 : bool bIsMatchingGroup = false;
1328 :
1329 : {
1330 : const auto oIter = m_oMapXPathToLayer.find(
1331 155012 : m_osCurSubXPath.empty()
1332 : ?
1333 : // Case where we haven't yet entered the top-level element, which
1334 : // may be in container elements
1335 :
1336 : osXPath
1337 : :
1338 :
1339 : // Case where we are a sub-element of a top-level feature
1340 155012 : m_osCurSubXPath);
1341 :
1342 155012 : if (oIter != m_oMapXPathToLayer.end())
1343 : {
1344 29347 : poLayer = oIter->second;
1345 : }
1346 : }
1347 155012 : if (!poLayer)
1348 : {
1349 125665 : const auto oIter = m_oMapFieldXPathToGroupLayer.find(m_osCurSubXPath);
1350 : // Case where we are a sub-element of a (repeated) group of a
1351 : // top-level feature
1352 125665 : if (oIter != m_oMapFieldXPathToGroupLayer.end())
1353 : {
1354 19416 : poLayer = oIter->second;
1355 19416 : bIsMatchingGroup = true;
1356 : }
1357 : }
1358 155012 : if (!poLayer && m_oCurCtxt.m_poLayer != nullptr)
1359 : {
1360 : const auto oIter = m_oMapXPathToLayerRepeadedSequence.find(
1361 106032 : m_oCurCtxt.m_poLayer->GetFeatureClass().GetXPath());
1362 106032 : if (oIter != m_oMapXPathToLayerRepeadedSequence.end())
1363 : {
1364 : // Needed to handle sequence_1_unbounded_non_simplifiable.subelement
1365 : // case of data/gmlas_test1.xml
1366 228242 : for (auto *poLayerIter : oIter->second)
1367 : {
1368 : const bool bIsMatchingRepeatedSequence =
1369 300148 : m_oCurCtxt.m_poLayer != poLayerIter &&
1370 148021 : poLayerIter->GetOGRFieldIndexFromXPath(m_osCurSubXPath) >=
1371 152127 : 0;
1372 152127 : if (bIsMatchingRepeatedSequence)
1373 : {
1374 3954 : poLayer = poLayerIter;
1375 3954 : break;
1376 : }
1377 : }
1378 : }
1379 : }
1380 257090 : if (!poLayer && !m_osCurSubXPath.empty() &&
1381 102078 : m_oCurCtxt.m_poGroupLayer != nullptr)
1382 : {
1383 5286 : for (auto &poLayerIter : *m_apoLayers)
1384 : {
1385 : const int nTmpIdx =
1386 5286 : poLayerIter->GetOGRFieldIndexFromXPath(m_osCurSubXPath);
1387 5286 : if (nTmpIdx >= 0 || nTmpIdx == IDX_COMPOUND_FOLDED)
1388 : {
1389 : // Case where we go back from a sub-element of a (repeated) group
1390 : // of a top-level feature to a regular sub-element of that top-level
1391 : // feature
1392 1762 : poLayer = poLayerIter.get();
1393 1762 : break;
1394 : }
1395 : }
1396 : }
1397 :
1398 155012 : if (poLayer)
1399 : {
1400 : #ifdef DEBUG_VERBOSE
1401 : CPLDebug("GMLAS", "Matches layer %s (%s)", poLayer->GetName(),
1402 : poLayer->GetFeatureClass().GetXPath().c_str());
1403 : #endif
1404 :
1405 105931 : if (poLayer->GetParent() != nullptr &&
1406 105931 : poLayer->GetParent()->GetFeatureClass().IsRepeatedSequence() &&
1407 3517 : m_oCurCtxt.m_poGroupLayer != poLayer->GetParent())
1408 : {
1409 : // Yuck! Simulate top-level element of a group if we directly
1410 : // jump into a nested class of it !
1411 : /* Something like
1412 : <xs:group name="group">
1413 : <xs:sequence>
1414 : <xs:element name="optional_elt" type="xs:string"
1415 : minOccurs="0"/> <xs:element name="elt"> <xs:complexType>
1416 : <xs:sequence>
1417 : <xs:element name="subelt"
1418 : type="xs:dateTime" maxOccurs="unbounded"/>
1419 : </xs:sequence>
1420 : </xs:complexType>
1421 : </xs:element>
1422 : </xs:sequence>
1423 : </xs:group>
1424 :
1425 : <top_element>
1426 : <elt><subelt>...</subelt></elt>
1427 : </top_element>
1428 : */
1429 1 : m_oCurCtxt.m_poLayer = poLayer->GetParent();
1430 1 : m_oCurCtxt.m_poGroupLayer = m_oCurCtxt.m_poLayer;
1431 1 : m_oCurCtxt.m_nLevel = m_nLevel;
1432 1 : m_oCurCtxt.m_nLastFieldIdxGroupLayer = -1;
1433 1 : CreateNewFeature(m_oCurCtxt.m_poLayer->GetName());
1434 : }
1435 :
1436 54479 : bool bPushNewState = true;
1437 54479 : if (bIsMatchingGroup)
1438 : {
1439 19416 : int nFieldIdx = poLayer->GetOGRFieldIndexFromXPath(m_osCurSubXPath);
1440 19416 : bool bPushNewFeature = false;
1441 19416 : if (m_oCurCtxt.m_poGroupLayer == nullptr)
1442 : {
1443 1791 : m_oCurCtxt.m_poFeature = nullptr;
1444 : }
1445 17625 : else if (nFieldIdx < 0)
1446 : {
1447 6993 : bPushNewState = false;
1448 : }
1449 10632 : else if (m_oCurCtxt.m_nGroupLayerLevel == m_nLevel &&
1450 7107 : m_oCurCtxt.m_poGroupLayer != poLayer)
1451 : {
1452 : #ifdef DEBUG_VERBOSE
1453 : CPLDebug("GMLAS", "new feature: group case 1");
1454 : #endif
1455 : /* Case like:
1456 : <first_elt_of_group>...</first_elt_of_group>
1457 : <first_elt_of_another_group> <!-- we are here at
1458 : startElement() -->
1459 : ...</first_elt_of_group>
1460 : */
1461 1 : bPushNewFeature = true;
1462 : }
1463 28368 : else if (m_oCurCtxt.m_nGroupLayerLevel == m_nLevel &&
1464 7106 : m_oCurCtxt.m_poGroupLayer == poLayer &&
1465 19510 : nFieldIdx == m_oCurCtxt.m_nLastFieldIdxGroupLayer &&
1466 1773 : !IsArrayType(
1467 1773 : m_oCurCtxt.m_poFeature->GetFieldDefnRef(nFieldIdx)
1468 : ->GetType()))
1469 : {
1470 : #ifdef DEBUG_VERBOSE
1471 : CPLDebug("GMLAS", "new feature: group case 2");
1472 : #endif
1473 : /* Case like:
1474 : <first_elt>...</first_elt>
1475 : <first_elt> <-- here -->
1476 : */
1477 873 : bPushNewFeature = true;
1478 : }
1479 9758 : else if (m_oCurCtxt.m_nGroupLayerLevel == m_nLevel &&
1480 6233 : nFieldIdx < m_oCurCtxt.m_nLastFieldIdxGroupLayer)
1481 : {
1482 : #ifdef DEBUG_VERBOSE
1483 : CPLDebug("GMLAS", "new feature: group case nFieldIdx < "
1484 : "m_oCurCtxt.m_nLastFieldIdxGroupLayer");
1485 : #endif
1486 : /* Case like:
1487 : <first_elt_of_group>...</first_elt_of_group>
1488 : <second_elt_of_group>...</first_elt_of_group>
1489 : <first_elt_of_group> <!-- we are here at
1490 : startElement() -->
1491 : ...
1492 : </first_elt_of_group>
1493 : */
1494 2652 : bPushNewFeature = true;
1495 : }
1496 7106 : else if (m_oCurCtxt.m_nGroupLayerLevel == m_nLevel + 1 &&
1497 0 : m_oCurCtxt.m_poGroupLayer == poLayer)
1498 : {
1499 : #ifdef DEBUG_VERBOSE
1500 : CPLDebug("GMLAS", "new feature: group case 3");
1501 : #endif
1502 : /* Case like:
1503 : <first_elt>...</first_elt>
1504 : <second_elt><sub>...</sub></second_elt>
1505 : <first_elt> <-- here -->
1506 : ...</first_elt>
1507 : */
1508 0 : bPushNewFeature = true;
1509 : }
1510 19416 : if (bPushNewFeature)
1511 : {
1512 3526 : CPLAssert(m_oCurCtxt.m_poFeature);
1513 3526 : CPLAssert(m_oCurCtxt.m_poGroupLayer);
1514 : // CPLDebug("GMLAS", "Feature ready");
1515 3526 : PushFeatureReady(
1516 7052 : std::unique_ptr<OGRFeature>(m_oCurCtxt.m_poFeature),
1517 : m_oCurCtxt.m_poGroupLayer);
1518 3526 : m_oCurCtxt.m_poFeature = nullptr;
1519 3526 : m_nCurFieldIdx = -1;
1520 : }
1521 19416 : m_oCurCtxt.m_poLayer = poLayer;
1522 19416 : m_oCurCtxt.m_poGroupLayer = poLayer;
1523 19416 : m_oCurCtxt.m_nGroupLayerLevel = m_nLevel;
1524 19416 : if (nFieldIdx >= 0)
1525 12423 : m_oCurCtxt.m_nLastFieldIdxGroupLayer = nFieldIdx;
1526 : }
1527 : else
1528 : {
1529 36826 : if (m_oCurCtxt.m_nGroupLayerLevel == m_nLevel &&
1530 1763 : poLayer == m_aoStackContext.back().m_poLayer)
1531 : {
1532 : // This is the case where we switch from an element that was
1533 : // in a group to a regular element of the same level
1534 :
1535 : // Push group feature as ready
1536 1762 : CPLAssert(m_oCurCtxt.m_poFeature);
1537 :
1538 : // CPLDebug("GMLAS", "Feature ready");
1539 1762 : PushFeatureReady(
1540 3524 : std::unique_ptr<OGRFeature>(m_oCurCtxt.m_poFeature),
1541 : m_oCurCtxt.m_poGroupLayer);
1542 :
1543 : // Restore "top-level" context
1544 1762 : CPLAssert(!m_aoStackContext.empty());
1545 1762 : m_oCurCtxt = m_aoStackContext.back();
1546 1762 : bPushNewState = false;
1547 : }
1548 : else
1549 : {
1550 33301 : if (m_oCurCtxt.m_poGroupLayer)
1551 : {
1552 7034 : Context oContext;
1553 3517 : oContext = m_oCurCtxt;
1554 3517 : oContext.m_nLevel = -1;
1555 3517 : oContext.Dump();
1556 3517 : PushContext(oContext);
1557 : }
1558 :
1559 33301 : m_oCurCtxt.m_poFeature = nullptr;
1560 33301 : m_oCurCtxt.m_poGroupLayer = nullptr;
1561 33301 : m_oCurCtxt.m_nGroupLayerLevel = -1;
1562 33301 : m_oCurCtxt.m_nLastFieldIdxGroupLayer = -1;
1563 33301 : m_oCurCtxt.m_poLayer = poLayer;
1564 33301 : if (m_aoStackContext.empty())
1565 1265 : m_osCurSubXPath = osXPath;
1566 : }
1567 : }
1568 :
1569 54479 : if (m_oCurCtxt.m_poFeature == nullptr)
1570 : {
1571 38618 : CPLAssert(bPushNewState);
1572 38618 : CreateNewFeature(osLocalname);
1573 : }
1574 :
1575 54479 : if (bPushNewState)
1576 : {
1577 91448 : Context oContext;
1578 45724 : oContext = m_oCurCtxt;
1579 45724 : oContext.m_nLevel = m_nLevel;
1580 45724 : PushContext(oContext);
1581 45724 : m_oCurCtxt.m_oMapCounter.clear();
1582 : }
1583 : }
1584 :
1585 155012 : if (m_oCurCtxt.m_poLayer)
1586 : {
1587 : #ifdef DEBUG_VERBOSE
1588 : CPLDebug("GMLAS", "Current layer: %s", m_oCurCtxt.m_poLayer->GetName());
1589 : #endif
1590 :
1591 154795 : bool bHasProcessedAttributes = false;
1592 :
1593 : // Find if we can match this element with one of our fields
1594 : int idx =
1595 154795 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(m_osCurSubXPath);
1596 309590 : int geom_idx = m_oCurCtxt.m_poLayer->GetOGRGeomFieldIndexFromXPath(
1597 154795 : m_osCurSubXPath);
1598 :
1599 154795 : if (idx < 0 && idx != IDX_COMPOUND_FOLDED)
1600 : {
1601 : /* Special case for a layer that matches everything, as found */
1602 : /* in swe:extension */
1603 20499 : idx = m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
1604 40998 : m_oCurCtxt.m_poLayer->GetFeatureClass().GetXPath() +
1605 : szMATCH_ALL);
1606 22177 : if (idx >= 0 &&
1607 1678 : m_oCurCtxt.m_poLayer->GetFeatureClass().GetFields().size() > 1)
1608 : {
1609 : // But only match this wildcard field if it is the only child
1610 : // of the feature class, otherwise that is going to prevent
1611 : // matching regular fields
1612 : // Practical case the <any processContents="lax" minOccurs="0"
1613 : // maxOccurs="unbounded"> declaratin of
1614 : // http://schemas.earthresourceml.org/earthresourceml-lite/1.0/erml-lite.xsd
1615 : // http://services.ga.gov.au/earthresource/ows?service=wfs&version=2.0.0&request=GetFeature&typenames=erl:CommodityResourceView&count=10
1616 : // FIXME: currently we will thus ignore those extra content
1617 : // See ogr_gmlas_any_field_at_end_of_declaration test case
1618 6 : idx = -1;
1619 : }
1620 : }
1621 154795 : if (idx < 0 && geom_idx < 0 && geom_idx != IDX_COMPOUND_FOLDED)
1622 : {
1623 : /* Special case for a layer that is a made of only a geometry */
1624 34152 : geom_idx = m_oCurCtxt.m_poLayer->GetOGRGeomFieldIndexFromXPath(
1625 68304 : m_oCurCtxt.m_poLayer->GetFeatureClass().GetXPath() +
1626 : szMATCH_ALL);
1627 : }
1628 :
1629 154795 : if (idx >= 0 || geom_idx >= 0)
1630 : {
1631 : // Sanity check. Shouldn't normally happen !
1632 241290 : if (m_oCurCtxt.m_poFeature == nullptr ||
1633 120645 : m_oCurCtxt.m_poLayer->GetLayerDefn() !=
1634 120645 : m_oCurCtxt.m_poFeature->GetDefnRef())
1635 : {
1636 0 : CPLError(CE_Failure, CPLE_AppDefined,
1637 : "Inconsistent m_poLayer / m_poFeature state");
1638 0 : m_bParsingError = true;
1639 0 : return;
1640 : }
1641 :
1642 120645 : bool bPushNewFeature = false;
1643 : const int nFCFieldIdx =
1644 : (idx >= 0)
1645 120645 : ? m_oCurCtxt.m_poLayer->GetFCFieldIndexFromOGRFieldIdx(idx)
1646 368 : : m_oCurCtxt.m_poLayer->GetFCFieldIndexFromOGRGeomFieldIdx(
1647 120645 : geom_idx);
1648 :
1649 : /* For cases like
1650 : <xs:element name="element_compound">
1651 : <xs:complexType>
1652 : <xs:sequence maxOccurs="unbounded">
1653 : <xs:element name="subelement1"
1654 : type="xs:string"/> <xs:element name="subelement2"
1655 : type="xs:string"/>
1656 : </xs:sequence>
1657 : </xs:complexType>
1658 : </xs:element>
1659 :
1660 : <element_compound>
1661 : <subelement1>a</subelement>
1662 : <subelement2>b</subelement>
1663 : <subelement1>c</subelement>
1664 : <subelement2>d</subelement>
1665 : </element_compound>
1666 : */
1667 :
1668 120645 : if (idx >= 0 && idx < m_nCurFieldIdx)
1669 : {
1670 : #ifdef DEBUG_VERBOSE
1671 : CPLDebug("GMLAS", "new feature: idx < m_nCurFieldIdx");
1672 : #endif
1673 2 : bPushNewFeature = true;
1674 : }
1675 :
1676 : /* For cases like
1677 : <xs:element name="element_compound">
1678 : <xs:complexType>
1679 : <xs:sequence maxOccurs="unbounded">
1680 : <xs:element name="subelement"
1681 : type="xs:dateTime"/>
1682 : </xs:sequence>
1683 : </xs:complexType>
1684 : </xs:element>
1685 :
1686 : <element_compound>
1687 : <subelement>2012-01-01T12:34:56Z</subelement>
1688 : <subelement>2012-01-02T12:34:56Z</subelement>
1689 : </element_compound>
1690 : */
1691 120275 : else if (idx >= 0 && idx == m_nCurFieldIdx &&
1692 13385 : !IsArrayType(
1693 13385 : m_oCurCtxt.m_poFeature->GetFieldDefnRef(m_nCurFieldIdx)
1694 240918 : ->GetType()) &&
1695 : // Make sure this isn't a repeated geometry as well
1696 0 : !(geom_idx >= 0 && nFCFieldIdx >= 0 &&
1697 0 : m_oCurCtxt.m_poLayer->GetFeatureClass()
1698 0 : .GetFields()[nFCFieldIdx]
1699 0 : .GetMaxOccurs() > 1))
1700 : {
1701 2055 : bPushNewFeature = true;
1702 : }
1703 :
1704 : // Make sure we are in a repeated sequence, otherwise this is
1705 : // invalid XML
1706 122702 : if (bPushNewFeature &&
1707 120649 : !m_oCurCtxt.m_poLayer->GetFeatureClass().IsRepeatedSequence() &&
1708 : // Case of element within xs:choice
1709 4 : !(idx >= 0 && nFCFieldIdx >= 0 &&
1710 4 : m_oCurCtxt.m_poLayer->GetFeatureClass()
1711 4 : .GetFields()[nFCFieldIdx]
1712 4 : .MayAppearOutOfOrder()))
1713 : {
1714 4 : bPushNewFeature = false;
1715 4 : CPLError(CE_Warning, CPLE_AppDefined, "Unexpected element %s",
1716 : m_osCurSubXPath.c_str());
1717 : }
1718 :
1719 120645 : if (bPushNewFeature)
1720 : {
1721 : // CPLDebug("GMLAS", "Feature ready");
1722 2053 : PushFeatureReady(
1723 4106 : std::unique_ptr<OGRFeature>(m_oCurCtxt.m_poFeature),
1724 : m_oCurCtxt.m_poLayer);
1725 4106 : Context oContext = m_aoStackContext.back();
1726 2053 : m_aoStackContext.pop_back();
1727 2053 : CreateNewFeature(osLocalname);
1728 2053 : oContext.m_poFeature = m_oCurCtxt.m_poFeature;
1729 2053 : m_aoStackContext.push_back(std::move(oContext));
1730 2053 : m_oCurCtxt.m_oMapCounter.clear();
1731 : }
1732 :
1733 120645 : if (m_nCurFieldIdx != idx)
1734 : {
1735 108945 : m_osTextContentList.Clear();
1736 108945 : m_nTextContentListEstimatedSize = 0;
1737 : }
1738 120645 : m_nCurFieldIdx = idx;
1739 120645 : m_nCurGeomFieldIdx = geom_idx;
1740 120645 : m_nCurFieldLevel = m_nLevel + 1;
1741 120645 : m_osTextContent.clear();
1742 120645 : m_bIsXMLBlob = false;
1743 120645 : m_bIsXMLBlobIncludeUpper = false;
1744 :
1745 : #ifdef DEBUG_VERBOSE
1746 : if (idx >= 0)
1747 : {
1748 : CPLDebug("GMLAS", "Matches field %s",
1749 : m_oCurCtxt.m_poLayer->GetLayerDefn()
1750 : ->GetFieldDefn(idx)
1751 : ->GetNameRef());
1752 : }
1753 : if (geom_idx >= 0)
1754 : {
1755 : CPLDebug("GMLAS", "Matches geometry field %s",
1756 : m_oCurCtxt.m_poLayer->GetLayerDefn()
1757 : ->GetGeomFieldDefn(geom_idx)
1758 : ->GetNameRef());
1759 : }
1760 : #endif
1761 120645 : if (nFCFieldIdx >= 0)
1762 : {
1763 120645 : const GMLASField &oField(m_oCurCtxt.m_poLayer->GetFeatureClass()
1764 120645 : .GetFields()[nFCFieldIdx]);
1765 120645 : if (m_nSWEDataArrayLevel < 0 && m_nSWEDataRecordLevel < 0)
1766 : {
1767 235840 : m_bIsXMLBlob = (oField.GetType() == GMLAS_FT_ANYTYPE ||
1768 115248 : m_nCurGeomFieldIdx != -1);
1769 : }
1770 120645 : m_bIsXMLBlobIncludeUpper =
1771 120645 : m_bIsXMLBlob && oField.GetIncludeThisEltInBlob();
1772 120645 : if (m_bIsXMLBlobIncludeUpper)
1773 : {
1774 1688 : BuildXMLBlobStartElement(osXPath, attrs);
1775 1688 : m_nLevel++;
1776 1688 : return;
1777 : }
1778 :
1779 : // Figure out if it is an element that calls for a related
1780 : // top-level feature (but without junction table)
1781 118957 : if (oField.GetCategory() ==
1782 : GMLASField::PATH_TO_CHILD_ELEMENT_WITH_LINK)
1783 : {
1784 : const CPLString &osNestedXPath(
1785 7157 : oField.GetRelatedClassXPath());
1786 7157 : CPLAssert(!osNestedXPath.empty());
1787 7157 : OGRGMLASLayer *poSubLayer = GetLayerByXPath(osNestedXPath);
1788 7157 : if (poSubLayer && m_nCurFieldIdx >= 0)
1789 : {
1790 7157 : int nOldCurFieldIdx = m_nCurFieldIdx;
1791 7157 : OGRFeature *poOldCurFeature = m_oCurCtxt.m_poFeature;
1792 7157 : OGRGMLASLayer *poOldLayer = m_oCurCtxt.m_poLayer;
1793 7157 : m_oCurCtxt.m_poLayer = poSubLayer;
1794 7157 : CreateNewFeature(osLocalname);
1795 :
1796 7157 : m_oCurCtxt.m_poGroupLayer = nullptr;
1797 7157 : m_oCurCtxt.m_nGroupLayerLevel = -1;
1798 7157 : m_oCurCtxt.m_nLastFieldIdxGroupLayer = -1;
1799 :
1800 : // Install new context
1801 14314 : Context oContext;
1802 7157 : oContext = m_oCurCtxt;
1803 7157 : oContext.m_nLevel = m_nLevel;
1804 7157 : oContext.m_osCurSubXPath = m_osCurSubXPath;
1805 7157 : m_osCurSubXPath = osNestedXPath;
1806 : #ifdef DEBUG_VERBOSE
1807 : CPLDebug("GMLAS",
1808 : "Installing new m_osCurSubXPath from %s to %s",
1809 : oContext.m_osCurSubXPath.c_str(),
1810 : m_osCurSubXPath.c_str());
1811 : #endif
1812 7157 : PushContext(oContext);
1813 7157 : m_oCurCtxt.m_oMapCounter.clear();
1814 :
1815 : // Process attributes now because we might need to
1816 : // fetch the child id from them
1817 7157 : ProcessAttributes(attrs);
1818 7157 : bHasProcessedAttributes = true;
1819 :
1820 : CPLString osChildId(
1821 7157 : m_oCurCtxt.m_poFeature->GetFieldAsString(
1822 14314 : m_oCurCtxt.m_poLayer->GetIDFieldIdx()));
1823 7157 : SetField(poOldCurFeature, poOldLayer, nOldCurFieldIdx,
1824 : osChildId);
1825 :
1826 29 : if (m_bProcessSWEDataRecord && !m_bIsXMLBlob &&
1827 29 : m_nSWEDataArrayLevel < 0 &&
1828 7191 : m_nSWEDataRecordLevel < 0 &&
1829 5 : osNestedXPath == "swe:DataRecord")
1830 : {
1831 5 : m_nSWEDataRecordLevel = m_nLevel;
1832 5 : BuildXMLBlobStartElement(osXPath, attrs);
1833 : }
1834 : }
1835 : }
1836 118957 : }
1837 : }
1838 :
1839 : #if 0
1840 : // Case where we have an abstract type and don't know its realizations
1841 : else if ( idx != IDX_COMPOUND_FOLDED &&
1842 : (idx = m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
1843 : osSubXPathBefore + "/" + "*")) >= 0 &&
1844 : m_oCurCtxt.m_poGroupLayer == NULL )
1845 : {
1846 : m_nCurFieldIdx = idx;
1847 : m_nCurFieldLevel = m_nLevel + 1;
1848 : m_osTextContent.clear();
1849 : m_bIsXMLBlob = true;
1850 : m_bIsXMLBlobIncludeUpper = true;
1851 : BuildXMLBlobStartElement(osNSPrefix, osLocalname, attrs);
1852 : m_nLevel ++;
1853 : return;
1854 : }
1855 : #endif
1856 :
1857 34150 : else if (m_nLevel > m_aoStackContext.back().m_nLevel)
1858 : {
1859 : // Figure out if it is an element that calls from a related
1860 : // top-level feature with a junction table
1861 : const std::vector<GMLASField> &aoFields =
1862 20610 : m_oCurCtxt.m_poLayer->GetFeatureClass().GetFields();
1863 1496660 : for (size_t i = 0; i < aoFields.size(); ++i)
1864 : {
1865 1480650 : if (aoFields[i].GetCategory() ==
1866 1531730 : GMLASField::PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE &&
1867 51085 : aoFields[i].GetXPath() == m_osCurSubXPath)
1868 : {
1869 : const CPLString &osAbstractElementXPath(
1870 4596 : aoFields[i].GetAbstractElementXPath());
1871 : const CPLString &osNestedXPath(
1872 4596 : aoFields[i].GetRelatedClassXPath());
1873 4596 : CPLAssert(!osAbstractElementXPath.empty());
1874 4596 : CPLAssert(!osNestedXPath.empty());
1875 :
1876 4596 : OGRGMLASLayer *poJunctionLayer = GetLayerByXPath(
1877 9192 : GMLASSchemaAnalyzer::BuildJunctionTableXPath(
1878 : osAbstractElementXPath, osNestedXPath));
1879 4596 : OGRGMLASLayer *poSubLayer = GetLayerByXPath(osNestedXPath);
1880 :
1881 4596 : if (poSubLayer && poJunctionLayer)
1882 : {
1883 : CPLString osParentId(
1884 4596 : m_oCurCtxt.m_poFeature->GetFieldAsString(
1885 9192 : m_oCurCtxt.m_poLayer->GetIDFieldIdx()));
1886 :
1887 : // Create child feature
1888 4596 : m_oCurCtxt.m_poLayer = poSubLayer;
1889 4596 : CreateNewFeature(osLocalname);
1890 :
1891 4596 : ++m_oMapGlobalCounter[poJunctionLayer];
1892 : const int nGlobalCounter =
1893 4596 : m_oMapGlobalCounter[poJunctionLayer];
1894 :
1895 4596 : ++m_oCurCtxt.m_oMapCounter[poJunctionLayer];
1896 : const int nCounter =
1897 4596 : m_oCurCtxt.m_oMapCounter[poJunctionLayer];
1898 :
1899 4596 : m_oCurCtxt.m_poGroupLayer = nullptr;
1900 4596 : m_oCurCtxt.m_nGroupLayerLevel = -1;
1901 4596 : m_oCurCtxt.m_nLastFieldIdxGroupLayer = -1;
1902 :
1903 : // Install new context
1904 9192 : Context oContext;
1905 4596 : oContext = m_oCurCtxt;
1906 4596 : oContext.m_nLevel = m_nLevel;
1907 4596 : oContext.m_osCurSubXPath = m_osCurSubXPath;
1908 4596 : m_osCurSubXPath = osNestedXPath;
1909 : #ifdef DEBUG_VERBOSE
1910 : CPLDebug("GMLAS",
1911 : "Installing new m_osCurSubXPath from %s to %s",
1912 : oContext.m_osCurSubXPath.c_str(),
1913 : m_osCurSubXPath.c_str());
1914 : #endif
1915 4596 : PushContext(oContext);
1916 4596 : m_oCurCtxt.m_oMapCounter.clear();
1917 :
1918 : // Process attributes now because we might need to
1919 : // fetch the child id from them
1920 4596 : ProcessAttributes(attrs);
1921 4596 : bHasProcessedAttributes = true;
1922 :
1923 : CPLString osChildId(
1924 4596 : m_oCurCtxt.m_poFeature->GetFieldAsString(
1925 9192 : m_oCurCtxt.m_poLayer->GetIDFieldIdx()));
1926 :
1927 : // Create junction feature
1928 : auto poJunctionFeature = std::make_unique<OGRFeature>(
1929 9192 : poJunctionLayer->GetLayerDefn());
1930 4596 : poJunctionFeature->SetFID(nGlobalCounter);
1931 4596 : poJunctionFeature->SetField(szOCCURRENCE, nCounter);
1932 4596 : poJunctionFeature->SetField(szPARENT_PKID, osParentId);
1933 4596 : poJunctionFeature->SetField(szCHILD_PKID, osChildId);
1934 4596 : PushFeatureReady(std::move(poJunctionFeature),
1935 : poJunctionLayer);
1936 : }
1937 4596 : idx = IDX_COMPOUND_FOLDED;
1938 :
1939 4596 : break;
1940 : }
1941 : }
1942 :
1943 20610 : m_nCurFieldIdx = -1;
1944 20610 : m_nCurGeomFieldIdx = -1;
1945 20933 : if (idx != IDX_COMPOUND_FOLDED && m_nLevelSilentIgnoredXPath < 0 &&
1946 :
1947 : // Detect if we are in a situation where elements like
1948 : // <foo xsi:nil="true"/> have no corresponding OGR field
1949 : // because of the use of remove_unused_fields=true
1950 323 : !(m_oCurCtxt.m_poLayer->GetFCFieldIndexFromXPath(
1951 323 : m_osCurSubXPath) >= 0 &&
1952 78 : attrs.getLength() == 1 &&
1953 20610 : m_oMapURIToPrefix[transcode(attrs.getURI(0))] ==
1954 : szXSI_PREFIX &&
1955 20610 : transcode(attrs.getLocalName(0)) == szNIL))
1956 : {
1957 646 : CPLString osMatchedXPath;
1958 323 : if (m_oIgnoredXPathMatcher.MatchesRefXPath(m_osCurSubXPath,
1959 : osMatchedXPath))
1960 : {
1961 0 : if (m_oMapIgnoredXPathToWarn[osMatchedXPath])
1962 : {
1963 0 : CPLError(CE_Warning, CPLE_AppDefined,
1964 : "Element with xpath=%s found in document but "
1965 : "ignored according to configuration",
1966 : m_osCurSubXPath.c_str());
1967 : }
1968 : else
1969 : {
1970 0 : CPLDebug("GMLAS",
1971 : "Element with xpath=%s found in document but "
1972 : "ignored according to configuration",
1973 : m_osCurSubXPath.c_str());
1974 : }
1975 0 : m_nLevelSilentIgnoredXPath = m_nLevel;
1976 : }
1977 : else
1978 : {
1979 323 : if (m_bWarnUnexpected)
1980 : {
1981 9 : CPLError(CE_Warning, CPLE_AppDefined,
1982 : "Unexpected element with xpath=%s "
1983 : "(subxpath=%s) found",
1984 : m_osCurXPath.c_str(), m_osCurSubXPath.c_str());
1985 : }
1986 : else
1987 : {
1988 314 : CPLDebug("GMLAS",
1989 : "Unexpected element with xpath=%s "
1990 : "(subxpath=%s) found",
1991 : m_osCurXPath.c_str(), m_osCurSubXPath.c_str());
1992 : }
1993 : }
1994 : }
1995 : }
1996 : else
1997 : {
1998 13540 : m_nCurFieldIdx = -1;
1999 13540 : m_nCurGeomFieldIdx = -1;
2000 : }
2001 :
2002 153107 : if (!bHasProcessedAttributes && m_nLevelSilentIgnoredXPath < 0)
2003 141354 : ProcessAttributes(attrs);
2004 : }
2005 : else
2006 : {
2007 217 : m_nCurFieldIdx = -1;
2008 217 : m_nCurGeomFieldIdx = -1;
2009 : }
2010 :
2011 153324 : m_nLevel++;
2012 : }
2013 :
2014 : /************************************************************************/
2015 : /* ProcessAttributes() */
2016 : /************************************************************************/
2017 :
2018 153107 : void GMLASReader::ProcessAttributes(const Attributes &attrs)
2019 : {
2020 : // Browse through attributes and match them with one of our fields
2021 153107 : m_osAttrXPath = m_osCurSubXPath;
2022 153107 : m_osAttrXPath += '/';
2023 153107 : m_osAttrXPath += szAT_ANY_ATTR;
2024 : const int nWildcardAttrIdx =
2025 153107 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(m_osAttrXPath);
2026 153107 : json_object *poWildcard = nullptr;
2027 :
2028 179780 : for (unsigned int i = 0; i < attrs.getLength(); i++)
2029 : {
2030 : const CPLString &osAttrNSPrefix(
2031 : m_osAttrNSPrefix =
2032 26673 : m_oMapURIToPrefix[transcode(attrs.getURI(i), m_osAttrNSUri)]);
2033 : const CPLString &osAttrLocalname(
2034 26673 : transcode(attrs.getLocalName(i), m_osAttrLocalName));
2035 : const CPLString &osAttrValue(
2036 26673 : transcode(attrs.getValue(i), m_osAttrValue));
2037 26673 : CPLString &osAttrXPath(m_osAttrXPath);
2038 26673 : if (!osAttrNSPrefix.empty())
2039 : {
2040 8939 : osAttrXPath.reserve(m_osCurSubXPath.size() + 2 +
2041 17878 : osAttrNSPrefix.size() + 1 +
2042 8939 : osAttrLocalname.size());
2043 8939 : osAttrXPath = m_osCurSubXPath;
2044 8939 : osAttrXPath += "/@";
2045 8939 : osAttrXPath += osAttrNSPrefix;
2046 8939 : osAttrXPath += ":";
2047 8939 : osAttrXPath += osAttrLocalname;
2048 : }
2049 : else
2050 : {
2051 35468 : osAttrXPath.reserve(m_osCurSubXPath.size() + 2 +
2052 17734 : osAttrLocalname.size());
2053 17734 : osAttrXPath = m_osCurSubXPath;
2054 17734 : osAttrXPath += "/@";
2055 17734 : osAttrXPath += osAttrLocalname;
2056 : }
2057 :
2058 : // CPLDebug("GMLAS", "Attr %s=%s", osAttrXPath.c_str(),
2059 : // osAttrValue.c_str());
2060 :
2061 : const int nAttrIdx =
2062 26673 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(osAttrXPath);
2063 : int nFCIdx;
2064 26673 : if (nAttrIdx >= 0)
2065 : {
2066 : const OGRFieldType eType(
2067 19162 : m_oCurCtxt.m_poFeature->GetFieldDefnRef(nAttrIdx)->GetType());
2068 19162 : if (osAttrValue.empty() && eType == OFTString)
2069 : {
2070 0 : m_oCurCtxt.m_poFeature->SetField(nAttrIdx, "");
2071 : }
2072 : else
2073 : {
2074 19162 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer, nAttrIdx,
2075 : osAttrValue);
2076 : }
2077 :
2078 19217 : if (osAttrNSPrefix == szXLINK_PREFIX && osAttrLocalname == szHREF &&
2079 55 : !osAttrValue.empty())
2080 : {
2081 55 : ProcessXLinkHref(nAttrIdx, osAttrXPath, osAttrValue);
2082 : }
2083 :
2084 37799 : if (m_oXLinkResolver.GetConf().m_bResolveInternalXLinks &&
2085 18637 : m_bInitialPass)
2086 : {
2087 : nFCIdx =
2088 420 : m_oCurCtxt.m_poLayer->GetFCFieldIndexFromXPath(osAttrXPath);
2089 840 : if (nFCIdx >= 0 && m_oCurCtxt.m_poLayer->GetFeatureClass()
2090 420 : .GetFields()[nFCIdx]
2091 420 : .GetType() == GMLAS_FT_ID)
2092 : {
2093 : // We don't check that there's no existing id in the map
2094 : // This is normally forbidden by the xs:ID rules
2095 : // If not respected by the document, this should not lead to
2096 : // crashes
2097 42 : m_oMapElementIdToLayer[osAttrValue] = m_oCurCtxt.m_poLayer;
2098 :
2099 42 : if (m_oCurCtxt.m_poLayer->IsGeneratedIDField())
2100 : {
2101 19 : m_oMapElementIdToPKID[osAttrValue] =
2102 38 : CPLString(m_oCurCtxt.m_poFeature->GetFieldAsString(
2103 38 : m_oCurCtxt.m_poLayer->GetIDFieldIdx()));
2104 : }
2105 : }
2106 : }
2107 : }
2108 :
2109 7511 : else if (osAttrNSPrefix == szXSI_PREFIX && osAttrLocalname == szNIL)
2110 : {
2111 1161 : if (osAttrValue == "true")
2112 : {
2113 : const int nMainAttrIdx =
2114 2322 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
2115 1161 : m_osCurSubXPath);
2116 1161 : if (nMainAttrIdx >= 0)
2117 : {
2118 1140 : m_oCurCtxt.m_poFeature->SetFieldNull(nMainAttrIdx);
2119 : }
2120 : else
2121 : {
2122 : const int nHrefAttrIdx =
2123 21 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
2124 42 : m_osCurSubXPath + "/@" + szXLINK_PREFIX + ":" +
2125 : szHREF);
2126 21 : if (nHrefAttrIdx >= 0)
2127 : {
2128 0 : m_oCurCtxt.m_poFeature->SetFieldNull(nHrefAttrIdx);
2129 : }
2130 : }
2131 : }
2132 : }
2133 :
2134 6350 : else if (osAttrNSPrefix != szXMLNS_PREFIX &&
2135 3003 : osAttrLocalname != szXMLNS_PREFIX &&
2136 2953 : !(osAttrNSPrefix == szXSI_PREFIX &&
2137 1129 : osAttrLocalname == szSCHEMA_LOCATION) &&
2138 1840 : !(osAttrNSPrefix == szXSI_PREFIX &&
2139 9353 : osAttrLocalname == szNO_NAMESPACE_SCHEMA_LOCATION) &&
2140 : // Do not warn about fixed attributes on geometry properties
2141 1824 : !(m_nCurGeomFieldIdx >= 0 &&
2142 0 : ((osAttrNSPrefix == szXLINK_PREFIX &&
2143 0 : osAttrLocalname == szTYPE) ||
2144 0 : (osAttrNSPrefix == "" && osAttrLocalname == szOWNS))))
2145 : {
2146 3648 : CPLString osMatchedXPath;
2147 1824 : if (nWildcardAttrIdx >= 0)
2148 : {
2149 1812 : if (poWildcard == nullptr)
2150 1812 : poWildcard = json_object_new_object();
2151 3624 : CPLString osKey;
2152 1812 : if (!osAttrNSPrefix.empty())
2153 0 : osKey = osAttrNSPrefix + ":" + osAttrLocalname;
2154 : else
2155 1812 : osKey = osAttrLocalname;
2156 1812 : json_object_object_add(poWildcard, osKey,
2157 : json_object_new_string(osAttrValue));
2158 : }
2159 30 : else if (m_bValidate &&
2160 6 : (nFCIdx = m_oCurCtxt.m_poLayer->GetFCFieldIndexFromXPath(
2161 18 : osAttrXPath)) >= 0 &&
2162 6 : !m_oCurCtxt.m_poLayer->GetFeatureClass()
2163 3 : .GetFields()[nFCIdx]
2164 3 : .GetFixedValue()
2165 3 : .empty())
2166 : {
2167 : // In validation mode, fixed attributes not present in the
2168 : // document are still reported, which cause spurious warnings
2169 : }
2170 27 : else if (m_bValidate &&
2171 5 : (nFCIdx = m_oCurCtxt.m_poLayer->GetFCFieldIndexFromXPath(
2172 2 : osAttrXPath)) >= 0 &&
2173 4 : !m_oCurCtxt.m_poLayer->GetFeatureClass()
2174 2 : .GetFields()[nFCIdx]
2175 2 : .GetDefaultValue()
2176 18 : .empty() &&
2177 2 : m_oCurCtxt.m_poLayer->GetFeatureClass()
2178 2 : .GetFields()[nFCIdx]
2179 4 : .GetDefaultValue() == m_osAttrValue)
2180 : {
2181 : // In validation mode, default attributes not present in the
2182 : // document are still reported, which cause spurious warnings
2183 : }
2184 9 : else if (m_oIgnoredXPathMatcher.MatchesRefXPath(osAttrXPath,
2185 : osMatchedXPath))
2186 : {
2187 2 : if (m_oMapIgnoredXPathToWarn[osMatchedXPath])
2188 : {
2189 1 : CPLError(CE_Warning, CPLE_AppDefined,
2190 : "Attribute with xpath=%s found in document but "
2191 : "ignored according to configuration",
2192 : osAttrXPath.c_str());
2193 : }
2194 : else
2195 : {
2196 1 : CPLDebug("GMLAS",
2197 : "Attribute with xpath=%s found in document but "
2198 : "ignored according to configuration",
2199 : osAttrXPath.c_str());
2200 : }
2201 : }
2202 : else
2203 : {
2204 7 : if (m_bWarnUnexpected)
2205 : {
2206 5 : CPLError(CE_Warning, CPLE_AppDefined,
2207 : "Unexpected attribute with xpath=%s found",
2208 : osAttrXPath.c_str());
2209 : }
2210 : else
2211 : {
2212 : // Emit debug message if unexpected attribute
2213 2 : CPLDebug("GMLAS",
2214 : "Unexpected attribute with xpath=%s found",
2215 : osAttrXPath.c_str());
2216 : }
2217 : }
2218 : }
2219 : }
2220 :
2221 : // Store wildcard attributes
2222 153107 : if (poWildcard != nullptr)
2223 : {
2224 1812 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer, nWildcardAttrIdx,
2225 : json_object_get_string(poWildcard));
2226 1812 : json_object_put(poWildcard);
2227 : }
2228 :
2229 : // Process fixed and default values, except when doing the initial scan
2230 : // so as to avoid the bRemoveUnusedFields logic to be confused
2231 153107 : if (!m_bInitialPass)
2232 : {
2233 147040 : const int nFieldCount = m_oCurCtxt.m_poFeature->GetFieldCount();
2234 : const std::vector<GMLASField> &aoFields =
2235 147040 : m_oCurCtxt.m_poLayer->GetFeatureClass().GetFields();
2236 5399420 : for (int i = 0; i < nFieldCount; i++)
2237 : {
2238 : const int nFCIdx =
2239 5252380 : m_oCurCtxt.m_poLayer->GetFCFieldIndexFromOGRFieldIdx(i);
2240 10362800 : if (nFCIdx >= 0 &&
2241 5110430 : aoFields[nFCIdx].GetXPath().find('@') != std::string::npos)
2242 : {
2243 : // We process fixed as default. In theory, to be XSD compliant,
2244 : // the user shouldn't have put a different value than the fixed
2245 : // one, but just in case he did, then honour it instead of
2246 : // overwriting it.
2247 : CPLString osFixedDefaultValue =
2248 3229330 : aoFields[nFCIdx].GetFixedValue();
2249 1614660 : if (osFixedDefaultValue.empty())
2250 1483380 : osFixedDefaultValue = aoFields[nFCIdx].GetDefaultValue();
2251 1877230 : if (!osFixedDefaultValue.empty() &&
2252 262564 : !m_oCurCtxt.m_poFeature->IsFieldSetAndNotNull(i))
2253 : {
2254 2072 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer, i,
2255 : osFixedDefaultValue);
2256 : }
2257 : }
2258 : }
2259 : }
2260 153107 : }
2261 :
2262 : /************************************************************************/
2263 : /* ProcessXLinkHref() */
2264 : /************************************************************************/
2265 :
2266 55 : void GMLASReader::ProcessXLinkHref(int nAttrIdx, const CPLString &osAttrXPath,
2267 : const CPLString &osAttrValue)
2268 : {
2269 : // If we are a xlink:href attribute, and that the link value is
2270 : // a internal link, then find if we have
2271 : // a field that does a relation to a targetElement
2272 55 : if (osAttrValue[0] == '#')
2273 : {
2274 20 : const int nAttrIdx2 = m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
2275 40 : GMLASField::MakePKIDFieldXPathFromXLinkHrefXPath(osAttrXPath));
2276 20 : if (nAttrIdx2 >= 0)
2277 : {
2278 2 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer, nAttrIdx2,
2279 4 : osAttrValue.substr(1));
2280 : }
2281 18 : else if (m_oXLinkResolver.GetConf().m_bResolveInternalXLinks)
2282 : {
2283 : const CPLString osReferringField(
2284 18 : m_oCurCtxt.m_poLayer->GetLayerDefn()
2285 18 : ->GetFieldDefn(nAttrIdx)
2286 36 : ->GetNameRef());
2287 36 : CPLString osId(osAttrValue.substr(1));
2288 18 : if (m_bInitialPass)
2289 : {
2290 : std::pair<OGRGMLASLayer *, CPLString> oReferringPair(
2291 18 : m_oCurCtxt.m_poLayer, osReferringField);
2292 18 : m_oMapFieldXPathToLinkValue[oReferringPair].push_back(
2293 9 : std::move(osId));
2294 : }
2295 : else
2296 : {
2297 9 : const auto oIter = m_oMapElementIdToLayer.find(osId);
2298 9 : if (oIter != m_oMapElementIdToLayer.end())
2299 : {
2300 5 : OGRGMLASLayer *poTargetLayer = oIter->second;
2301 : const CPLString osLinkFieldXPath =
2302 5 : m_oCurCtxt.m_poLayer
2303 : ->GetXPathOfFieldLinkForAttrToOtherLayer(
2304 : osReferringField,
2305 10 : poTargetLayer->GetFeatureClass().GetXPath());
2306 : const int nLinkFieldOGRId =
2307 5 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
2308 : osLinkFieldXPath);
2309 5 : if (nLinkFieldOGRId >= 0)
2310 : {
2311 5 : const auto oIter2 = m_oMapElementIdToPKID.find(osId);
2312 5 : if (oIter2 != m_oMapElementIdToPKID.end())
2313 : {
2314 3 : m_oCurCtxt.m_poFeature->SetField(nLinkFieldOGRId,
2315 3 : oIter2->second);
2316 : }
2317 : else
2318 : {
2319 2 : m_oCurCtxt.m_poFeature->SetField(nLinkFieldOGRId,
2320 : osId);
2321 : }
2322 : }
2323 : }
2324 : }
2325 : }
2326 : }
2327 : else
2328 : {
2329 : const int nRuleIdx =
2330 35 : m_oXLinkResolver.GetMatchingResolutionRule(osAttrValue);
2331 35 : if (nRuleIdx >= 0)
2332 : {
2333 : const GMLASXLinkResolutionConf::URLSpecificResolution &oRule(
2334 14 : m_oXLinkResolver.GetConf().m_aoURLSpecificRules[nRuleIdx]);
2335 14 : if (m_bInitialPass)
2336 : {
2337 8 : m_oMapXLinkFields[m_oCurCtxt.m_poLayer][osAttrXPath].insert(
2338 8 : nRuleIdx);
2339 : }
2340 6 : else if (oRule.m_eResolutionMode ==
2341 : GMLASXLinkResolutionConf::RawContent)
2342 : {
2343 : const int nAttrIdx2 =
2344 1 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
2345 : GMLASField::
2346 2 : MakeXLinkRawContentFieldXPathFromXLinkHrefXPath(
2347 : osAttrXPath));
2348 1 : CPLAssert(nAttrIdx2 >= 0);
2349 :
2350 : const CPLString osRawContent(
2351 1 : m_oXLinkResolver.GetRawContentForRule(osAttrValue,
2352 2 : nRuleIdx));
2353 1 : if (!osRawContent.empty())
2354 : {
2355 1 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer,
2356 : nAttrIdx2, osRawContent);
2357 : }
2358 : }
2359 5 : else if (oRule.m_eResolutionMode ==
2360 : GMLASXLinkResolutionConf::FieldsFromXPath)
2361 : {
2362 : const CPLString osRawContent(
2363 5 : m_oXLinkResolver.GetRawContentForRule(osAttrValue,
2364 10 : nRuleIdx));
2365 5 : if (!osRawContent.empty())
2366 : {
2367 5 : CPLXMLNode *psNode = CPLParseXMLString(osRawContent);
2368 5 : if (psNode != nullptr)
2369 : {
2370 10 : std::vector<CPLString> aoXPaths;
2371 10 : std::map<CPLString, size_t> oMapFieldXPathToIdx;
2372 23 : for (size_t i = 0; i < oRule.m_aoFields.size(); ++i)
2373 : {
2374 : const CPLString &osXPathRule(
2375 18 : oRule.m_aoFields[i].m_osXPath);
2376 18 : aoXPaths.push_back(osXPathRule);
2377 18 : oMapFieldXPathToIdx[osXPathRule] = i;
2378 : }
2379 10 : GMLASXPathMatcher oMatcher;
2380 5 : oMatcher.SetRefXPaths(std::map<CPLString, CPLString>(),
2381 : aoXPaths);
2382 5 : oMatcher.SetDocumentMapURIToPrefix(
2383 10 : std::map<CPLString, CPLString>());
2384 :
2385 5 : CPLXMLNode *psIter = psNode;
2386 15 : for (; psIter != nullptr; psIter = psIter->psNext)
2387 : {
2388 10 : if (psIter->eType == CXT_Element &&
2389 10 : psIter->pszValue[0] != '?')
2390 : {
2391 5 : ExploreXMLDoc(osAttrXPath, oRule, psIter,
2392 10 : CPLString(), oMatcher,
2393 : oMapFieldXPathToIdx);
2394 : }
2395 : }
2396 : }
2397 5 : CPLDestroyXMLNode(psNode);
2398 : }
2399 : }
2400 : }
2401 21 : else if (m_oXLinkResolver.IsRawContentResolutionEnabled())
2402 : {
2403 : const int nAttrIdx2 =
2404 11 : m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
2405 22 : GMLASField::MakeXLinkRawContentFieldXPathFromXLinkHrefXPath(
2406 : osAttrXPath));
2407 11 : CPLAssert(nAttrIdx2 >= 0);
2408 :
2409 : const CPLString osRawContent(
2410 22 : m_oXLinkResolver.GetRawContent(osAttrValue));
2411 11 : if (!osRawContent.empty())
2412 : {
2413 7 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer,
2414 : nAttrIdx2, osRawContent);
2415 : }
2416 : }
2417 : }
2418 55 : }
2419 :
2420 : /************************************************************************/
2421 : /* ExploreXMLDoc() */
2422 : /************************************************************************/
2423 :
2424 29 : void GMLASReader::ExploreXMLDoc(
2425 : const CPLString &osAttrXPath,
2426 : const GMLASXLinkResolutionConf::URLSpecificResolution &oRule,
2427 : CPLXMLNode *psNode, const CPLString &osParentXPath,
2428 : const GMLASXPathMatcher &oMatcher,
2429 : const std::map<CPLString, size_t> &oMapFieldXPathToIdx)
2430 : {
2431 58 : CPLString osXPath;
2432 29 : if (osParentXPath.empty())
2433 5 : osXPath = psNode->pszValue;
2434 24 : else if (psNode->eType == CXT_Element)
2435 22 : osXPath = osParentXPath + "/" + psNode->pszValue;
2436 : else
2437 : {
2438 2 : CPLAssert(psNode->eType == CXT_Attribute);
2439 2 : osXPath = osParentXPath + "/@" + psNode->pszValue;
2440 : }
2441 :
2442 58 : CPLString osMatchedXPathRule;
2443 29 : if (oMatcher.MatchesRefXPath(osXPath, osMatchedXPathRule))
2444 : {
2445 20 : const auto oIter = oMapFieldXPathToIdx.find(osMatchedXPathRule);
2446 20 : CPLAssert(oIter != oMapFieldXPathToIdx.end());
2447 20 : const size_t nFieldRuleIdx = oIter->second;
2448 : const CPLString osDerivedFieldXPath(
2449 : GMLASField::MakeXLinkDerivedFieldXPathFromXLinkHrefXPath(
2450 40 : osAttrXPath, oRule.m_aoFields[nFieldRuleIdx].m_osName));
2451 20 : const int nAttrIdx = m_oCurCtxt.m_poLayer->GetOGRFieldIndexFromXPath(
2452 : osDerivedFieldXPath);
2453 20 : CPLAssert(nAttrIdx >= 0);
2454 40 : CPLString osVal;
2455 20 : if (psNode->eType == CXT_Element && psNode->psChild != nullptr &&
2456 18 : psNode->psChild->eType == CXT_Text &&
2457 18 : psNode->psChild->psNext == nullptr)
2458 : {
2459 16 : osVal = psNode->psChild->pszValue;
2460 : }
2461 4 : else if (psNode->eType == CXT_Attribute)
2462 : {
2463 2 : osVal = psNode->psChild->pszValue;
2464 : }
2465 : else
2466 : {
2467 2 : char *pszContent = CPLSerializeXMLTree(psNode->psChild);
2468 2 : osVal = pszContent;
2469 2 : CPLFree(pszContent);
2470 : }
2471 22 : if (m_oCurCtxt.m_poFeature->IsFieldSetAndNotNull(nAttrIdx) &&
2472 2 : m_oCurCtxt.m_poFeature->GetFieldDefnRef(nAttrIdx)->GetType() ==
2473 : OFTString)
2474 : {
2475 2 : osVal = m_oCurCtxt.m_poFeature->GetFieldAsString(nAttrIdx) +
2476 4 : CPLString(" ") + osVal;
2477 : }
2478 20 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer, nAttrIdx, osVal);
2479 : }
2480 :
2481 29 : CPLXMLNode *psIter = psNode->psChild;
2482 75 : for (; psIter != nullptr; psIter = psIter->psNext)
2483 : {
2484 46 : if (psIter->eType == CXT_Element || psIter->eType == CXT_Attribute)
2485 : {
2486 24 : ExploreXMLDoc(osAttrXPath, oRule, psIter, osXPath, oMatcher,
2487 : oMapFieldXPathToIdx);
2488 : }
2489 : }
2490 29 : }
2491 :
2492 : /************************************************************************/
2493 : /* endElement() */
2494 : /************************************************************************/
2495 :
2496 162266 : void GMLASReader::endElement(const XMLCh *const uri,
2497 : const XMLCh *const localname,
2498 : const XMLCh *const
2499 : #ifdef DEBUG_VERBOSE
2500 : qname
2501 : #endif
2502 : )
2503 : {
2504 162266 : m_nEntityCounter = 0;
2505 :
2506 162266 : m_nLevel--;
2507 :
2508 : #ifdef DEBUG_VERBOSE
2509 : CPLDebug("GMLAS", "m_nLevel = %d", m_nLevel);
2510 : #endif
2511 :
2512 : #ifdef DEBUG_VERBOSE
2513 : {
2514 : const CPLString &osLocalname(transcode(localname, m_osLocalname));
2515 : const CPLString &osNSPrefix(
2516 : m_osNSPrefix = m_oMapURIToPrefix[transcode(uri, m_osNSUri)]);
2517 : if (osNSPrefix.empty())
2518 : m_osXPath = osLocalname;
2519 : else
2520 : {
2521 : m_osXPath.reserve(osNSPrefix.size() + 1 + osLocalname.size());
2522 : m_osXPath = osNSPrefix;
2523 : m_osXPath += ":";
2524 : m_osXPath += osLocalname;
2525 : }
2526 : }
2527 : CPLDebug("GMLAS", "endElement(%s / %s)", transcode(qname).c_str(),
2528 : m_osXPath.c_str());
2529 : #endif
2530 :
2531 162266 : if (m_nLevelSilentIgnoredXPath == m_nLevel)
2532 : {
2533 0 : m_nLevelSilentIgnoredXPath = -1;
2534 : }
2535 :
2536 : // Make sure to set field only if we are at the expected nesting level
2537 162266 : if (m_nCurFieldIdx >= 0 && m_nLevel == m_nCurFieldLevel - 1)
2538 : {
2539 : const OGRFieldType eType(
2540 113078 : m_oCurCtxt.m_poFeature->GetFieldDefnRef(m_nCurFieldIdx)->GetType());
2541 :
2542 : // Assign XML content to field value
2543 113078 : if (IsArrayType(eType))
2544 : {
2545 : const int nFCFieldIdx =
2546 28886 : m_oCurCtxt.m_poLayer->GetFCFieldIndexFromOGRFieldIdx(
2547 : m_nCurFieldIdx);
2548 57772 : if (nFCFieldIdx >= 0 && m_oCurCtxt.m_poLayer->GetFeatureClass()
2549 28886 : .GetFields()[nFCFieldIdx]
2550 28886 : .IsList())
2551 : {
2552 5300 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer,
2553 5300 : m_nCurFieldIdx, m_osTextContent);
2554 : }
2555 23586 : else if (m_nTextContentListEstimatedSize > m_nMaxContentSize)
2556 : {
2557 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
2558 : "Too much repeated data in a single element");
2559 0 : m_bParsingError = true;
2560 : }
2561 : else
2562 : {
2563 : // Transform boolean values to something that OGR understands
2564 29806 : if (eType == OFTIntegerList &&
2565 6220 : m_oCurCtxt.m_poFeature->GetFieldDefnRef(m_nCurFieldIdx)
2566 6220 : ->GetSubType() == OFSTBoolean)
2567 : {
2568 2140 : if (m_osTextContent == "true")
2569 1052 : m_osTextContent = "1";
2570 : else
2571 1088 : m_osTextContent = "0";
2572 : }
2573 :
2574 23586 : m_osTextContentList.AddString(m_osTextContent);
2575 : // 16 is an arbitrary number for the cost of a new entry in the
2576 : // string list
2577 23586 : m_nTextContentListEstimatedSize += 16 + m_osTextContent.size();
2578 23586 : m_oCurCtxt.m_poFeature->SetField(m_nCurFieldIdx,
2579 23586 : m_osTextContentList.List());
2580 : }
2581 : }
2582 : else
2583 : {
2584 84192 : if (m_bIsXMLBlobIncludeUpper && FillTextContent())
2585 : {
2586 : const CPLString &osLocalname(
2587 1638 : transcode(localname, m_osLocalname));
2588 : const CPLString &osNSPrefix(
2589 1638 : m_oMapURIToPrefix[transcode(uri, m_osNSUri)]);
2590 :
2591 1638 : m_osTextContent += "</";
2592 1638 : if (!osNSPrefix.empty())
2593 : {
2594 2 : m_osTextContent += osNSPrefix;
2595 2 : m_osTextContent += ":";
2596 : }
2597 1638 : m_osTextContent += osLocalname;
2598 1638 : m_osTextContent += ">";
2599 : }
2600 :
2601 84192 : SetField(m_oCurCtxt.m_poFeature, m_oCurCtxt.m_poLayer,
2602 84192 : m_nCurFieldIdx, m_osTextContent);
2603 : }
2604 : }
2605 :
2606 : // Make sure to set field only if we are at the expected nesting level
2607 162266 : if (m_nCurGeomFieldIdx >= 0 && m_nLevel == m_nCurFieldLevel - 1)
2608 : {
2609 444 : if (!m_apsXMLNodeStack.empty())
2610 : {
2611 420 : CPLAssert(m_apsXMLNodeStack.size() == 1);
2612 420 : CPLXMLNode *psRoot = m_apsXMLNodeStack[0].psNode;
2613 420 : ProcessGeometry(psRoot);
2614 420 : CPLDestroyXMLNode(psRoot);
2615 420 : m_apsXMLNodeStack.clear();
2616 : }
2617 : }
2618 :
2619 162266 : if ((m_nCurFieldIdx >= 0 || m_nCurGeomFieldIdx >= 0) &&
2620 148093 : m_nLevel == m_nCurFieldLevel - 1)
2621 : {
2622 113446 : m_bIsXMLBlob = false;
2623 113446 : m_bIsXMLBlobIncludeUpper = false;
2624 : }
2625 :
2626 162266 : if (m_bIsXMLBlob)
2627 : {
2628 7832 : if (m_nCurGeomFieldIdx >= 0)
2629 : {
2630 1478 : if (m_apsXMLNodeStack.size() > 1)
2631 : {
2632 : #ifdef DEBUG_VERBOSE
2633 : CPLDebug("GMLAS", "m_apsXMLNodeStack.pop_back()");
2634 : #endif
2635 1074 : m_apsXMLNodeStack.pop_back();
2636 : }
2637 : }
2638 :
2639 7832 : if (FillTextContent())
2640 : {
2641 4716 : const CPLString &osLocalname(transcode(localname, m_osLocalname));
2642 : const CPLString &osNSPrefix(
2643 4716 : m_oMapURIToPrefix[transcode(uri, m_osNSUri)]);
2644 :
2645 4716 : m_osTextContent += "</";
2646 4716 : if (!osNSPrefix.empty())
2647 : {
2648 1011 : m_osTextContent += osNSPrefix;
2649 1011 : m_osTextContent += ":";
2650 : }
2651 4716 : m_osTextContent += osLocalname;
2652 4716 : m_osTextContent += ">";
2653 :
2654 4716 : if (m_osTextContent.size() > m_nMaxContentSize)
2655 : {
2656 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
2657 : "Too much data in a single element");
2658 0 : m_bParsingError = true;
2659 : }
2660 : }
2661 : }
2662 : else
2663 : {
2664 154434 : m_osTextContent.clear();
2665 : }
2666 :
2667 162266 : if (m_nSWEDataArrayLevel >= 0)
2668 : {
2669 204 : if (m_nLevel > m_nSWEDataArrayLevel)
2670 : {
2671 192 : CPLAssert(m_apsXMLNodeStack.size() > 1);
2672 192 : m_apsXMLNodeStack.pop_back();
2673 : }
2674 : else
2675 : {
2676 12 : CPLAssert(m_apsXMLNodeStack.size() == 1);
2677 12 : CPLXMLNode *psRoot = m_apsXMLNodeStack[0].psNode;
2678 12 : ProcessSWEDataArray(psRoot);
2679 12 : m_nSWEDataArrayLevel = -1;
2680 12 : CPLDestroyXMLNode(psRoot);
2681 12 : m_apsXMLNodeStack.clear();
2682 : }
2683 : }
2684 :
2685 : // The while and not just if is needed when a group is at the end of an
2686 : // element
2687 437372 : while (!m_aoStackContext.empty() &&
2688 218091 : m_aoStackContext.back().m_nLevel >= m_nLevel)
2689 : {
2690 114030 : auto oMapCounter = m_aoStackContext.back().m_oMapCounter;
2691 57015 : if (!m_aoStackContext.back().m_osCurSubXPath.empty())
2692 : {
2693 : #ifdef DEBUG_VERBOSE
2694 : CPLDebug("GMLAS", "Restoring m_osCurSubXPath from %s to %s",
2695 : m_osCurSubXPath.c_str(),
2696 : m_aoStackContext.back().m_osCurSubXPath.c_str());
2697 : #endif
2698 11613 : m_osCurSubXPath = m_aoStackContext.back().m_osCurSubXPath;
2699 : }
2700 :
2701 57015 : if (m_oCurCtxt.m_poGroupLayer == m_oCurCtxt.m_poLayer)
2702 : {
2703 12403 : PopContext();
2704 12403 : CPLAssert(!m_aoStackContext.empty());
2705 12403 : m_oCurCtxt.m_poLayer = m_aoStackContext.back().m_poLayer;
2706 : }
2707 : else
2708 : {
2709 44612 : if (m_oCurCtxt.m_poGroupLayer)
2710 : {
2711 : /* Case like
2712 : <first_elt_of_group>...</first_elt_of_group>
2713 : </end_of_enclosing_element> <!-- we are here at
2714 : endElement() -->
2715 : */
2716 :
2717 : // CPLDebug("GMLAS", "Feature ready");
2718 0 : PushFeatureReady(
2719 0 : std::unique_ptr<OGRFeature>(m_oCurCtxt.m_poFeature),
2720 : m_oCurCtxt.m_poGroupLayer);
2721 : // CPLDebug("GMLAS", "Feature ready");
2722 0 : PushFeatureReady(std::unique_ptr<OGRFeature>(
2723 0 : m_aoStackContext.back().m_poFeature),
2724 0 : m_aoStackContext.back().m_poLayer);
2725 : }
2726 : else
2727 : {
2728 : // CPLDebug("GMLAS", "Feature ready");
2729 44612 : PushFeatureReady(
2730 89224 : std::unique_ptr<OGRFeature>(m_oCurCtxt.m_poFeature),
2731 : m_oCurCtxt.m_poLayer);
2732 : }
2733 44612 : PopContext();
2734 44612 : if (!m_aoStackContext.empty())
2735 : {
2736 43608 : m_oCurCtxt = m_aoStackContext.back();
2737 43608 : m_oCurCtxt.m_osCurSubXPath.clear();
2738 43608 : if (m_oCurCtxt.m_nLevel < 0)
2739 : {
2740 3517 : PopContext();
2741 3517 : CPLAssert(!m_aoStackContext.empty());
2742 3517 : m_oCurCtxt.m_poLayer = m_aoStackContext.back().m_poLayer;
2743 : }
2744 : }
2745 : else
2746 : {
2747 1004 : m_oCurCtxt.m_poFeature = nullptr;
2748 1004 : m_oCurCtxt.m_poLayer = nullptr;
2749 1004 : m_oCurCtxt.m_poGroupLayer = nullptr;
2750 1004 : m_oCurCtxt.m_nGroupLayerLevel = -1;
2751 1004 : m_oCurCtxt.m_nLastFieldIdxGroupLayer = -1;
2752 : }
2753 44612 : m_nCurFieldIdx = -1;
2754 : }
2755 57015 : m_oCurCtxt.m_oMapCounter = std::move(oMapCounter);
2756 :
2757 : #ifdef DEBUG_VERBOSE
2758 : CPLDebug("GMLAS", "m_oCurCtxt = ");
2759 : m_oCurCtxt.Dump();
2760 : #endif
2761 : }
2762 :
2763 162266 : size_t nLastXPathLength = m_anStackXPathLength.back();
2764 162266 : m_anStackXPathLength.pop_back();
2765 162266 : if (m_anStackXPathLength.empty())
2766 956 : m_osCurXPath.clear();
2767 : else
2768 161310 : m_osCurXPath.resize(m_osCurXPath.size() - 1 - nLastXPathLength);
2769 :
2770 162266 : if (m_osCurSubXPath.size() >= 1 + nLastXPathLength)
2771 161076 : m_osCurSubXPath.resize(m_osCurSubXPath.size() - 1 - nLastXPathLength);
2772 1190 : else if (m_osCurSubXPath.size() == nLastXPathLength)
2773 1004 : m_osCurSubXPath.clear();
2774 :
2775 162266 : if (m_nSWEDataRecordLevel >= 0)
2776 : {
2777 87 : if (m_nLevel > m_nSWEDataRecordLevel)
2778 : {
2779 82 : CPLAssert(m_apsXMLNodeStack.size() > 1);
2780 82 : m_apsXMLNodeStack.pop_back();
2781 : }
2782 : else
2783 : {
2784 5 : CPLAssert(m_apsXMLNodeStack.size() == 1);
2785 5 : CPLXMLNode *psRoot = m_apsXMLNodeStack[0].psNode;
2786 5 : ProcessSWEDataRecord(psRoot);
2787 5 : m_nSWEDataRecordLevel = -1;
2788 5 : CPLDestroyXMLNode(psRoot);
2789 5 : m_apsXMLNodeStack.clear();
2790 : }
2791 : }
2792 162266 : }
2793 :
2794 : /************************************************************************/
2795 : /* startEntity() */
2796 : /************************************************************************/
2797 :
2798 2002 : void GMLASReader::startEntity(const XMLCh *const /* name */)
2799 : {
2800 2002 : m_nEntityCounter++;
2801 2002 : if (m_nEntityCounter > 1000 && !m_bParsingError)
2802 : {
2803 : throw SAXNotSupportedException(
2804 2 : "File probably corrupted (million laugh pattern)");
2805 : }
2806 2000 : }
2807 :
2808 : /************************************************************************/
2809 : /* SetSWEValue() */
2810 : /************************************************************************/
2811 :
2812 63 : static void SetSWEValue(OGRFeature *poFeature, int iField, CPLString &osValue)
2813 : {
2814 63 : if (!osValue.empty())
2815 : {
2816 57 : OGRFieldDefn *poFieldDefn = poFeature->GetFieldDefnRef(iField);
2817 57 : OGRFieldType eType(poFieldDefn->GetType());
2818 57 : OGRFieldSubType eSubType(poFieldDefn->GetSubType());
2819 57 : if (eType == OFTReal || eType == OFTInteger)
2820 : {
2821 24 : osValue.Trim();
2822 24 : if (eSubType == OFSTBoolean)
2823 : {
2824 : osValue =
2825 3 : EQUAL(osValue, "1") || EQUAL(osValue, "True") ? "1" : "0";
2826 : }
2827 : }
2828 57 : poFeature->SetField(iField, osValue.c_str());
2829 : }
2830 63 : }
2831 :
2832 : /************************************************************************/
2833 : /* SkipSpace() */
2834 : /************************************************************************/
2835 :
2836 111 : static size_t SkipSpace(const char *pszValues, size_t i)
2837 : {
2838 111 : while (isspace(static_cast<unsigned char>(pszValues[i])))
2839 39 : i++;
2840 72 : return i;
2841 : }
2842 :
2843 : /************************************************************************/
2844 : /* ProcessSWEDataArray() */
2845 : /************************************************************************/
2846 :
2847 12 : void GMLASReader::ProcessSWEDataArray(CPLXMLNode *psRoot)
2848 : {
2849 12 : if (m_oCurCtxt.m_poLayer == nullptr)
2850 0 : return;
2851 :
2852 12 : CPLStripXMLNamespace(psRoot, "swe", true);
2853 12 : CPLXMLNode *psElementType = CPLGetXMLNode(psRoot, "elementType");
2854 12 : if (psElementType == nullptr)
2855 0 : return;
2856 12 : CPLXMLNode *psDataRecord = CPLGetXMLNode(psElementType, "DataRecord");
2857 12 : if (psDataRecord == nullptr)
2858 0 : return;
2859 12 : const char *pszValues = CPLGetXMLValue(psRoot, "values", nullptr);
2860 12 : if (pszValues == nullptr)
2861 0 : return;
2862 12 : CPLXMLNode *psTextEncoding = CPLGetXMLNode(psRoot, "encoding.TextEncoding");
2863 12 : if (psTextEncoding == nullptr)
2864 0 : return;
2865 : // CPLString osDecimalSeparator =
2866 : // CPLGetXMLValue(psTextEncoding, "decimalSeparator", ".");
2867 : CPLString osBlockSeparator =
2868 12 : CPLGetXMLValue(psTextEncoding, "blockSeparator", "");
2869 : CPLString osTokenSeparator =
2870 12 : CPLGetXMLValue(psTextEncoding, "tokenSeparator", "");
2871 12 : if (osBlockSeparator.empty() || osTokenSeparator.empty())
2872 0 : return;
2873 :
2874 12 : if (m_bInitialPass)
2875 : {
2876 6 : CPLString osLayerName;
2877 3 : osLayerName.Printf("DataArray_%d", m_nSWEDataArrayLayerIdx + 1);
2878 : const char *pszElementTypeName =
2879 3 : CPLGetXMLValue(psElementType, "name", nullptr);
2880 3 : if (pszElementTypeName != nullptr)
2881 : {
2882 1 : osLayerName += "_";
2883 1 : osLayerName += pszElementTypeName;
2884 : }
2885 3 : osLayerName = osLayerName.tolower();
2886 6 : auto poLayer = std::make_unique<OGRGMLASLayer>(osLayerName);
2887 :
2888 : // Register layer in _ogr_layers_metadata
2889 : {
2890 : OGRFeature oLayerDescFeature(
2891 6 : m_poLayersMetadataLayer->GetLayerDefn());
2892 3 : oLayerDescFeature.SetField(szLAYER_NAME, osLayerName);
2893 3 : oLayerDescFeature.SetField(szLAYER_CATEGORY, szSWE_DATA_ARRAY);
2894 :
2895 3 : CPLString osFieldName(szPARENT_PREFIX);
2896 : osFieldName +=
2897 3 : m_oCurCtxt.m_poLayer->GetLayerDefn()
2898 3 : ->GetFieldDefn(m_oCurCtxt.m_poLayer->GetIDFieldIdx())
2899 3 : ->GetNameRef();
2900 3 : oLayerDescFeature.SetField(szLAYER_PARENT_PKID_NAME,
2901 : osFieldName.c_str());
2902 3 : CPL_IGNORE_RET_VAL(
2903 3 : m_poLayersMetadataLayer->CreateFeature(&oLayerDescFeature));
2904 : }
2905 :
2906 : // Register layer relationship in _ogr_layer_relationships
2907 : {
2908 : OGRFeature oRelationshipsFeature(
2909 3 : m_poRelationshipsLayer->GetLayerDefn());
2910 3 : oRelationshipsFeature.SetField(szPARENT_LAYER,
2911 3 : m_oCurCtxt.m_poLayer->GetName());
2912 3 : oRelationshipsFeature.SetField(
2913 : szPARENT_PKID,
2914 3 : m_oCurCtxt.m_poLayer->GetLayerDefn()
2915 3 : ->GetFieldDefn(m_oCurCtxt.m_poLayer->GetIDFieldIdx())
2916 : ->GetNameRef());
2917 3 : if (!m_osSWEDataArrayParentField.empty())
2918 : {
2919 3 : oRelationshipsFeature.SetField(szPARENT_ELEMENT_NAME,
2920 : m_osSWEDataArrayParentField);
2921 : }
2922 3 : oRelationshipsFeature.SetField(szCHILD_LAYER, osLayerName);
2923 3 : CPL_IGNORE_RET_VAL(
2924 3 : m_poRelationshipsLayer->CreateFeature(&oRelationshipsFeature));
2925 : }
2926 :
2927 3 : poLayer->ProcessDataRecordOfDataArrayCreateFields(
2928 : m_oCurCtxt.m_poLayer, psDataRecord, m_poFieldsMetadataLayer);
2929 3 : m_apoSWEDataArrayLayersOwned.emplace_back(std::move(poLayer));
2930 : }
2931 : else
2932 : {
2933 9 : CPLAssert(m_nSWEDataArrayLayerIdx <
2934 : static_cast<int>(m_apoSWEDataArrayLayersRef.size()));
2935 : OGRGMLASLayer *poLayer =
2936 9 : m_apoSWEDataArrayLayersRef[m_nSWEDataArrayLayerIdx];
2937 : // -1 because first field is parent id
2938 9 : const int nFieldCount = poLayer->GetLayerDefn()->GetFieldCount() - 1;
2939 9 : int nFID = 1;
2940 9 : int iField = 0;
2941 9 : const size_t nLen = strlen(pszValues);
2942 9 : std::unique_ptr<OGRFeature> poFeature;
2943 9 : const bool bSameSep = (osTokenSeparator == osBlockSeparator);
2944 9 : size_t nLastValid = SkipSpace(pszValues, 0);
2945 9 : size_t i = nLastValid;
2946 645 : while (i < nLen)
2947 : {
2948 636 : if (poFeature == nullptr)
2949 : {
2950 : poFeature =
2951 15 : std::make_unique<OGRFeature>(poLayer->GetLayerDefn());
2952 15 : poFeature->SetFID(nFID);
2953 30 : poFeature->SetField(0,
2954 15 : m_oCurCtxt.m_poFeature->GetFieldAsString(
2955 15 : m_oCurCtxt.m_poLayer->GetIDFieldIdx()));
2956 15 : nFID++;
2957 15 : iField = 0;
2958 : }
2959 636 : if (strncmp(pszValues + i, osTokenSeparator,
2960 636 : osTokenSeparator.size()) == 0)
2961 : {
2962 54 : if (bSameSep && iField == nFieldCount)
2963 : {
2964 3 : PushFeatureReady(std::move(poFeature), poLayer);
2965 : poFeature =
2966 3 : std::make_unique<OGRFeature>(poLayer->GetLayerDefn());
2967 3 : poFeature->SetFID(nFID);
2968 6 : poFeature->SetField(
2969 3 : 0, m_oCurCtxt.m_poFeature->GetFieldAsString(
2970 3 : m_oCurCtxt.m_poLayer->GetIDFieldIdx()));
2971 3 : nFID++;
2972 3 : iField = 0;
2973 : }
2974 :
2975 54 : if (iField < nFieldCount)
2976 : {
2977 51 : CPLString osValue(pszValues + nLastValid, i - nLastValid);
2978 : // +1 because first field is parent id
2979 51 : SetSWEValue(poFeature.get(), iField + 1, osValue);
2980 51 : iField++;
2981 : }
2982 54 : nLastValid = i + osTokenSeparator.size();
2983 54 : nLastValid = SkipSpace(pszValues, nLastValid);
2984 54 : i = nLastValid;
2985 : }
2986 582 : else if (strncmp(pszValues + i, osBlockSeparator,
2987 582 : osBlockSeparator.size()) == 0)
2988 : {
2989 9 : if (iField < nFieldCount)
2990 : {
2991 6 : CPLString osValue(pszValues + nLastValid, i - nLastValid);
2992 : // +1 because first field is parent id
2993 6 : SetSWEValue(poFeature.get(), iField + 1, osValue);
2994 6 : iField++;
2995 : }
2996 9 : PushFeatureReady(std::move(poFeature), poLayer);
2997 9 : poFeature.reset();
2998 9 : nLastValid = i + osBlockSeparator.size();
2999 9 : nLastValid = SkipSpace(pszValues, nLastValid);
3000 9 : i = nLastValid;
3001 : }
3002 : else
3003 : {
3004 573 : i++;
3005 : }
3006 : }
3007 : // cppcheck-suppress accessMoved
3008 9 : if (poFeature)
3009 : {
3010 6 : if (iField < nFieldCount)
3011 : {
3012 12 : CPLString osValue(pszValues + nLastValid, nLen - nLastValid);
3013 : // +1 because first field is parent id
3014 6 : SetSWEValue(poFeature.get(), iField + 1, osValue);
3015 : // iField ++;
3016 : }
3017 6 : PushFeatureReady(std::move(poFeature), poLayer);
3018 : }
3019 : }
3020 12 : m_nSWEDataArrayLayerIdx++;
3021 : }
3022 :
3023 : /************************************************************************/
3024 : /* ProcessSWEDataRecord() */
3025 : /************************************************************************/
3026 :
3027 5 : void GMLASReader::ProcessSWEDataRecord(CPLXMLNode *psRoot)
3028 : {
3029 5 : CPLStripXMLNamespace(psRoot, "swe", true);
3030 5 : if (m_bInitialPass)
3031 : {
3032 : // Collect existing live features of this layer, so that we can
3033 : // patch them
3034 8 : std::vector<OGRFeature *> apoFeatures;
3035 4 : apoFeatures.push_back(m_oCurCtxt.m_poFeature);
3036 8 : for (auto &feature : m_aoFeaturesReady)
3037 : {
3038 4 : if (feature.second == m_oCurCtxt.m_poLayer)
3039 0 : apoFeatures.push_back(feature.first.get());
3040 : }
3041 4 : m_oCurCtxt.m_poLayer->ProcessDataRecordCreateFields(
3042 : psRoot, apoFeatures, m_poFieldsMetadataLayer);
3043 : }
3044 : else
3045 : {
3046 1 : m_oCurCtxt.m_poLayer->ProcessDataRecordFillFeature(
3047 : psRoot, m_oCurCtxt.m_poFeature);
3048 : }
3049 5 : }
3050 :
3051 : /************************************************************************/
3052 : /* GMLASGetSRSName() */
3053 : /************************************************************************/
3054 :
3055 420 : static const char *GMLASGetSRSName(CPLXMLNode *psNode)
3056 : {
3057 420 : const char *pszSRSName = CPLGetXMLValue(psNode, szSRS_NAME, nullptr);
3058 420 : if (pszSRSName == nullptr)
3059 : {
3060 : // Case of a gml:Point where the srsName is on the gml:pos
3061 340 : pszSRSName = CPLGetXMLValue(psNode, "gml:pos.srsName", nullptr);
3062 : }
3063 420 : return pszSRSName;
3064 : }
3065 :
3066 : /************************************************************************/
3067 : /* AddMissingSRSDimension() */
3068 : /************************************************************************/
3069 :
3070 1 : static void AddMissingSRSDimension(CPLXMLNode *psRoot, int nDefaultSrsDimension)
3071 : {
3072 3 : for (CPLXMLNode *psIter = psRoot->psChild; psIter; psIter = psIter->psNext)
3073 : {
3074 2 : if (psIter->eType == CXT_Element)
3075 : {
3076 1 : if (CPLGetXMLValue(psIter, "srsDimension", nullptr) == nullptr)
3077 : {
3078 1 : if (strcmp(psIter->pszValue, "gml:posList") == 0)
3079 : {
3080 1 : CPLAddXMLAttributeAndValue(
3081 : psIter, "srsDimension",
3082 : CPLSPrintf("%d", nDefaultSrsDimension));
3083 : }
3084 : else
3085 : {
3086 0 : AddMissingSRSDimension(psIter, nDefaultSrsDimension);
3087 : }
3088 : }
3089 : }
3090 : }
3091 1 : }
3092 :
3093 : /************************************************************************/
3094 : /* ProcessGeometry() */
3095 : /************************************************************************/
3096 :
3097 420 : void GMLASReader::ProcessGeometry(CPLXMLNode *psRoot)
3098 : {
3099 : OGRGeomFieldDefn *poGeomFieldDefn =
3100 420 : m_oCurCtxt.m_poFeature->GetGeomFieldDefnRef(m_nCurGeomFieldIdx);
3101 :
3102 420 : if (m_bInitialPass)
3103 : {
3104 228 : const char *pszSRSName = GMLASGetSRSName(psRoot);
3105 228 : if (pszSRSName != nullptr)
3106 : {
3107 : // If we are doing a first pass, store the SRS of the geometry
3108 : // column
3109 100 : if (!m_oSetGeomFieldsWithUnknownSRS.empty() &&
3110 50 : m_oSetGeomFieldsWithUnknownSRS.find(poGeomFieldDefn) !=
3111 100 : m_oSetGeomFieldsWithUnknownSRS.end())
3112 : {
3113 42 : OGRSpatialReference *poSRS = new OGRSpatialReference();
3114 42 : poSRS->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
3115 :
3116 42 : if (poSRS->SetFromUserInput(
3117 : pszSRSName,
3118 : OGRSpatialReference::
3119 42 : SET_FROM_USER_INPUT_LIMITATIONS_get()) ==
3120 : OGRERR_NONE)
3121 : {
3122 42 : m_oMapGeomFieldDefnToSRSName[poGeomFieldDefn] = pszSRSName;
3123 42 : poGeomFieldDefn->SetSpatialRef(poSRS);
3124 : }
3125 42 : poSRS->Release();
3126 42 : m_oSetGeomFieldsWithUnknownSRS.erase(poGeomFieldDefn);
3127 : }
3128 : }
3129 228 : return;
3130 : }
3131 :
3132 193 : if (m_nDefaultSrsDimension != 0 &&
3133 1 : CPLGetXMLValue(psRoot, "srsDimension", nullptr) == nullptr)
3134 : {
3135 1 : AddMissingSRSDimension(psRoot, m_nDefaultSrsDimension);
3136 : }
3137 :
3138 : #ifdef DEBUG_VERBOSE
3139 : {
3140 : char *pszXML = CPLSerializeXMLTree(psRoot);
3141 : CPLDebug("GML", "geometry = %s", pszXML);
3142 : CPLFree(pszXML);
3143 : }
3144 : #endif
3145 :
3146 : auto poGeom = std::unique_ptr<OGRGeometry>(
3147 384 : OGRGeometry::FromHandle(OGR_G_CreateFromGMLTree(psRoot)));
3148 192 : if (poGeom != nullptr)
3149 : {
3150 192 : const char *pszSRSName = GMLASGetSRSName(psRoot);
3151 :
3152 192 : bool bSwapXY = false;
3153 192 : if (pszSRSName != nullptr)
3154 : {
3155 : // Check if the srsName indicates unusual axis order,
3156 : // and if so swap x and y coordinates.
3157 38 : const auto oIter = m_oMapSRSNameToInvertedAxis.find(pszSRSName);
3158 38 : if (oIter == m_oMapSRSNameToInvertedAxis.end())
3159 : {
3160 38 : OGRSpatialReference oSRS;
3161 38 : oSRS.SetFromUserInput(
3162 : pszSRSName,
3163 : OGRSpatialReference::SET_FROM_USER_INPUT_LIMITATIONS_get());
3164 66 : bSwapXY = !STARTS_WITH_CI(pszSRSName, "EPSG:") &&
3165 28 : (CPL_TO_BOOL(oSRS.EPSGTreatsAsLatLong()) ||
3166 2 : CPL_TO_BOOL(oSRS.EPSGTreatsAsNorthingEasting()));
3167 38 : m_oMapSRSNameToInvertedAxis[pszSRSName] = bSwapXY;
3168 : }
3169 : else
3170 : {
3171 0 : bSwapXY = oIter->second;
3172 : }
3173 : }
3174 192 : if ((bSwapXY && m_eSwapCoordinates == GMLAS_SWAP_AUTO) ||
3175 172 : m_eSwapCoordinates == GMLAS_SWAP_YES)
3176 : {
3177 36 : poGeom->swapXY();
3178 : }
3179 :
3180 : // Do we need to do reprojection ?
3181 230 : if (pszSRSName != nullptr &&
3182 230 : poGeomFieldDefn->GetSpatialRef() != nullptr &&
3183 38 : m_oMapGeomFieldDefnToSRSName[poGeomFieldDefn] != pszSRSName)
3184 : {
3185 2 : bool bReprojectionOK = false;
3186 4 : OGRSpatialReference oSRS;
3187 2 : if (oSRS.SetFromUserInput(
3188 : pszSRSName,
3189 : OGRSpatialReference::
3190 2 : SET_FROM_USER_INPUT_LIMITATIONS_get()) == OGRERR_NONE)
3191 : {
3192 : auto poCT = std::unique_ptr<OGRCoordinateTransformation>(
3193 : OGRCreateCoordinateTransformation(
3194 2 : &oSRS, poGeomFieldDefn->GetSpatialRef()));
3195 1 : if (poCT != nullptr)
3196 : {
3197 1 : bReprojectionOK =
3198 1 : (poGeom->transform(poCT.get()) == OGRERR_NONE);
3199 : }
3200 : }
3201 2 : if (!bReprojectionOK)
3202 : {
3203 1 : CPLError(CE_Warning, CPLE_AppDefined,
3204 : "Reprojection from %s to %s failed", pszSRSName,
3205 1 : m_oMapGeomFieldDefnToSRSName[poGeomFieldDefn].c_str());
3206 1 : poGeom.reset();
3207 : }
3208 : #ifdef DEBUG_VERBOSE
3209 : else
3210 : {
3211 : CPLDebug("GMLAS", "Reprojected geometry from %s to %s",
3212 : pszSRSName,
3213 : m_oMapGeomFieldDefnToSRSName[poGeomFieldDefn].c_str());
3214 : }
3215 : #endif
3216 : }
3217 :
3218 192 : if (poGeom != nullptr)
3219 : {
3220 : // Deal with possibly repeated geometries by building
3221 : // a geometry collection. We could also create a
3222 : // nested table, but that would probably be less
3223 : // convenient to use.
3224 : auto poPrevGeom = std::unique_ptr<OGRGeometry>(
3225 382 : m_oCurCtxt.m_poFeature->StealGeometry(m_nCurGeomFieldIdx));
3226 191 : if (poPrevGeom != nullptr)
3227 : {
3228 25 : if (poPrevGeom->getGeometryType() == wkbGeometryCollection)
3229 : {
3230 24 : poPrevGeom->toGeometryCollection()->addGeometryDirectly(
3231 12 : poGeom.release());
3232 12 : poGeom = std::move(poPrevGeom);
3233 : }
3234 : else
3235 : {
3236 26 : auto poGC = std::make_unique<OGRGeometryCollection>();
3237 13 : poGC->addGeometryDirectly(poPrevGeom.release());
3238 13 : poGC->addGeometryDirectly(poGeom.release());
3239 13 : poGeom = std::move(poGC);
3240 : }
3241 : }
3242 191 : poGeom->assignSpatialReference(poGeomFieldDefn->GetSpatialRef());
3243 191 : m_oCurCtxt.m_poFeature->SetGeomFieldDirectly(m_nCurGeomFieldIdx,
3244 : poGeom.release());
3245 : }
3246 : }
3247 : else
3248 : {
3249 0 : char *pszXML = CPLSerializeXMLTree(psRoot);
3250 0 : CPLDebug("GMLAS", "Non-recognized geometry: %s", pszXML);
3251 0 : CPLFree(pszXML);
3252 : }
3253 : }
3254 :
3255 : /************************************************************************/
3256 : /* characters() */
3257 : /************************************************************************/
3258 :
3259 290157 : void GMLASReader::characters(const XMLCh *const chars, const XMLSize_t length)
3260 : {
3261 290157 : bool bTextMemberUpdated = false;
3262 5693 : if (((m_bIsXMLBlob && m_nCurGeomFieldIdx >= 0 && !m_bInitialPass) ||
3263 296307 : m_nSWEDataArrayLevel >= 0 || m_nSWEDataRecordLevel >= 0) &&
3264 : // Check the stack is not empty in case of space chars before the
3265 : // starting node
3266 711 : !m_apsXMLNodeStack.empty())
3267 : {
3268 699 : bTextMemberUpdated = true;
3269 : const CPLString &osText(
3270 699 : transcode(chars, m_osText, static_cast<int>(length)));
3271 :
3272 : // Merge content in current text node if it exists
3273 699 : NodeLastChild &sNodeLastChild = m_apsXMLNodeStack.back();
3274 699 : if (sNodeLastChild.psLastChild != nullptr &&
3275 374 : sNodeLastChild.psLastChild->eType == CXT_Text)
3276 : {
3277 20 : CPLXMLNode *psNode = sNodeLastChild.psLastChild;
3278 20 : const size_t nOldLength = strlen(psNode->pszValue);
3279 : char *pszNewValue = reinterpret_cast<char *>(
3280 20 : VSIRealloc(psNode->pszValue, nOldLength + osText.size() + 1));
3281 20 : if (pszNewValue)
3282 : {
3283 20 : psNode->pszValue = pszNewValue;
3284 20 : memcpy(pszNewValue + nOldLength, osText.c_str(),
3285 20 : osText.size() + 1);
3286 : }
3287 : else
3288 : {
3289 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "Out of memory");
3290 0 : m_bParsingError = true;
3291 20 : }
3292 : }
3293 : // Otherwise create a new text node
3294 : else
3295 : {
3296 : CPLXMLNode *psNode =
3297 679 : reinterpret_cast<CPLXMLNode *>(CPLMalloc(sizeof(CPLXMLNode)));
3298 679 : psNode->eType = CXT_Text;
3299 679 : psNode->pszValue =
3300 679 : reinterpret_cast<char *>(CPLMalloc(osText.size() + 1));
3301 679 : memcpy(psNode->pszValue, osText.c_str(), osText.size() + 1);
3302 679 : psNode->psNext = nullptr;
3303 679 : psNode->psChild = nullptr;
3304 679 : AttachAsLastChild(psNode);
3305 : }
3306 : }
3307 :
3308 290157 : if (!FillTextContent())
3309 : {
3310 90940 : m_osTextContent = "1"; // dummy
3311 90940 : return;
3312 : }
3313 :
3314 199217 : if (m_bIsXMLBlob)
3315 : {
3316 3894 : if (m_nCurFieldIdx >= 0)
3317 : {
3318 : const CPLString &osText(
3319 : bTextMemberUpdated
3320 3894 : ? m_osText
3321 3592 : : transcode(chars, m_osText, static_cast<int>(length)));
3322 :
3323 3894 : char *pszEscaped = CPLEscapeString(
3324 3894 : osText.c_str(), static_cast<int>(osText.size()), CPLES_XML);
3325 : try
3326 : {
3327 3894 : m_osTextContent += pszEscaped;
3328 : }
3329 0 : catch (const std::bad_alloc &)
3330 : {
3331 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "Out of memory");
3332 0 : m_bParsingError = true;
3333 : }
3334 3894 : CPLFree(pszEscaped);
3335 : }
3336 : }
3337 : // Make sure to set content only if we are at the expected nesting level
3338 195323 : else if (m_nLevel == m_nCurFieldLevel)
3339 : {
3340 : const CPLString &osText(
3341 100795 : transcode(chars, m_osText, static_cast<int>(length)));
3342 : try
3343 : {
3344 100795 : m_osTextContent += osText;
3345 : }
3346 0 : catch (const std::bad_alloc &)
3347 : {
3348 0 : CPLError(CE_Failure, CPLE_OutOfMemory, "Out of memory");
3349 0 : m_bParsingError = true;
3350 : }
3351 : }
3352 :
3353 199217 : if (m_osTextContent.size() > m_nMaxContentSize)
3354 : {
3355 0 : CPLError(CE_Failure, CPLE_OutOfMemory,
3356 : "Too much data in a single element");
3357 0 : m_bParsingError = true;
3358 : }
3359 : }
3360 :
3361 : /************************************************************************/
3362 : /* GetNextFeature() */
3363 : /************************************************************************/
3364 :
3365 5417 : OGRFeature *GMLASReader::GetNextFeature(OGRGMLASLayer **ppoBelongingLayer,
3366 : GDALProgressFunc pfnProgress,
3367 : void *pProgressData)
3368 : {
3369 5417 : while (!m_aoFeaturesReady.empty())
3370 : {
3371 3 : auto poFeatureReady = std::move(m_aoFeaturesReady.front().first);
3372 3 : OGRGMLASLayer *poFeatureReadyLayer = m_aoFeaturesReady.front().second;
3373 3 : m_aoFeaturesReady.erase(m_aoFeaturesReady.begin());
3374 :
3375 3 : if (m_poLayerOfInterest == nullptr ||
3376 3 : m_poLayerOfInterest == poFeatureReadyLayer)
3377 : {
3378 3 : if (ppoBelongingLayer)
3379 0 : *ppoBelongingLayer = poFeatureReadyLayer;
3380 3 : return poFeatureReady.release();
3381 : }
3382 : }
3383 :
3384 5414 : if (m_bEOF)
3385 0 : return nullptr;
3386 :
3387 : try
3388 : {
3389 5414 : if (m_bFirstIteration)
3390 : {
3391 1239 : m_bFirstIteration = false;
3392 2478 : if (!m_poSAXReader->parseFirst(*(m_GMLInputSource.get()),
3393 1239 : m_oToFill))
3394 : {
3395 0 : m_bParsingError = true;
3396 0 : m_bEOF = true;
3397 0 : return nullptr;
3398 : }
3399 : }
3400 :
3401 5414 : vsi_l_offset nLastOffset = m_fp->Tell();
3402 617650 : while (m_poSAXReader->parseNext(m_oToFill))
3403 : {
3404 616717 : if (pfnProgress && m_fp->Tell() - nLastOffset > 100 * 1024)
3405 : {
3406 0 : nLastOffset = m_fp->Tell();
3407 0 : double dfPct = -1;
3408 0 : if (m_nFileSize)
3409 0 : dfPct = 1.0 * nLastOffset / m_nFileSize;
3410 0 : if (!pfnProgress(dfPct, "", pProgressData))
3411 : {
3412 0 : m_bInterrupted = true;
3413 0 : break;
3414 : }
3415 : }
3416 616717 : if (m_bParsingError)
3417 0 : break;
3418 :
3419 668800 : while (!m_aoFeaturesReady.empty())
3420 : {
3421 : auto poFeatureReady =
3422 56564 : std::move(m_aoFeaturesReady.front().first);
3423 : OGRGMLASLayer *poFeatureReadyLayer =
3424 56564 : m_aoFeaturesReady.front().second;
3425 56564 : m_aoFeaturesReady.erase(m_aoFeaturesReady.begin());
3426 :
3427 56564 : if (m_poLayerOfInterest == nullptr ||
3428 54126 : m_poLayerOfInterest == poFeatureReadyLayer)
3429 : {
3430 4481 : if (ppoBelongingLayer)
3431 2438 : *ppoBelongingLayer = poFeatureReadyLayer;
3432 :
3433 4481 : if (pfnProgress)
3434 : {
3435 0 : nLastOffset = m_fp->Tell();
3436 0 : double dfPct = -1;
3437 0 : if (m_nFileSize)
3438 0 : dfPct = 1.0 * nLastOffset / m_nFileSize;
3439 0 : if (!pfnProgress(dfPct, "", pProgressData))
3440 : {
3441 0 : m_bInterrupted = true;
3442 0 : m_bEOF = true;
3443 0 : return nullptr;
3444 : }
3445 : }
3446 :
3447 4481 : return poFeatureReady.release();
3448 : }
3449 : }
3450 : }
3451 :
3452 931 : m_bEOF = true;
3453 : }
3454 0 : catch (const XMLException &toCatch)
3455 : {
3456 0 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
3457 0 : transcode(toCatch.getMessage()).c_str());
3458 0 : m_bParsingError = true;
3459 0 : m_bEOF = true;
3460 : }
3461 2 : catch (const SAXException &toCatch)
3462 : {
3463 2 : CPLError(CE_Failure, CPLE_AppDefined, "%s",
3464 4 : transcode(toCatch.getMessage()).c_str());
3465 2 : m_bParsingError = true;
3466 2 : m_bEOF = true;
3467 : }
3468 :
3469 933 : return nullptr;
3470 : }
3471 :
3472 : /************************************************************************/
3473 : /* RunFirstPass() */
3474 : /************************************************************************/
3475 :
3476 112 : bool GMLASReader::RunFirstPass(
3477 : GDALProgressFunc pfnProgress, void *pProgressData, bool bRemoveUnusedLayers,
3478 : bool bRemoveUnusedFields, bool bProcessSWEDataArray,
3479 : OGRLayer *poFieldsMetadataLayer, OGRLayer *poLayersMetadataLayer,
3480 : OGRLayer *poRelationshipsLayer, std::set<CPLString> &aoSetRemovedLayerNames)
3481 : {
3482 112 : m_bInitialPass = true;
3483 112 : m_bProcessSWEDataArray = bProcessSWEDataArray;
3484 112 : m_poFieldsMetadataLayer = poFieldsMetadataLayer;
3485 112 : m_poLayersMetadataLayer = poLayersMetadataLayer;
3486 112 : m_poRelationshipsLayer = poRelationshipsLayer;
3487 :
3488 : // Store in m_oSetGeomFieldsWithUnknownSRS the geometry fields
3489 224 : std::set<OGRGMLASLayer *> oSetUnreferencedLayers;
3490 112 : std::map<OGRGMLASLayer *, std::set<CPLString>> oMapUnusedFields;
3491 14304 : for (auto &poLayer : *m_apoLayers)
3492 : {
3493 14192 : OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();
3494 14192 : oSetUnreferencedLayers.insert(poLayer.get());
3495 14416 : for (int j = 0; j < poFDefn->GetGeomFieldCount(); j++)
3496 : {
3497 224 : m_oSetGeomFieldsWithUnknownSRS.insert(poFDefn->GetGeomFieldDefn(j));
3498 : }
3499 214618 : for (int j = 0; j < poFDefn->GetFieldCount(); j++)
3500 : {
3501 200426 : oMapUnusedFields[poLayer.get()].insert(
3502 200426 : poFDefn->GetFieldDefn(j)->GetNameRef());
3503 : }
3504 : }
3505 :
3506 112 : CPLDebug("GMLAS", "Start of first pass");
3507 :
3508 : // Do we need to do a full scan of the file ?
3509 : const bool bHasURLSpecificRules =
3510 112 : !m_oXLinkResolver.GetConf().m_aoURLSpecificRules.empty();
3511 : const bool bDoFullPass =
3512 62 : (m_bValidate || bRemoveUnusedLayers || bRemoveUnusedFields ||
3513 230 : bHasURLSpecificRules || bProcessSWEDataArray ||
3514 168 : m_oXLinkResolver.GetConf().m_bResolveInternalXLinks);
3515 :
3516 : // Loop on features until we have determined the SRS of all geometry
3517 : // columns, or potentially on the whole file for the above reasons
3518 : OGRGMLASLayer *poLayerFeature;
3519 1770 : while (bDoFullPass || !m_oSetGeomFieldsWithUnknownSRS.empty())
3520 : {
3521 : auto poFeature = std::unique_ptr<OGRFeature>(
3522 1770 : GetNextFeature(&poLayerFeature, pfnProgress, pProgressData));
3523 1770 : if (!poFeature)
3524 112 : break;
3525 1658 : if (bRemoveUnusedLayers)
3526 1 : oSetUnreferencedLayers.erase(poLayerFeature);
3527 1658 : if (bRemoveUnusedFields)
3528 : {
3529 : std::set<CPLString> &oSetUnusedFields =
3530 1 : oMapUnusedFields[poLayerFeature];
3531 1 : OGRFeatureDefn *poFDefn = poLayerFeature->GetLayerDefn();
3532 1 : int nFieldCount = poFDefn->GetFieldCount();
3533 9 : for (int j = 0; j < nFieldCount; j++)
3534 : {
3535 8 : if (poFeature->IsFieldSetAndNotNull(j))
3536 4 : oSetUnusedFields.erase(
3537 4 : poFDefn->GetFieldDefn(j)->GetNameRef());
3538 : }
3539 : }
3540 : }
3541 :
3542 112 : CPLDebug("GMLAS", "End of first pass");
3543 :
3544 112 : ProcessInternalXLinkFirstPass(bRemoveUnusedFields, oMapUnusedFields);
3545 :
3546 112 : if (bRemoveUnusedLayers)
3547 : {
3548 2 : std::vector<std::unique_ptr<OGRGMLASLayer>> apoNewLayers;
3549 5 : for (auto &poLayer : *m_apoLayers)
3550 : {
3551 4 : if (oSetUnreferencedLayers.find(poLayer.get()) ==
3552 8 : oSetUnreferencedLayers.end())
3553 : {
3554 1 : apoNewLayers.emplace_back(std::move(poLayer));
3555 : }
3556 : else
3557 : {
3558 3 : aoSetRemovedLayerNames.insert(poLayer->GetName());
3559 : }
3560 : }
3561 1 : *m_apoLayers = std::move(apoNewLayers);
3562 : }
3563 112 : if (bRemoveUnusedFields)
3564 : {
3565 2 : for (auto &poLayer : *m_apoLayers)
3566 : {
3567 5 : for (const auto &oIter : oMapUnusedFields[poLayer.get()])
3568 : {
3569 8 : poLayer->RemoveField(
3570 4 : poLayer->GetLayerDefn()->GetFieldIndex(oIter));
3571 : }
3572 :
3573 : // We need to run this again since we may have delete the
3574 : // element that holds attributes, like in
3575 : // <foo xsi:nil="true" nilReason="unknown"/> where foo will be
3576 : // eliminated, but foo_nilReason kept.
3577 1 : poLayer->CreateCompoundFoldedMappings();
3578 : }
3579 : }
3580 :
3581 : // Add fields coming from matching URL specific rules
3582 112 : if (bHasURLSpecificRules)
3583 : {
3584 3 : CreateFieldsForURLSpecificRules();
3585 : }
3586 :
3587 : // Clear the set even if we didn't manage to determine all the SRS
3588 112 : m_oSetGeomFieldsWithUnknownSRS.clear();
3589 :
3590 224 : return !m_bInterrupted;
3591 : }
3592 :
3593 : /************************************************************************/
3594 : /* ProcessInternalXLinkFirstPass() */
3595 : /************************************************************************/
3596 :
3597 112 : void GMLASReader::ProcessInternalXLinkFirstPass(
3598 : bool bRemoveUnusedFields,
3599 : std::map<OGRGMLASLayer *, std::set<CPLString>> &oMapUnusedFields)
3600 : {
3601 118 : for (const auto &oIter : m_oMapFieldXPathToLinkValue)
3602 : {
3603 6 : OGRGMLASLayer *poReferringLayer = oIter.first.first;
3604 6 : const CPLString &osReferringField = oIter.first.second;
3605 6 : const std::vector<CPLString> &aosLinks = oIter.second;
3606 12 : std::set<OGRGMLASLayer *> oSetTargetLayers;
3607 15 : for (size_t i = 0; i < aosLinks.size(); i++)
3608 : {
3609 9 : const auto oIter2 = m_oMapElementIdToLayer.find(aosLinks[i]);
3610 9 : if (oIter2 == m_oMapElementIdToLayer.end())
3611 : {
3612 4 : CPLError(CE_Warning, CPLE_AppDefined,
3613 : "%s:%s = '#%s' has no corresponding target "
3614 : "element in this document",
3615 : poReferringLayer->GetName(), osReferringField.c_str(),
3616 4 : aosLinks[i].c_str());
3617 : }
3618 5 : else if (oSetTargetLayers.find(oIter2->second) ==
3619 10 : oSetTargetLayers.end())
3620 : {
3621 4 : OGRGMLASLayer *poTargetLayer = oIter2->second;
3622 4 : oSetTargetLayers.insert(poTargetLayer);
3623 : CPLString osLinkFieldName =
3624 : poReferringLayer->CreateLinkForAttrToOtherLayer(
3625 : osReferringField,
3626 8 : poTargetLayer->GetFeatureClass().GetXPath());
3627 4 : if (bRemoveUnusedFields)
3628 : {
3629 0 : oMapUnusedFields[poReferringLayer].erase(osLinkFieldName);
3630 : }
3631 : }
3632 : }
3633 : }
3634 112 : }
3635 :
3636 : /************************************************************************/
3637 : /* CreateFieldsForURLSpecificRules() */
3638 : /************************************************************************/
3639 :
3640 3 : void GMLASReader::CreateFieldsForURLSpecificRules()
3641 : {
3642 6 : for (const auto &oIter : m_oMapXLinkFields)
3643 : {
3644 3 : OGRGMLASLayer *poLayer = oIter.first;
3645 3 : const auto &oMap2 = oIter.second;
3646 8 : for (const auto &oIter2 : oMap2)
3647 : {
3648 5 : const CPLString &osFieldXPath(oIter2.first);
3649 : // Note that CreateFieldsForURLSpecificRule() running on a previous
3650 : // iteration will have inserted new OGR fields, so we really need
3651 : // to compute that index now.
3652 : const int nFieldIdx =
3653 5 : poLayer->GetOGRFieldIndexFromXPath(osFieldXPath);
3654 5 : CPLAssert(nFieldIdx >= 0);
3655 5 : int nInsertFieldIdx = nFieldIdx + 1;
3656 5 : const auto &oSetRuleIndex = oIter2.second;
3657 12 : for (const auto &nRuleIdx : oSetRuleIndex)
3658 : {
3659 : const GMLASXLinkResolutionConf::URLSpecificResolution &oRule =
3660 7 : m_oXLinkResolver.GetConf().m_aoURLSpecificRules[nRuleIdx];
3661 7 : CreateFieldsForURLSpecificRule(poLayer, nFieldIdx, osFieldXPath,
3662 : nInsertFieldIdx, oRule);
3663 : }
3664 : }
3665 : }
3666 3 : }
3667 :
3668 : /************************************************************************/
3669 : /* CreateFieldsForURLSpecificRule() */
3670 : /************************************************************************/
3671 :
3672 7 : void GMLASReader::CreateFieldsForURLSpecificRule(
3673 : OGRGMLASLayer *poLayer, int nFieldIdx, const CPLString &osFieldXPath,
3674 : int &nInsertFieldIdx,
3675 : const GMLASXLinkResolutionConf::URLSpecificResolution &oRule)
3676 : {
3677 7 : if (oRule.m_eResolutionMode == GMLASXLinkResolutionConf::RawContent)
3678 : {
3679 : const CPLString osRawContentXPath(
3680 : GMLASField::MakeXLinkRawContentFieldXPathFromXLinkHrefXPath(
3681 2 : osFieldXPath));
3682 1 : if (poLayer->GetOGRFieldIndexFromXPath(osRawContentXPath) < 0)
3683 : {
3684 : CPLString osRawContentFieldname(
3685 2 : poLayer->GetLayerDefn()->GetFieldDefn(nFieldIdx)->GetNameRef());
3686 1 : size_t nPos = osRawContentFieldname.find("_href");
3687 1 : if (nPos != std::string::npos)
3688 1 : osRawContentFieldname.resize(nPos);
3689 1 : osRawContentFieldname += "_rawcontent";
3690 1 : OGRFieldDefn oFieldDefnRaw(osRawContentFieldname, OFTString);
3691 1 : poLayer->InsertNewField(nInsertFieldIdx, oFieldDefnRaw,
3692 : osRawContentXPath);
3693 1 : nInsertFieldIdx++;
3694 : }
3695 : }
3696 6 : else if (oRule.m_eResolutionMode ==
3697 : GMLASXLinkResolutionConf::FieldsFromXPath)
3698 : {
3699 26 : for (size_t i = 0; i < oRule.m_aoFields.size(); ++i)
3700 : {
3701 : const CPLString osDerivedFieldXPath(
3702 : GMLASField::MakeXLinkDerivedFieldXPathFromXLinkHrefXPath(
3703 40 : osFieldXPath, oRule.m_aoFields[i].m_osName));
3704 20 : if (poLayer->GetOGRFieldIndexFromXPath(osDerivedFieldXPath) < 0)
3705 : {
3706 18 : CPLString osNewFieldname(poLayer->GetLayerDefn()
3707 18 : ->GetFieldDefn(nFieldIdx)
3708 36 : ->GetNameRef());
3709 18 : size_t nPos = osNewFieldname.find("_href");
3710 18 : if (nPos != std::string::npos)
3711 18 : osNewFieldname.resize(nPos);
3712 18 : osNewFieldname += "_" + oRule.m_aoFields[i].m_osName;
3713 :
3714 18 : OGRFieldType eType = OFTString;
3715 18 : const CPLString &osType(oRule.m_aoFields[i].m_osType);
3716 18 : if (osType == "integer")
3717 6 : eType = OFTInteger;
3718 12 : else if (osType == "long")
3719 2 : eType = OFTInteger64;
3720 10 : else if (osType == "double")
3721 2 : eType = OFTReal;
3722 8 : else if (osType == "dateTime")
3723 2 : eType = OFTDateTime;
3724 :
3725 18 : OGRFieldDefn oFieldDefnRaw(osNewFieldname, eType);
3726 18 : poLayer->InsertNewField(nInsertFieldIdx, oFieldDefnRaw,
3727 : osDerivedFieldXPath);
3728 18 : nInsertFieldIdx++;
3729 : }
3730 : }
3731 : }
3732 7 : }
|