Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: OpenGIS Simple Features Reference Implementation
4 : * Purpose: Implementation of OGC Features and Geometries JSON (JSON-FG)
5 : * Author: Even Rouault <even.rouault at spatialys.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2023, Even Rouault <even.rouault at spatialys.com>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #ifndef OGR_JSONFG_H_INCLUDED
14 : #define OGR_JSONFG_H_INCLUDED
15 :
16 : #include "cpl_vsi_virtual.h"
17 :
18 : #include "gdal_priv.h"
19 : #include "ogrsf_frmts.h"
20 : #include "ogrgeojsonutils.h"
21 : #include "ogrgeojsonwriter.h"
22 : #include "ogrjsoncollectionstreamingparser.h"
23 : #include "ogr_mem.h"
24 : #include "directedacyclicgraph.hpp"
25 :
26 : #include <map>
27 : #include <set>
28 : #include <utility>
29 :
30 : /************************************************************************/
31 : /* OGRJSONFGMemLayer */
32 : /************************************************************************/
33 :
34 : /** Layer with all features ingested into memory. */
35 100 : class OGRJSONFGMemLayer final : public OGRMemLayer
36 : {
37 : public:
38 : OGRJSONFGMemLayer(GDALDataset *poDS, const char *pszName,
39 : OGRSpatialReference *poSRS, OGRwkbGeometryType eGType);
40 : ~OGRJSONFGMemLayer();
41 :
42 1 : const char *GetFIDColumn() override
43 : {
44 1 : return osFIDColumn_.c_str();
45 : }
46 :
47 : int TestCapability(const char *pszCap) override;
48 :
49 0 : void SetFIDColumn(const char *pszName)
50 : {
51 0 : osFIDColumn_ = pszName;
52 0 : }
53 :
54 : void AddFeature(std::unique_ptr<OGRFeature> poFeature);
55 :
56 23 : GDALDataset *GetDataset() override
57 : {
58 23 : return m_poDS;
59 : }
60 :
61 : private:
62 : GDALDataset *m_poDS = nullptr;
63 : std::string osFIDColumn_{};
64 : bool bOriginalIdModified_ = false;
65 :
66 : CPL_DISALLOW_COPY_ASSIGN(OGRJSONFGMemLayer)
67 : };
68 :
69 : /************************************************************************/
70 : /* OGRJSONFGStreamedLayer */
71 : /************************************************************************/
72 :
73 : class OGRJSONFGStreamingParser;
74 :
75 : /** Layer with features being acquired progressively through a streaming
76 : parser.
77 :
78 : Only applies for FeatureCollection read through a file
79 : */
80 : class OGRJSONFGStreamedLayer final
81 : : public OGRLayer,
82 : public OGRGetNextFeatureThroughRaw<OGRJSONFGStreamedLayer>
83 : {
84 : public:
85 : OGRJSONFGStreamedLayer(GDALDataset *poDS, const char *pszName,
86 : OGRSpatialReference *poSRS,
87 : OGRwkbGeometryType eGType);
88 : ~OGRJSONFGStreamedLayer();
89 :
90 : // BEGIN specific public API
91 :
92 : //! Set the FID column name
93 0 : void SetFIDColumn(const char *pszName)
94 : {
95 0 : osFIDColumn_ = pszName;
96 0 : }
97 :
98 : //! Set the total feature count
99 96 : void SetFeatureCount(GIntBig nCount)
100 : {
101 96 : nFeatureCount_ = nCount;
102 96 : }
103 :
104 : /** Set the file handle.
105 :
106 : Must be called before GetNextFeature() is called
107 : */
108 : void SetFile(VSIVirtualHandleUniquePtr &&poFile);
109 :
110 : /** Set the streaming parser
111 :
112 : Must be called before GetNextFeature() is called
113 : */
114 : void SetStreamingParser(
115 : std::unique_ptr<OGRJSONFGStreamingParser> &&poStreamingParser);
116 :
117 : // END specific public API
118 :
119 522 : const char *GetFIDColumn() override
120 : {
121 522 : return osFIDColumn_.c_str();
122 : }
123 :
124 2208 : OGRFeatureDefn *GetLayerDefn() override
125 : {
126 2208 : return poFeatureDefn_;
127 : }
128 :
129 : int TestCapability(const char *pszCap) override;
130 :
131 : GIntBig GetFeatureCount(int bForce) override;
132 :
133 : void ResetReading() override;
134 :
135 310 : DEFINE_GET_NEXT_FEATURE_THROUGH_RAW(OGRJSONFGStreamedLayer)
136 :
137 16 : GDALDataset *GetDataset() override
138 : {
139 16 : return m_poDS;
140 : }
141 :
142 : private:
143 : GDALDataset *m_poDS = nullptr;
144 : OGRFeatureDefn *poFeatureDefn_ = nullptr;
145 : std::string osFIDColumn_{};
146 :
147 : /** Total number of features. */
148 : GIntBig nFeatureCount_ = -1;
149 :
150 : VSIVirtualHandleUniquePtr poFile_{};
151 :
152 : std::unique_ptr<OGRJSONFGStreamingParser> poStreamingParser_{};
153 :
154 : /** Whether a warning has been emitted about feature IDs having been
155 : * modified */
156 : bool bOriginalIdModified_ = false;
157 : /** Set of feature IDs read/allocated up to that point */
158 : std::set<GIntBig> oSetUsedFIDs_{};
159 :
160 : /** Ensure the FID of the feature is unique */
161 : OGRFeature *EnsureUniqueFID(OGRFeature *poFeat);
162 :
163 : /** Return next feature (without filter) */
164 : OGRFeature *GetNextRawFeature();
165 :
166 : CPL_DISALLOW_COPY_ASSIGN(OGRJSONFGStreamedLayer)
167 : };
168 :
169 : /************************************************************************/
170 : /* OGRJSONFGWriteLayer */
171 : /************************************************************************/
172 :
173 : class OGRJSONFGDataset;
174 :
175 : class OGRJSONFGWriteLayer final : public OGRLayer
176 : {
177 : public:
178 : OGRJSONFGWriteLayer(
179 : const char *pszName, const OGRSpatialReference *poSRS,
180 : std::unique_ptr<OGRCoordinateTransformation> &&poCTToWGS84,
181 : const std::string &osCoordRefSys, OGRwkbGeometryType eGType,
182 : CSLConstList papszOptions, OGRJSONFGDataset *poDS);
183 : ~OGRJSONFGWriteLayer();
184 :
185 : //
186 : // OGRLayer Interface
187 : //
188 687 : OGRFeatureDefn *GetLayerDefn() override
189 : {
190 687 : return poFeatureDefn_;
191 : }
192 :
193 1 : OGRSpatialReference *GetSpatialRef() override
194 : {
195 1 : return nullptr;
196 : }
197 :
198 16 : void ResetReading() override
199 : {
200 16 : }
201 :
202 16 : OGRFeature *GetNextFeature() override
203 : {
204 16 : return nullptr;
205 : }
206 :
207 : OGRErr ICreateFeature(OGRFeature *poFeature) override;
208 : OGRErr CreateField(const OGRFieldDefn *poField, int bApproxOK) override;
209 : int TestCapability(const char *pszCap) override;
210 :
211 : OGRErr SyncToDisk() override;
212 :
213 : GDALDataset *GetDataset() override;
214 :
215 : private:
216 : OGRJSONFGDataset *poDS_{};
217 : OGRFeatureDefn *poFeatureDefn_ = nullptr;
218 : std::unique_ptr<OGRCoordinateTransformation> poCTToWGS84_;
219 : bool bIsWGS84CRS_ = false;
220 : bool m_bMustSwapForPlace = false;
221 : int nOutCounter_ = 0;
222 : std::string osCoordRefSys_{};
223 :
224 : OGRGeoJSONWriteOptions oWriteOptions_{};
225 : OGRGeoJSONWriteOptions oWriteOptionsPlace_{};
226 : bool bWriteFallbackGeometry_ = true;
227 :
228 : CPL_DISALLOW_COPY_ASSIGN(OGRJSONFGWriteLayer)
229 : };
230 :
231 : /************************************************************************/
232 : /* OGRJSONFGDataset */
233 : /************************************************************************/
234 :
235 : class OGRJSONFGReader;
236 :
237 : class OGRJSONFGDataset final : public GDALDataset
238 : {
239 : public:
240 224 : OGRJSONFGDataset() = default;
241 : ~OGRJSONFGDataset();
242 :
243 : bool Open(GDALOpenInfo *poOpenInfo, GeoJSONSourceType nSrcType);
244 : bool Create(const char *pszName, CSLConstList papszOptions);
245 :
246 43 : int GetLayerCount() override
247 : {
248 43 : return static_cast<int>(apoLayers_.size());
249 : }
250 :
251 : OGRLayer *GetLayer(int i) override;
252 :
253 : //! Return the output file handle. Used by OGRJSONFGWriteLayer
254 230 : VSILFILE *GetOutputFile() const
255 : {
256 230 : return fpOut_;
257 : }
258 :
259 : /** Return whether there is a single output layer.
260 : * Used by OGRJSONFGWriteLayer
261 : */
262 240 : bool IsSingleOutputLayer() const
263 : {
264 240 : return bSingleOutputLayer_;
265 : }
266 :
267 : //! Return whether the output file is seekable
268 2 : bool GetFpOutputIsSeekable() const
269 : {
270 2 : return bFpOutputIsSeekable_;
271 : }
272 :
273 : void BeforeCreateFeature();
274 :
275 : OGRLayer *ICreateLayer(const char *pszName,
276 : const OGRGeomFieldDefn *poGeomFieldDefn,
277 : CSLConstList papszOptions) override;
278 :
279 : int TestCapability(const char *pszCap) override;
280 :
281 : OGRErr SyncToDiskInternal();
282 :
283 : protected:
284 : friend class OGRJSONFGReader;
285 : OGRJSONFGMemLayer *AddLayer(std::unique_ptr<OGRJSONFGMemLayer> &&poLayer);
286 : OGRJSONFGStreamedLayer *
287 : AddLayer(std::unique_ptr<OGRJSONFGStreamedLayer> &&poLayer);
288 :
289 : private:
290 : char *pszGeoData_ = nullptr;
291 : size_t nGeoDataLen_ = 0;
292 : std::vector<std::unique_ptr<OGRLayer>> apoLayers_{};
293 : std::unique_ptr<OGRJSONFGReader> poReader_{};
294 :
295 : // Write side
296 : VSILFILE *fpOut_ = nullptr;
297 : bool bSingleOutputLayer_ = false;
298 : bool bHasEmittedFeatures_ = false;
299 : bool bFpOutputIsSeekable_ = false;
300 :
301 : /** Offset at which the '] }' terminating sequence has already been
302 : * written by SyncToDisk(). 0 if it has not been written.
303 : */
304 : vsi_l_offset m_nPositionBeforeFCClosed = 0;
305 :
306 : bool ReadFromFile(GDALOpenInfo *poOpenInfo, const char *pszUnprefixed);
307 : bool ReadFromService(GDALOpenInfo *poOpenInfo, const char *pszSource);
308 :
309 : void FinishWriting();
310 :
311 : bool EmitStartFeaturesIfNeededAndReturnIfFirstFeature();
312 :
313 : CPL_DISALLOW_COPY_ASSIGN(OGRJSONFGDataset)
314 : };
315 :
316 : /************************************************************************/
317 : /* OGRJSONFGReader */
318 : /************************************************************************/
319 :
320 : class OGRJSONFGReader
321 : {
322 : public:
323 147 : OGRJSONFGReader() = default;
324 : ~OGRJSONFGReader();
325 :
326 : /** Load all features from the passed in JSON text in OGRJSONFGMemLayer(s)
327 : *
328 : * This method should only be called once, and is exclusive with
329 : * AnalyzeWithStreamingParser()
330 : */
331 : bool Load(OGRJSONFGDataset *poDS, const char *pszText,
332 : const std::string &osDefaultLayerName);
333 :
334 : /** Do a first pass analysis of the content of the passed file to create
335 : * OGRJSONFGStreamedLayer's
336 : *
337 : * It is the responsibility of the caller to call
338 : * SetFile() and SetStreamingParser() on the created layers afterwards
339 : *
340 : * This method should only be called once, and is exclusive with
341 : * Load()
342 : */
343 : bool AnalyzeWithStreamingParser(OGRJSONFGDataset *poDS, VSILFILE *fp,
344 : const std::string &osDefaultLayerName,
345 : bool &bCanTryWithNonStreamingParserOut);
346 :
347 : /** Geometry element we are interested in. */
348 : enum class GeometryElement
349 : {
350 : /** Use "place" when possible, fallback to "geometry" otherwise. */
351 : AUTO,
352 : /** Only use "place" */
353 : PLACE,
354 : /** Only use "geometry" */
355 : GEOMETRY,
356 : };
357 :
358 : /** Sets the geometry element we are interested in. */
359 6 : void SetGeometryElement(GeometryElement elt)
360 : {
361 6 : eGeometryElement_ = elt;
362 6 : }
363 :
364 : /** Returns a OGRFeature built from the passed in JSON object.
365 : *
366 : * @param poObj JSON feature
367 : * @param pszRequestedLayer name of the layer of interest, or nullptr if
368 : * no filtering needed on the layer name. If the feature does not belong
369 : * to the requested layer, nullptr is returned.
370 : * @param pOutMemLayer Pointer to the OGRJSONFGMemLayer* layer to which
371 : * the returned feature belongs to. May be nullptr. Only applies when
372 : * the Load() method has been used.
373 : * @param pOutStreamedLayer Pointer to the OGRJSONFGStreamedLayer* layer to
374 : * which the returned feature belongs to. May be nullptr. Only applies when
375 : * the AnalyzeWithStreamingParser() method has been used.
376 : */
377 : std::unique_ptr<OGRFeature>
378 : ReadFeature(json_object *poObj, const char *pszRequestedLayer,
379 : OGRJSONFGMemLayer **pOutMemLayer,
380 : OGRJSONFGStreamedLayer **pOutStreamedLayer);
381 :
382 : protected:
383 : friend class OGRJSONFGStreamingParser;
384 :
385 : bool GenerateLayerDefnFromFeature(json_object *poObj);
386 :
387 : private:
388 : GeometryElement eGeometryElement_ = GeometryElement::AUTO;
389 :
390 : OGRJSONFGDataset *poDS_ = nullptr;
391 : std::string osDefaultLayerName_{};
392 : json_object *poObject_ = nullptr;
393 :
394 : bool bFlattenNestedAttributes_ = false;
395 : char chNestedAttributeSeparator_ = 0;
396 : bool bArrayAsString_ = false;
397 : bool bDateAsString_ = false;
398 :
399 : /** Layer building context, specific to one layer. */
400 158 : struct LayerDefnBuildContext
401 : {
402 : //! Maps a field name to its index in apoFieldDefn[]
403 : std::map<std::string, int> oMapFieldNameToIdx{};
404 :
405 : //! Vector of OGRFieldDefn
406 : std::vector<std::unique_ptr<OGRFieldDefn>> apoFieldDefn{};
407 :
408 : //! Directed acyclic graph used to build the order of fields.
409 : gdal::DirectedAcyclicGraph<int, std::string> dag{};
410 :
411 : /** Set of indices of apoFieldDefn[] for which no type information is
412 : * known yet. */
413 : std::set<int> aoSetUndeterminedTypeFields{};
414 :
415 : //! Whether at least one feature has a "coordRefSys" member.
416 : bool bHasCoordRefSysAtFeatureLevel = false;
417 :
418 : /** CRS object corresponding to "coordRefsys" member at feature level.
419 : * Only set if homogeneous among features.
420 : */
421 : std::unique_ptr<OGRSpatialReference> poCRSAtFeatureLevel{};
422 :
423 : /** Serialized JSON value of "coordRefsys" member at feature level.
424 : * Only set if homogeneous among features.
425 : */
426 : std::string osCoordRefSysAtFeatureLevel{};
427 :
428 : /** Whether to switch X/Y ordinates in geometries appearing in "place"
429 : * element. Only applies to CRS at layer level.
430 : */
431 : bool bSwapPlacesXY = false;
432 :
433 : //! Whether the layer CRS is WGS 84.
434 : bool bLayerCRSIsWGS84 = false;
435 :
436 : //! Coordinate transformation from WGS 84 to layer CRS (might be null)
437 : std::unique_ptr<OGRCoordinateTransformation> poCTWGS84ToLayerCRS{};
438 :
439 : /** Feature count */
440 : GIntBig nFeatureCount = 0;
441 :
442 : //! Whether the Feature.id should be mapped to a OGR field.
443 : bool bFeatureLevelIdAsAttribute = false;
444 :
445 : //! Whether the Feature.id should be mapped to a OGR FID.
446 : bool bFeatureLevelIdAsFID = false;
447 :
448 : //! Whether 64-bit integers are needed for OGR FID.
449 : bool bNeedFID64 = false;
450 :
451 : //! Whether detection of layer geometry type is still needed.
452 : bool bDetectLayerGeomType = true;
453 :
454 : //! Whether no geometry has been analyzed yet.
455 : bool bFirstGeometry = true;
456 :
457 : //! Layer geometry type.
458 : OGRwkbGeometryType eLayerGeomType = wkbUnknown;
459 :
460 : //! Whether a Feature.time.date element has been found.
461 : bool bHasTimeDate = false;
462 :
463 : //! Whether a Feature.time.timestamp element has been found.
464 : bool bHasTimeTimestamp = false;
465 :
466 : /** Whether a Feature.time.interval[0] element of type timestamp has
467 : * been found */
468 : bool bHasTimeIntervalStartTimestamp = false;
469 :
470 : /** Whether a Feature.time.interval[0] element of type date has
471 : * been found */
472 : bool bHasTimeIntervalStartDate = false;
473 :
474 : /** Whether a Feature.time.interval[1] element of type timestamp has
475 : * been found */
476 : bool bHasTimeIntervalEndTimestamp = false;
477 :
478 : /** Whether a Feature.time.interval[1] element of type date has
479 : * been found */
480 : bool bHasTimeIntervalEndDate = false;
481 :
482 : //! Index of OGR field "time" / "jsonfg_time"
483 : int nIdxFieldTime = -1;
484 :
485 : //! Index of OGR field "time_start" / "jsonfg_time_start"
486 : int nIdxFieldTimeStart = -1;
487 :
488 : //! Index of OGR field "time_end" / "jsonfg_time_end"
489 : int nIdxFieldTimeEnd = -1;
490 :
491 : //! Corresponding OGRJSONFGMemLayer (only for Load() ingestion mode)
492 : OGRJSONFGMemLayer *poMemLayer = nullptr;
493 :
494 : /** Corresponding OGRJSONFGStreamedLayer(only for
495 : * AnalyzeWithStreamingParser() mode) */
496 : OGRJSONFGStreamedLayer *poStreamedLayer = nullptr;
497 :
498 304 : LayerDefnBuildContext() = default;
499 12 : LayerDefnBuildContext(LayerDefnBuildContext &&) = default;
500 : LayerDefnBuildContext &operator=(LayerDefnBuildContext &&) = default;
501 :
502 : private:
503 : CPL_DISALLOW_COPY_ASSIGN(LayerDefnBuildContext)
504 : };
505 :
506 : //! Maps a layer name to its build context
507 : std::map<std::string, LayerDefnBuildContext> oMapBuildContext_{};
508 :
509 : //
510 : // Copy operations not supported.
511 : //
512 : CPL_DISALLOW_COPY_ASSIGN(OGRJSONFGReader)
513 :
514 : const char *GetLayerNameForFeature(json_object *poObj) const;
515 : bool GenerateLayerDefns();
516 : bool FinalizeGenerateLayerDefns(bool bStreamedLayer);
517 : void FinalizeBuildContext(LayerDefnBuildContext &oBuildContext,
518 : const char *pszLayerName, bool bStreamedLayer,
519 : bool bInvalidCRS, bool bSwapPlacesXYTopLevel,
520 : OGRSpatialReference *poSRSTopLevel);
521 : };
522 :
523 : /************************************************************************/
524 : /* OGRJSONFGStreamingParser */
525 : /************************************************************************/
526 :
527 : /** FeatureCollection streaming parser. */
528 490 : class OGRJSONFGStreamingParser final : public OGRJSONCollectionStreamingParser
529 : {
530 : OGRJSONFGReader &m_oReader;
531 : std::string m_osRequestedLayer{};
532 :
533 : std::vector<std::pair<std::unique_ptr<OGRFeature>, OGRLayer *>>
534 : m_apoFeatures{};
535 : size_t m_nCurFeatureIdx = 0;
536 :
537 : CPL_DISALLOW_COPY_ASSIGN(OGRJSONFGStreamingParser)
538 :
539 : protected:
540 : void GotFeature(json_object *poObj, bool bFirstPass,
541 : const std::string &osJson) override;
542 : void TooComplex() override;
543 :
544 : public:
545 : OGRJSONFGStreamingParser(OGRJSONFGReader &oReader, bool bFirstPass);
546 : ~OGRJSONFGStreamingParser();
547 :
548 96 : void SetRequestedLayer(const char *pszRequestedLayer)
549 : {
550 96 : m_osRequestedLayer = pszRequestedLayer;
551 96 : }
552 :
553 : std::unique_ptr<OGRJSONFGStreamingParser> Clone();
554 :
555 : std::pair<std::unique_ptr<OGRFeature>, OGRLayer *> GetNextFeature();
556 : };
557 :
558 : bool OGRJSONFGMustSwapXY(const OGRSpatialReference *poSRS);
559 :
560 : #endif // OGR_JSONFG_H_INCLUDED
|