Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL
4 : * Purpose: gdal "vector create" subcommand
5 : * Author: Alessandro Pasotti <elpaso at itopen dot it>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2026, Alessandro Pasotti <elpaso at itopen dot it>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include <regex>
14 : #include "gdalalg_vector_create.h"
15 : #include "gdal_utils.h"
16 : #include "ogr_schema_override.h"
17 :
18 : //! @cond Doxygen_Suppress
19 :
20 : #ifndef _
21 : #define _(x) (x)
22 : #endif
23 :
24 : /************************************************************************/
25 : /* GDALVectorCreateAlgorithm::GDALVectorCreateAlgorithm() */
26 : /************************************************************************/
27 :
28 120 : GDALVectorCreateAlgorithm::GDALVectorCreateAlgorithm(bool standaloneStep)
29 : : GDALVectorPipelineStepAlgorithm(
30 : NAME, DESCRIPTION, HELP_URL,
31 0 : ConstructorOptions()
32 120 : .SetStandaloneStep(standaloneStep)
33 240 : .SetOutputFormatCreateCapability(GDAL_DCAP_CREATE)
34 :
35 : // Remove defaults because input is the optional template
36 120 : .SetAddDefaultArguments(false)
37 :
38 : // For --like input template
39 120 : .SetAutoOpenInputDatasets(true)
40 240 : .SetInputDatasetAlias("like")
41 120 : .SetInputDatasetRequired(false)
42 120 : .SetInputDatasetPositional(false)
43 120 : .SetInputDatasetMaxCount(1)
44 240 : .SetInputDatasetMetaVar("TEMPLATE-DATASET")
45 :
46 : // Remove arguments that don't make sense in a create context
47 : // Note: this is required despite SetAddDefaultArguments(false)
48 120 : .SetAddUpsertArgument(false)
49 120 : .SetAddSkipErrorsArgument(false)
50 360 : .SetAddAppendLayerArgument(false))
51 : {
52 :
53 120 : AddVectorInputArgs(false);
54 120 : AddVectorOutputArgs(/* hiddenForCLI = */ false,
55 : /* shortNameOutputLayerAllowed=*/false);
56 120 : AddGeometryTypeArg(&m_geometryType, _("Layer geometry type"));
57 :
58 : // Add optional geometry field name argument, not all drivers support it, and if not specified, the default "geom" name will be used.
59 : auto &geomFieldNameArg =
60 : AddArg("geometry-field", 0,
61 : _("Name of the geometry field to create (if supported by the "
62 : "output format)"),
63 240 : &m_geometryFieldName)
64 240 : .SetMetaVar("GEOMETRY-FIELD")
65 120 : .SetDefault(m_geometryFieldName);
66 :
67 240 : AddArg("crs", 0, _("Set CRS"), &m_crs)
68 240 : .AddHiddenAlias("srs")
69 120 : .SetIsCRSArg(/*noneAllowed=*/false);
70 :
71 120 : AddArg("fid", 0, _("FID column name"), &m_fidColumnName);
72 :
73 120 : constexpr auto inputMutexGroup = "like-schema-field";
74 :
75 : // Apply mutex to GDAL_ARG_NAME_INPUT
76 : // This is hackish and I really don't like const_cast but I couldn't find another way.
77 : const_cast<GDALAlgorithmArgDecl &>(
78 240 : GetArg(GDAL_ARG_NAME_INPUT)->GetDeclaration())
79 120 : .SetMutualExclusionGroup(inputMutexGroup);
80 :
81 : // Add --schema argument to read OGR_SCHEMA and populate field definitions from it. It is mutually exclusive with --like and --field arguments.
82 : AddArg("schema", 0,
83 : _("Read OGR_SCHEMA and populate field definitions from it"),
84 240 : &m_schemaJsonOrPath)
85 240 : .SetMetaVar("SCHEMA_JSON")
86 120 : .SetRepeatedArgAllowed(false)
87 120 : .SetMutualExclusionGroup(inputMutexGroup);
88 :
89 : // Add field definition argument
90 : AddFieldDefinitionArg(&m_fieldStrDefinitions, &m_fieldDefinitions,
91 120 : _("Add a field definition to the output layer"))
92 240 : .SetMetaVar("<NAME>:<TYPE>[(,<WIDTH>[,<PRECISION>])]")
93 120 : .SetPackedValuesAllowed(false)
94 120 : .SetRepeatedArgAllowed(true)
95 120 : .SetMutualExclusionGroup(inputMutexGroup);
96 :
97 120 : AddValidationAction(
98 175 : [this, &geomFieldNameArg]()
99 : {
100 63 : if ((!m_schemaJsonOrPath.empty() || !m_inputDataset.empty()) &&
101 32 : ((!m_geometryFieldName.empty() &&
102 16 : geomFieldNameArg.IsExplicitlySet()) ||
103 15 : !m_geometryType.empty() || !m_fieldDefinitions.empty() ||
104 13 : !m_crs.empty() || !m_fidColumnName.empty()))
105 : {
106 5 : ReportError(CE_Failure, CPLE_AppDefined,
107 : "When --schema or --like is specified, "
108 : "--geometry-field, --geometry-type, --field, "
109 : "--crs and --fid options must not be specified.");
110 5 : return false;
111 : }
112 42 : return true;
113 : });
114 120 : }
115 :
116 : /************************************************************************/
117 : /* GDALVectorCreateAlgorithm::RunStep() */
118 : /************************************************************************/
119 :
120 38 : bool GDALVectorCreateAlgorithm::RunStep(GDALPipelineStepRunContext &)
121 : {
122 :
123 38 : const std::string &datasetName = m_outputDataset.GetName();
124 : const std::string outputLayerName =
125 38 : m_outputLayerName.empty() ? CPLGetBasenameSafe(datasetName.c_str())
126 76 : : m_outputLayerName;
127 :
128 38 : std::unique_ptr<GDALDataset> poDstDS;
129 38 : poDstDS.reset(GDALDataset::Open(datasetName.c_str(),
130 : GDAL_OF_VECTOR | GDAL_OF_UPDATE, nullptr,
131 : nullptr, nullptr));
132 :
133 38 : if (poDstDS && !m_update)
134 : {
135 0 : ReportError(CE_Failure, CPLE_AppDefined,
136 : "Dataset %s already exists. Specify the "
137 : "--%s option to open it in update mode.",
138 : datasetName.c_str(), GDAL_ARG_NAME_UPDATE);
139 0 : return false;
140 : }
141 :
142 38 : GDALDataset *poSrcDS = m_inputDataset.empty()
143 38 : ? nullptr
144 6 : : m_inputDataset.front().GetDatasetRef();
145 :
146 76 : OGRSchemaOverride oSchemaOverride;
147 :
148 10 : const auto loadJSON = [this,
149 10 : &oSchemaOverride](const std::string &source) -> bool
150 : {
151 : // This error count is necessary because LoadFromJSON tries to load
152 : // the content as a file first (and set an error it if fails) then tries
153 : // to load as a JSON string but even if it succeeds an error is still
154 : // set and not cleared.
155 10 : const auto nErrorCount = CPLGetErrorCounter();
156 10 : if (!oSchemaOverride.LoadFromJSON(source,
157 : /* allowGeometryFields */ true))
158 : {
159 : // Get the last error message and report it, since LoadFromJSON doesn't do it itself.
160 0 : if (nErrorCount != CPLGetErrorCounter())
161 : {
162 0 : const std::string lastErrorMsg = CPLGetLastErrorMsg();
163 0 : CPLErrorReset();
164 0 : ReportError(CE_Failure, CPLE_AppDefined,
165 : "Cannot parse OGR_SCHEMA: %s.",
166 : lastErrorMsg.c_str());
167 : }
168 : else
169 : {
170 0 : ReportError(CE_Failure, CPLE_AppDefined,
171 : "Cannot parse OGR_SCHEMA (unknown error).");
172 : }
173 0 : return false;
174 : }
175 10 : else if (nErrorCount != CPLGetErrorCounter())
176 : {
177 10 : CPLErrorReset();
178 : }
179 10 : return true;
180 38 : };
181 :
182 : // Use the input dataset as to create an OGR_SCHEMA
183 38 : if (poSrcDS)
184 : {
185 : // Export the schema using GDALVectorInfo
186 6 : CPLStringList aosOptions;
187 :
188 6 : aosOptions.AddString("-schema");
189 :
190 : // Must be last, as positional
191 6 : aosOptions.AddString("dummy");
192 6 : aosOptions.AddString("-al");
193 :
194 : GDALVectorInfoOptions *psInfo =
195 6 : GDALVectorInfoOptionsNew(aosOptions.List(), nullptr);
196 :
197 6 : char *ret = GDALVectorInfo(GDALDataset::ToHandle(poSrcDS), psInfo);
198 6 : GDALVectorInfoOptionsFree(psInfo);
199 6 : if (!ret)
200 0 : return false;
201 :
202 6 : if (!loadJSON(ret))
203 : {
204 0 : CPLFree(ret);
205 0 : return false;
206 : }
207 6 : CPLFree(ret);
208 : }
209 32 : else if (!m_schemaJsonOrPath.empty() && !loadJSON(m_schemaJsonOrPath))
210 : {
211 0 : return false;
212 : }
213 :
214 38 : if (m_standaloneStep)
215 : {
216 38 : if (m_format.empty())
217 : {
218 : const auto aosFormats =
219 : CPLStringList(GDALGetOutputDriversForDatasetName(
220 30 : m_outputDataset.GetName().c_str(), GDAL_OF_VECTOR,
221 : /* bSingleMatch = */ true,
222 30 : /* bWarn = */ true));
223 30 : if (aosFormats.size() != 1)
224 : {
225 0 : ReportError(CE_Failure, CPLE_AppDefined,
226 : "Cannot guess driver for %s",
227 0 : m_outputDataset.GetName().c_str());
228 0 : return false;
229 : }
230 30 : m_format = aosFormats[0];
231 : }
232 : }
233 : else
234 : {
235 0 : m_format = "MEM";
236 : }
237 :
238 : auto poDstDriver =
239 38 : GetGDALDriverManager()->GetDriverByName(m_format.c_str());
240 38 : if (!poDstDriver)
241 : {
242 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot find driver %s.",
243 : m_format.c_str());
244 0 : return false;
245 : }
246 :
247 38 : if (!poDstDS)
248 26 : poDstDS.reset(poDstDriver->Create(datasetName.c_str(), 0, 0, 0,
249 : GDT_Unknown,
250 52 : CPLStringList(m_creationOptions)));
251 :
252 38 : if (!poDstDS)
253 : {
254 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot create dataset %s.",
255 : datasetName.c_str());
256 0 : return false;
257 : }
258 :
259 : // An OGR_SCHEMA has been provided
260 38 : if (!oSchemaOverride.GetLayerOverrides().empty())
261 : {
262 : // Checks if input layer names were specified and the layers exists in the schema
263 10 : if (!m_inputLayerNames.empty())
264 : {
265 12 : for (const auto &inputLayerName : m_inputLayerNames)
266 : {
267 7 : if (!oSchemaOverride.GetLayerOverride(inputLayerName).IsValid())
268 : {
269 0 : ReportError(CE_Failure, CPLE_AppDefined,
270 : "The specified input layer name '%s' doesn't "
271 : "exist in the provided template or schema.",
272 : inputLayerName.c_str());
273 0 : return false;
274 : }
275 : }
276 : }
277 :
278 : // If there are multiple layers check if the destination format supports
279 : // multiple layers, and if not, error out.
280 10 : if (oSchemaOverride.GetLayerOverrides().size() > 1 &&
281 6 : !GDALGetMetadataItem(poDstDriver, GDAL_DCAP_MULTIPLE_VECTOR_LAYERS,
282 16 : nullptr) &&
283 2 : m_inputLayerNames.size() != 1)
284 : {
285 1 : ReportError(CE_Failure, CPLE_AppDefined,
286 : "The output format %s doesn't support multiple layers.",
287 1 : poDstDriver->GetDescription());
288 1 : return false;
289 : }
290 :
291 : // If output layer name was specified and there is more than one layer in the schema,
292 : // error out since we won't know which layer to apply it to
293 11 : if (!m_outputLayerName.empty() &&
294 11 : oSchemaOverride.GetLayerOverrides().size() > 1 &&
295 2 : m_inputLayerNames.size() != 1)
296 : {
297 1 : ReportError(CE_Failure, CPLE_AppDefined,
298 : "Output layer name should not be specified when there "
299 : "are multiple layers in the schema.");
300 1 : return false;
301 : }
302 :
303 8 : std::vector<std::string> layersToBeCreated;
304 23 : for (const auto &oLayerOverride : oSchemaOverride.GetLayerOverrides())
305 : {
306 :
307 26 : if (!m_inputLayerNames.empty() &&
308 0 : std::find(m_inputLayerNames.begin(), m_inputLayerNames.end(),
309 11 : oLayerOverride.GetLayerName()) ==
310 26 : m_inputLayerNames.end())
311 : {
312 : // This layer is not in the list of input layers to consider, so skip it
313 6 : continue;
314 : }
315 9 : layersToBeCreated.push_back(oLayerOverride.GetLayerName());
316 : }
317 :
318 : // Loop over layers in the OGR_SCHEMA and create them
319 17 : for (const auto &layerToCreate : layersToBeCreated)
320 : {
321 : const auto &oLayerOverride =
322 9 : oSchemaOverride.GetLayerOverride(layerToCreate);
323 9 : if (!oLayerOverride.IsValid())
324 : {
325 0 : ReportError(CE_Failure, CPLE_AppDefined,
326 : "Invalid layer override for layer '%s'.",
327 : layerToCreate.c_str());
328 0 : return false;
329 : }
330 :
331 : // We can use the defined layer name only if there is a single layer to be created
332 : const std::string userSpecifiedNewName =
333 9 : !m_outputLayerName.empty() ? m_outputLayerName
334 9 : : oLayerOverride.GetLayerName();
335 : const std::string outputLayerNewName =
336 11 : layersToBeCreated.size() > 1 ? oLayerOverride.GetLayerName()
337 11 : : userSpecifiedNewName;
338 :
339 9 : if (!CreateLayer(poDstDS.get(), outputLayerNewName,
340 : oLayerOverride.GetFIDColumnName(),
341 18 : oLayerOverride.GetFieldDefinitions(),
342 18 : oLayerOverride.GetGeomFieldDefinitions()))
343 : {
344 0 : ReportError(CE_Failure, CPLE_AppDefined,
345 : "Cannot create layer '%s'",
346 0 : oLayerOverride.GetLayerName().c_str());
347 0 : return false;
348 : }
349 : }
350 : }
351 : else
352 : {
353 28 : std::vector<OGRGeomFieldDefn> geometryFieldDefinitions;
354 28 : if (!m_geometryType.empty())
355 : {
356 : const OGRwkbGeometryType eDstType =
357 26 : OGRFromOGCGeomType(m_geometryType.c_str());
358 27 : if (eDstType == wkbUnknown &&
359 1 : !STARTS_WITH_CI(m_geometryType.c_str(), "GEOMETRY"))
360 : {
361 0 : ReportError(CE_Failure, CPLE_AppDefined,
362 : "Unsupported geometry type: '%s'.",
363 : m_geometryType.c_str());
364 0 : return false;
365 : }
366 : else
367 : {
368 : OGRGeomFieldDefn oGeomFieldDefn(m_geometryFieldName.c_str(),
369 26 : eDstType);
370 : std::unique_ptr<OGRSpatialReference,
371 : OGRSpatialReferenceReleaser>
372 0 : poSRS;
373 26 : poSRS.reset(std::make_unique<OGRSpatialReference>().release());
374 26 : if (!m_crs.empty())
375 : {
376 25 : if (poSRS->SetFromUserInput(m_crs.c_str()) != OGRERR_NONE)
377 : {
378 0 : ReportError(CE_Failure, CPLE_AppDefined,
379 : "Cannot parse CRS definition: '%s'.",
380 : m_crs.c_str());
381 0 : return false;
382 : }
383 : else
384 : {
385 25 : oGeomFieldDefn.SetSpatialRef(poSRS.get());
386 : }
387 : }
388 26 : geometryFieldDefinitions.push_back(std::move(oGeomFieldDefn));
389 : }
390 : }
391 :
392 28 : if (!CreateLayer(poDstDS.get(), outputLayerName, m_fidColumnName,
393 56 : GetOutputFields(), geometryFieldDefinitions))
394 : {
395 1 : ReportError(CE_Failure, CPLE_AppDefined,
396 : "Cannot create layer '%s'.", outputLayerName.c_str());
397 1 : return false;
398 : }
399 : }
400 :
401 35 : m_outputDataset.Set(std::move(poDstDS));
402 35 : return true;
403 : }
404 :
405 : /************************************************************************/
406 : /* GDALVectorCreateAlgorithm::RunImpl() */
407 : /************************************************************************/
408 38 : bool GDALVectorCreateAlgorithm::RunImpl(GDALProgressFunc pfnProgress,
409 : void *pProgressData)
410 : {
411 38 : GDALPipelineStepRunContext stepCtxt;
412 38 : stepCtxt.m_pfnProgress = pfnProgress;
413 38 : stepCtxt.m_pProgressData = pProgressData;
414 38 : return RunPreStepPipelineValidations() && RunStep(stepCtxt);
415 : }
416 :
417 : /************************************************************************/
418 : /* GDALVectorCreateAlgorithm::GetOutputFields() */
419 : /************************************************************************/
420 28 : std::vector<OGRFieldDefn> GDALVectorCreateAlgorithm::GetOutputFields() const
421 : {
422 : // This is where we will eventually implement override logic to modify field
423 : // definitions based on input dataset and/or OGR_SCHEMA, but for now we just
424 : // return the field definitions as specified by the user through --field arguments.
425 28 : return m_fieldDefinitions;
426 : }
427 :
428 : /************************************************************************/
429 : /* GDALVectorCreateAlgorithm::CreateLayer() */
430 : /************************************************************************/
431 37 : bool GDALVectorCreateAlgorithm::CreateLayer(
432 : GDALDataset *poDstDS, const std::string &layerName,
433 : const std::string &fidColumnName,
434 : const std::vector<OGRFieldDefn> &fieldDefinitions,
435 : const std::vector<OGRGeomFieldDefn> &geometryFieldDefinitions) const
436 : {
437 :
438 37 : auto poDstLayer = poDstDS->GetLayerByName(layerName.c_str());
439 :
440 37 : if (poDstLayer)
441 : {
442 1 : if (GetOverwriteLayer())
443 : {
444 1 : int iLayer = -1;
445 1 : const int nLayerCount = poDstDS->GetLayerCount();
446 2 : for (iLayer = 0; iLayer < nLayerCount; iLayer++)
447 : {
448 2 : if (poDstDS->GetLayer(iLayer) == poDstLayer)
449 1 : break;
450 : }
451 :
452 1 : if (iLayer < nLayerCount)
453 : {
454 1 : if (poDstDS->DeleteLayer(iLayer) != OGRERR_NONE)
455 : {
456 0 : ReportError(CE_Failure, CPLE_AppDefined,
457 : "Cannot delete layer '%s'.", layerName.c_str());
458 0 : return false;
459 : }
460 : }
461 1 : poDstLayer = nullptr;
462 : }
463 : else
464 : {
465 0 : ReportError(CE_Failure, CPLE_AppDefined,
466 : "Layer '%s' already exists. Specify the "
467 : "--%s option to overwrite it.",
468 : layerName.c_str(), GDAL_ARG_NAME_OVERWRITE_LAYER);
469 0 : return false;
470 : }
471 : }
472 36 : else if (GetOverwriteLayer())
473 : {
474 1 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot find layer '%s'.",
475 : layerName.c_str());
476 1 : return false;
477 : }
478 :
479 : // Get the geometry field definition, if any
480 36 : std::unique_ptr<OGRGeomFieldDefn> poGeomFieldDefn;
481 36 : if (!geometryFieldDefinitions.empty())
482 : {
483 32 : if (geometryFieldDefinitions.size() > 1)
484 : {
485 : // NOTE: this limitation may eventually be removed,
486 : // but for now we don't want to deal with the complexity
487 : // of creating multiple geometry fields with various drivers that
488 : // may or may not support it
489 0 : ReportError(CE_Failure, CPLE_AppDefined,
490 : "Multiple geometry fields are not supported.");
491 0 : return false;
492 : }
493 : poGeomFieldDefn =
494 32 : std::make_unique<OGRGeomFieldDefn>(geometryFieldDefinitions[0]);
495 : }
496 :
497 36 : if (!poDstLayer)
498 : {
499 36 : CPLStringList aosCreationOptions(GetLayerCreationOptions());
500 72 : if (aosCreationOptions.FetchNameValue("FID") == nullptr &&
501 36 : !fidColumnName.empty())
502 : {
503 7 : auto poDstDriver = poDstDS->GetDriver();
504 7 : if (poDstDriver && poDstDriver->HasLayerCreationOption("FID"))
505 : {
506 7 : aosCreationOptions.SetNameValue("FID", fidColumnName.c_str());
507 : }
508 : }
509 : poDstLayer =
510 36 : poDstDS->CreateLayer(layerName.c_str(), poGeomFieldDefn.get(),
511 36 : aosCreationOptions.List());
512 : }
513 :
514 36 : if (!poDstLayer)
515 : {
516 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot create layer '%s'.",
517 : layerName.c_str());
518 0 : return false;
519 : }
520 :
521 70 : for (const auto &oFieldDefn : fieldDefinitions)
522 : {
523 34 : if (poDstLayer->CreateField(&oFieldDefn) != OGRERR_NONE)
524 : {
525 0 : ReportError(CE_Failure, CPLE_AppDefined,
526 : "Cannot create field '%s' in layer '%s'.",
527 : oFieldDefn.GetNameRef(), layerName.c_str());
528 0 : return false;
529 : }
530 : }
531 :
532 36 : return true;
533 : }
534 :
535 : /************************************************************************/
536 : /* ~GDALVectorCreateAlgorithmStandalone() */
537 : /************************************************************************/
538 : GDALVectorCreateAlgorithmStandalone::~GDALVectorCreateAlgorithmStandalone() =
539 : default;
540 :
541 : //! @endcond
|