Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL
4 : * Purpose: gdal "vector create" subcommand
5 : * Author: Alessandro Pasotti <elpaso at itopen dot it>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2026, Alessandro Pasotti <elpaso at itopen dot it>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include <regex>
14 : #include "gdalalg_vector_create.h"
15 : #include "gdal_utils.h"
16 : #include "ogr_schema_override.h"
17 :
18 : //! @cond Doxygen_Suppress
19 :
20 : #ifndef _
21 : #define _(x) (x)
22 : #endif
23 :
24 : /************************************************************************/
25 : /* GDALVectorCreateAlgorithm::GDALVectorCreateAlgorithm() */
26 : /************************************************************************/
27 :
28 129 : GDALVectorCreateAlgorithm::GDALVectorCreateAlgorithm(bool standaloneStep)
29 : : GDALVectorPipelineStepAlgorithm(
30 : NAME, DESCRIPTION, HELP_URL,
31 0 : ConstructorOptions()
32 129 : .SetStandaloneStep(standaloneStep)
33 258 : .SetOutputFormatCreateCapability(GDAL_DCAP_CREATE)
34 :
35 : // Remove defaults because input is the optional template
36 129 : .SetAddDefaultArguments(false)
37 :
38 : // For --like input template
39 129 : .SetAutoOpenInputDatasets(true)
40 258 : .SetInputDatasetHelpMsg(_("Template vector dataset"))
41 258 : .SetInputDatasetAlias("like")
42 129 : .SetInputDatasetRequired(false)
43 129 : .SetInputDatasetPositional(false)
44 129 : .SetInputDatasetMaxCount(1)
45 258 : .SetInputDatasetMetaVar("TEMPLATE-DATASET")
46 :
47 : // Remove arguments that don't make sense in a create context
48 : // Note: this is required despite SetAddDefaultArguments(false)
49 129 : .SetAddUpsertArgument(false)
50 129 : .SetAddSkipErrorsArgument(false)
51 387 : .SetAddAppendLayerArgument(false))
52 : {
53 :
54 129 : AddVectorInputArgs(false);
55 129 : AddVectorOutputArgs(/* hiddenForCLI = */ false,
56 : /* shortNameOutputLayerAllowed=*/false);
57 129 : AddGeometryTypeArg(&m_geometryType, _("Layer geometry type"));
58 :
59 : // Add optional geometry field name argument, not all drivers support it, and if not specified, the default "geom" name will be used.
60 : auto &geomFieldNameArg =
61 : AddArg("geometry-field", 0,
62 : _("Name of the geometry field to create (if supported by the "
63 : "output format)"),
64 258 : &m_geometryFieldName)
65 258 : .SetMetaVar("GEOMETRY-FIELD")
66 129 : .SetDefault(m_geometryFieldName);
67 :
68 258 : AddArg("crs", 0, _("Set CRS"), &m_crs)
69 258 : .AddHiddenAlias("srs")
70 129 : .SetIsCRSArg(/*noneAllowed=*/false);
71 :
72 129 : AddArg("fid", 0, _("FID column name"), &m_fidColumnName);
73 :
74 129 : constexpr auto inputMutexGroup = "like-schema-field";
75 :
76 : // Apply mutex to GDAL_ARG_NAME_INPUT
77 : // This is hackish and I really don't like const_cast but I couldn't find another way.
78 : const_cast<GDALAlgorithmArgDecl &>(
79 258 : GetArg(GDAL_ARG_NAME_INPUT)->GetDeclaration())
80 129 : .SetMutualExclusionGroup(inputMutexGroup);
81 :
82 : // Add --schema argument to read OGR_SCHEMA and populate field definitions from it. It is mutually exclusive with --like and --field arguments.
83 : AddArg("schema", 0,
84 : _("Read OGR_SCHEMA and populate field definitions from it"),
85 258 : &m_schemaJsonOrPath)
86 258 : .SetMetaVar("SCHEMA_JSON")
87 129 : .SetRepeatedArgAllowed(false)
88 129 : .SetMutualExclusionGroup(inputMutexGroup);
89 :
90 : // Add field definition argument
91 : AddFieldDefinitionArg(&m_fieldStrDefinitions, &m_fieldDefinitions,
92 129 : _("Add a field definition to the output layer"))
93 258 : .SetMetaVar("<NAME>:<TYPE>[(,<WIDTH>[,<PRECISION>])]")
94 129 : .SetPackedValuesAllowed(false)
95 129 : .SetRepeatedArgAllowed(true)
96 129 : .SetMutualExclusionGroup(inputMutexGroup);
97 :
98 129 : AddValidationAction(
99 182 : [this, &geomFieldNameArg]()
100 : {
101 65 : if ((!m_schemaJsonOrPath.empty() || !m_inputDataset.empty()) &&
102 34 : ((!m_geometryFieldName.empty() &&
103 17 : geomFieldNameArg.IsExplicitlySet()) ||
104 16 : !m_geometryType.empty() || !m_fieldDefinitions.empty() ||
105 14 : !m_crs.empty() || !m_fidColumnName.empty()))
106 : {
107 5 : ReportError(CE_Failure, CPLE_AppDefined,
108 : "When --schema or --like is specified, "
109 : "--geometry-field, --geometry-type, --field, "
110 : "--crs and --fid options must not be specified.");
111 5 : return false;
112 : }
113 43 : return true;
114 : });
115 129 : }
116 :
117 : /************************************************************************/
118 : /* GDALVectorCreateAlgorithm::RunStep() */
119 : /************************************************************************/
120 :
121 39 : bool GDALVectorCreateAlgorithm::RunStep(GDALPipelineStepRunContext &)
122 : {
123 :
124 39 : const std::string &datasetName = m_outputDataset.GetName();
125 : const std::string outputLayerName =
126 39 : m_outputLayerName.empty() ? CPLGetBasenameSafe(datasetName.c_str())
127 78 : : m_outputLayerName;
128 :
129 39 : std::unique_ptr<GDALDataset> poDstDS;
130 39 : poDstDS.reset(GDALDataset::Open(datasetName.c_str(),
131 : GDAL_OF_VECTOR | GDAL_OF_UPDATE, nullptr,
132 : nullptr, nullptr));
133 :
134 39 : if (poDstDS && !m_update)
135 : {
136 0 : ReportError(CE_Failure, CPLE_AppDefined,
137 : "Dataset %s already exists. Specify the "
138 : "--%s option to open it in update mode.",
139 : datasetName.c_str(), GDAL_ARG_NAME_UPDATE);
140 0 : return false;
141 : }
142 :
143 39 : GDALDataset *poSrcDS = m_inputDataset.empty()
144 39 : ? nullptr
145 6 : : m_inputDataset.front().GetDatasetRef();
146 :
147 78 : OGRSchemaOverride oSchemaOverride;
148 :
149 11 : const auto loadJSON = [this,
150 11 : &oSchemaOverride](const std::string &source) -> bool
151 : {
152 : // This error count is necessary because LoadFromJSON tries to load
153 : // the content as a file first (and set an error it if fails) then tries
154 : // to load as a JSON string but even if it succeeds an error is still
155 : // set and not cleared.
156 11 : const auto nErrorCount = CPLGetErrorCounter();
157 11 : if (!oSchemaOverride.LoadFromJSON(source,
158 : /* allowGeometryFields */ true))
159 : {
160 : // Get the last error message and report it, since LoadFromJSON doesn't do it itself.
161 0 : if (nErrorCount != CPLGetErrorCounter())
162 : {
163 0 : const std::string lastErrorMsg = CPLGetLastErrorMsg();
164 0 : CPLErrorReset();
165 0 : ReportError(CE_Failure, CPLE_AppDefined,
166 : "Cannot parse OGR_SCHEMA: %s.",
167 : lastErrorMsg.c_str());
168 : }
169 : else
170 : {
171 0 : ReportError(CE_Failure, CPLE_AppDefined,
172 : "Cannot parse OGR_SCHEMA (unknown error).");
173 : }
174 0 : return false;
175 : }
176 11 : else if (nErrorCount != CPLGetErrorCounter())
177 : {
178 11 : CPLErrorReset();
179 : }
180 11 : return true;
181 39 : };
182 :
183 : // Use the input dataset as to create an OGR_SCHEMA
184 39 : if (poSrcDS)
185 : {
186 : // Export the schema using GDALVectorInfo
187 6 : CPLStringList aosOptions;
188 :
189 6 : aosOptions.AddString("-schema");
190 :
191 : // Must be last, as positional
192 6 : aosOptions.AddString("dummy");
193 6 : aosOptions.AddString("-al");
194 :
195 : GDALVectorInfoOptions *psInfo =
196 6 : GDALVectorInfoOptionsNew(aosOptions.List(), nullptr);
197 :
198 6 : char *ret = GDALVectorInfo(GDALDataset::ToHandle(poSrcDS), psInfo);
199 6 : GDALVectorInfoOptionsFree(psInfo);
200 6 : if (!ret)
201 0 : return false;
202 :
203 6 : if (!loadJSON(ret))
204 : {
205 0 : CPLFree(ret);
206 0 : return false;
207 : }
208 6 : CPLFree(ret);
209 : }
210 33 : else if (!m_schemaJsonOrPath.empty() && !loadJSON(m_schemaJsonOrPath))
211 : {
212 0 : return false;
213 : }
214 :
215 39 : if (m_standaloneStep)
216 : {
217 39 : if (m_format.empty())
218 : {
219 : const auto aosFormats =
220 : CPLStringList(GDALGetOutputDriversForDatasetName(
221 39 : m_outputDataset.GetName().c_str(), GDAL_OF_VECTOR,
222 : /* bSingleMatch = */ true,
223 39 : /* bWarn = */ true));
224 39 : if (aosFormats.size() != 1)
225 : {
226 0 : ReportError(CE_Failure, CPLE_AppDefined,
227 : "Cannot guess driver for %s",
228 0 : m_outputDataset.GetName().c_str());
229 0 : return false;
230 : }
231 39 : m_format = aosFormats[0];
232 : }
233 : }
234 : else
235 : {
236 0 : m_format = "MEM";
237 : }
238 :
239 : auto poDstDriver =
240 39 : GetGDALDriverManager()->GetDriverByName(m_format.c_str());
241 39 : if (!poDstDriver)
242 : {
243 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot find driver %s.",
244 : m_format.c_str());
245 0 : return false;
246 : }
247 :
248 39 : if (!poDstDS)
249 27 : poDstDS.reset(poDstDriver->Create(datasetName.c_str(), 0, 0, 0,
250 : GDT_Unknown,
251 54 : CPLStringList(m_creationOptions)));
252 :
253 39 : if (!poDstDS)
254 : {
255 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot create dataset %s.",
256 : datasetName.c_str());
257 0 : return false;
258 : }
259 :
260 : // An OGR_SCHEMA has been provided
261 39 : if (!oSchemaOverride.GetLayerOverrides().empty())
262 : {
263 : // Checks if input layer names were specified and the layers exists in the schema
264 11 : if (!m_inputLayerNames.empty())
265 : {
266 12 : for (const auto &inputLayerName : m_inputLayerNames)
267 : {
268 7 : if (!oSchemaOverride.GetLayerOverride(inputLayerName).IsValid())
269 : {
270 0 : ReportError(CE_Failure, CPLE_AppDefined,
271 : "The specified input layer name '%s' doesn't "
272 : "exist in the provided template or schema.",
273 : inputLayerName.c_str());
274 0 : return false;
275 : }
276 : }
277 : }
278 :
279 : // If there are multiple layers check if the destination format supports
280 : // multiple layers, and if not, error out.
281 11 : if (oSchemaOverride.GetLayerOverrides().size() > 1 &&
282 6 : !GDALGetMetadataItem(poDstDriver, GDAL_DCAP_MULTIPLE_VECTOR_LAYERS,
283 17 : nullptr) &&
284 2 : m_inputLayerNames.size() != 1)
285 : {
286 1 : ReportError(CE_Failure, CPLE_AppDefined,
287 : "The output format %s doesn't support multiple layers.",
288 1 : poDstDriver->GetDescription());
289 1 : return false;
290 : }
291 :
292 : // If output layer name was specified and there is more than one layer in the schema,
293 : // error out since we won't know which layer to apply it to
294 12 : if (!m_outputLayerName.empty() &&
295 12 : oSchemaOverride.GetLayerOverrides().size() > 1 &&
296 2 : m_inputLayerNames.size() != 1)
297 : {
298 1 : ReportError(CE_Failure, CPLE_AppDefined,
299 : "Output layer name should not be specified when there "
300 : "are multiple layers in the schema.");
301 1 : return false;
302 : }
303 :
304 9 : std::vector<std::string> layersToBeCreated;
305 25 : for (const auto &oLayerOverride : oSchemaOverride.GetLayerOverrides())
306 : {
307 :
308 27 : if (!m_inputLayerNames.empty() &&
309 0 : std::find(m_inputLayerNames.begin(), m_inputLayerNames.end(),
310 11 : oLayerOverride.GetLayerName()) ==
311 27 : m_inputLayerNames.end())
312 : {
313 : // This layer is not in the list of input layers to consider, so skip it
314 6 : continue;
315 : }
316 10 : layersToBeCreated.push_back(oLayerOverride.GetLayerName());
317 : }
318 :
319 : // Loop over layers in the OGR_SCHEMA and create them
320 19 : for (const auto &layerToCreate : layersToBeCreated)
321 : {
322 : const auto &oLayerOverride =
323 10 : oSchemaOverride.GetLayerOverride(layerToCreate);
324 10 : if (!oLayerOverride.IsValid())
325 : {
326 0 : ReportError(CE_Failure, CPLE_AppDefined,
327 : "Invalid layer override for layer '%s'.",
328 : layerToCreate.c_str());
329 0 : return false;
330 : }
331 :
332 : // We can use the defined layer name only if there is a single layer to be created
333 : const std::string userSpecifiedNewName =
334 10 : !m_outputLayerName.empty() ? m_outputLayerName
335 10 : : oLayerOverride.GetLayerName();
336 : const std::string outputLayerNewName =
337 12 : layersToBeCreated.size() > 1 ? oLayerOverride.GetLayerName()
338 12 : : userSpecifiedNewName;
339 :
340 10 : if (!CreateLayer(poDstDS.get(), outputLayerNewName,
341 : oLayerOverride.GetFIDColumnName(),
342 20 : oLayerOverride.GetFieldDefinitions(),
343 20 : oLayerOverride.GetGeomFieldDefinitions()))
344 : {
345 0 : ReportError(CE_Failure, CPLE_AppDefined,
346 : "Cannot create layer '%s'",
347 0 : oLayerOverride.GetLayerName().c_str());
348 0 : return false;
349 : }
350 : }
351 : }
352 : else
353 : {
354 28 : std::vector<OGRGeomFieldDefn> geometryFieldDefinitions;
355 28 : if (!m_geometryType.empty())
356 : {
357 : const OGRwkbGeometryType eDstType =
358 26 : OGRFromOGCGeomType(m_geometryType.c_str());
359 27 : if (eDstType == wkbUnknown &&
360 1 : !STARTS_WITH_CI(m_geometryType.c_str(), "GEOMETRY"))
361 : {
362 0 : ReportError(CE_Failure, CPLE_AppDefined,
363 : "Unsupported geometry type: '%s'.",
364 : m_geometryType.c_str());
365 0 : return false;
366 : }
367 : else
368 : {
369 : OGRGeomFieldDefn oGeomFieldDefn(m_geometryFieldName.c_str(),
370 26 : eDstType);
371 26 : if (!m_crs.empty())
372 : {
373 : auto poSRS =
374 25 : OGRSpatialReferenceRefCountedPtr::makeInstance();
375 25 : if (poSRS->SetFromUserInput(m_crs.c_str()) != OGRERR_NONE)
376 : {
377 0 : ReportError(CE_Failure, CPLE_AppDefined,
378 : "Cannot parse CRS definition: '%s'.",
379 : m_crs.c_str());
380 0 : return false;
381 : }
382 : else
383 : {
384 25 : oGeomFieldDefn.SetSpatialRef(poSRS.get());
385 : }
386 : }
387 26 : geometryFieldDefinitions.push_back(std::move(oGeomFieldDefn));
388 : }
389 : }
390 :
391 28 : if (!CreateLayer(poDstDS.get(), outputLayerName, m_fidColumnName,
392 56 : GetOutputFields(), geometryFieldDefinitions))
393 : {
394 1 : ReportError(CE_Failure, CPLE_AppDefined,
395 : "Cannot create layer '%s'.", outputLayerName.c_str());
396 1 : return false;
397 : }
398 : }
399 :
400 36 : m_outputDataset.Set(std::move(poDstDS));
401 36 : return true;
402 : }
403 :
404 : /************************************************************************/
405 : /* GDALVectorCreateAlgorithm::RunImpl() */
406 : /************************************************************************/
407 39 : bool GDALVectorCreateAlgorithm::RunImpl(GDALProgressFunc pfnProgress,
408 : void *pProgressData)
409 : {
410 39 : GDALPipelineStepRunContext stepCtxt;
411 39 : stepCtxt.m_pfnProgress = pfnProgress;
412 39 : stepCtxt.m_pProgressData = pProgressData;
413 39 : return RunPreStepPipelineValidations() && RunStep(stepCtxt);
414 : }
415 :
416 : /************************************************************************/
417 : /* GDALVectorCreateAlgorithm::GetOutputFields() */
418 : /************************************************************************/
419 28 : std::vector<OGRFieldDefn> GDALVectorCreateAlgorithm::GetOutputFields() const
420 : {
421 : // This is where we will eventually implement override logic to modify field
422 : // definitions based on input dataset and/or OGR_SCHEMA, but for now we just
423 : // return the field definitions as specified by the user through --field arguments.
424 28 : return m_fieldDefinitions;
425 : }
426 :
427 : /************************************************************************/
428 : /* GDALVectorCreateAlgorithm::CreateLayer() */
429 : /************************************************************************/
430 38 : bool GDALVectorCreateAlgorithm::CreateLayer(
431 : GDALDataset *poDstDS, const std::string &layerName,
432 : const std::string &fidColumnName,
433 : const std::vector<OGRFieldDefn> &fieldDefinitions,
434 : const std::vector<OGRGeomFieldDefn> &geometryFieldDefinitions) const
435 : {
436 :
437 38 : auto poDstLayer = poDstDS->GetLayerByName(layerName.c_str());
438 :
439 38 : if (poDstLayer)
440 : {
441 1 : if (GetOverwriteLayer())
442 : {
443 1 : int iLayer = -1;
444 1 : const int nLayerCount = poDstDS->GetLayerCount();
445 2 : for (iLayer = 0; iLayer < nLayerCount; iLayer++)
446 : {
447 2 : if (poDstDS->GetLayer(iLayer) == poDstLayer)
448 1 : break;
449 : }
450 :
451 1 : if (iLayer < nLayerCount)
452 : {
453 1 : if (poDstDS->DeleteLayer(iLayer) != OGRERR_NONE)
454 : {
455 0 : ReportError(CE_Failure, CPLE_AppDefined,
456 : "Cannot delete layer '%s'.", layerName.c_str());
457 0 : return false;
458 : }
459 : }
460 1 : poDstLayer = nullptr;
461 : }
462 : else
463 : {
464 0 : ReportError(CE_Failure, CPLE_AppDefined,
465 : "Layer '%s' already exists. Specify the "
466 : "--%s option to overwrite it.",
467 : layerName.c_str(), GDAL_ARG_NAME_OVERWRITE_LAYER);
468 0 : return false;
469 : }
470 : }
471 37 : else if (GetOverwriteLayer())
472 : {
473 1 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot find layer '%s'.",
474 : layerName.c_str());
475 1 : return false;
476 : }
477 :
478 : // Get the geometry field definition, if any
479 37 : std::unique_ptr<OGRGeomFieldDefn> poGeomFieldDefn;
480 37 : if (!geometryFieldDefinitions.empty())
481 : {
482 33 : if (geometryFieldDefinitions.size() > 1)
483 : {
484 : // NOTE: this limitation may eventually be removed,
485 : // but for now we don't want to deal with the complexity
486 : // of creating multiple geometry fields with various drivers that
487 : // may or may not support it
488 0 : ReportError(CE_Failure, CPLE_AppDefined,
489 : "Multiple geometry fields are not supported.");
490 0 : return false;
491 : }
492 : poGeomFieldDefn =
493 33 : std::make_unique<OGRGeomFieldDefn>(geometryFieldDefinitions[0]);
494 : }
495 :
496 37 : if (!poDstLayer)
497 : {
498 37 : CPLStringList aosCreationOptions(GetLayerCreationOptions());
499 74 : if (aosCreationOptions.FetchNameValue("FID") == nullptr &&
500 37 : !fidColumnName.empty())
501 : {
502 7 : auto poDstDriver = poDstDS->GetDriver();
503 7 : if (poDstDriver && poDstDriver->HasLayerCreationOption("FID"))
504 : {
505 7 : aosCreationOptions.SetNameValue("FID", fidColumnName.c_str());
506 : }
507 : }
508 : poDstLayer =
509 37 : poDstDS->CreateLayer(layerName.c_str(), poGeomFieldDefn.get(),
510 37 : aosCreationOptions.List());
511 : }
512 :
513 37 : if (!poDstLayer)
514 : {
515 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot create layer '%s'.",
516 : layerName.c_str());
517 0 : return false;
518 : }
519 :
520 72 : for (const auto &oFieldDefn : fieldDefinitions)
521 : {
522 35 : if (poDstLayer->CreateField(&oFieldDefn) != OGRERR_NONE)
523 : {
524 0 : ReportError(CE_Failure, CPLE_AppDefined,
525 : "Cannot create field '%s' in layer '%s'.",
526 : oFieldDefn.GetNameRef(), layerName.c_str());
527 0 : return false;
528 : }
529 : }
530 :
531 37 : return true;
532 : }
533 :
534 : /************************************************************************/
535 : /* ~GDALVectorCreateAlgorithmStandalone() */
536 : /************************************************************************/
537 : GDALVectorCreateAlgorithmStandalone::~GDALVectorCreateAlgorithmStandalone() =
538 : default;
539 :
540 : //! @endcond
|