Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL
4 : * Purpose: gdal "vector create" subcommand
5 : * Author: Alessandro Pasotti <elpaso at itopen dot it>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2026, Alessandro Pasotti <elpaso at itopen dot it>
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #include <regex>
14 : #include "gdalalg_vector_create.h"
15 : #include "gdal_utils.h"
16 : #include "ogr_schema_override.h"
17 :
18 : //! @cond Doxygen_Suppress
19 :
20 : #ifndef _
21 : #define _(x) (x)
22 : #endif
23 :
24 : /************************************************************************/
25 : /* GDALVectorCreateAlgorithm::GDALVectorCreateAlgorithm() */
26 : /************************************************************************/
27 :
28 131 : GDALVectorCreateAlgorithm::GDALVectorCreateAlgorithm(bool standaloneStep)
29 : : GDALVectorPipelineStepAlgorithm(
30 : NAME, DESCRIPTION, HELP_URL,
31 0 : ConstructorOptions()
32 131 : .SetStandaloneStep(standaloneStep)
33 262 : .SetOutputFormatCreateCapability(GDAL_DCAP_CREATE)
34 :
35 : // Remove defaults because input is the optional template
36 131 : .SetAddDefaultArguments(false)
37 :
38 : // For --like input template
39 131 : .SetAutoOpenInputDatasets(true)
40 262 : .SetInputDatasetHelpMsg(_("Template vector dataset"))
41 262 : .SetInputDatasetAlias("like")
42 131 : .SetInputDatasetRequired(false)
43 131 : .SetInputDatasetPositional(false)
44 131 : .SetInputDatasetMaxCount(1)
45 262 : .SetInputDatasetMetaVar("TEMPLATE-DATASET")
46 :
47 : // Remove arguments that don't make sense in a create context
48 : // Note: this is required despite SetAddDefaultArguments(false)
49 131 : .SetAddUpsertArgument(false)
50 131 : .SetAddSkipErrorsArgument(false)
51 393 : .SetAddAppendLayerArgument(false))
52 : {
53 :
54 131 : AddVectorInputArgs(false);
55 131 : AddVectorOutputArgs(/* hiddenForCLI = */ false,
56 : /* shortNameOutputLayerAllowed=*/false);
57 131 : AddGeometryTypeArg(&m_geometryType, _("Layer geometry type"));
58 :
59 : // Add optional geometry field name argument, not all drivers support it, and if not specified, the default "geom" name will be used.
60 : auto &geomFieldNameArg =
61 : AddArg("geometry-field", 0,
62 : _("Name of the geometry field to create (if supported by the "
63 : "output format)"),
64 262 : &m_geometryFieldName)
65 262 : .SetMetaVar("GEOMETRY-FIELD")
66 131 : .SetDefault(m_geometryFieldName);
67 :
68 262 : AddArg("crs", 0, _("Set CRS"), &m_crs)
69 262 : .AddHiddenAlias("srs")
70 131 : .SetIsCRSArg(/*noneAllowed=*/false);
71 :
72 131 : AddArg("fid", 0, _("FID column name"), &m_fidColumnName);
73 :
74 131 : constexpr auto inputMutexGroup = "like-schema-field";
75 :
76 : // Apply mutex to GDAL_ARG_NAME_INPUT
77 : // This is hackish and I really don't like const_cast but I couldn't find another way.
78 : const_cast<GDALAlgorithmArgDecl &>(
79 262 : GetArg(GDAL_ARG_NAME_INPUT)->GetDeclaration())
80 131 : .SetMutualExclusionGroup(inputMutexGroup);
81 :
82 : // Add --schema argument to read OGR_SCHEMA and populate field definitions from it. It is mutually exclusive with --like and --field arguments.
83 : AddArg("schema", 0,
84 : _("Read OGR_SCHEMA and populate field definitions from it"),
85 262 : &m_schemaJsonOrPath)
86 262 : .SetMetaVar("SCHEMA_JSON")
87 131 : .SetRepeatedArgAllowed(false)
88 131 : .SetMutualExclusionGroup(inputMutexGroup);
89 :
90 : // Add field definition argument
91 : AddFieldDefinitionArg(&m_fieldStrDefinitions, &m_fieldDefinitions,
92 131 : _("Add a field definition to the output layer"))
93 262 : .SetMetaVar("<NAME>:<TYPE>[(,<WIDTH>[,<PRECISION>])]")
94 131 : .SetPackedValuesAllowed(false)
95 131 : .SetRepeatedArgAllowed(true)
96 131 : .SetMutualExclusionGroup(inputMutexGroup);
97 :
98 131 : AddValidationAction(
99 182 : [this, &geomFieldNameArg]()
100 : {
101 65 : if ((!m_schemaJsonOrPath.empty() || !m_inputDataset.empty()) &&
102 34 : ((!m_geometryFieldName.empty() &&
103 17 : geomFieldNameArg.IsExplicitlySet()) ||
104 16 : !m_geometryType.empty() || !m_fieldDefinitions.empty() ||
105 14 : !m_crs.empty() || !m_fidColumnName.empty()))
106 : {
107 5 : ReportError(CE_Failure, CPLE_AppDefined,
108 : "When --schema or --like is specified, "
109 : "--geometry-field, --geometry-type, --field, "
110 : "--crs and --fid options must not be specified.");
111 5 : return false;
112 : }
113 43 : return true;
114 : });
115 131 : }
116 :
117 : /************************************************************************/
118 : /* GDALVectorCreateAlgorithm::RunStep() */
119 : /************************************************************************/
120 :
121 39 : bool GDALVectorCreateAlgorithm::RunStep(GDALPipelineStepRunContext &)
122 : {
123 :
124 39 : const std::string &datasetName = m_outputDataset.GetName();
125 : const std::string outputLayerName =
126 39 : m_outputLayerName.empty() ? CPLGetBasenameSafe(datasetName.c_str())
127 78 : : m_outputLayerName;
128 :
129 39 : std::unique_ptr<GDALDataset> poDstDS;
130 39 : poDstDS.reset(GDALDataset::Open(datasetName.c_str(),
131 : GDAL_OF_VECTOR | GDAL_OF_UPDATE, nullptr,
132 : nullptr, nullptr));
133 :
134 39 : if (poDstDS && !m_update)
135 : {
136 0 : ReportError(CE_Failure, CPLE_AppDefined,
137 : "Dataset %s already exists. Specify the "
138 : "--%s option to open it in update mode.",
139 : datasetName.c_str(), GDAL_ARG_NAME_UPDATE);
140 0 : return false;
141 : }
142 :
143 39 : GDALDataset *poSrcDS = m_inputDataset.empty()
144 39 : ? nullptr
145 6 : : m_inputDataset.front().GetDatasetRef();
146 :
147 78 : OGRSchemaOverride oSchemaOverride;
148 :
149 11 : const auto loadJSON = [this,
150 11 : &oSchemaOverride](const std::string &source) -> bool
151 : {
152 : // This error count is necessary because LoadFromJSON tries to load
153 : // the content as a file first (and set an error it if fails) then tries
154 : // to load as a JSON string but even if it succeeds an error is still
155 : // set and not cleared.
156 11 : const auto nErrorCount = CPLGetErrorCounter();
157 11 : if (!oSchemaOverride.LoadFromJSON(source,
158 : /* allowGeometryFields */ true))
159 : {
160 : // Get the last error message and report it, since LoadFromJSON doesn't do it itself.
161 0 : if (nErrorCount != CPLGetErrorCounter())
162 : {
163 0 : const std::string lastErrorMsg = CPLGetLastErrorMsg();
164 0 : CPLErrorReset();
165 0 : ReportError(CE_Failure, CPLE_AppDefined,
166 : "Cannot parse OGR_SCHEMA: %s.",
167 : lastErrorMsg.c_str());
168 : }
169 : else
170 : {
171 0 : ReportError(CE_Failure, CPLE_AppDefined,
172 : "Cannot parse OGR_SCHEMA (unknown error).");
173 : }
174 0 : return false;
175 : }
176 11 : else if (nErrorCount != CPLGetErrorCounter())
177 : {
178 11 : CPLErrorReset();
179 : }
180 11 : return true;
181 39 : };
182 :
183 : // Use the input dataset as to create an OGR_SCHEMA
184 39 : if (poSrcDS)
185 : {
186 : // Export the schema using GDALVectorInfo
187 6 : CPLStringList aosOptions;
188 :
189 6 : aosOptions.AddString("-schema");
190 :
191 : // Must be last, as positional
192 6 : aosOptions.AddString("dummy");
193 6 : aosOptions.AddString("-al");
194 :
195 : GDALVectorInfoOptions *psInfo =
196 6 : GDALVectorInfoOptionsNew(aosOptions.List(), nullptr);
197 :
198 6 : char *ret = GDALVectorInfo(GDALDataset::ToHandle(poSrcDS), psInfo);
199 6 : GDALVectorInfoOptionsFree(psInfo);
200 6 : if (!ret)
201 0 : return false;
202 :
203 6 : if (!loadJSON(ret))
204 : {
205 0 : CPLFree(ret);
206 0 : return false;
207 : }
208 6 : CPLFree(ret);
209 : }
210 33 : else if (!m_schemaJsonOrPath.empty() && !loadJSON(m_schemaJsonOrPath))
211 : {
212 0 : return false;
213 : }
214 :
215 39 : if (m_standaloneStep)
216 : {
217 39 : if (m_format.empty())
218 : {
219 : const auto aosFormats =
220 : CPLStringList(GDALGetOutputDriversForDatasetName(
221 39 : m_outputDataset.GetName().c_str(), GDAL_OF_VECTOR,
222 : /* bSingleMatch = */ true,
223 39 : /* bWarn = */ true));
224 39 : if (aosFormats.size() != 1)
225 : {
226 0 : ReportError(CE_Failure, CPLE_AppDefined,
227 : "Cannot guess driver for %s",
228 0 : m_outputDataset.GetName().c_str());
229 0 : return false;
230 : }
231 39 : m_format = aosFormats[0];
232 : }
233 : }
234 : else
235 : {
236 0 : m_format = "MEM";
237 : }
238 :
239 : auto poDstDriver =
240 39 : GetGDALDriverManager()->GetDriverByName(m_format.c_str());
241 39 : if (!poDstDriver)
242 : {
243 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot find driver %s.",
244 : m_format.c_str());
245 0 : return false;
246 : }
247 :
248 39 : if (!poDstDS)
249 27 : poDstDS.reset(poDstDriver->Create(datasetName.c_str(), 0, 0, 0,
250 : GDT_Unknown,
251 54 : CPLStringList(m_creationOptions)));
252 :
253 39 : if (!poDstDS)
254 : {
255 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot create dataset %s.",
256 : datasetName.c_str());
257 0 : return false;
258 : }
259 :
260 : // An OGR_SCHEMA has been provided
261 39 : if (!oSchemaOverride.GetLayerOverrides().empty())
262 : {
263 : // Checks if input layer names were specified and the layers exists in the schema
264 11 : if (!m_inputLayerNames.empty())
265 : {
266 12 : for (const auto &inputLayerName : m_inputLayerNames)
267 : {
268 7 : if (!oSchemaOverride.GetLayerOverride(inputLayerName).IsValid())
269 : {
270 0 : ReportError(CE_Failure, CPLE_AppDefined,
271 : "The specified input layer name '%s' doesn't "
272 : "exist in the provided template or schema.",
273 : inputLayerName.c_str());
274 0 : return false;
275 : }
276 : }
277 : }
278 :
279 : // If there are multiple layers check if the destination format supports
280 : // multiple layers, and if not, error out.
281 11 : if (oSchemaOverride.GetLayerOverrides().size() > 1 &&
282 6 : !GDALGetMetadataItem(poDstDriver, GDAL_DCAP_MULTIPLE_VECTOR_LAYERS,
283 17 : nullptr) &&
284 2 : m_inputLayerNames.size() != 1)
285 : {
286 1 : ReportError(CE_Failure, CPLE_AppDefined,
287 : "The output format %s doesn't support multiple layers.",
288 1 : poDstDriver->GetDescription());
289 1 : return false;
290 : }
291 :
292 : // If output layer name was specified and there is more than one layer in the schema,
293 : // error out since we won't know which layer to apply it to
294 12 : if (!m_outputLayerName.empty() &&
295 12 : oSchemaOverride.GetLayerOverrides().size() > 1 &&
296 2 : m_inputLayerNames.size() != 1)
297 : {
298 1 : ReportError(CE_Failure, CPLE_AppDefined,
299 : "Output layer name should not be specified when there "
300 : "are multiple layers in the schema.");
301 1 : return false;
302 : }
303 :
304 9 : std::vector<std::string> layersToBeCreated;
305 25 : for (const auto &oLayerOverride : oSchemaOverride.GetLayerOverrides())
306 : {
307 :
308 27 : if (!m_inputLayerNames.empty() &&
309 0 : std::find(m_inputLayerNames.begin(), m_inputLayerNames.end(),
310 11 : oLayerOverride.GetLayerName()) ==
311 27 : m_inputLayerNames.end())
312 : {
313 : // This layer is not in the list of input layers to consider, so skip it
314 6 : continue;
315 : }
316 10 : layersToBeCreated.push_back(oLayerOverride.GetLayerName());
317 : }
318 :
319 : // Loop over layers in the OGR_SCHEMA and create them
320 19 : for (const auto &layerToCreate : layersToBeCreated)
321 : {
322 : const auto &oLayerOverride =
323 10 : oSchemaOverride.GetLayerOverride(layerToCreate);
324 10 : if (!oLayerOverride.IsValid())
325 : {
326 0 : ReportError(CE_Failure, CPLE_AppDefined,
327 : "Invalid layer override for layer '%s'.",
328 : layerToCreate.c_str());
329 0 : return false;
330 : }
331 :
332 : // We can use the defined layer name only if there is a single layer to be created
333 : const std::string userSpecifiedNewName =
334 10 : !m_outputLayerName.empty() ? m_outputLayerName
335 10 : : oLayerOverride.GetLayerName();
336 : const std::string outputLayerNewName =
337 12 : layersToBeCreated.size() > 1 ? oLayerOverride.GetLayerName()
338 12 : : userSpecifiedNewName;
339 :
340 10 : if (!CreateLayer(poDstDS.get(), outputLayerNewName,
341 : oLayerOverride.GetFIDColumnName(),
342 20 : oLayerOverride.GetFieldDefinitions(),
343 20 : oLayerOverride.GetGeomFieldDefinitions()))
344 : {
345 0 : ReportError(CE_Failure, CPLE_AppDefined,
346 : "Cannot create layer '%s'",
347 0 : oLayerOverride.GetLayerName().c_str());
348 0 : return false;
349 : }
350 : }
351 : }
352 : else
353 : {
354 28 : std::vector<OGRGeomFieldDefn> geometryFieldDefinitions;
355 28 : if (!m_geometryType.empty())
356 : {
357 : const OGRwkbGeometryType eDstType =
358 26 : OGRFromOGCGeomType(m_geometryType.c_str());
359 27 : if (eDstType == wkbUnknown &&
360 1 : !STARTS_WITH_CI(m_geometryType.c_str(), "GEOMETRY"))
361 : {
362 0 : ReportError(CE_Failure, CPLE_AppDefined,
363 : "Unsupported geometry type: '%s'.",
364 : m_geometryType.c_str());
365 0 : return false;
366 : }
367 : else
368 : {
369 : OGRGeomFieldDefn oGeomFieldDefn(m_geometryFieldName.c_str(),
370 26 : eDstType);
371 26 : if (!m_crs.empty())
372 : {
373 : auto poSRS =
374 25 : OGRSpatialReferenceRefCountedPtr::makeInstance();
375 25 : if (poSRS->SetFromUserInput(m_crs.c_str()) != OGRERR_NONE)
376 : {
377 0 : ReportError(CE_Failure, CPLE_AppDefined,
378 : "Cannot parse CRS definition: '%s'.",
379 : m_crs.c_str());
380 0 : return false;
381 : }
382 : else
383 : {
384 25 : oGeomFieldDefn.SetSpatialRef(poSRS.get());
385 : }
386 : }
387 26 : geometryFieldDefinitions.push_back(std::move(oGeomFieldDefn));
388 : }
389 : }
390 :
391 28 : if (!CreateLayer(poDstDS.get(), outputLayerName, m_fidColumnName,
392 56 : GetOutputFields(), geometryFieldDefinitions))
393 : {
394 1 : ReportError(CE_Failure, CPLE_AppDefined,
395 : "Cannot create layer '%s'.", outputLayerName.c_str());
396 1 : return false;
397 : }
398 : }
399 :
400 36 : m_outputDataset.Set(std::move(poDstDS));
401 36 : return true;
402 : }
403 :
404 : /************************************************************************/
405 : /* GDALVectorCreateAlgorithm::RunImpl() */
406 : /************************************************************************/
407 39 : bool GDALVectorCreateAlgorithm::RunImpl(GDALProgressFunc pfnProgress,
408 : void *pProgressData)
409 : {
410 39 : GDALPipelineStepRunContext stepCtxt;
411 39 : stepCtxt.m_pfnProgress = pfnProgress;
412 39 : stepCtxt.m_pProgressData = pProgressData;
413 39 : return RunPreStepPipelineValidations() && RunStep(stepCtxt);
414 : }
415 :
416 : /************************************************************************/
417 : /* GDALVectorCreateAlgorithm::GetOutputFields() */
418 : /************************************************************************/
419 28 : std::vector<OGRFieldDefn> GDALVectorCreateAlgorithm::GetOutputFields() const
420 : {
421 : // This is where we will eventually implement override logic to modify field
422 : // definitions based on input dataset and/or OGR_SCHEMA, but for now we just
423 : // return the field definitions as specified by the user through --field arguments.
424 28 : return m_fieldDefinitions;
425 : }
426 :
427 : /************************************************************************/
428 : /* GDALVectorCreateAlgorithm::CreateLayer() */
429 : /************************************************************************/
430 38 : bool GDALVectorCreateAlgorithm::CreateLayer(
431 : GDALDataset *poDstDS, const std::string &layerName,
432 : const std::string &fidColumnName,
433 : const std::vector<OGRFieldDefn> &fieldDefinitions,
434 : const std::vector<OGRGeomFieldDefn> &geometryFieldDefinitions) const
435 : {
436 38 : if (auto poExistingDstLayer = poDstDS->GetLayerByName(layerName.c_str()))
437 : {
438 1 : if (GetOverwriteLayer())
439 : {
440 1 : int iLayer = -1;
441 1 : const int nLayerCount = poDstDS->GetLayerCount();
442 2 : for (iLayer = 0; iLayer < nLayerCount; iLayer++)
443 : {
444 2 : if (poDstDS->GetLayer(iLayer) == poExistingDstLayer)
445 1 : break;
446 : }
447 :
448 1 : if (iLayer < nLayerCount)
449 : {
450 1 : if (poDstDS->DeleteLayer(iLayer) != OGRERR_NONE)
451 : {
452 0 : ReportError(CE_Failure, CPLE_AppDefined,
453 : "Cannot delete layer '%s'.", layerName.c_str());
454 0 : return false;
455 : }
456 : }
457 : }
458 : else
459 : {
460 0 : ReportError(CE_Failure, CPLE_AppDefined,
461 : "Layer '%s' already exists. Specify the "
462 : "--%s option to overwrite it.",
463 : layerName.c_str(), GDAL_ARG_NAME_OVERWRITE_LAYER);
464 0 : return false;
465 : }
466 : }
467 37 : else if (GetOverwriteLayer())
468 : {
469 1 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot find layer '%s'.",
470 : layerName.c_str());
471 1 : return false;
472 : }
473 :
474 : // Get the geometry field definition, if any
475 37 : std::unique_ptr<OGRGeomFieldDefn> poGeomFieldDefn;
476 37 : if (!geometryFieldDefinitions.empty())
477 : {
478 33 : if (geometryFieldDefinitions.size() > 1)
479 : {
480 : // NOTE: this limitation may eventually be removed,
481 : // but for now we don't want to deal with the complexity
482 : // of creating multiple geometry fields with various drivers that
483 : // may or may not support it
484 0 : ReportError(CE_Failure, CPLE_AppDefined,
485 : "Multiple geometry fields are not supported.");
486 0 : return false;
487 : }
488 : poGeomFieldDefn =
489 33 : std::make_unique<OGRGeomFieldDefn>(geometryFieldDefinitions[0]);
490 : }
491 :
492 74 : CPLStringList aosCreationOptions(GetLayerCreationOptions());
493 74 : if (aosCreationOptions.FetchNameValue("FID") == nullptr &&
494 37 : !fidColumnName.empty())
495 : {
496 7 : auto poDstDriver = poDstDS->GetDriver();
497 7 : if (poDstDriver && poDstDriver->HasLayerCreationOption("FID"))
498 : {
499 7 : aosCreationOptions.SetNameValue("FID", fidColumnName.c_str());
500 : }
501 : }
502 37 : auto poDstLayer = poDstDS->CreateLayer(
503 37 : layerName.c_str(), poGeomFieldDefn.get(), aosCreationOptions.List());
504 37 : if (!poDstLayer)
505 : {
506 0 : ReportError(CE_Failure, CPLE_AppDefined, "Cannot create layer '%s'.",
507 : layerName.c_str());
508 0 : return false;
509 : }
510 :
511 72 : for (const auto &oFieldDefn : fieldDefinitions)
512 : {
513 35 : if (poDstLayer->CreateField(&oFieldDefn) != OGRERR_NONE)
514 : {
515 0 : ReportError(CE_Failure, CPLE_AppDefined,
516 : "Cannot create field '%s' in layer '%s'.",
517 : oFieldDefn.GetNameRef(), layerName.c_str());
518 0 : return false;
519 : }
520 : }
521 :
522 37 : return true;
523 : }
524 :
525 : /************************************************************************/
526 : /* ~GDALVectorCreateAlgorithmStandalone() */
527 : /************************************************************************/
528 : GDALVectorCreateAlgorithmStandalone::~GDALVectorCreateAlgorithmStandalone() =
529 : default;
530 :
531 : //! @endcond
|