Line data Source code
1 : /******************************************************************************
2 : *
3 : * Component: OGDI Driver Support Library
4 : * Purpose: Generic SQL WHERE Expression Evaluator Declarations.
5 : * Author: Frank Warmerdam <warmerdam@pobox.com>
6 : *
7 : ******************************************************************************
8 : * Copyright (C) 2001 Information Interoperability Institute (3i)
9 : * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10 : * Permission to use, copy, modify and distribute this software and
11 : * its documentation for any purpose and without fee is hereby granted,
12 : * provided that the above copyright notice appear in all copies, that
13 : * both the copyright notice and this permission notice appear in
14 : * supporting documentation, and that the name of 3i not be used
15 : * in advertising or publicity pertaining to distribution of the software
16 : * without specific, written prior permission. 3i makes no
17 : * representations about the suitability of this software for any purpose.
18 : * It is provided "as is" without express or implied warranty.
19 : ****************************************************************************/
20 :
21 : #ifndef SWQ_H_INCLUDED_
22 : #define SWQ_H_INCLUDED_
23 :
24 : #ifndef DOXYGEN_SKIP
25 :
26 : #include "cpl_conv.h"
27 : #include "cpl_string.h"
28 : #include "ogr_core.h"
29 :
30 : #include <list>
31 : #include <map>
32 : #include <vector>
33 : #include <set>
34 :
35 : #if defined(_WIN32) && !defined(strcasecmp)
36 : #define strcasecmp stricmp
37 : #endif
38 :
39 : // Used for swq_summary.oSetDistinctValues and oVectorDistinctValues
40 : #define SZ_OGR_NULL "__OGR_NULL__"
41 :
42 : typedef enum
43 : {
44 : SWQ_OR,
45 : SWQ_AND,
46 : SWQ_NOT,
47 : SWQ_EQ,
48 : SWQ_NE,
49 : SWQ_GE,
50 : SWQ_LE,
51 : SWQ_LT,
52 : SWQ_GT,
53 : SWQ_LIKE,
54 : SWQ_ILIKE,
55 : SWQ_ISNULL,
56 : SWQ_IN,
57 : SWQ_BETWEEN,
58 : SWQ_ADD,
59 : SWQ_SUBTRACT,
60 : SWQ_MULTIPLY,
61 : SWQ_DIVIDE,
62 : SWQ_MODULUS,
63 : SWQ_CONCAT,
64 : SWQ_SUBSTR,
65 : SWQ_HSTORE_GET_VALUE,
66 :
67 : SWQ_AVG,
68 : SWQ_AGGREGATE_BEGIN = SWQ_AVG,
69 : SWQ_MIN,
70 : SWQ_MAX,
71 : SWQ_COUNT,
72 : SWQ_SUM,
73 : SWQ_STDDEV_POP,
74 : SWQ_STDDEV_SAMP,
75 : SWQ_AGGREGATE_END = SWQ_STDDEV_SAMP,
76 :
77 : SWQ_CAST,
78 : SWQ_CUSTOM_FUNC, /* only if parsing done in bAcceptCustomFuncs mode */
79 : SWQ_ARGUMENT_LIST /* temporary value only set during parsing and replaced by
80 : something else at the end */
81 : } swq_op;
82 :
83 : typedef enum
84 : {
85 : SWQ_INTEGER,
86 : SWQ_INTEGER64,
87 : SWQ_FLOAT,
88 : SWQ_STRING,
89 : SWQ_BOOLEAN, // integer
90 : SWQ_DATE, // string
91 : SWQ_TIME, // string
92 : SWQ_TIMESTAMP, // string
93 : SWQ_GEOMETRY,
94 : SWQ_NULL,
95 : SWQ_OTHER,
96 : SWQ_ERROR
97 : } swq_field_type;
98 :
99 : #define SWQ_IS_INTEGER(x) ((x) == SWQ_INTEGER || (x) == SWQ_INTEGER64)
100 :
101 : typedef enum
102 : {
103 : SNT_CONSTANT,
104 : SNT_COLUMN,
105 : SNT_OPERATION
106 : } swq_node_type;
107 :
108 : class swq_field_list;
109 : class swq_expr_node;
110 : class swq_select;
111 : class OGRGeometry;
112 :
113 : struct CPL_UNSTABLE_API swq_evaluation_context
114 : {
115 : bool bUTF8Strings = false;
116 : };
117 :
118 : typedef swq_expr_node *(*swq_field_fetcher)(swq_expr_node *op,
119 : void *record_handle);
120 : typedef swq_expr_node *(*swq_op_evaluator)(
121 : swq_expr_node *op, swq_expr_node **sub_field_values,
122 : const swq_evaluation_context &sContext);
123 : typedef swq_field_type (*swq_op_checker)(
124 : swq_expr_node *op, int bAllowMismatchTypeOnFieldComparison);
125 :
126 : class swq_custom_func_registrar;
127 :
128 2349 : class CPL_UNSTABLE_API swq_expr_node
129 : {
130 : swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
131 : const swq_evaluation_context &sContext,
132 : int nRecLevel);
133 : void reset();
134 :
135 : public:
136 : swq_expr_node();
137 : swq_expr_node(const swq_expr_node &);
138 : swq_expr_node(swq_expr_node &&);
139 :
140 : swq_expr_node &operator=(const swq_expr_node &);
141 : swq_expr_node &operator=(swq_expr_node &&);
142 :
143 : bool operator==(const swq_expr_node &) const;
144 :
145 : explicit swq_expr_node(const char *);
146 : explicit swq_expr_node(int);
147 : explicit swq_expr_node(GIntBig);
148 : explicit swq_expr_node(double);
149 : explicit swq_expr_node(OGRGeometry *);
150 : explicit swq_expr_node(swq_op);
151 :
152 : ~swq_expr_node();
153 :
154 : void MarkAsTimestamp();
155 : CPLString UnparseOperationFromUnparsedSubExpr(char **apszSubExpr);
156 : char *Unparse(swq_field_list *, char chColumnQuote);
157 : void Dump(FILE *fp, int depth);
158 : swq_field_type Check(swq_field_list *, int bAllowFieldsInSecondaryTables,
159 : int bAllowMismatchTypeOnFieldComparison,
160 : swq_custom_func_registrar *poCustomFuncRegistrar);
161 : swq_expr_node *Evaluate(swq_field_fetcher pfnFetcher, void *record,
162 : const swq_evaluation_context &sContext);
163 : swq_expr_node *Clone();
164 :
165 : void ReplaceBetweenByGEAndLERecurse();
166 : void ReplaceInByOrRecurse();
167 : void PushNotOperationDownToStack();
168 :
169 : void RebalanceAndOr();
170 :
171 : bool HasReachedMaxDepth() const;
172 :
173 : swq_node_type eNodeType = SNT_CONSTANT;
174 : swq_field_type field_type = SWQ_INTEGER;
175 :
176 : /* only for SNT_OPERATION */
177 : void PushSubExpression(swq_expr_node *);
178 : void ReverseSubExpressions();
179 : swq_op nOperation = SWQ_OR;
180 : int nSubExprCount = 0;
181 : swq_expr_node **papoSubExpr = nullptr;
182 :
183 : /* only for SNT_COLUMN */
184 : int field_index = 0;
185 : int table_index = 0;
186 : char *table_name = nullptr;
187 :
188 : /* only for SNT_CONSTANT */
189 : int is_null = false;
190 : int64_t int_value = 0;
191 : double float_value = 0.0;
192 : OGRGeometry *geometry_value = nullptr;
193 :
194 : /* shared by SNT_COLUMN, SNT_CONSTANT and also possibly SNT_OPERATION when
195 : */
196 : /* nOperation == SWQ_CUSTOM_FUNC */
197 : char *string_value = nullptr; /* column name when SNT_COLUMN */
198 :
199 : // May be transiently used by swq_parser.h, but should not be relied upon
200 : // after parsing. swq_col_def.bHidden captures it afterwards.
201 : bool bHidden = false;
202 :
203 : // Recursive depth of this expression, taking into account papoSubExpr.
204 : int nDepth = 1;
205 :
206 : static CPLString QuoteIfNecessary(const CPLString &, char chQuote = '\'');
207 : static CPLString Quote(const CPLString &, char chQuote = '\'');
208 : };
209 :
210 : typedef struct
211 : {
212 : const char *pszName;
213 : swq_op eOperation;
214 : swq_op_evaluator pfnEvaluator;
215 : swq_op_checker pfnChecker;
216 : } swq_operation;
217 :
218 : class CPL_UNSTABLE_API swq_op_registrar
219 : {
220 : public:
221 : static const swq_operation *GetOperator(const char *);
222 : static const swq_operation *GetOperator(swq_op eOperation);
223 : };
224 :
225 : class CPL_UNSTABLE_API swq_custom_func_registrar
226 : {
227 : public:
228 1 : virtual ~swq_custom_func_registrar()
229 1 : {
230 1 : }
231 :
232 : virtual const swq_operation *GetOperator(const char *) = 0;
233 : };
234 :
235 : typedef struct
236 : {
237 : char *data_source;
238 : char *table_name;
239 : char *table_alias;
240 : } swq_table_def;
241 :
242 : class CPL_UNSTABLE_API swq_field_list
243 : {
244 : public:
245 : int count;
246 : char **names;
247 : swq_field_type *types;
248 : int *table_ids;
249 : int *ids;
250 :
251 : int table_count;
252 : swq_table_def *table_defs;
253 : };
254 :
255 : class CPL_UNSTABLE_API swq_parse_context
256 : {
257 : public:
258 8052 : swq_parse_context()
259 8052 : : nStartToken(0), pszInput(nullptr), pszNext(nullptr),
260 : pszLastValid(nullptr), bAcceptCustomFuncs(FALSE), poRoot(nullptr),
261 8052 : poCurSelect(nullptr)
262 : {
263 8052 : }
264 :
265 : int nStartToken;
266 : const char *pszInput;
267 : const char *pszNext;
268 : const char *pszLastValid;
269 : int bAcceptCustomFuncs;
270 :
271 : swq_expr_node *poRoot;
272 :
273 : swq_select *poCurSelect;
274 : };
275 :
276 : /* Compile an SQL WHERE clause into an internal form. The field_list is
277 : ** the list of fields in the target 'table', used to render where into
278 : ** field numbers instead of names.
279 : */
280 : int CPL_UNSTABLE_API swqparse(swq_parse_context *context);
281 : int CPL_UNSTABLE_API swqlex(swq_expr_node **ppNode, swq_parse_context *context);
282 : void CPL_UNSTABLE_API swqerror(swq_parse_context *context, const char *msg);
283 :
284 : int CPL_UNSTABLE_API swq_identify_field(const char *table_name,
285 : const char *token,
286 : swq_field_list *field_list,
287 : swq_field_type *this_type,
288 : int *table_id);
289 :
290 : CPLErr CPL_UNSTABLE_API
291 : swq_expr_compile(const char *where_clause, int field_count, char **field_list,
292 : swq_field_type *field_types, int bCheck,
293 : swq_custom_func_registrar *poCustomFuncRegistrar,
294 : swq_expr_node **expr_root);
295 :
296 : CPLErr CPL_UNSTABLE_API
297 : swq_expr_compile2(const char *where_clause, swq_field_list *field_list,
298 : int bCheck, swq_custom_func_registrar *poCustomFuncRegistrar,
299 : swq_expr_node **expr_root);
300 :
301 : /*
302 : ** Evaluation related.
303 : */
304 : int CPL_UNSTABLE_API swq_test_like(const char *input, const char *pattern);
305 :
306 : swq_expr_node CPL_UNSTABLE_API *
307 : SWQGeneralEvaluator(swq_expr_node *, swq_expr_node **,
308 : const swq_evaluation_context &sContext);
309 : swq_field_type CPL_UNSTABLE_API
310 : SWQGeneralChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
311 : swq_expr_node CPL_UNSTABLE_API *
312 : SWQCastEvaluator(swq_expr_node *, swq_expr_node **,
313 : const swq_evaluation_context &sContext);
314 : swq_field_type CPL_UNSTABLE_API
315 : SWQCastChecker(swq_expr_node *node, int bAllowMismatchTypeOnFieldComparison);
316 : const char CPL_UNSTABLE_API *SWQFieldTypeToString(swq_field_type field_type);
317 :
318 : /****************************************************************************/
319 :
320 : #define SWQP_ALLOW_UNDEFINED_COL_FUNCS 0x01
321 :
322 : #define SWQM_SUMMARY_RECORD 1
323 : #define SWQM_RECORDSET 2
324 : #define SWQM_DISTINCT_LIST 3
325 :
326 : typedef enum
327 : {
328 : SWQCF_NONE = 0,
329 : SWQCF_AVG = SWQ_AVG,
330 : SWQCF_MIN = SWQ_MIN,
331 : SWQCF_MAX = SWQ_MAX,
332 : SWQCF_COUNT = SWQ_COUNT,
333 : SWQCF_SUM = SWQ_SUM,
334 : SWQCF_STDDEV_POP = SWQ_STDDEV_POP,
335 : SWQCF_STDDEV_SAMP = SWQ_STDDEV_SAMP,
336 : SWQCF_CUSTOM
337 : } swq_col_func;
338 :
339 : typedef struct
340 : {
341 : swq_col_func col_func;
342 : char *table_name;
343 : char *field_name;
344 : char *field_alias;
345 : int table_index;
346 : int field_index;
347 : swq_field_type field_type;
348 : swq_field_type target_type;
349 : OGRFieldSubType target_subtype;
350 : int field_length;
351 : int field_precision;
352 : int distinct_flag;
353 : bool bHidden;
354 : OGRwkbGeometryType eGeomType;
355 : int nSRID;
356 : swq_expr_node *expr;
357 : } swq_col_def;
358 :
359 : class CPL_UNSTABLE_API swq_summary
360 : {
361 : public:
362 : struct Comparator
363 : {
364 : bool bSortAsc;
365 : swq_field_type eType;
366 :
367 113 : Comparator() : bSortAsc(true), eType(SWQ_STRING)
368 : {
369 113 : }
370 :
371 : bool operator()(const CPLString &, const CPLString &) const;
372 : };
373 :
374 : //! Return the sum, using Kahan-Babuska-Neumaier algorithm.
375 : // Cf cf KahanBabushkaNeumaierSum of https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements
376 12 : double sum() const
377 : {
378 12 : return sum_only_finite_terms ? sum_acc + sum_correction : sum_acc;
379 : }
380 :
381 : GIntBig count = 0;
382 :
383 : std::vector<CPLString> oVectorDistinctValues{};
384 : std::set<CPLString, Comparator> oSetDistinctValues{};
385 : bool sum_only_finite_terms = true;
386 : // Sum accumulator. To get the accurate sum, use the sum() method
387 : double sum_acc = 0.0;
388 : // Sum correction term.
389 : double sum_correction = 0.0;
390 : double min = 0.0;
391 : double max = 0.0;
392 :
393 : // Welford's online algorithm for variance:
394 : // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
395 : double mean_for_variance = 0.0;
396 : double sq_dist_from_mean_acc = 0.0; // "M2"
397 :
398 : CPLString osMin{};
399 : CPLString osMax{};
400 : };
401 :
402 : typedef struct
403 : {
404 : char *table_name;
405 : char *field_name;
406 : int table_index;
407 : int field_index;
408 : int ascending_flag;
409 : } swq_order_def;
410 :
411 : typedef struct
412 : {
413 : int secondary_table;
414 : swq_expr_node *poExpr;
415 : } swq_join_def;
416 :
417 : class CPL_UNSTABLE_API swq_select_parse_options
418 : {
419 : public:
420 : swq_custom_func_registrar *poCustomFuncRegistrar;
421 : int bAllowFieldsInSecondaryTablesInWhere;
422 : int bAddSecondaryTablesGeometryFields;
423 : int bAlwaysPrefixWithTableName;
424 : int bAllowDistinctOnGeometryField;
425 : int bAllowDistinctOnMultipleFields;
426 :
427 60 : swq_select_parse_options()
428 60 : : poCustomFuncRegistrar(nullptr),
429 : bAllowFieldsInSecondaryTablesInWhere(FALSE),
430 : bAddSecondaryTablesGeometryFields(FALSE),
431 : bAlwaysPrefixWithTableName(FALSE),
432 : bAllowDistinctOnGeometryField(FALSE),
433 60 : bAllowDistinctOnMultipleFields(FALSE)
434 : {
435 60 : }
436 : };
437 :
438 2956 : class CPL_UNSTABLE_API swq_select
439 : {
440 : void postpreparse();
441 :
442 : CPL_DISALLOW_COPY_ASSIGN(swq_select)
443 :
444 : public:
445 : swq_select();
446 : ~swq_select();
447 :
448 : int query_mode = 0;
449 :
450 : char *raw_select = nullptr;
451 :
452 : int PushField(swq_expr_node *poExpr, const char *pszAlias,
453 : bool distinct_flag, bool bHidden);
454 :
455 : int PushExcludeField(swq_expr_node *poExpr);
456 :
457 128378 : int result_columns() const
458 : {
459 128378 : return static_cast<int>(column_defs.size());
460 : }
461 :
462 : std::vector<swq_col_def> column_defs{};
463 : std::vector<swq_summary> column_summary{};
464 :
465 : int PushTableDef(const char *pszDataSource, const char *pszTableName,
466 : const char *pszAlias);
467 : int table_count = 0;
468 : swq_table_def *table_defs = nullptr;
469 :
470 : void PushJoin(int iSecondaryTable, swq_expr_node *poExpr);
471 : int join_count = 0;
472 : swq_join_def *join_defs = nullptr;
473 :
474 : swq_expr_node *where_expr = nullptr;
475 :
476 : void PushOrderBy(const char *pszTableName, const char *pszFieldName,
477 : int bAscending);
478 : int order_specs = 0;
479 : swq_order_def *order_defs = nullptr;
480 :
481 : void SetLimit(GIntBig nLimit);
482 : GIntBig limit = -1;
483 :
484 : void SetOffset(GIntBig nOffset);
485 : GIntBig offset = 0;
486 :
487 : swq_select *poOtherSelect = nullptr;
488 : void PushUnionAll(swq_select *poOtherSelectIn);
489 :
490 : CPLErr preparse(const char *select_statement,
491 : int bAcceptCustomFuncs = FALSE);
492 : CPLErr expand_wildcard(swq_field_list *field_list,
493 : int bAlwaysPrefixWithTableName);
494 : CPLErr parse(swq_field_list *field_list,
495 : swq_select_parse_options *poParseOptions);
496 :
497 : char *Unparse();
498 :
499 : bool bExcludedGeometry = false;
500 :
501 : private:
502 : bool IsFieldExcluded(int src_index, const char *table, const char *field);
503 :
504 : // map of EXCLUDE columns keyed according to the index of the
505 : // asterisk with which it should be associated. key of -1 is
506 : // used for column lists that have not yet been associated with
507 : // an asterisk.
508 : std::map<int, std::list<swq_col_def>> m_exclude_fields{};
509 : };
510 :
511 : /* This method should generally be invoked with pszValue set, except when
512 : * called on a non-DISTINCT column definition of numeric type (SWQ_BOOLEAN,
513 : * SWQ_INTEGER, SWQ_INTEGER64, SWQ_FLOAT), in which case pdfValue should
514 : * rather be set.
515 : */
516 : const char CPL_UNSTABLE_API *swq_select_summarize(swq_select *select_info,
517 : int dest_column,
518 : const char *pszValue,
519 : const double *pdfValue);
520 :
521 : int CPL_UNSTABLE_API swq_is_reserved_keyword(const char *pszStr);
522 :
523 : char CPL_UNSTABLE_API *OGRHStoreGetValue(const char *pszHStore,
524 : const char *pszSearchedKey);
525 :
526 : #ifdef GDAL_COMPILATION
527 : void swq_fixup(swq_parse_context *psParseContext);
528 : swq_expr_node *swq_create_and_or_or(swq_op op, swq_expr_node *left,
529 : swq_expr_node *right);
530 : int swq_test_like(const char *input, const char *pattern, char chEscape,
531 : bool insensitive, bool bUTF8Strings);
532 : #endif
533 :
534 : #endif /* #ifndef DOXYGEN_SKIP */
535 :
536 : #endif /* def SWQ_H_INCLUDED_ */
|