Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: GDAL
4 : * Purpose: GDALZonalStats implementation
5 : * Author: Dan Baston
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2018-2025, ISciences LLC
9 : *
10 : * SPDX-License-Identifier: MIT
11 : ****************************************************************************/
12 :
13 : #pragma once
14 :
15 : #include <algorithm>
16 : #include <cmath>
17 : #include <limits>
18 : #include <optional>
19 : #include <unordered_map>
20 :
21 : //! @cond Doxygen_Suppress
22 :
23 : namespace gdal
24 : {
25 : struct RasterStatsOptions
26 : {
27 : static constexpr float min_coverage_fraction_default =
28 : std::numeric_limits<float>::min(); // ~1e-38
29 :
30 : float min_coverage_fraction = min_coverage_fraction_default;
31 : bool calc_variance = false;
32 : bool store_histogram = false;
33 : bool store_values = false;
34 : bool store_weights = false;
35 : bool store_coverage_fraction = false;
36 : bool store_xy = false;
37 : bool include_nodata = false;
38 : double default_weight = std::numeric_limits<double>::quiet_NaN();
39 :
40 : bool operator==(const RasterStatsOptions &other) const
41 : {
42 : return min_coverage_fraction == other.min_coverage_fraction &&
43 : calc_variance == other.calc_variance &&
44 : store_histogram == other.store_histogram &&
45 : store_values == other.store_values &&
46 : store_weights == other.store_weights &&
47 : store_coverage_fraction == other.store_coverage_fraction &&
48 : store_xy == other.store_xy &&
49 : include_nodata == other.include_nodata &&
50 : (default_weight == other.default_weight ||
51 : (std::isnan(default_weight) &&
52 : std::isnan(other.default_weight)));
53 : }
54 :
55 : bool operator!=(const RasterStatsOptions &other) const
56 : {
57 : return !(*this == other);
58 : }
59 : };
60 :
61 : class WestVariance
62 : {
63 : /** \brief Implements an incremental algorithm for weighted standard
64 : * deviation, variance, and coefficient of variation, as described in
65 : * formula WV2 of West, D.H.D. (1979) "Updating Mean and Variance
66 : * Estimates: An Improved Method". Communications of the ACM 22(9).
67 : */
68 :
69 : private:
70 : double sum_w = 0;
71 : double mean = 0;
72 : double t = 0;
73 :
74 : public:
75 : /** \brief Update variance estimate with another value
76 : *
77 : * @param x value to add
78 : * @param w weight of `x`
79 : */
80 248 : void process(double x, double w)
81 : {
82 248 : if (w == 0)
83 : {
84 1 : return;
85 : }
86 :
87 247 : double mean_old = mean;
88 :
89 247 : sum_w += w;
90 247 : mean += (w / sum_w) * (x - mean_old);
91 247 : t += w * (x - mean_old) * (x - mean);
92 : }
93 :
94 : /** \brief Return the population variance.
95 : */
96 18 : constexpr double variance() const
97 : {
98 18 : return t / sum_w;
99 : }
100 :
101 : /** \brief Return the population standard deviation
102 : */
103 9 : double stdev() const
104 : {
105 9 : return std::sqrt(variance());
106 : }
107 :
108 : /** \brief Return the population coefficient of variation
109 : */
110 : double coefficent_of_variation() const
111 : {
112 : return stdev() / mean;
113 : }
114 : };
115 :
116 : template <typename ValueType> class RasterStats
117 : {
118 : public:
119 : /**
120 : * Compute raster statistics from a Raster representing intersection percentages,
121 : * a Raster representing data values, and (optionally) a Raster representing weights.
122 : * and a set of raster values.
123 : */
124 11738 : explicit RasterStats(const RasterStatsOptions &options)
125 11738 : : m_min{std::numeric_limits<ValueType>::max()},
126 11738 : m_max{std::numeric_limits<ValueType>::lowest()}, m_sum_ciwi{0},
127 11738 : m_sum_ci{0}, m_sum_xici{0}, m_sum_xiciwi{0}, m_options{options}
128 : {
129 11738 : }
130 :
131 : // All pixels covered 100%
132 11600 : void process(const ValueType *pValues, const GByte *pabyMask,
133 : const double *padfWeights, const GByte *pabyWeightsMask,
134 : const double *padfX, const double *padfY, size_t nX, size_t nY)
135 : {
136 23200 : for (size_t i = 0; i < nX * nY; i++)
137 : {
138 11600 : if (pabyMask[i] == 255)
139 : {
140 11464 : if (padfX && padfY)
141 : {
142 10000 : process_location(padfX[i % nX], padfY[i / nX]);
143 : }
144 22264 : const double dfWeight =
145 : pabyWeightsMask
146 10800 : ? (pabyWeightsMask[i] == 255
147 : ? padfWeights[i]
148 0 : : std::numeric_limits<double>::quiet_NaN())
149 : : 1.0;
150 11464 : process_value(pValues[i], 1.0, dfWeight);
151 : }
152 : }
153 11600 : }
154 :
155 : // Pixels covered 0% or 100%
156 528 : void process(const ValueType *pValues, const GByte *pabyMask,
157 : const double *padfWeights, const GByte *pabyWeightsMask,
158 : const GByte *pabyCov, const double *pdfX, const double *pdfY,
159 : size_t nX, size_t nY)
160 : {
161 10631 : for (size_t i = 0; i < nX * nY; i++)
162 : {
163 10103 : if (pabyMask[i] == 255 && pabyCov[i])
164 : {
165 5531 : if (pdfX && pdfY)
166 : {
167 1218 : process_location(pdfX[i % nX], pdfY[i / nX]);
168 : }
169 5680 : const double dfWeight =
170 : pabyWeightsMask
171 149 : ? (pabyWeightsMask[i] == 255
172 : ? padfWeights[i]
173 1 : : std::numeric_limits<double>::quiet_NaN())
174 : : 1.0;
175 5531 : process_value(pValues[i], 1.0, dfWeight);
176 : }
177 : }
178 528 : }
179 :
180 : // Pixels fractionally covered
181 312 : void process(const ValueType *pValues, const GByte *pabyMask,
182 : const double *padfWeights, const GByte *pabyWeightsMask,
183 : const float *pfCov, const double *pdfX, const double *pdfY,
184 : size_t nX, size_t nY)
185 : {
186 4864 : for (size_t i = 0; i < nX * nY; i++)
187 : {
188 4552 : if (pabyMask[i] == 255 &&
189 4504 : pfCov[i] >= m_options.min_coverage_fraction)
190 : {
191 3080 : if (pdfX && pdfY)
192 : {
193 764 : process_location(pdfX[i % nX], pdfY[i / nX]);
194 : }
195 3488 : const double dfWeight =
196 : pabyWeightsMask
197 408 : ? (pabyWeightsMask[i] == 255
198 : ? padfWeights[i]
199 0 : : std::numeric_limits<double>::quiet_NaN())
200 : : 1.0;
201 3080 : process_value(pValues[i], pfCov[i], dfWeight);
202 : }
203 : }
204 312 : }
205 :
206 11982 : void process_location(double x, double y)
207 : {
208 11982 : if (m_options.store_xy)
209 : {
210 11982 : m_cell_x.push_back(x);
211 11982 : m_cell_y.push_back(y);
212 : }
213 11982 : }
214 :
215 20075 : void process_value(const ValueType &val, float coverage, double weight)
216 : {
217 20075 : if (m_options.store_coverage_fraction)
218 : {
219 16 : m_cell_cov.push_back(coverage);
220 : }
221 :
222 20075 : m_sum_ci += static_cast<double>(coverage);
223 20075 : m_sum_xici += static_cast<double>(val) * static_cast<double>(coverage);
224 :
225 20075 : double ciwi = static_cast<double>(coverage) * weight;
226 20075 : m_sum_ciwi += ciwi;
227 20075 : m_sum_xiciwi += static_cast<double>(val) * ciwi;
228 :
229 20075 : if (m_options.calc_variance)
230 : {
231 124 : m_variance.process(static_cast<double>(val),
232 : static_cast<double>(coverage));
233 124 : m_weighted_variance.process(static_cast<double>(val), ciwi);
234 : }
235 :
236 20075 : if (val < m_min)
237 : {
238 519 : m_min = val;
239 519 : if (m_options.store_xy)
240 : {
241 158 : m_min_xy = {m_cell_x.back(), m_cell_y.back()};
242 : }
243 : }
244 :
245 20075 : if (val > m_max)
246 : {
247 1553 : m_max = val;
248 1553 : if (m_options.store_xy)
249 : {
250 345 : m_max_xy = {m_cell_x.back(), m_cell_y.back()};
251 : }
252 : }
253 :
254 20075 : if (m_options.store_histogram)
255 : {
256 164 : auto &entry = m_freq[val];
257 164 : entry.m_sum_ci += static_cast<double>(coverage);
258 164 : entry.m_sum_ciwi += ciwi;
259 : }
260 :
261 20075 : if (m_options.store_values)
262 : {
263 16 : m_cell_values.push_back(val);
264 : }
265 :
266 20075 : if (m_options.store_weights)
267 : {
268 36 : m_cell_weights.push_back(weight);
269 : }
270 20075 : }
271 :
272 : /**
273 : * The mean value of cells covered by this polygon, weighted
274 : * by the percent of the cell that is covered.
275 : */
276 94 : double mean() const
277 : {
278 94 : if (count() > 0)
279 : {
280 92 : return sum() / count();
281 : }
282 : else
283 : {
284 2 : return std::numeric_limits<double>::quiet_NaN();
285 : }
286 : }
287 :
288 : /**
289 : * The mean value of cells covered by this polygon, weighted
290 : * by the percent of the cell that is covered and a secondary
291 : * weighting raster.
292 : *
293 : * If any weights are undefined, will return NAN. If this is undesirable,
294 : * caller should replace undefined weights with a suitable default
295 : * before computing statistics.
296 : */
297 43 : double weighted_mean() const
298 : {
299 43 : if (weighted_count() > 0)
300 : {
301 28 : return weighted_sum() / weighted_count();
302 : }
303 : else
304 : {
305 15 : return std::numeric_limits<double>::quiet_NaN();
306 : }
307 : }
308 :
309 : /** The fraction of weighted cells to unweighted cells.
310 : * Meaningful only when the values of the weighting
311 : * raster are between 0 and 1.
312 : */
313 : double weighted_fraction() const
314 : {
315 : return weighted_sum() / sum();
316 : }
317 :
318 : /**
319 : * The raster value occupying the greatest number of cells
320 : * or partial cells within the polygon. When multiple values
321 : * cover the same number of cells, the greatest value will
322 : * be returned. Weights are not taken into account.
323 : */
324 18 : std::optional<ValueType> mode() const
325 : {
326 18 : auto it = std::max_element(
327 : m_freq.cbegin(), m_freq.cend(),
328 14 : [](const auto &a, const auto &b)
329 : {
330 24 : return a.second.m_sum_ci < b.second.m_sum_ci ||
331 10 : (a.second.m_sum_ci == b.second.m_sum_ci &&
332 18 : a.first < b.first);
333 : });
334 18 : if (it == m_freq.end())
335 : {
336 11 : return std::nullopt;
337 : }
338 7 : return it->first;
339 : }
340 :
341 : /**
342 : * The minimum value in any raster cell wholly or partially covered
343 : * by the polygon. Weights are not taken into account.
344 : */
345 2 : std::optional<ValueType> min() const
346 : {
347 2 : if (m_sum_ci == 0)
348 : {
349 0 : return std::nullopt;
350 : }
351 2 : return m_min;
352 : }
353 :
354 : /// XY values corresponding to the center of the cell whose value
355 : /// is returned by min()
356 4 : std::optional<std::pair<double, double>> min_xy() const
357 : {
358 4 : if (m_sum_ci == 0)
359 : {
360 0 : return std::nullopt;
361 : }
362 4 : return m_min_xy;
363 : }
364 :
365 : /**
366 : * The maximum value in any raster cell wholly or partially covered
367 : * by the polygon. Weights are not taken into account.
368 : */
369 34 : std::optional<ValueType> max() const
370 : {
371 34 : if (m_sum_ci == 0)
372 : {
373 0 : return std::nullopt;
374 : }
375 34 : return m_max;
376 : }
377 :
378 : /// XY values corresponding to the center of the cell whose value
379 : /// is returned by max()
380 68 : std::optional<std::pair<double, double>> max_xy() const
381 : {
382 68 : if (m_sum_ci == 0)
383 : {
384 0 : return std::nullopt;
385 : }
386 68 : return m_max_xy;
387 : }
388 :
389 : /**
390 : * The sum of raster cells covered by the polygon, with each raster
391 : * value weighted by its coverage fraction.
392 : */
393 236 : double sum() const
394 : {
395 236 : return m_sum_xici;
396 : }
397 :
398 : /**
399 : * The sum of raster cells covered by the polygon, with each raster
400 : * value weighted by its coverage fraction and weighting raster value.
401 : *
402 : * If any weights are undefined, will return NAN. If this is undesirable,
403 : * caller should replace undefined weights with a suitable default
404 : * before computing statistics.
405 : */
406 52 : double weighted_sum() const
407 : {
408 52 : return m_sum_xiciwi;
409 : }
410 :
411 : /**
412 : * The number of raster cells with any defined value
413 : * covered by the polygon. Weights are not taken
414 : * into account.
415 : */
416 200 : double count() const
417 : {
418 200 : return m_sum_ci;
419 : }
420 :
421 : /**
422 : * The number of raster cells with a specific value
423 : * covered by the polygon. Weights are not taken
424 : * into account.
425 : */
426 : std::optional<double> count(const ValueType &value) const
427 : {
428 : const auto &entry = m_freq.find(value);
429 :
430 : if (entry == m_freq.end())
431 : {
432 : return std::nullopt;
433 : }
434 :
435 : return entry->second.m_sum_ci;
436 : }
437 :
438 : /**
439 : * The fraction of defined raster cells covered by the polygon with
440 : * a value that equals the specified value.
441 : * Weights are not taken into account.
442 : */
443 : std::optional<double> frac(const ValueType &value) const
444 : {
445 : auto count_for_value = count(value);
446 :
447 : if (!count_for_value.has_value())
448 : {
449 : return count_for_value;
450 : }
451 :
452 : return count_for_value.value() / count();
453 : }
454 :
455 : /**
456 : * The weighted fraction of defined raster cells covered by the polygon with
457 : * a value that equals the specified value.
458 : */
459 : std::optional<double> weighted_frac(const ValueType &value) const
460 : {
461 : auto count_for_value = weighted_count(value);
462 :
463 : if (!count_for_value.has_value())
464 : {
465 : return count_for_value;
466 : }
467 :
468 : return count_for_value.value() / weighted_count();
469 : }
470 :
471 : /**
472 : * The population variance of raster cells touched
473 : * by the polygon. Cell coverage fractions are taken
474 : * into account; values of a weighting raster are not.
475 : */
476 2 : double variance() const
477 : {
478 2 : return m_variance.variance();
479 : }
480 :
481 : /**
482 : * The population variance of raster cells touched
483 : * by the polygon, taking into account cell coverage
484 : * fractions and values of a weighting raster.
485 : */
486 7 : double weighted_variance() const
487 : {
488 7 : return m_weighted_variance.variance();
489 : }
490 :
491 : /**
492 : * The population standard deviation of raster cells
493 : * touched by the polygon. Cell coverage fractions
494 : * are taken into account; values of a weighting
495 : * raster are not.
496 : */
497 2 : double stdev() const
498 : {
499 2 : return m_variance.stdev();
500 : }
501 :
502 : /**
503 : * The population standard deviation of raster cells
504 : * touched by the polygon, taking into account cell
505 : * coverage fractions and values of a weighting raster.
506 : */
507 7 : double weighted_stdev() const
508 : {
509 7 : return m_weighted_variance.stdev();
510 : }
511 :
512 : /**
513 : * The sum of weights for each cell covered by the
514 : * polygon, with each weight multiplied by the coverage
515 : * fraction of each cell.
516 : *
517 : * If any weights are undefined, will return NAN. If this is undesirable,
518 : * caller should replace undefined weights with a suitable default
519 : * before computing statistics.
520 : */
521 71 : double weighted_count() const
522 : {
523 71 : return m_sum_ciwi;
524 : }
525 :
526 : /**
527 : * The sum of weights for each cell of a specific value covered by the
528 : * polygon, with each weight multiplied by the coverage fraction
529 : * of each cell.
530 : *
531 : * If any weights are undefined, will return NAN. If this is undesirable,
532 : * caller should replace undefined weights with a suitable default
533 : * before computing statistics.
534 : */
535 : std::optional<double> weighted_count(const ValueType &value) const
536 : {
537 : const auto &entry = m_freq.find(value);
538 :
539 : if (entry == m_freq.end())
540 : {
541 : return std::nullopt;
542 : }
543 :
544 : return entry->second.m_sum_ciwi;
545 : }
546 :
547 : /**
548 : * The raster value occupying the least number of cells
549 : * or partial cells within the polygon. When multiple values
550 : * cover the same number of cells, the lowest value will
551 : * be returned.
552 : *
553 : * Cell weights are not taken into account.
554 : */
555 2 : std::optional<ValueType> minority() const
556 : {
557 2 : auto it = std::min_element(
558 : m_freq.cbegin(), m_freq.cend(),
559 14 : [](const auto &a, const auto &b)
560 : {
561 28 : return a.second.m_sum_ci < b.second.m_sum_ci ||
562 14 : (a.second.m_sum_ci == b.second.m_sum_ci &&
563 20 : a.first < b.first);
564 : });
565 2 : if (it == m_freq.end())
566 : {
567 0 : return std::nullopt;
568 : }
569 2 : return it->first;
570 : }
571 :
572 : /**
573 : * The number of distinct defined raster values in cells wholly
574 : * or partially covered by the polygon.
575 : */
576 2 : std::uint64_t variety() const
577 : {
578 2 : return m_freq.size();
579 : }
580 :
581 2 : const std::vector<ValueType> &values() const
582 : {
583 2 : return m_cell_values;
584 : }
585 :
586 : const std::vector<bool> &values_defined() const
587 : {
588 : return m_cell_values_defined;
589 : }
590 :
591 2 : const std::vector<float> &coverage_fractions() const
592 : {
593 2 : return m_cell_cov;
594 : }
595 :
596 5 : const std::vector<double> &weights() const
597 : {
598 5 : return m_cell_weights;
599 : }
600 :
601 : const std::vector<bool> &weights_defined() const
602 : {
603 : return m_cell_weights_defined;
604 : }
605 :
606 2 : const std::vector<double> ¢er_x() const
607 : {
608 2 : return m_cell_x;
609 : }
610 :
611 2 : const std::vector<double> ¢er_y() const
612 : {
613 2 : return m_cell_y;
614 : }
615 :
616 4 : const auto &freq() const
617 : {
618 4 : return m_freq;
619 : }
620 :
621 : private:
622 : ValueType m_min{};
623 : ValueType m_max{};
624 : std::pair<double, double> m_min_xy{
625 : std::numeric_limits<double>::quiet_NaN(),
626 : std::numeric_limits<double>::quiet_NaN()};
627 : std::pair<double, double> m_max_xy{
628 : std::numeric_limits<double>::quiet_NaN(),
629 : std::numeric_limits<double>::quiet_NaN()};
630 :
631 : // ci: coverage fraction of pixel i
632 : // wi: weight of pixel i
633 : // xi: value of pixel i
634 : double m_sum_ciwi{0};
635 : double m_sum_ci{0};
636 : double m_sum_xici{0};
637 : double m_sum_xiciwi{0};
638 :
639 : WestVariance m_variance{};
640 : WestVariance m_weighted_variance{};
641 :
642 : struct ValueFreqEntry
643 : {
644 : double m_sum_ci = 0;
645 : double m_sum_ciwi = 0;
646 : };
647 :
648 : std::unordered_map<ValueType, ValueFreqEntry> m_freq{};
649 :
650 : std::vector<float> m_cell_cov{};
651 : std::vector<ValueType> m_cell_values{};
652 : std::vector<double> m_cell_weights{};
653 : std::vector<double> m_cell_x{};
654 : std::vector<double> m_cell_y{};
655 : std::vector<bool> m_cell_values_defined{};
656 : std::vector<bool> m_cell_weights_defined{};
657 :
658 : RasterStatsOptions m_options;
659 : };
660 :
661 : template <typename T>
662 : std::ostream &operator<<(std::ostream &os, const RasterStats<T> &stats)
663 : {
664 : os << "{" << std::endl;
665 : os << " \"count\" : " << stats.count() << "," << std::endl;
666 :
667 : os << " \"min\" : ";
668 : if (stats.min().has_value())
669 : {
670 : os << stats.min().value();
671 : }
672 : else
673 : {
674 : os << "null";
675 : }
676 : os << "," << std::endl;
677 :
678 : os << " \"max\" : ";
679 : if (stats.max().has_value())
680 : {
681 : os << stats.max().value();
682 : }
683 : else
684 : {
685 : os << "null";
686 : }
687 : os << "," << std::endl;
688 :
689 : os << " \"mean\" : " << stats.mean() << "," << std::endl;
690 : os << " \"sum\" : " << stats.sum() << "," << std::endl;
691 : os << " \"weighted_mean\" : " << stats.weighted_mean() << "," << std::endl;
692 : os << " \"weighted_sum\" : " << stats.weighted_sum();
693 : if (stats.stores_values())
694 : {
695 : os << "," << std::endl;
696 : os << " \"mode\" : ";
697 : if (stats.mode().has_value())
698 : {
699 : os << stats.mode().value();
700 : }
701 : else
702 : {
703 : os << "null";
704 : }
705 : os << "," << std::endl;
706 :
707 : os << " \"minority\" : ";
708 : if (stats.minority().has_value())
709 : {
710 : os << stats.minority().value();
711 : }
712 : else
713 : {
714 : os << "null";
715 : }
716 : os << "," << std::endl;
717 :
718 : os << " \"variety\" : " << stats.variety() << std::endl;
719 : }
720 : else
721 : {
722 : os << std::endl;
723 : }
724 : os << "}" << std::endl;
725 : return os;
726 : }
727 :
728 : } // namespace gdal
729 :
730 : //! @endcond
|