Line data Source code
1 : /******************************************************************************
2 : *
3 : * Project: FlatGeobuf
4 : * Purpose: Packed RTree management
5 : * Author: Björn Harrtell <bjorn at wololo dot org>
6 : *
7 : ******************************************************************************
8 : * Copyright (c) 2018-2020, Björn Harrtell <bjorn at wololo dot org>
9 : *
10 : * Permission is hereby granted, free of charge, to any person obtaining a
11 : * copy of this software and associated documentation files (the "Software"),
12 : * to deal in the Software without restriction, including without limitation
13 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 : * and/or sell copies of the Software, and to permit persons to whom the
15 : * Software is furnished to do so, subject to the following conditions:
16 : *
17 : * The above copyright notice and this permission notice shall be included
18 : * in all copies or substantial portions of the Software.
19 : *
20 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 : * DEALINGS IN THE SOFTWARE.
27 : ****************************************************************************/
28 :
29 : // NOTE: The upstream of this file is in
30 : // https://github.com/bjornharrtell/flatgeobuf/tree/master/src/cpp
31 :
32 : #ifdef GDAL_COMPILATION
33 : #include "cpl_port.h"
34 : #else
35 : #define CPL_IS_LSB 1
36 : #endif
37 :
38 : #include "packedrtree.h"
39 :
40 : #include <algorithm>
41 : #include <limits>
42 : #include <map>
43 : #include <unordered_map>
44 : #include <iostream>
45 :
46 : namespace FlatGeobuf
47 : {
48 :
49 477 : const NodeItem &NodeItem::expand(const NodeItem &r)
50 : {
51 477 : if (r.minX < minX)
52 372 : minX = r.minX;
53 477 : if (r.minY < minY)
54 355 : minY = r.minY;
55 477 : if (r.maxX > maxX)
56 375 : maxX = r.maxX;
57 477 : if (r.maxY > maxY)
58 366 : maxY = r.maxY;
59 477 : return *this;
60 : }
61 :
62 345 : NodeItem NodeItem::create(uint64_t offset)
63 : {
64 : return {std::numeric_limits<double>::infinity(),
65 : std::numeric_limits<double>::infinity(),
66 : -1 * std::numeric_limits<double>::infinity(),
67 345 : -1 * std::numeric_limits<double>::infinity(), offset};
68 : }
69 :
70 206 : bool NodeItem::intersects(const NodeItem &r) const
71 : {
72 206 : if (maxX < r.minX)
73 19 : return false;
74 187 : if (maxY < r.minY)
75 15 : return false;
76 172 : if (minX > r.maxX)
77 21 : return false;
78 151 : if (minY > r.maxY)
79 0 : return false;
80 151 : return true;
81 : }
82 :
83 115 : std::vector<double> NodeItem::toVector()
84 : {
85 115 : return std::vector<double>{minX, minY, maxX, maxY};
86 : }
87 :
88 : // Based on public domain code at
89 : // https://github.com/rawrunprotected/hilbert_curves
90 238 : uint32_t hilbert(uint32_t x, uint32_t y)
91 : {
92 238 : uint32_t a = x ^ y;
93 238 : uint32_t b = 0xFFFF ^ a;
94 238 : uint32_t c = 0xFFFF ^ (x | y);
95 238 : uint32_t d = x & (y ^ 0xFFFF);
96 :
97 238 : uint32_t A = a | (b >> 1);
98 238 : uint32_t B = (a >> 1) ^ a;
99 238 : uint32_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c;
100 238 : uint32_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d;
101 :
102 238 : a = A;
103 238 : b = B;
104 238 : c = C;
105 238 : d = D;
106 238 : A = ((a & (a >> 2)) ^ (b & (b >> 2)));
107 238 : B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2)));
108 238 : C ^= ((a & (c >> 2)) ^ (b & (d >> 2)));
109 238 : D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2)));
110 :
111 238 : a = A;
112 238 : b = B;
113 238 : c = C;
114 238 : d = D;
115 238 : A = ((a & (a >> 4)) ^ (b & (b >> 4)));
116 238 : B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4)));
117 238 : C ^= ((a & (c >> 4)) ^ (b & (d >> 4)));
118 238 : D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4)));
119 :
120 238 : a = A;
121 238 : b = B;
122 238 : c = C;
123 238 : d = D;
124 238 : C ^= ((a & (c >> 8)) ^ (b & (d >> 8)));
125 238 : D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8)));
126 :
127 238 : a = C ^ (C >> 1);
128 238 : b = D ^ (D >> 1);
129 :
130 238 : uint32_t i0 = x ^ y;
131 238 : uint32_t i1 = b | (0xFFFF ^ (i0 | a));
132 :
133 238 : i0 = (i0 | (i0 << 8)) & 0x00FF00FF;
134 238 : i0 = (i0 | (i0 << 4)) & 0x0F0F0F0F;
135 238 : i0 = (i0 | (i0 << 2)) & 0x33333333;
136 238 : i0 = (i0 | (i0 << 1)) & 0x55555555;
137 :
138 238 : i1 = (i1 | (i1 << 8)) & 0x00FF00FF;
139 238 : i1 = (i1 | (i1 << 4)) & 0x0F0F0F0F;
140 238 : i1 = (i1 | (i1 << 2)) & 0x33333333;
141 238 : i1 = (i1 | (i1 << 1)) & 0x55555555;
142 :
143 238 : uint32_t value = ((i1 << 1) | i0);
144 :
145 238 : return value;
146 : }
147 :
148 238 : uint32_t hilbert(const NodeItem &r, uint32_t hilbertMax, const double minX,
149 : const double minY, const double width, const double height)
150 : {
151 238 : uint32_t x = 0;
152 238 : uint32_t y = 0;
153 : uint32_t v;
154 238 : if (width != 0.0)
155 230 : x = static_cast<uint32_t>(
156 230 : floor(hilbertMax * ((r.minX + r.maxX) / 2 - minX) / width));
157 238 : if (height != 0.0)
158 226 : y = static_cast<uint32_t>(
159 226 : floor(hilbertMax * ((r.minY + r.maxY) / 2 - minY) / height));
160 238 : v = hilbert(x, y);
161 238 : return v;
162 : }
163 :
164 0 : void hilbertSort(std::vector<std::shared_ptr<Item>> &items)
165 : {
166 0 : NodeItem extent = calcExtent(items);
167 0 : const double minX = extent.minX;
168 0 : const double minY = extent.minY;
169 0 : const double width = extent.width();
170 0 : const double height = extent.height();
171 0 : std::sort(items.begin(), items.end(),
172 0 : [minX, minY, width, height](std::shared_ptr<Item> a,
173 0 : std::shared_ptr<Item> b)
174 : {
175 0 : uint32_t ha = hilbert(a->nodeItem, HILBERT_MAX, minX, minY,
176 : width, height);
177 0 : uint32_t hb = hilbert(b->nodeItem, HILBERT_MAX, minX, minY,
178 : width, height);
179 0 : return ha > hb;
180 : });
181 0 : }
182 :
183 0 : void hilbertSort(std::vector<NodeItem> &items)
184 : {
185 0 : NodeItem extent = calcExtent(items);
186 0 : const double minX = extent.minX;
187 0 : const double minY = extent.minY;
188 0 : const double width = extent.width();
189 0 : const double height = extent.height();
190 0 : std::sort(items.begin(), items.end(),
191 0 : [minX, minY, width, height](const NodeItem &a, const NodeItem &b)
192 : {
193 : uint32_t ha =
194 0 : hilbert(a, HILBERT_MAX, minX, minY, width, height);
195 : uint32_t hb =
196 0 : hilbert(b, HILBERT_MAX, minX, minY, width, height);
197 0 : return ha > hb;
198 : });
199 0 : }
200 :
201 0 : NodeItem calcExtent(const std::vector<std::shared_ptr<Item>> &items)
202 : {
203 : return std::accumulate(
204 : items.begin(), items.end(), NodeItem::create(0),
205 0 : [](NodeItem a, const std::shared_ptr<Item> &b) -> NodeItem
206 0 : { return a.expand(b->nodeItem); });
207 : }
208 :
209 0 : NodeItem calcExtent(const std::vector<NodeItem> &nodes)
210 : {
211 : return std::accumulate(nodes.begin(), nodes.end(), NodeItem::create(0),
212 0 : [](NodeItem a, const NodeItem &b) -> NodeItem
213 0 : { return a.expand(b); });
214 : }
215 :
216 115 : void PackedRTree::init(const uint16_t nodeSize)
217 : {
218 115 : if (nodeSize < 2)
219 0 : throw std::invalid_argument("Node size must be at least 2");
220 115 : if (_numItems == 0)
221 0 : throw std::invalid_argument("Cannot create empty tree");
222 230 : _nodeSize = std::min(std::max(nodeSize, static_cast<uint16_t>(2)),
223 115 : static_cast<uint16_t>(65535));
224 115 : _levelBounds = generateLevelBounds(_numItems, _nodeSize);
225 115 : _numNodes = _levelBounds.front().second;
226 115 : _nodeItems = new NodeItem[static_cast<size_t>(_numNodes)];
227 115 : }
228 :
229 : std::vector<std::pair<uint64_t, uint64_t>>
230 156 : PackedRTree::generateLevelBounds(const uint64_t numItems,
231 : const uint16_t nodeSize)
232 : {
233 156 : if (nodeSize < 2)
234 0 : throw std::invalid_argument("Node size must be at least 2");
235 156 : if (numItems == 0)
236 0 : throw std::invalid_argument("Number of items must be greater than 0");
237 156 : if (numItems >
238 156 : std::numeric_limits<uint64_t>::max() - ((numItems / nodeSize) * 2))
239 0 : throw std::overflow_error("Number of items too large");
240 :
241 : // number of nodes per level in bottom-up order
242 312 : std::vector<uint64_t> levelNumNodes;
243 156 : uint64_t n = numItems;
244 156 : uint64_t numNodes = n;
245 156 : levelNumNodes.push_back(n);
246 0 : do
247 : {
248 156 : n = (n + nodeSize - 1) / nodeSize;
249 156 : numNodes += n;
250 156 : levelNumNodes.push_back(n);
251 156 : } while (n != 1);
252 :
253 : // bounds per level in reversed storage order (top-down)
254 312 : std::vector<uint64_t> levelOffsets;
255 156 : n = numNodes;
256 468 : for (auto size : levelNumNodes)
257 312 : levelOffsets.push_back(n -= size);
258 156 : std::vector<std::pair<uint64_t, uint64_t>> levelBounds;
259 468 : for (size_t i = 0; i < levelNumNodes.size(); i++)
260 312 : levelBounds.push_back(std::pair<uint64_t, uint64_t>(
261 624 : levelOffsets[i], levelOffsets[i] + levelNumNodes[i]));
262 312 : return levelBounds;
263 : }
264 :
265 115 : void PackedRTree::generateNodes()
266 : {
267 230 : for (uint32_t i = 0; i < _levelBounds.size() - 1; i++)
268 : {
269 115 : auto pos = _levelBounds[i].first;
270 115 : auto end = _levelBounds[i].second;
271 115 : auto newpos = _levelBounds[i + 1].first;
272 230 : while (pos < end)
273 : {
274 115 : NodeItem node = NodeItem::create(pos);
275 274 : for (uint32_t j = 0; j < _nodeSize && pos < end; j++)
276 159 : node.expand(_nodeItems[pos++]);
277 115 : _nodeItems[newpos++] = node;
278 : }
279 : }
280 115 : }
281 :
282 0 : void PackedRTree::fromData(const void *data)
283 : {
284 0 : auto buf = reinterpret_cast<const uint8_t *>(data);
285 0 : const NodeItem *pn = reinterpret_cast<const NodeItem *>(buf);
286 0 : for (uint64_t i = 0; i < _numNodes; i++)
287 : {
288 0 : NodeItem n = *pn++;
289 0 : _nodeItems[i] = n;
290 0 : _extent.expand(n);
291 : }
292 0 : }
293 :
294 0 : PackedRTree::PackedRTree(const std::vector<std::shared_ptr<Item>> &items,
295 0 : const NodeItem &extent, const uint16_t nodeSize)
296 0 : : _extent(extent), _numItems(items.size())
297 : {
298 0 : init(nodeSize);
299 0 : for (size_t i = 0; i < _numItems; i++)
300 0 : _nodeItems[_numNodes - _numItems + i] = items[i]->nodeItem;
301 0 : generateNodes();
302 0 : }
303 :
304 0 : PackedRTree::PackedRTree(const std::vector<NodeItem> &nodes,
305 0 : const NodeItem &extent, const uint16_t nodeSize)
306 0 : : _extent(extent), _numItems(nodes.size())
307 : {
308 0 : init(nodeSize);
309 0 : for (size_t i = 0; i < _numItems; i++)
310 0 : _nodeItems[_numNodes - _numItems + i] = nodes[i];
311 0 : generateNodes();
312 0 : }
313 :
314 0 : PackedRTree::PackedRTree(const void *data, const uint64_t numItems,
315 0 : const uint16_t nodeSize)
316 0 : : _extent(NodeItem::create(0)), _numItems(numItems)
317 : {
318 0 : init(nodeSize);
319 0 : fromData(data);
320 0 : }
321 :
322 115 : PackedRTree::PackedRTree(std::function<void(NodeItem *)> fillNodeItems,
323 : const uint64_t numItems, const NodeItem &extent,
324 115 : const uint16_t nodeSize)
325 115 : : _extent(extent), _numItems(numItems)
326 : {
327 115 : init(nodeSize);
328 115 : fillNodeItems(_nodeItems + _numNodes - _numItems);
329 115 : generateNodes();
330 115 : }
331 :
332 : std::vector<SearchResultItem>
333 0 : PackedRTree::search(double minX, double minY, double maxX, double maxY) const
334 : {
335 0 : uint64_t leafNodesOffset = _levelBounds.front().first;
336 0 : NodeItem n{minX, minY, maxX, maxY, 0};
337 0 : std::vector<SearchResultItem> results;
338 0 : std::unordered_map<uint64_t, uint64_t> queue;
339 0 : queue.insert(std::pair<uint64_t, uint64_t>(0, _levelBounds.size() - 1));
340 0 : while (queue.size() != 0)
341 : {
342 0 : auto next = queue.begin();
343 0 : uint64_t nodeIndex = next->first;
344 0 : uint64_t level = next->second;
345 0 : queue.erase(next);
346 0 : bool isLeafNode = nodeIndex >= _numNodes - _numItems;
347 : // find the end index of the node
348 : uint64_t end =
349 0 : std::min(static_cast<uint64_t>(nodeIndex + _nodeSize),
350 0 : _levelBounds[static_cast<size_t>(level)].second);
351 : // search through child nodes
352 0 : for (uint64_t pos = nodeIndex; pos < end; pos++)
353 : {
354 0 : const auto &nodeItem = _nodeItems[static_cast<size_t>(pos)];
355 0 : if (!n.intersects(nodeItem))
356 0 : continue;
357 0 : if (isLeafNode)
358 0 : results.push_back({nodeItem.offset, pos - leafNodesOffset});
359 : else
360 : queue.insert(
361 0 : std::pair<uint64_t, uint64_t>(nodeItem.offset, level - 1));
362 : }
363 : }
364 0 : return results;
365 : }
366 :
367 32 : std::vector<SearchResultItem> PackedRTree::streamSearch(
368 : const uint64_t numItems, const uint16_t nodeSize, const NodeItem &item,
369 : const std::function<void(uint8_t *, size_t, size_t)> &readNode)
370 : {
371 64 : auto levelBounds = generateLevelBounds(numItems, nodeSize);
372 32 : uint64_t leafNodesOffset = levelBounds.front().first;
373 32 : uint64_t numNodes = levelBounds.front().second;
374 64 : auto nodeItems = std::vector<NodeItem>(nodeSize);
375 32 : uint8_t *nodesBuf = reinterpret_cast<uint8_t *>(nodeItems.data());
376 : // use ordered search queue to make index traversal in sequential order
377 64 : std::map<uint64_t, uint64_t> queue;
378 32 : std::vector<SearchResultItem> results;
379 32 : queue.insert(std::pair<uint64_t, uint64_t>(0, levelBounds.size() - 1));
380 91 : while (queue.size() != 0)
381 : {
382 59 : auto next = queue.begin();
383 59 : uint64_t nodeIndex = next->first;
384 59 : uint64_t level = next->second;
385 59 : queue.erase(next);
386 59 : bool isLeafNode = nodeIndex >= numNodes - numItems;
387 : // find the end index of the node
388 118 : uint64_t end = std::min(static_cast<uint64_t>(nodeIndex + nodeSize),
389 59 : levelBounds[static_cast<size_t>(level)].second);
390 59 : uint64_t length = end - nodeIndex;
391 59 : readNode(nodesBuf, static_cast<size_t>(nodeIndex * sizeof(NodeItem)),
392 59 : static_cast<size_t>(length * sizeof(NodeItem)));
393 : #if !CPL_IS_LSB
394 : for (size_t i = 0; i < static_cast<size_t>(length); i++)
395 : {
396 : CPL_LSBPTR64(&nodeItems[i].minX);
397 : CPL_LSBPTR64(&nodeItems[i].minY);
398 : CPL_LSBPTR64(&nodeItems[i].maxX);
399 : CPL_LSBPTR64(&nodeItems[i].maxY);
400 : CPL_LSBPTR64(&nodeItems[i].offset);
401 : }
402 : #endif
403 : // search through child nodes
404 265 : for (uint64_t pos = nodeIndex; pos < end; pos++)
405 : {
406 206 : uint64_t nodePos = pos - nodeIndex;
407 206 : const auto &nodeItem = nodeItems[static_cast<size_t>(nodePos)];
408 206 : if (!item.intersects(nodeItem))
409 55 : continue;
410 151 : if (isLeafNode)
411 124 : results.push_back({nodeItem.offset, pos - leafNodesOffset});
412 : else
413 : queue.insert(
414 27 : std::pair<uint64_t, uint64_t>(nodeItem.offset, level - 1));
415 : }
416 : }
417 64 : return results;
418 : }
419 :
420 0 : uint64_t PackedRTree::size() const
421 : {
422 0 : return _numNodes * sizeof(NodeItem);
423 : }
424 :
425 162 : uint64_t PackedRTree::size(const uint64_t numItems, const uint16_t nodeSize)
426 : {
427 162 : if (nodeSize < 2)
428 0 : throw std::invalid_argument("Node size must be at least 2");
429 162 : if (numItems == 0)
430 0 : throw std::invalid_argument("Number of items must be greater than 0");
431 : const uint16_t nodeSizeMin =
432 324 : std::min(std::max(nodeSize, static_cast<uint16_t>(2)),
433 162 : static_cast<uint16_t>(65535));
434 : // limit so that resulting size in bytes can be represented by uint64_t
435 162 : if (numItems > static_cast<uint64_t>(1) << 56)
436 0 : throw std::overflow_error("Number of items must be less than 2^56");
437 162 : uint64_t n = numItems;
438 162 : uint64_t numNodes = n;
439 0 : do
440 : {
441 162 : n = (n + nodeSizeMin - 1) / nodeSizeMin;
442 162 : numNodes += n;
443 162 : } while (n != 1);
444 162 : return numNodes * sizeof(NodeItem);
445 : }
446 :
447 115 : void PackedRTree::streamWrite(
448 : const std::function<void(uint8_t *, size_t)> &writeData)
449 : {
450 : #if !CPL_IS_LSB
451 : for (size_t i = 0; i < static_cast<size_t>(_numNodes); i++)
452 : {
453 : CPL_LSBPTR64(&_nodeItems[i].minX);
454 : CPL_LSBPTR64(&_nodeItems[i].minY);
455 : CPL_LSBPTR64(&_nodeItems[i].maxX);
456 : CPL_LSBPTR64(&_nodeItems[i].maxY);
457 : CPL_LSBPTR64(&_nodeItems[i].offset);
458 : }
459 : #endif
460 115 : writeData(reinterpret_cast<uint8_t *>(_nodeItems),
461 115 : static_cast<size_t>(_numNodes * sizeof(NodeItem)));
462 : #if !CPL_IS_LSB
463 : for (size_t i = 0; i < static_cast<size_t>(_numNodes); i++)
464 : {
465 : CPL_LSBPTR64(&_nodeItems[i].minX);
466 : CPL_LSBPTR64(&_nodeItems[i].minY);
467 : CPL_LSBPTR64(&_nodeItems[i].maxX);
468 : CPL_LSBPTR64(&_nodeItems[i].maxY);
469 : CPL_LSBPTR64(&_nodeItems[i].offset);
470 : }
471 : #endif
472 115 : }
473 :
474 0 : NodeItem PackedRTree::getExtent() const
475 : {
476 0 : return _extent;
477 : }
478 :
479 : } // namespace FlatGeobuf
|