Line data Source code
1 : /**********************************************************************
2 : *
3 : * Name: cpl_hash_set.cpp
4 : * Project: CPL - Common Portability Library
5 : * Purpose: Hash set functions.
6 : * Author: Even Rouault, <even dot rouault at spatialys.com>
7 : *
8 : **********************************************************************
9 : * Copyright (c) 2008-2009, Even Rouault <even dot rouault at spatialys.com>
10 : *
11 : * Permission is hereby granted, free of charge, to any person obtaining a
12 : * copy of this software and associated documentation files (the "Software"),
13 : * to deal in the Software without restriction, including without limitation
14 : * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 : * and/or sell copies of the Software, and to permit persons to whom the
16 : * Software is furnished to do so, subject to the following conditions:
17 : *
18 : * The above copyright notice and this permission notice shall be included
19 : * in all copies or substantial portions of the Software.
20 : *
21 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 : * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 : * DEALINGS IN THE SOFTWARE.
28 : ****************************************************************************/
29 :
30 : #include "cpl_hash_set.h"
31 :
32 : #include <cstring>
33 :
34 : #include "cpl_conv.h"
35 : #include "cpl_error.h"
36 : #include "cpl_list.h"
37 :
38 : struct _CPLHashSet
39 : {
40 : CPLHashSetHashFunc fnHashFunc;
41 : CPLHashSetEqualFunc fnEqualFunc;
42 : CPLHashSetFreeEltFunc fnFreeEltFunc;
43 : CPLList **tabList;
44 : int nSize;
45 : int nIndiceAllocatedSize;
46 : int nAllocatedSize;
47 : CPLList *psRecyclingList;
48 : int nRecyclingListSize;
49 : bool bRehash;
50 : #ifdef HASH_DEBUG
51 : int nCollisions;
52 : #endif
53 : };
54 :
55 : constexpr int anPrimes[] = {
56 : 53, 97, 193, 389, 769, 1543, 3079,
57 : 6151, 12289, 24593, 49157, 98317, 196613, 393241,
58 : 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653,
59 : 100663319, 201326611, 402653189, 805306457, 1610612741};
60 :
61 : /************************************************************************/
62 : /* CPLHashSetNew() */
63 : /************************************************************************/
64 :
65 : /**
66 : * Creates a new hash set
67 : *
68 : * The hash function must return a hash value for the elements to insert.
69 : * If fnHashFunc is NULL, CPLHashSetHashPointer will be used.
70 : *
71 : * The equal function must return if two elements are equal.
72 : * If fnEqualFunc is NULL, CPLHashSetEqualPointer will be used.
73 : *
74 : * The free function is used to free elements inserted in the hash set,
75 : * when the hash set is destroyed, when elements are removed or replaced.
76 : * If fnFreeEltFunc is NULL, elements inserted into the hash set will not be
77 : * freed.
78 : *
79 : * @param fnHashFunc hash function. May be NULL.
80 : * @param fnEqualFunc equal function. May be NULL.
81 : * @param fnFreeEltFunc element free function. May be NULL.
82 : *
83 : * @return a new hash set
84 : */
85 :
86 4821 : CPLHashSet *CPLHashSetNew(CPLHashSetHashFunc fnHashFunc,
87 : CPLHashSetEqualFunc fnEqualFunc,
88 : CPLHashSetFreeEltFunc fnFreeEltFunc)
89 : {
90 4821 : CPLHashSet *set = static_cast<CPLHashSet *>(CPLMalloc(sizeof(CPLHashSet)));
91 4821 : set->fnHashFunc = fnHashFunc ? fnHashFunc : CPLHashSetHashPointer;
92 4821 : set->fnEqualFunc = fnEqualFunc ? fnEqualFunc : CPLHashSetEqualPointer;
93 4821 : set->fnFreeEltFunc = fnFreeEltFunc;
94 4821 : set->nSize = 0;
95 4821 : set->tabList = static_cast<CPLList **>(CPLCalloc(sizeof(CPLList *), 53));
96 4821 : set->nIndiceAllocatedSize = 0;
97 4821 : set->nAllocatedSize = 53;
98 4821 : set->psRecyclingList = nullptr;
99 4821 : set->nRecyclingListSize = 0;
100 4821 : set->bRehash = false;
101 : #ifdef HASH_DEBUG
102 : set->nCollisions = 0;
103 : #endif
104 4821 : return set;
105 : }
106 :
107 : /************************************************************************/
108 : /* CPLHashSetSize() */
109 : /************************************************************************/
110 :
111 : /**
112 : * Returns the number of elements inserted in the hash set
113 : *
114 : * Note: this is not the internal size of the hash set
115 : *
116 : * @param set the hash set
117 : *
118 : * @return the number of elements in the hash set
119 : */
120 :
121 7 : int CPLHashSetSize(const CPLHashSet *set)
122 : {
123 7 : CPLAssert(set != nullptr);
124 7 : return set->nSize;
125 : }
126 :
127 : /************************************************************************/
128 : /* CPLHashSetGetNewListElt() */
129 : /************************************************************************/
130 :
131 13281 : static CPLList *CPLHashSetGetNewListElt(CPLHashSet *set)
132 : {
133 13281 : if (set->psRecyclingList)
134 : {
135 20 : CPLList *psRet = set->psRecyclingList;
136 20 : psRet->pData = nullptr;
137 20 : set->nRecyclingListSize--;
138 20 : set->psRecyclingList = psRet->psNext;
139 20 : return psRet;
140 : }
141 :
142 13261 : return static_cast<CPLList *>(CPLMalloc(sizeof(CPLList)));
143 : }
144 :
145 : /************************************************************************/
146 : /* CPLHashSetReturnListElt() */
147 : /************************************************************************/
148 :
149 1383 : static void CPLHashSetReturnListElt(CPLHashSet *set, CPLList *psList)
150 : {
151 1383 : if (set->nRecyclingListSize < 128)
152 : {
153 511 : psList->psNext = set->psRecyclingList;
154 511 : set->psRecyclingList = psList;
155 511 : set->nRecyclingListSize++;
156 : }
157 : else
158 : {
159 872 : CPLFree(psList);
160 : }
161 1383 : }
162 :
163 : /************************************************************************/
164 : /* CPLHashSetClearInternal() */
165 : /************************************************************************/
166 :
167 4821 : static void CPLHashSetClearInternal(CPLHashSet *set, bool bFinalize)
168 : {
169 4821 : CPLAssert(set != nullptr);
170 262158 : for (int i = 0; i < set->nAllocatedSize; i++)
171 : {
172 257337 : CPLList *cur = set->tabList[i];
173 269235 : while (cur)
174 : {
175 11898 : if (set->fnFreeEltFunc)
176 1157 : set->fnFreeEltFunc(cur->pData);
177 11898 : CPLList *psNext = cur->psNext;
178 11898 : if (bFinalize)
179 11898 : CPLFree(cur);
180 : else
181 0 : CPLHashSetReturnListElt(set, cur);
182 11898 : cur = psNext;
183 : }
184 257337 : set->tabList[i] = nullptr;
185 : }
186 4821 : set->bRehash = false;
187 4821 : }
188 :
189 : /************************************************************************/
190 : /* CPLHashSetDestroy() */
191 : /************************************************************************/
192 :
193 : /**
194 : * Destroys an allocated hash set.
195 : *
196 : * This function also frees the elements if a free function was
197 : * provided at the creation of the hash set.
198 : *
199 : * @param set the hash set
200 : */
201 :
202 4821 : void CPLHashSetDestroy(CPLHashSet *set)
203 : {
204 4821 : CPLHashSetClearInternal(set, true);
205 4821 : CPLFree(set->tabList);
206 4821 : CPLListDestroy(set->psRecyclingList);
207 4821 : CPLFree(set);
208 4821 : }
209 :
210 : /************************************************************************/
211 : /* CPLHashSetClear() */
212 : /************************************************************************/
213 :
214 : /**
215 : * Clear all elements from a hash set.
216 : *
217 : * This function also frees the elements if a free function was
218 : * provided at the creation of the hash set.
219 : *
220 : * @param set the hash set
221 : * @since GDAL 2.1
222 : */
223 :
224 0 : void CPLHashSetClear(CPLHashSet *set)
225 : {
226 0 : CPLHashSetClearInternal(set, false);
227 0 : set->tabList = static_cast<CPLList **>(
228 0 : CPLRealloc(set->tabList, sizeof(CPLList *) * 53));
229 0 : set->nIndiceAllocatedSize = 0;
230 0 : set->nAllocatedSize = 53;
231 : #ifdef HASH_DEBUG
232 : set->nCollisions = 0;
233 : #endif
234 0 : set->nSize = 0;
235 0 : }
236 :
237 : /************************************************************************/
238 : /* CPLHashSetForeach() */
239 : /************************************************************************/
240 :
241 : /**
242 : * Walk through the hash set and runs the provided function on all the
243 : * elements
244 : *
245 : * This function is provided the user_data argument of CPLHashSetForeach.
246 : * It must return TRUE to go on the walk through the hash set, or FALSE to
247 : * make it stop.
248 : *
249 : * Note : the structure of the hash set must *NOT* be modified during the
250 : * walk.
251 : *
252 : * @param set the hash set.
253 : * @param fnIterFunc the function called on each element.
254 : * @param user_data the user data provided to the function.
255 : */
256 :
257 1 : void CPLHashSetForeach(CPLHashSet *set, CPLHashSetIterEltFunc fnIterFunc,
258 : void *user_data)
259 : {
260 1 : CPLAssert(set != nullptr);
261 1 : if (!fnIterFunc)
262 0 : return;
263 :
264 1544 : for (int i = 0; i < set->nAllocatedSize; i++)
265 : {
266 1543 : CPLList *cur = set->tabList[i];
267 2543 : while (cur)
268 : {
269 1000 : if (!fnIterFunc(cur->pData, user_data))
270 0 : return;
271 :
272 1000 : cur = cur->psNext;
273 : }
274 : }
275 : }
276 :
277 : /************************************************************************/
278 : /* CPLHashSetRehash() */
279 : /************************************************************************/
280 :
281 42 : static void CPLHashSetRehash(CPLHashSet *set)
282 : {
283 42 : int nNewAllocatedSize = anPrimes[set->nIndiceAllocatedSize];
284 : CPLList **newTabList = static_cast<CPLList **>(
285 42 : CPLCalloc(sizeof(CPLList *), nNewAllocatedSize));
286 : #ifdef HASH_DEBUG
287 : CPLDebug("CPLHASH",
288 : "hashSet=%p, nSize=%d, nCollisions=%d, "
289 : "fCollisionRate=%.02f",
290 : set, set->nSize, set->nCollisions,
291 : set->nCollisions * 100.0 / set->nSize);
292 : set->nCollisions = 0;
293 : #endif
294 6582 : for (int i = 0; i < set->nAllocatedSize; i++)
295 : {
296 6540 : CPLList *cur = set->tabList[i];
297 10383 : while (cur)
298 : {
299 : const unsigned long nNewHashVal =
300 3843 : set->fnHashFunc(cur->pData) % nNewAllocatedSize;
301 : #ifdef HASH_DEBUG
302 : if (newTabList[nNewHashVal])
303 : set->nCollisions++;
304 : #endif
305 3843 : CPLList *psNext = cur->psNext;
306 3843 : cur->psNext = newTabList[nNewHashVal];
307 3843 : newTabList[nNewHashVal] = cur;
308 3843 : cur = psNext;
309 : }
310 : }
311 42 : CPLFree(set->tabList);
312 42 : set->tabList = newTabList;
313 42 : set->nAllocatedSize = nNewAllocatedSize;
314 42 : set->bRehash = false;
315 42 : }
316 :
317 : /************************************************************************/
318 : /* CPLHashSetFindPtr() */
319 : /************************************************************************/
320 :
321 40308 : static void **CPLHashSetFindPtr(CPLHashSet *set, const void *elt)
322 : {
323 40308 : const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize;
324 40308 : CPLList *cur = set->tabList[nHashVal];
325 42458 : while (cur)
326 : {
327 22986 : if (set->fnEqualFunc(cur->pData, elt))
328 20836 : return &cur->pData;
329 2150 : cur = cur->psNext;
330 : }
331 19472 : return nullptr;
332 : }
333 :
334 : /************************************************************************/
335 : /* CPLHashSetInsert() */
336 : /************************************************************************/
337 :
338 : /**
339 : * Inserts an element into a hash set.
340 : *
341 : * If the element was already inserted in the hash set, the previous
342 : * element is replaced by the new element. If a free function was provided,
343 : * it is used to free the previously inserted element
344 : *
345 : * @param set the hash set
346 : * @param elt the new element to insert in the hash set
347 : *
348 : * @return TRUE if the element was not already in the hash set
349 : */
350 :
351 15554 : int CPLHashSetInsert(CPLHashSet *set, void *elt)
352 : {
353 15554 : CPLAssert(set != nullptr);
354 15554 : void **pElt = CPLHashSetFindPtr(set, elt);
355 15554 : if (pElt)
356 : {
357 2273 : if (set->fnFreeEltFunc)
358 25 : set->fnFreeEltFunc(*pElt);
359 :
360 2273 : *pElt = elt;
361 2273 : return FALSE;
362 : }
363 :
364 13281 : if (set->nSize >= 2 * set->nAllocatedSize / 3 ||
365 13244 : (set->bRehash && set->nIndiceAllocatedSize > 0 &&
366 0 : set->nSize <= set->nAllocatedSize / 2))
367 : {
368 37 : set->nIndiceAllocatedSize++;
369 37 : CPLHashSetRehash(set);
370 : }
371 :
372 13281 : const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize;
373 : #ifdef HASH_DEBUG
374 : if (set->tabList[nHashVal])
375 : set->nCollisions++;
376 : #endif
377 :
378 13281 : CPLList *new_elt = CPLHashSetGetNewListElt(set);
379 13281 : new_elt->pData = elt;
380 13281 : new_elt->psNext = set->tabList[nHashVal];
381 13281 : set->tabList[nHashVal] = new_elt;
382 13281 : set->nSize++;
383 :
384 13281 : return TRUE;
385 : }
386 :
387 : /************************************************************************/
388 : /* CPLHashSetLookup() */
389 : /************************************************************************/
390 :
391 : /**
392 : * Returns the element found in the hash set corresponding to the element to
393 : * look up The element must not be modified.
394 : *
395 : * @param set the hash set
396 : * @param elt the element to look up in the hash set
397 : *
398 : * @return the element found in the hash set or NULL
399 : */
400 :
401 24754 : void *CPLHashSetLookup(CPLHashSet *set, const void *elt)
402 : {
403 24754 : CPLAssert(set != nullptr);
404 24754 : void **pElt = CPLHashSetFindPtr(set, elt);
405 24754 : if (pElt)
406 18563 : return *pElt;
407 :
408 6191 : return nullptr;
409 : }
410 :
411 : /************************************************************************/
412 : /* CPLHashSetRemoveInternal() */
413 : /************************************************************************/
414 :
415 1384 : static bool CPLHashSetRemoveInternal(CPLHashSet *set, const void *elt,
416 : bool bDeferRehash)
417 : {
418 1384 : CPLAssert(set != nullptr);
419 1384 : if (set->nIndiceAllocatedSize > 0 && set->nSize <= set->nAllocatedSize / 2)
420 : {
421 5 : set->nIndiceAllocatedSize--;
422 5 : if (bDeferRehash)
423 0 : set->bRehash = true;
424 : else
425 5 : CPLHashSetRehash(set);
426 : }
427 :
428 1384 : int nHashVal = static_cast<int>(set->fnHashFunc(elt) % set->nAllocatedSize);
429 1384 : CPLList *cur = set->tabList[nHashVal];
430 1384 : CPLList *prev = nullptr;
431 1389 : while (cur)
432 : {
433 1388 : if (set->fnEqualFunc(cur->pData, elt))
434 : {
435 1383 : if (prev)
436 5 : prev->psNext = cur->psNext;
437 : else
438 1378 : set->tabList[nHashVal] = cur->psNext;
439 :
440 1383 : if (set->fnFreeEltFunc)
441 383 : set->fnFreeEltFunc(cur->pData);
442 :
443 1383 : CPLHashSetReturnListElt(set, cur);
444 : #ifdef HASH_DEBUG
445 : if (set->tabList[nHashVal])
446 : set->nCollisions--;
447 : #endif
448 1383 : set->nSize--;
449 1383 : return true;
450 : }
451 5 : prev = cur;
452 5 : cur = cur->psNext;
453 : }
454 1 : return false;
455 : }
456 :
457 : /************************************************************************/
458 : /* CPLHashSetRemove() */
459 : /************************************************************************/
460 :
461 : /**
462 : * Removes an element from a hash set
463 : *
464 : * @param set the hash set
465 : * @param elt the new element to remove from the hash set
466 : *
467 : * @return TRUE if the element was in the hash set
468 : */
469 :
470 1384 : int CPLHashSetRemove(CPLHashSet *set, const void *elt)
471 : {
472 1384 : return CPLHashSetRemoveInternal(set, elt, false);
473 : }
474 :
475 : /************************************************************************/
476 : /* CPLHashSetRemoveDeferRehash() */
477 : /************************************************************************/
478 :
479 : /**
480 : * Removes an element from a hash set.
481 : *
482 : * This will defer potential rehashing of the set to later calls to
483 : * CPLHashSetInsert() or CPLHashSetRemove().
484 : *
485 : * @param set the hash set
486 : * @param elt the new element to remove from the hash set
487 : *
488 : * @return TRUE if the element was in the hash set
489 : * @since GDAL 2.1
490 : */
491 :
492 0 : int CPLHashSetRemoveDeferRehash(CPLHashSet *set, const void *elt)
493 : {
494 0 : return CPLHashSetRemoveInternal(set, elt, true);
495 : }
496 :
497 : /************************************************************************/
498 : /* CPLHashSetHashPointer() */
499 : /************************************************************************/
500 :
501 : /**
502 : * Hash function for an arbitrary pointer
503 : *
504 : * @param elt the arbitrary pointer to hash
505 : *
506 : * @return the hash value of the pointer
507 : */
508 :
509 45681 : unsigned long CPLHashSetHashPointer(const void *elt)
510 : {
511 : return static_cast<unsigned long>(
512 45681 : reinterpret_cast<GUIntptr_t>(const_cast<void *>(elt)));
513 : }
514 :
515 : /************************************************************************/
516 : /* CPLHashSetEqualPointer() */
517 : /************************************************************************/
518 :
519 : /**
520 : * Equality function for arbitrary pointers
521 : *
522 : * @param elt1 the first arbitrary pointer to compare
523 : * @param elt2 the second arbitrary pointer to compare
524 : *
525 : * @return TRUE if the pointers are equal
526 : */
527 :
528 16998 : int CPLHashSetEqualPointer(const void *elt1, const void *elt2)
529 : {
530 16998 : return elt1 == elt2;
531 : }
532 :
533 : /************************************************************************/
534 : /* CPLHashSetHashStr() */
535 : /************************************************************************/
536 :
537 : /**
538 : * Hash function for a zero-terminated string
539 : *
540 : * @param elt the string to hash. May be NULL.
541 : *
542 : * @return the hash value of the string
543 : */
544 :
545 : CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
546 48655 : unsigned long CPLHashSetHashStr(const void *elt)
547 : {
548 48655 : if (elt == nullptr)
549 10 : return 0;
550 :
551 48645 : const unsigned char *pszStr = static_cast<const unsigned char *>(elt);
552 48645 : unsigned long hash = 0;
553 :
554 48645 : int c = 0;
555 951545 : while ((c = *pszStr++) != '\0')
556 902900 : hash = c + (hash << 6) + (hash << 16) - hash;
557 :
558 48645 : return hash;
559 : }
560 :
561 : /************************************************************************/
562 : /* CPLHashSetEqualStr() */
563 : /************************************************************************/
564 :
565 : /**
566 : * Equality function for strings
567 : *
568 : * @param elt1 the first string to compare. May be NULL.
569 : * @param elt2 the second string to compare. May be NULL.
570 : *
571 : * @return TRUE if the strings are equal
572 : */
573 :
574 98 : int CPLHashSetEqualStr(const void *elt1, const void *elt2)
575 : {
576 98 : const char *pszStr1 = static_cast<const char *>(elt1);
577 98 : const char *pszStr2 = static_cast<const char *>(elt2);
578 :
579 98 : if (pszStr1 == nullptr && pszStr2 != nullptr)
580 0 : return FALSE;
581 :
582 98 : if (pszStr1 != nullptr && pszStr2 == nullptr)
583 0 : return FALSE;
584 :
585 98 : if (pszStr1 == nullptr && pszStr2 == nullptr)
586 2 : return TRUE;
587 :
588 96 : return strcmp(pszStr1, pszStr2) == 0;
589 : }
|