LCOV - code coverage report
Current view: top level - port - cpl_hash_set.cpp (source / functions) Hit Total Coverage
Test: gdal_filtered.info Lines: 153 170 90.0 %
Date: 2024-04-28 18:08:58 Functions: 17 19 89.5 %

          Line data    Source code
       1             : /**********************************************************************
       2             :  *
       3             :  * Name:     cpl_hash_set.cpp
       4             :  * Project:  CPL - Common Portability Library
       5             :  * Purpose:  Hash set functions.
       6             :  * Author:   Even Rouault, <even dot rouault at spatialys.com>
       7             :  *
       8             :  **********************************************************************
       9             :  * Copyright (c) 2008-2009, Even Rouault <even dot rouault at spatialys.com>
      10             :  *
      11             :  * Permission is hereby granted, free of charge, to any person obtaining a
      12             :  * copy of this software and associated documentation files (the "Software"),
      13             :  * to deal in the Software without restriction, including without limitation
      14             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      15             :  * and/or sell copies of the Software, and to permit persons to whom the
      16             :  * Software is furnished to do so, subject to the following conditions:
      17             :  *
      18             :  * The above copyright notice and this permission notice shall be included
      19             :  * in all copies or substantial portions of the Software.
      20             :  *
      21             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      22             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      23             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
      24             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      25             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      26             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      27             :  * DEALINGS IN THE SOFTWARE.
      28             :  ****************************************************************************/
      29             : 
      30             : #include "cpl_hash_set.h"
      31             : 
      32             : #include <cstring>
      33             : 
      34             : #include "cpl_conv.h"
      35             : #include "cpl_error.h"
      36             : #include "cpl_list.h"
      37             : 
      38             : struct _CPLHashSet
      39             : {
      40             :     CPLHashSetHashFunc fnHashFunc;
      41             :     CPLHashSetEqualFunc fnEqualFunc;
      42             :     CPLHashSetFreeEltFunc fnFreeEltFunc;
      43             :     CPLList **tabList;
      44             :     int nSize;
      45             :     int nIndiceAllocatedSize;
      46             :     int nAllocatedSize;
      47             :     CPLList *psRecyclingList;
      48             :     int nRecyclingListSize;
      49             :     bool bRehash;
      50             : #ifdef HASH_DEBUG
      51             :     int nCollisions;
      52             : #endif
      53             : };
      54             : 
      55             : constexpr int anPrimes[] = {
      56             :     53,        97,        193,       389,       769,       1543,     3079,
      57             :     6151,      12289,     24593,     49157,     98317,     196613,   393241,
      58             :     786433,    1572869,   3145739,   6291469,   12582917,  25165843, 50331653,
      59             :     100663319, 201326611, 402653189, 805306457, 1610612741};
      60             : 
      61             : /************************************************************************/
      62             : /*                          CPLHashSetNew()                             */
      63             : /************************************************************************/
      64             : 
      65             : /**
      66             :  * Creates a new hash set
      67             :  *
      68             :  * The hash function must return a hash value for the elements to insert.
      69             :  * If fnHashFunc is NULL, CPLHashSetHashPointer will be used.
      70             :  *
      71             :  * The equal function must return if two elements are equal.
      72             :  * If fnEqualFunc is NULL, CPLHashSetEqualPointer will be used.
      73             :  *
      74             :  * The free function is used to free elements inserted in the hash set,
      75             :  * when the hash set is destroyed, when elements are removed or replaced.
      76             :  * If fnFreeEltFunc is NULL, elements inserted into the hash set will not be
      77             :  * freed.
      78             :  *
      79             :  * @param fnHashFunc hash function. May be NULL.
      80             :  * @param fnEqualFunc equal function. May be NULL.
      81             :  * @param fnFreeEltFunc element free function. May be NULL.
      82             :  *
      83             :  * @return a new hash set
      84             :  */
      85             : 
      86        4821 : CPLHashSet *CPLHashSetNew(CPLHashSetHashFunc fnHashFunc,
      87             :                           CPLHashSetEqualFunc fnEqualFunc,
      88             :                           CPLHashSetFreeEltFunc fnFreeEltFunc)
      89             : {
      90        4821 :     CPLHashSet *set = static_cast<CPLHashSet *>(CPLMalloc(sizeof(CPLHashSet)));
      91        4821 :     set->fnHashFunc = fnHashFunc ? fnHashFunc : CPLHashSetHashPointer;
      92        4821 :     set->fnEqualFunc = fnEqualFunc ? fnEqualFunc : CPLHashSetEqualPointer;
      93        4821 :     set->fnFreeEltFunc = fnFreeEltFunc;
      94        4821 :     set->nSize = 0;
      95        4821 :     set->tabList = static_cast<CPLList **>(CPLCalloc(sizeof(CPLList *), 53));
      96        4821 :     set->nIndiceAllocatedSize = 0;
      97        4821 :     set->nAllocatedSize = 53;
      98        4821 :     set->psRecyclingList = nullptr;
      99        4821 :     set->nRecyclingListSize = 0;
     100        4821 :     set->bRehash = false;
     101             : #ifdef HASH_DEBUG
     102             :     set->nCollisions = 0;
     103             : #endif
     104        4821 :     return set;
     105             : }
     106             : 
     107             : /************************************************************************/
     108             : /*                          CPLHashSetSize()                            */
     109             : /************************************************************************/
     110             : 
     111             : /**
     112             :  * Returns the number of elements inserted in the hash set
     113             :  *
     114             :  * Note: this is not the internal size of the hash set
     115             :  *
     116             :  * @param set the hash set
     117             :  *
     118             :  * @return the number of elements in the hash set
     119             :  */
     120             : 
     121           7 : int CPLHashSetSize(const CPLHashSet *set)
     122             : {
     123           7 :     CPLAssert(set != nullptr);
     124           7 :     return set->nSize;
     125             : }
     126             : 
     127             : /************************************************************************/
     128             : /*                       CPLHashSetGetNewListElt()                      */
     129             : /************************************************************************/
     130             : 
     131       13281 : static CPLList *CPLHashSetGetNewListElt(CPLHashSet *set)
     132             : {
     133       13281 :     if (set->psRecyclingList)
     134             :     {
     135          20 :         CPLList *psRet = set->psRecyclingList;
     136          20 :         psRet->pData = nullptr;
     137          20 :         set->nRecyclingListSize--;
     138          20 :         set->psRecyclingList = psRet->psNext;
     139          20 :         return psRet;
     140             :     }
     141             : 
     142       13261 :     return static_cast<CPLList *>(CPLMalloc(sizeof(CPLList)));
     143             : }
     144             : 
     145             : /************************************************************************/
     146             : /*                       CPLHashSetReturnListElt()                      */
     147             : /************************************************************************/
     148             : 
     149        1383 : static void CPLHashSetReturnListElt(CPLHashSet *set, CPLList *psList)
     150             : {
     151        1383 :     if (set->nRecyclingListSize < 128)
     152             :     {
     153         511 :         psList->psNext = set->psRecyclingList;
     154         511 :         set->psRecyclingList = psList;
     155         511 :         set->nRecyclingListSize++;
     156             :     }
     157             :     else
     158             :     {
     159         872 :         CPLFree(psList);
     160             :     }
     161        1383 : }
     162             : 
     163             : /************************************************************************/
     164             : /*                   CPLHashSetClearInternal()                          */
     165             : /************************************************************************/
     166             : 
     167        4821 : static void CPLHashSetClearInternal(CPLHashSet *set, bool bFinalize)
     168             : {
     169        4821 :     CPLAssert(set != nullptr);
     170      262158 :     for (int i = 0; i < set->nAllocatedSize; i++)
     171             :     {
     172      257337 :         CPLList *cur = set->tabList[i];
     173      269235 :         while (cur)
     174             :         {
     175       11898 :             if (set->fnFreeEltFunc)
     176        1157 :                 set->fnFreeEltFunc(cur->pData);
     177       11898 :             CPLList *psNext = cur->psNext;
     178       11898 :             if (bFinalize)
     179       11898 :                 CPLFree(cur);
     180             :             else
     181           0 :                 CPLHashSetReturnListElt(set, cur);
     182       11898 :             cur = psNext;
     183             :         }
     184      257337 :         set->tabList[i] = nullptr;
     185             :     }
     186        4821 :     set->bRehash = false;
     187        4821 : }
     188             : 
     189             : /************************************************************************/
     190             : /*                        CPLHashSetDestroy()                           */
     191             : /************************************************************************/
     192             : 
     193             : /**
     194             :  * Destroys an allocated hash set.
     195             :  *
     196             :  * This function also frees the elements if a free function was
     197             :  * provided at the creation of the hash set.
     198             :  *
     199             :  * @param set the hash set
     200             :  */
     201             : 
     202        4821 : void CPLHashSetDestroy(CPLHashSet *set)
     203             : {
     204        4821 :     CPLHashSetClearInternal(set, true);
     205        4821 :     CPLFree(set->tabList);
     206        4821 :     CPLListDestroy(set->psRecyclingList);
     207        4821 :     CPLFree(set);
     208        4821 : }
     209             : 
     210             : /************************************************************************/
     211             : /*                        CPLHashSetClear()                             */
     212             : /************************************************************************/
     213             : 
     214             : /**
     215             :  * Clear all elements from a hash set.
     216             :  *
     217             :  * This function also frees the elements if a free function was
     218             :  * provided at the creation of the hash set.
     219             :  *
     220             :  * @param set the hash set
     221             :  * @since GDAL 2.1
     222             :  */
     223             : 
     224           0 : void CPLHashSetClear(CPLHashSet *set)
     225             : {
     226           0 :     CPLHashSetClearInternal(set, false);
     227           0 :     set->tabList = static_cast<CPLList **>(
     228           0 :         CPLRealloc(set->tabList, sizeof(CPLList *) * 53));
     229           0 :     set->nIndiceAllocatedSize = 0;
     230           0 :     set->nAllocatedSize = 53;
     231             : #ifdef HASH_DEBUG
     232             :     set->nCollisions = 0;
     233             : #endif
     234           0 :     set->nSize = 0;
     235           0 : }
     236             : 
     237             : /************************************************************************/
     238             : /*                       CPLHashSetForeach()                            */
     239             : /************************************************************************/
     240             : 
     241             : /**
     242             :  * Walk through the hash set and runs the provided function on all the
     243             :  * elements
     244             :  *
     245             :  * This function is provided the user_data argument of CPLHashSetForeach.
     246             :  * It must return TRUE to go on the walk through the hash set, or FALSE to
     247             :  * make it stop.
     248             :  *
     249             :  * Note : the structure of the hash set must *NOT* be modified during the
     250             :  * walk.
     251             :  *
     252             :  * @param set the hash set.
     253             :  * @param fnIterFunc the function called on each element.
     254             :  * @param user_data the user data provided to the function.
     255             :  */
     256             : 
     257           1 : void CPLHashSetForeach(CPLHashSet *set, CPLHashSetIterEltFunc fnIterFunc,
     258             :                        void *user_data)
     259             : {
     260           1 :     CPLAssert(set != nullptr);
     261           1 :     if (!fnIterFunc)
     262           0 :         return;
     263             : 
     264        1544 :     for (int i = 0; i < set->nAllocatedSize; i++)
     265             :     {
     266        1543 :         CPLList *cur = set->tabList[i];
     267        2543 :         while (cur)
     268             :         {
     269        1000 :             if (!fnIterFunc(cur->pData, user_data))
     270           0 :                 return;
     271             : 
     272        1000 :             cur = cur->psNext;
     273             :         }
     274             :     }
     275             : }
     276             : 
     277             : /************************************************************************/
     278             : /*                        CPLHashSetRehash()                            */
     279             : /************************************************************************/
     280             : 
     281          42 : static void CPLHashSetRehash(CPLHashSet *set)
     282             : {
     283          42 :     int nNewAllocatedSize = anPrimes[set->nIndiceAllocatedSize];
     284             :     CPLList **newTabList = static_cast<CPLList **>(
     285          42 :         CPLCalloc(sizeof(CPLList *), nNewAllocatedSize));
     286             : #ifdef HASH_DEBUG
     287             :     CPLDebug("CPLHASH",
     288             :              "hashSet=%p, nSize=%d, nCollisions=%d, "
     289             :              "fCollisionRate=%.02f",
     290             :              set, set->nSize, set->nCollisions,
     291             :              set->nCollisions * 100.0 / set->nSize);
     292             :     set->nCollisions = 0;
     293             : #endif
     294        6582 :     for (int i = 0; i < set->nAllocatedSize; i++)
     295             :     {
     296        6540 :         CPLList *cur = set->tabList[i];
     297       10383 :         while (cur)
     298             :         {
     299             :             const unsigned long nNewHashVal =
     300        3843 :                 set->fnHashFunc(cur->pData) % nNewAllocatedSize;
     301             : #ifdef HASH_DEBUG
     302             :             if (newTabList[nNewHashVal])
     303             :                 set->nCollisions++;
     304             : #endif
     305        3843 :             CPLList *psNext = cur->psNext;
     306        3843 :             cur->psNext = newTabList[nNewHashVal];
     307        3843 :             newTabList[nNewHashVal] = cur;
     308        3843 :             cur = psNext;
     309             :         }
     310             :     }
     311          42 :     CPLFree(set->tabList);
     312          42 :     set->tabList = newTabList;
     313          42 :     set->nAllocatedSize = nNewAllocatedSize;
     314          42 :     set->bRehash = false;
     315          42 : }
     316             : 
     317             : /************************************************************************/
     318             : /*                        CPLHashSetFindPtr()                           */
     319             : /************************************************************************/
     320             : 
     321       40308 : static void **CPLHashSetFindPtr(CPLHashSet *set, const void *elt)
     322             : {
     323       40308 :     const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize;
     324       40308 :     CPLList *cur = set->tabList[nHashVal];
     325       42458 :     while (cur)
     326             :     {
     327       22986 :         if (set->fnEqualFunc(cur->pData, elt))
     328       20836 :             return &cur->pData;
     329        2150 :         cur = cur->psNext;
     330             :     }
     331       19472 :     return nullptr;
     332             : }
     333             : 
     334             : /************************************************************************/
     335             : /*                         CPLHashSetInsert()                           */
     336             : /************************************************************************/
     337             : 
     338             : /**
     339             :  * Inserts an element into a hash set.
     340             :  *
     341             :  * If the element was already inserted in the hash set, the previous
     342             :  * element is replaced by the new element. If a free function was provided,
     343             :  * it is used to free the previously inserted element
     344             :  *
     345             :  * @param set the hash set
     346             :  * @param elt the new element to insert in the hash set
     347             :  *
     348             :  * @return TRUE if the element was not already in the hash set
     349             :  */
     350             : 
     351       15554 : int CPLHashSetInsert(CPLHashSet *set, void *elt)
     352             : {
     353       15554 :     CPLAssert(set != nullptr);
     354       15554 :     void **pElt = CPLHashSetFindPtr(set, elt);
     355       15554 :     if (pElt)
     356             :     {
     357        2273 :         if (set->fnFreeEltFunc)
     358          25 :             set->fnFreeEltFunc(*pElt);
     359             : 
     360        2273 :         *pElt = elt;
     361        2273 :         return FALSE;
     362             :     }
     363             : 
     364       13281 :     if (set->nSize >= 2 * set->nAllocatedSize / 3 ||
     365       13244 :         (set->bRehash && set->nIndiceAllocatedSize > 0 &&
     366           0 :          set->nSize <= set->nAllocatedSize / 2))
     367             :     {
     368          37 :         set->nIndiceAllocatedSize++;
     369          37 :         CPLHashSetRehash(set);
     370             :     }
     371             : 
     372       13281 :     const unsigned long nHashVal = set->fnHashFunc(elt) % set->nAllocatedSize;
     373             : #ifdef HASH_DEBUG
     374             :     if (set->tabList[nHashVal])
     375             :         set->nCollisions++;
     376             : #endif
     377             : 
     378       13281 :     CPLList *new_elt = CPLHashSetGetNewListElt(set);
     379       13281 :     new_elt->pData = elt;
     380       13281 :     new_elt->psNext = set->tabList[nHashVal];
     381       13281 :     set->tabList[nHashVal] = new_elt;
     382       13281 :     set->nSize++;
     383             : 
     384       13281 :     return TRUE;
     385             : }
     386             : 
     387             : /************************************************************************/
     388             : /*                        CPLHashSetLookup()                            */
     389             : /************************************************************************/
     390             : 
     391             : /**
     392             :  * Returns the element found in the hash set corresponding to the element to
     393             :  * look up The element must not be modified.
     394             :  *
     395             :  * @param set the hash set
     396             :  * @param elt the element to look up in the hash set
     397             :  *
     398             :  * @return the element found in the hash set or NULL
     399             :  */
     400             : 
     401       24754 : void *CPLHashSetLookup(CPLHashSet *set, const void *elt)
     402             : {
     403       24754 :     CPLAssert(set != nullptr);
     404       24754 :     void **pElt = CPLHashSetFindPtr(set, elt);
     405       24754 :     if (pElt)
     406       18563 :         return *pElt;
     407             : 
     408        6191 :     return nullptr;
     409             : }
     410             : 
     411             : /************************************************************************/
     412             : /*                     CPLHashSetRemoveInternal()                       */
     413             : /************************************************************************/
     414             : 
     415        1384 : static bool CPLHashSetRemoveInternal(CPLHashSet *set, const void *elt,
     416             :                                      bool bDeferRehash)
     417             : {
     418        1384 :     CPLAssert(set != nullptr);
     419        1384 :     if (set->nIndiceAllocatedSize > 0 && set->nSize <= set->nAllocatedSize / 2)
     420             :     {
     421           5 :         set->nIndiceAllocatedSize--;
     422           5 :         if (bDeferRehash)
     423           0 :             set->bRehash = true;
     424             :         else
     425           5 :             CPLHashSetRehash(set);
     426             :     }
     427             : 
     428        1384 :     int nHashVal = static_cast<int>(set->fnHashFunc(elt) % set->nAllocatedSize);
     429        1384 :     CPLList *cur = set->tabList[nHashVal];
     430        1384 :     CPLList *prev = nullptr;
     431        1389 :     while (cur)
     432             :     {
     433        1388 :         if (set->fnEqualFunc(cur->pData, elt))
     434             :         {
     435        1383 :             if (prev)
     436           5 :                 prev->psNext = cur->psNext;
     437             :             else
     438        1378 :                 set->tabList[nHashVal] = cur->psNext;
     439             : 
     440        1383 :             if (set->fnFreeEltFunc)
     441         383 :                 set->fnFreeEltFunc(cur->pData);
     442             : 
     443        1383 :             CPLHashSetReturnListElt(set, cur);
     444             : #ifdef HASH_DEBUG
     445             :             if (set->tabList[nHashVal])
     446             :                 set->nCollisions--;
     447             : #endif
     448        1383 :             set->nSize--;
     449        1383 :             return true;
     450             :         }
     451           5 :         prev = cur;
     452           5 :         cur = cur->psNext;
     453             :     }
     454           1 :     return false;
     455             : }
     456             : 
     457             : /************************************************************************/
     458             : /*                         CPLHashSetRemove()                           */
     459             : /************************************************************************/
     460             : 
     461             : /**
     462             :  * Removes an element from a hash set
     463             :  *
     464             :  * @param set the hash set
     465             :  * @param elt the new element to remove from the hash set
     466             :  *
     467             :  * @return TRUE if the element was in the hash set
     468             :  */
     469             : 
     470        1384 : int CPLHashSetRemove(CPLHashSet *set, const void *elt)
     471             : {
     472        1384 :     return CPLHashSetRemoveInternal(set, elt, false);
     473             : }
     474             : 
     475             : /************************************************************************/
     476             : /*                     CPLHashSetRemoveDeferRehash()                    */
     477             : /************************************************************************/
     478             : 
     479             : /**
     480             :  * Removes an element from a hash set.
     481             :  *
     482             :  * This will defer potential rehashing of the set to later calls to
     483             :  * CPLHashSetInsert() or CPLHashSetRemove().
     484             :  *
     485             :  * @param set the hash set
     486             :  * @param elt the new element to remove from the hash set
     487             :  *
     488             :  * @return TRUE if the element was in the hash set
     489             :  * @since GDAL 2.1
     490             :  */
     491             : 
     492           0 : int CPLHashSetRemoveDeferRehash(CPLHashSet *set, const void *elt)
     493             : {
     494           0 :     return CPLHashSetRemoveInternal(set, elt, true);
     495             : }
     496             : 
     497             : /************************************************************************/
     498             : /*                    CPLHashSetHashPointer()                           */
     499             : /************************************************************************/
     500             : 
     501             : /**
     502             :  * Hash function for an arbitrary pointer
     503             :  *
     504             :  * @param elt the arbitrary pointer to hash
     505             :  *
     506             :  * @return the hash value of the pointer
     507             :  */
     508             : 
     509       45681 : unsigned long CPLHashSetHashPointer(const void *elt)
     510             : {
     511             :     return static_cast<unsigned long>(
     512       45681 :         reinterpret_cast<GUIntptr_t>(const_cast<void *>(elt)));
     513             : }
     514             : 
     515             : /************************************************************************/
     516             : /*                   CPLHashSetEqualPointer()                           */
     517             : /************************************************************************/
     518             : 
     519             : /**
     520             :  * Equality function for arbitrary pointers
     521             :  *
     522             :  * @param elt1 the first arbitrary pointer to compare
     523             :  * @param elt2 the second arbitrary pointer to compare
     524             :  *
     525             :  * @return TRUE if the pointers are equal
     526             :  */
     527             : 
     528       16998 : int CPLHashSetEqualPointer(const void *elt1, const void *elt2)
     529             : {
     530       16998 :     return elt1 == elt2;
     531             : }
     532             : 
     533             : /************************************************************************/
     534             : /*                        CPLHashSetHashStr()                           */
     535             : /************************************************************************/
     536             : 
     537             : /**
     538             :  * Hash function for a zero-terminated string
     539             :  *
     540             :  * @param elt the string to hash. May be NULL.
     541             :  *
     542             :  * @return the hash value of the string
     543             :  */
     544             : 
     545             : CPL_NOSANITIZE_UNSIGNED_INT_OVERFLOW
     546       48655 : unsigned long CPLHashSetHashStr(const void *elt)
     547             : {
     548       48655 :     if (elt == nullptr)
     549          10 :         return 0;
     550             : 
     551       48645 :     const unsigned char *pszStr = static_cast<const unsigned char *>(elt);
     552       48645 :     unsigned long hash = 0;
     553             : 
     554       48645 :     int c = 0;
     555      951545 :     while ((c = *pszStr++) != '\0')
     556      902900 :         hash = c + (hash << 6) + (hash << 16) - hash;
     557             : 
     558       48645 :     return hash;
     559             : }
     560             : 
     561             : /************************************************************************/
     562             : /*                     CPLHashSetEqualStr()                             */
     563             : /************************************************************************/
     564             : 
     565             : /**
     566             :  * Equality function for strings
     567             :  *
     568             :  * @param elt1 the first string to compare. May be NULL.
     569             :  * @param elt2 the second string to compare. May be NULL.
     570             :  *
     571             :  * @return TRUE if the strings are equal
     572             :  */
     573             : 
     574          98 : int CPLHashSetEqualStr(const void *elt1, const void *elt2)
     575             : {
     576          98 :     const char *pszStr1 = static_cast<const char *>(elt1);
     577          98 :     const char *pszStr2 = static_cast<const char *>(elt2);
     578             : 
     579          98 :     if (pszStr1 == nullptr && pszStr2 != nullptr)
     580           0 :         return FALSE;
     581             : 
     582          98 :     if (pszStr1 != nullptr && pszStr2 == nullptr)
     583           0 :         return FALSE;
     584             : 
     585          98 :     if (pszStr1 == nullptr && pszStr2 == nullptr)
     586           2 :         return TRUE;
     587             : 
     588          96 :     return strcmp(pszStr1, pszStr2) == 0;
     589             : }

Generated by: LCOV version 1.14