Line data Source code
1 : /****************************************************************************** 2 : * Project: OGR 3 : * Purpose: OGRGMLASDriver implementation 4 : * Author: Even Rouault, <even dot rouault at spatialys dot com> 5 : * 6 : * Initial development funded by the European Earth observation programme 7 : * Copernicus 8 : * 9 : ****************************************************************************** 10 : * Copyright (c) 2016, Even Rouault, <even dot rouault at spatialys dot com> 11 : * 12 : * Permission is hereby granted, free of charge, to any person obtaining a 13 : * copy of this software and associated documentation files (the "Software"), 14 : * to deal in the Software without restriction, including without limitation 15 : * the rights to use, copy, modify, merge, publish, distribute, sublicense, 16 : * and/or sell copies of the Software, and to permit persons to whom the 17 : * Software is furnished to do so, subject to the following conditions: 18 : * 19 : * The above copyright notice and this permission notice shall be included 20 : * in all copies or substantial portions of the Software. 21 : * 22 : * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 23 : * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 : * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 25 : * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 : * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 : * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 28 : * DEALINGS IN THE SOFTWARE. 29 : ****************************************************************************/ 30 : 31 : #include "ogr_gmlas.h" 32 : 33 : #include <map> 34 : #include <set> 35 : 36 : /************************************************************************/ 37 : /* OGRGMLASTruncateIdentifier() */ 38 : /************************************************************************/ 39 : 40 3778 : CPLString OGRGMLASTruncateIdentifier(const CPLString &osName, 41 : int nIdentMaxLength) 42 : { 43 3778 : int nExtra = static_cast<int>(osName.size()) - nIdentMaxLength; 44 3778 : CPLAssert(nExtra > 0); 45 : 46 : // Decompose in tokens 47 3778 : char **papszTokens = CSLTokenizeString2(osName, "_", CSLT_ALLOWEMPTYTOKENS); 48 7556 : std::vector<char> achDelimiters; 49 7556 : std::vector<CPLString> aosTokens; 50 14448 : for (int j = 0; papszTokens[j] != nullptr; ++j) 51 : { 52 10670 : const char *pszToken = papszTokens[j]; 53 10670 : bool bIsCamelCase = false; 54 : // Split parts like camelCase or CamelCase into several tokens 55 10670 : if (pszToken[0] != '\0' && pszToken[1] >= 'a' && pszToken[1] <= 'z') 56 : { 57 9457 : bIsCamelCase = true; 58 9457 : bool bLastIsLower = true; 59 18914 : std::vector<CPLString> aoParts; 60 18914 : CPLString osCurrentPart; 61 9457 : osCurrentPart += pszToken[0]; 62 9457 : osCurrentPart += pszToken[1]; 63 98445 : for (int k = 2; pszToken[k]; ++k) 64 : { 65 89179 : if (pszToken[k] >= 'A' && pszToken[k] <= 'Z') 66 : { 67 7554 : if (!bLastIsLower) 68 : { 69 191 : bIsCamelCase = false; 70 191 : break; 71 : } 72 7363 : aoParts.push_back(osCurrentPart); 73 7363 : osCurrentPart.clear(); 74 7363 : bLastIsLower = false; 75 : } 76 : else 77 : { 78 81625 : bLastIsLower = true; 79 : } 80 88988 : osCurrentPart += pszToken[k]; 81 : } 82 9457 : if (bIsCamelCase) 83 : { 84 9266 : if (!osCurrentPart.empty()) 85 9266 : aoParts.push_back(osCurrentPart); 86 25697 : for (size_t k = 0; k < aoParts.size(); ++k) 87 : { 88 16431 : achDelimiters.push_back((j > 0 && k == 0) ? '_' : '\0'); 89 16431 : aosTokens.push_back(aoParts[k]); 90 : } 91 : } 92 : } 93 10670 : if (!bIsCamelCase) 94 : { 95 1404 : achDelimiters.push_back((j > 0) ? '_' : '\0'); 96 1404 : aosTokens.push_back(pszToken); 97 : } 98 : } 99 3778 : CSLDestroy(papszTokens); 100 : 101 : // Truncate identifier by removing last character of longest part 102 7556 : std::map<int, std::set<size_t>> oMapLengthToIdx; 103 : // Ignore last token in map creation 104 17835 : for (size_t j = 0; j + 1 < aosTokens.size(); ++j) 105 : { 106 14057 : const int nTokenLen = static_cast<int>(aosTokens[j].size()); 107 14057 : oMapLengthToIdx[nTokenLen].insert(j); 108 : } 109 3778 : int nLastTokenSize = static_cast<int>(aosTokens.back().size()); 110 3778 : if (oMapLengthToIdx.empty()) 111 : { 112 30 : if (nLastTokenSize > nExtra) 113 : { 114 30 : aosTokens[aosTokens.size() - 1].resize(nLastTokenSize - nExtra); 115 30 : nExtra = 0; 116 : } 117 : } 118 : else 119 : { 120 3748 : bool bHasDoneSomething = true; 121 78671 : while (nExtra > 0 && bHasDoneSomething) 122 : { 123 74923 : bHasDoneSomething = false; 124 74923 : auto iter = oMapLengthToIdx.end(); 125 74923 : --iter; 126 : // Avoid truncating last token unless it is excessively longer 127 : // than previous ones. 128 74923 : if (nLastTokenSize > 2 * iter->first) 129 : { 130 6028 : aosTokens[aosTokens.size() - 1].resize(nLastTokenSize - 1); 131 6028 : nLastTokenSize--; 132 6028 : bHasDoneSomething = true; 133 6028 : nExtra--; 134 : } 135 68895 : else if (iter->first > 1) 136 : { 137 : // Reduce one token by one character 138 68138 : const size_t j = *iter->second.begin(); 139 68138 : aosTokens[j].resize(iter->first - 1); 140 : 141 : // Move it to a new bucket 142 68138 : iter->second.erase(iter->second.begin()); 143 68138 : oMapLengthToIdx[iter->first - 1].insert(j); 144 : 145 : // Remove this bucket if is empty 146 68138 : if (iter->second.empty()) 147 : { 148 26605 : oMapLengthToIdx.erase(iter); 149 : } 150 : 151 68138 : nExtra--; 152 68138 : bHasDoneSomething = true; 153 : } 154 : } 155 : } 156 : 157 : // Reassemble truncated parts 158 3778 : CPLString osNewName; 159 21613 : for (size_t j = 0; j < aosTokens.size(); ++j) 160 : { 161 17835 : if (achDelimiters[j]) 162 6892 : osNewName += achDelimiters[j]; 163 17835 : osNewName += aosTokens[j]; 164 : } 165 : 166 : // If we are still longer than max allowed, truncate beginning of name 167 3778 : if (nExtra > 0) 168 : { 169 757 : osNewName = osNewName.substr(nExtra); 170 : } 171 3778 : CPLAssert(static_cast<int>(osNewName.size()) == nIdentMaxLength); 172 7556 : return osNewName; 173 : } 174 : 175 : /************************************************************************/ 176 : /* OGRGMLASAddSerialNumber() */ 177 : /************************************************************************/ 178 : 179 40274 : CPLString OGRGMLASAddSerialNumber(const CPLString &osNameIn, int iOccurrence, 180 : size_t nOccurrences, int nIdentMaxLength) 181 : { 182 40274 : CPLString osName(osNameIn); 183 74338 : const int nDigitsSize = (nOccurrences < 10) ? 1 184 34064 : : (nOccurrences < 100) ? 2 185 : : 3; 186 : char szDigits[4]; 187 40274 : snprintf(szDigits, sizeof(szDigits), "%0*d", nDigitsSize, iOccurrence); 188 40274 : if (nIdentMaxLength >= MIN_VALUE_OF_MAX_IDENTIFIER_LENGTH) 189 : { 190 80 : if (static_cast<int>(osName.size()) < nIdentMaxLength) 191 : { 192 2 : if (static_cast<int>(osName.size()) + nDigitsSize < nIdentMaxLength) 193 : { 194 2 : osName += szDigits; 195 : } 196 : else 197 : { 198 0 : osName.resize(nIdentMaxLength - nDigitsSize); 199 0 : osName += szDigits; 200 : } 201 : } 202 : else 203 : { 204 78 : osName.resize(osName.size() - nDigitsSize); 205 78 : osName += szDigits; 206 : } 207 : } 208 : else 209 : { 210 40194 : osName += szDigits; 211 : } 212 80548 : return osName; 213 : }