GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: RangeToken.cpp 901107 2010-01-20 08:45:02Z borisk $ 00020 */ 00021 00022 // --------------------------------------------------------------------------- 00023 // Includes 00024 // --------------------------------------------------------------------------- 00025 #if HAVE_CONFIG_H 00026 # include <config.h> 00027 #endif 00028 00029 #include <assert.h> 00030 00031 #include <xercesc/util/regx/RangeToken.hpp> 00032 #include <xercesc/util/regx/TokenFactory.hpp> 00033 #include <xercesc/util/IllegalArgumentException.hpp> 00034 #include <xercesc/util/XMLUniDefs.hpp> 00035 00036 #if XERCES_USE_TRANSCODER_ICU 00037 #include <unicode/uchar.h> 00038 00039 #if (U_ICU_VERSION_MAJOR_NUM > 2) || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >=4) 00040 #include <unicode/uset.h> 00041 #include <xercesc/util/XMLString.hpp> 00042 #include <xercesc/util/Janitor.hpp> 00043 #endif 00044 #endif 00045 00046 XERCES_CPP_NAMESPACE_BEGIN 00047 00048 // --------------------------------------------------------------------------- 00049 // Static member data initialization 00050 // --------------------------------------------------------------------------- 00051 const int RangeToken::MAPSIZE = 256; 00052 const unsigned int RangeToken::INITIALSIZE = 16; 00053 00054 // --------------------------------------------------------------------------- 00055 // RangeToken: Constructors and Destructors 00056 // --------------------------------------------------------------------------- 00057 RangeToken::RangeToken(const Token::tokType tkType, 00058 MemoryManager* const manager) 00059 : Token(tkType, manager) 00060 , fSorted(false) 00061 , fCompacted(false) 00062 , fNonMapIndex(0) 00063 , fElemCount(0) 00064 , fMaxCount(INITIALSIZE) 00065 , fMap(0) 00066 , fRanges(0) 00067 , fCaseIToken(0) 00068 , fMemoryManager(manager) 00069 { 00070 00071 } 00072 00073 RangeToken::~RangeToken() { 00074 00075 // TODO(dbertoni) This is a temporary hack until we can change the ABI. 00076 // See Jira issue XERCESC-1866 for more details. 00077 if (fCaseIToken && fCaseIToken->fCaseIToken == this) 00078 { 00079 fCaseIToken->fCaseIToken = 0; 00080 } 00081 fMemoryManager->deallocate(fMap);//delete [] fMap; 00082 fMemoryManager->deallocate(fRanges);//delete[] fRanges; 00083 } 00084 00085 00086 // This is a struct that defines a mapping for 00087 // case-insensitive matching. The first character 00088 // is the character we try to match in the range. 00089 // The second is the character we add to the range, 00090 // because it maps to the first when we're folding 00091 // case. 00092 struct ExceptionCharsStruct 00093 { 00094 XMLInt32 baseChar; 00095 00096 XMLInt32 matchingChar; 00097 }; 00098 00099 00100 // This is an array of character mappings that we will 00101 // add to ranges for case-insensitive matching. 00102 static const ExceptionCharsStruct s_exceptions[] = 00103 { 00104 { 0x49, 0x130 }, 00105 { 0x49, 0x131 }, 00106 { 0x4b, 0x212a }, 00107 { 0x53, 0x17f }, 00108 { 0x69, 0x130 }, 00109 { 0x69, 0x131 }, 00110 { 0x6b, 0x212a }, 00111 { 0x73, 0x17f }, 00112 { 0xc5, 0x212b }, 00113 { 0xe5, 0x212b }, 00114 { 0x1c4, 0x1c5 }, 00115 { 0x1c6, 0x1c5 }, 00116 { 0x1c7, 0x1c8 }, 00117 { 0x1c9, 0x1c8 }, 00118 { 0x1ca, 0x1cb }, 00119 { 0x1cc, 0x1cb }, 00120 { 0x1f1, 0x1f2 }, 00121 { 0x1f3, 0x1f2 }, 00122 { 0x392, 0x3d0 }, 00123 { 0x395, 0x3f5 }, 00124 { 0x398, 0x3d1 }, 00125 { 0x398, 0x3f4 }, 00126 { 0x399, 0x345 }, 00127 { 0x399, 0x1fbe }, 00128 { 0x39a, 0x3f0 }, 00129 { 0x39c, 0xb5 }, 00130 { 0x3a0, 0x3d6 }, 00131 { 0x3a1, 0x3f1 }, 00132 { 0x3a3, 0x3c2 }, 00133 { 0x3a6, 0x3d5 }, 00134 { 0x3a9, 0x2126 }, 00135 { 0x3b2, 0x3d0 }, 00136 { 0x3b5, 0x3f5 }, 00137 { 0x3b8, 0x3d1 }, 00138 { 0x3b8, 0x3f4 }, 00139 { 0x3b9, 0x345 }, 00140 { 0x3b9, 0x1fbe }, 00141 { 0x3ba, 0x3f0 }, 00142 { 0x3bc, 0xb5 }, 00143 { 0x3c0, 0x3d6 }, 00144 { 0x3c1, 0x3f1 }, 00145 { 0x3c3, 0x3c2 }, 00146 { 0x3c6, 0x3d5 }, 00147 { 0x3c9, 0x2126 }, 00148 { 0x1e60, 0x1e9b }, 00149 { 0x1e61, 0x1e9b } 00150 }; 00151 00152 // --------------------------------------------------------------------------- 00153 // RangeToken: Getter methods 00154 // --------------------------------------------------------------------------- 00155 RangeToken* RangeToken::getCaseInsensitiveToken(TokenFactory* const tokFactory) { 00156 00157 if (fCaseIToken == 0 && tokFactory && fRanges) { 00158 00159 bool isNRange = (getTokenType() == T_NRANGE) ? true : false; 00160 RangeToken* lwrToken = tokFactory->createRange(isNRange); 00161 00162 #if XERCES_USE_TRANSCODER_ICU && ((U_ICU_VERSION_MAJOR_NUM > 2) || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >=4)) 00163 UChar* rangeStr=(UChar*)fMemoryManager->allocate(40*fElemCount*sizeof(UChar)); 00164 ArrayJanitor<UChar> janRange(rangeStr, fMemoryManager); 00165 int c=0; 00166 rangeStr[c++] = chOpenSquare; 00167 for (unsigned int i = 0; i < fElemCount - 1; i += 2) { 00168 XMLCh buffer[10]; 00169 XMLSize_t len, j; 00170 00171 rangeStr[c++] = chBackSlash; 00172 rangeStr[c++] = chLatin_U; 00173 XMLString::binToText(fRanges[i], buffer, 10, 16, fMemoryManager); 00174 len = XMLString::stringLen(buffer); 00175 for(j=0;j<(8-len);j++) 00176 rangeStr[c++] = chDigit_0; 00177 XMLCh* p=buffer; 00178 while(*p) 00179 rangeStr[c++] = *p++; 00180 if(fRanges[i+1]!=fRanges[i]) 00181 { 00182 rangeStr[c++] = chDash; 00183 rangeStr[c++] = chBackSlash; 00184 rangeStr[c++] = chLatin_U; 00185 XMLString::binToText(fRanges[i+1], buffer, 10, 16, fMemoryManager); 00186 len = XMLString::stringLen(buffer); 00187 for(j=0;j<(8-len);j++) 00188 rangeStr[c++] = chDigit_0; 00189 p=buffer; 00190 while(*p) 00191 rangeStr[c++] = *p++; 00192 } 00193 } 00194 rangeStr[c++] = chCloseSquare; 00195 rangeStr[c++] = chNull; 00196 UErrorCode ec=U_ZERO_ERROR; 00197 USet* range=uset_openPatternOptions(rangeStr, -1, USET_CASE_INSENSITIVE, &ec); 00198 if(range) 00199 { 00200 ec = U_ZERO_ERROR; 00201 uint32_t cbCount=uset_serialize(range, NULL, 0, &ec); 00202 uint16_t* buffer=(uint16_t*)fMemoryManager->allocate(cbCount*sizeof(uint16_t)); 00203 ArrayJanitor<uint16_t> janSet(buffer, fMemoryManager); 00204 ec = U_ZERO_ERROR; 00205 uset_serialize(range, buffer, cbCount, &ec); 00206 USerializedSet serializedSet; 00207 uset_getSerializedSet(&serializedSet, buffer, cbCount); 00208 int32_t nSets=uset_getSerializedRangeCount(&serializedSet); 00209 for(int32_t i=0; i<nSets; i++) 00210 { 00211 UChar32 start, end; 00212 uset_getSerializedRange(&serializedSet, i, &start, &end); 00213 lwrToken->addRange(start, end); 00214 } 00215 // does this release the memory allocated by the set? 00216 uset_setSerializedToOne(&serializedSet, 32); 00217 uset_close(range); 00218 } 00219 #else 00220 unsigned int exceptIndex = 0; 00221 00222 for (unsigned int i = 0; i < fElemCount - 1; i += 2) { 00223 for (XMLInt32 ch = fRanges[i]; ch <= fRanges[i + 1]; ++ch) { 00224 #if XERCES_USE_TRANSCODER_ICU 00225 const XMLInt32 upperCh = u_toupper(ch); 00226 00227 if (upperCh != ch) 00228 { 00229 lwrToken->addRange(upperCh, upperCh); 00230 } 00231 00232 const XMLInt32 lowerCh = u_tolower(ch); 00233 00234 if (lowerCh != ch) 00235 { 00236 lwrToken->addRange(lowerCh, lowerCh); 00237 } 00238 00239 const XMLInt32 titleCh = u_totitle(ch); 00240 00241 if (titleCh != ch && titleCh != upperCh) 00242 { 00243 lwrToken->addRange(titleCh, titleCh); 00244 } 00245 #else 00246 if (ch >= chLatin_A && ch <= chLatin_Z) 00247 { 00248 ch += chLatin_a - chLatin_A; 00249 00250 lwrToken->addRange(ch, ch); 00251 } 00252 else if (ch >= chLatin_a && ch <= chLatin_z) 00253 { 00254 ch -= chLatin_a - chLatin_A; 00255 00256 lwrToken->addRange(ch, ch); 00257 } 00258 #endif 00259 00260 const unsigned int exceptionsSize = 00261 sizeof(s_exceptions) / sizeof(s_exceptions[0]); 00262 00263 // Add any exception chars. These are characters where the the 00264 // case mapping is not symmetric. (Unicode case mappings are not isomorphic...) 00265 while (exceptIndex < exceptionsSize) 00266 { 00267 if (s_exceptions[exceptIndex].baseChar < ch) 00268 { 00269 ++exceptIndex; 00270 } 00271 else if (s_exceptions[exceptIndex].baseChar == ch) 00272 { 00273 const XMLInt32 matchingChar = 00274 s_exceptions[exceptIndex].matchingChar; 00275 00276 lwrToken->addRange( 00277 matchingChar, 00278 matchingChar); 00279 00280 ++exceptIndex; 00281 } 00282 else 00283 { 00284 break; 00285 } 00286 } 00287 } 00288 } 00289 00290 lwrToken->mergeRanges(this); 00291 #endif 00292 lwrToken->compactRanges(); 00293 lwrToken->createMap(); 00294 00295 fCaseIToken = lwrToken; 00296 // TODO(dbertoni) This is a temporary hack until we can change the ABI. 00297 // See Jira issue XERCESC-1866 for more details. 00298 // Overload the fCaseIToken data member to be the case-insensitive token 00299 // that's caching the case-insensitive one. We need this because tokens 00300 // have varying lifetimes. 00301 fCaseIToken->setCaseInsensitiveToken(this); 00302 } 00303 00304 return fCaseIToken; 00305 } 00306 00307 // --------------------------------------------------------------------------- 00308 // RangeToken: Setter methods 00309 // --------------------------------------------------------------------------- 00310 void RangeToken::setRangeValues(XMLInt32* const rangeValues, const unsigned int count) 00311 { 00312 if (fRanges) { 00313 00314 if (fMap) { 00315 fMemoryManager->deallocate(fMap);//delete [] fMap; 00316 fMap = 0; 00317 } 00318 00319 fElemCount = 0; 00320 fMemoryManager->deallocate(fRanges);//delete [] fRanges; 00321 fRanges = 0; 00322 } 00323 00324 fElemCount = fMaxCount = count; 00325 fRanges = rangeValues; 00326 } 00327 00328 // --------------------------------------------------------------------------- 00329 // RangeToken: Range manipulation methods 00330 // --------------------------------------------------------------------------- 00331 void RangeToken::addRange(const XMLInt32 start, const XMLInt32 end) { 00332 00333 XMLInt32 val1, val2; 00334 00335 fCaseIToken = 0; 00336 00337 if (start <= end) { 00338 00339 val1 = start; 00340 val2 = end; 00341 } 00342 else { 00343 00344 val1 = end; 00345 val2 = start; 00346 } 00347 00348 if (fRanges == 0) { 00349 00350 fRanges = (XMLInt32*) fMemoryManager->allocate 00351 ( 00352 fMaxCount * sizeof(XMLInt32) 00353 );//new XMLInt32[fMaxCount]; 00354 fRanges[0] = val1; 00355 fRanges[1] = val2; 00356 fElemCount = 2; 00357 fSorted = true; 00358 } 00359 else { 00360 00361 if (fRanges[fElemCount-1] + 1 == val1) { 00362 00363 fRanges[fElemCount-1] = val2; 00364 return; 00365 } 00366 00367 if (fElemCount + 2 >= fMaxCount) { 00368 expand(2); 00369 } 00370 00371 if(fSorted && fRanges[fElemCount-1] >= val1) 00372 { 00373 for (int i = 0; i < (int)fElemCount; i +=2) 00374 { 00375 // check if this range is already part of this one 00376 if (fRanges[i] <= val1 && fRanges[i+1] >= val2) 00377 break; 00378 // or if the new one extends the old one 00379 else if(fRanges[i]==val1 && fRanges[i+1] < val2) 00380 { 00381 fRanges[i+1]=val2; 00382 break; 00383 } 00384 else if (fRanges[i] > val1 || 00385 (fRanges[i]==val1 && fRanges[i+1] > val2)) 00386 { 00387 for(int j=fElemCount-1;j>=i;j--) 00388 fRanges[j+2]=fRanges[j]; 00389 fRanges[i] = val1; 00390 fRanges[i+1] = val2; 00391 fElemCount += 2; 00392 break; 00393 } 00394 } 00395 } 00396 else 00397 { 00398 if (fRanges[fElemCount-1] >= val1) 00399 fSorted = false; 00400 00401 fRanges[fElemCount++] = val1; 00402 fRanges[fElemCount++] = val2; 00403 00404 if (!fSorted) { 00405 sortRanges(); 00406 } 00407 } 00408 } 00409 } 00410 00411 void RangeToken::sortRanges() { 00412 00413 if (fSorted || fRanges == 0) 00414 return; 00415 00416 for (int i = fElemCount - 4; i >= 0; i -= 2) { 00417 00418 for (int j = 0; j <= i; j +=2) { 00419 00420 if (fRanges[j] > fRanges[j + 2] 00421 || (fRanges[j]==fRanges[j+2] && fRanges[j+1] > fRanges[j+3])) { 00422 00423 XMLInt32 tmpVal = fRanges[j+2]; 00424 fRanges[j+2] = fRanges[j]; 00425 fRanges[j] = tmpVal; 00426 tmpVal = fRanges[j+3]; 00427 fRanges[j+3] = fRanges[j+1]; 00428 fRanges[j+1] = tmpVal; 00429 } 00430 } 00431 } 00432 00433 fSorted = true; 00434 } 00435 00436 void RangeToken::compactRanges() { 00437 00438 if (fCompacted || fRanges == 0 || fElemCount <= 2) 00439 return; 00440 00441 unsigned int base = 0; 00442 unsigned int target = 0; 00443 00444 while (target < fElemCount) { 00445 00446 if (base != target) { 00447 00448 fRanges[base] = fRanges[target++]; 00449 fRanges[base+1] = fRanges[target++]; 00450 } 00451 else 00452 target += 2; 00453 00454 XMLInt32 baseEnd = fRanges[base + 1]; 00455 00456 while (target < fElemCount) { 00457 00458 XMLInt32 startRange = fRanges[target]; 00459 00460 if (baseEnd + 1 < startRange) 00461 break; 00462 00463 XMLInt32 endRange = fRanges[target + 1]; 00464 00465 if (baseEnd + 1 == startRange || baseEnd < endRange) { 00466 00467 baseEnd = endRange; 00468 fRanges[base+1] = baseEnd; 00469 target += 2; 00470 } 00471 else if (baseEnd >= endRange) { 00472 target += 2; 00473 } 00474 else { 00475 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_CompactRangesError, fMemoryManager); 00476 } 00477 } // inner while 00478 00479 base += 2; 00480 } 00481 00482 fElemCount = base; 00483 fCompacted = true; 00484 } 00485 00486 void RangeToken::mergeRanges(const Token *const tok) { 00487 00488 00489 if (tok->getTokenType() != this->getTokenType()) 00490 ThrowXMLwithMemMgr(IllegalArgumentException, XMLExcepts::Regex_MergeRangesTypeMismatch, fMemoryManager); 00491 00492 RangeToken* rangeTok = (RangeToken *) tok; 00493 00494 if (rangeTok->fRanges == 0) 00495 return; 00496 00497 fCaseIToken = 0; 00498 sortRanges(); 00499 rangeTok->sortRanges(); 00500 00501 if (fRanges == 0) { 00502 00503 fMaxCount = rangeTok->fMaxCount; 00504 fRanges = (XMLInt32*) fMemoryManager->allocate 00505 ( 00506 fMaxCount * sizeof(XMLInt32) 00507 );//new XMLInt32[fMaxCount]; 00508 for (unsigned int index = 0; index < rangeTok->fElemCount; index++) { 00509 fRanges[index] = rangeTok->fRanges[index]; 00510 } 00511 00512 fElemCount = rangeTok->fElemCount; 00513 fSorted = true; 00514 return; 00515 } 00516 00517 unsigned int newMaxCount = (fElemCount + rangeTok->fElemCount >= fMaxCount) 00518 ? fMaxCount + rangeTok->fMaxCount : fMaxCount; 00519 XMLInt32* result = (XMLInt32*) fMemoryManager->allocate 00520 ( 00521 newMaxCount * sizeof(XMLInt32) 00522 );//new XMLInt32[newMaxCount]; 00523 00524 for (unsigned int i=0, j=0, k=0; i < fElemCount || j < rangeTok->fElemCount;) { 00525 00526 if (i >= fElemCount) { 00527 00528 for (int count = 0; count < 2; count++) { 00529 result[k++] = rangeTok->fRanges[j++]; 00530 } 00531 } 00532 else if (j >= rangeTok->fElemCount) { 00533 00534 for (int count = 0; count < 2; count++) { 00535 result[k++] = fRanges[i++]; 00536 } 00537 } 00538 else if (rangeTok->fRanges[j] < fRanges[i] 00539 || (rangeTok->fRanges[j] == fRanges[i] 00540 && rangeTok->fRanges[j+1] < fRanges[i+1])) { 00541 00542 for (int count = 0; count < 2; count++) { 00543 result[k++] = rangeTok->fRanges[j++]; 00544 } 00545 } 00546 else { 00547 00548 for (int count = 0; count < 2; count++) { 00549 00550 result[k++] = fRanges[i++]; 00551 } 00552 } 00553 } 00554 00555 fMemoryManager->deallocate(fRanges);//delete [] fRanges; 00556 fElemCount += rangeTok->fElemCount; 00557 fRanges = result; 00558 fMaxCount = newMaxCount; 00559 } 00560 00561 void RangeToken::subtractRanges(RangeToken* const tok) { 00562 00563 if (fRanges == 0 || tok->fRanges == 0) 00564 return; 00565 00566 if (tok->getTokenType() == T_NRANGE) { 00567 00568 intersectRanges(tok); 00569 return; 00570 } 00571 00572 fCaseIToken = 0; 00573 sortRanges(); 00574 compactRanges(); 00575 tok->sortRanges(); 00576 tok->compactRanges(); 00577 00578 unsigned int newMax = (fElemCount + tok->fElemCount >= fMaxCount) 00579 ? fMaxCount + tok->fMaxCount : fMaxCount; 00580 XMLInt32* result = (XMLInt32*) fMemoryManager->allocate 00581 ( 00582 newMax * sizeof(XMLInt32) 00583 );//new XMLInt32[newMax]; 00584 unsigned int newElemCount = 0; 00585 unsigned int srcCount = 0; 00586 unsigned int subCount = 0; 00587 00588 while (srcCount < fElemCount && subCount < tok->fElemCount) { 00589 00590 XMLInt32 srcBegin = fRanges[srcCount]; 00591 XMLInt32 srcEnd = fRanges[srcCount + 1]; 00592 XMLInt32 subBegin = tok->fRanges[subCount]; 00593 XMLInt32 subEnd = tok->fRanges[subCount + 1]; 00594 00595 if (srcEnd < subBegin) { // no overlap 00596 00597 result[newElemCount++] = fRanges[srcCount++]; 00598 result[newElemCount++] = fRanges[srcCount++]; 00599 } 00600 else if (srcEnd >= subBegin && srcBegin <= subEnd) { 00601 00602 if (subBegin <= srcBegin && srcEnd <= subEnd) { 00603 srcCount += 2; 00604 } 00605 else if (subBegin <= srcBegin) { 00606 00607 fRanges[srcCount] = subEnd + 1; 00608 subCount += 2; 00609 } 00610 else if (srcEnd <= subEnd) { 00611 00612 result[newElemCount++] = srcBegin; 00613 result[newElemCount++] = subBegin - 1; 00614 srcCount += 2; 00615 } 00616 else { 00617 00618 result[newElemCount++] = srcBegin; 00619 result[newElemCount++] = subBegin - 1; 00620 fRanges[srcCount] = subEnd + 1; 00621 subCount += 2; 00622 } 00623 } 00624 else if (subEnd < srcBegin) { 00625 subCount += 2; 00626 } 00627 else { 00628 fMemoryManager->deallocate(result);//delete [] result; 00629 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_SubtractRangesError, fMemoryManager); 00630 } 00631 } //end while 00632 00633 while (srcCount < fElemCount) { 00634 00635 result[newElemCount++] = fRanges[srcCount++]; 00636 result[newElemCount++] = fRanges[srcCount++]; 00637 } 00638 00639 fMemoryManager->deallocate(fRanges);//delete [] fRanges; 00640 fRanges = result; 00641 fElemCount = newElemCount; 00642 fMaxCount = newMax; 00643 } 00644 00648 void RangeToken::intersectRanges(RangeToken* const tok) { 00649 00650 if (fRanges == 0 || tok->fRanges == 0) 00651 return; 00652 00653 fCaseIToken = 0; 00654 sortRanges(); 00655 compactRanges(); 00656 tok->sortRanges(); 00657 tok->compactRanges(); 00658 00659 unsigned int newMax = (fElemCount + tok->fElemCount >= fMaxCount) 00660 ? fMaxCount + tok->fMaxCount : fMaxCount; 00661 XMLInt32* result = (XMLInt32*) fMemoryManager->allocate 00662 ( 00663 newMax * sizeof(XMLInt32) 00664 );//new XMLInt32[newMax]; 00665 unsigned int newElemCount = 0; 00666 unsigned int srcCount = 0; 00667 unsigned int tokCount = 0; 00668 00669 while (srcCount < fElemCount && tokCount < tok->fElemCount) { 00670 00671 XMLInt32 srcBegin = fRanges[srcCount]; 00672 XMLInt32 srcEnd = fRanges[srcCount + 1]; 00673 XMLInt32 tokBegin = tok->fRanges[tokCount]; 00674 XMLInt32 tokEnd = tok->fRanges[tokCount + 1]; 00675 00676 if (srcEnd < tokBegin) { 00677 srcCount += 2; 00678 } 00679 else if (srcEnd >= tokBegin && srcBegin <= tokEnd) { 00680 00681 if (tokBegin <= srcBegin && srcEnd <= tokEnd) { 00682 00683 result[newElemCount++] = srcBegin; 00684 result[newElemCount++] = srcEnd; 00685 srcCount += 2; 00686 } 00687 else if (tokBegin <= srcBegin) { 00688 00689 result[newElemCount++] = srcBegin; 00690 result[newElemCount++] = tokEnd; 00691 tokCount += 2; 00692 00693 if (tokCount < tok->fElemCount) 00694 fRanges[srcCount] = tokEnd + 1; 00695 else 00696 srcCount += 2; 00697 } 00698 else if (srcEnd <= tokEnd) { 00699 00700 result[newElemCount++] = tokBegin; 00701 result[newElemCount++] = srcEnd; 00702 srcCount += 2; 00703 } 00704 else { 00705 00706 result[newElemCount++] = tokBegin; 00707 result[newElemCount++] = tokEnd; 00708 tokCount += 2; 00709 00710 if (tokCount < tok->fElemCount) 00711 fRanges[srcCount] = tokEnd + 1; 00712 else 00713 srcCount += 2; 00714 } 00715 } 00716 else if (tokEnd < srcBegin) { 00717 tokCount += 2; 00718 00719 if (tokCount >= tok->fElemCount) 00720 srcCount += 2; 00721 } 00722 else { 00723 00724 fMemoryManager->deallocate(result);//delete [] result; 00725 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_IntersectRangesError, fMemoryManager); 00726 } 00727 } //end while 00728 00729 fMemoryManager->deallocate(fRanges);//delete [] fRanges; 00730 fRanges = result; 00731 fElemCount = newElemCount; 00732 fMaxCount = newMax; 00733 } 00734 00739 RangeToken* RangeToken::complementRanges(RangeToken* const tok, 00740 TokenFactory* const tokFactory, 00741 MemoryManager* const manager) { 00742 00743 if (tok->getTokenType() != T_RANGE && tok->getTokenType() != T_NRANGE) 00744 ThrowXMLwithMemMgr(IllegalArgumentException, XMLExcepts::Regex_ComplementRangesInvalidArg, manager); 00745 00746 tok->sortRanges(); 00747 tok->compactRanges(); 00748 00749 XMLInt32 lastElem = tok->fRanges[tok->fElemCount - 1]; 00750 RangeToken* rangeTok = tokFactory->createRange(); 00751 00752 if (tok->fRanges[0] > 0) { 00753 rangeTok->addRange(0, tok->fRanges[0] - 1); 00754 } 00755 00756 for (unsigned int i= 1; i< tok->fElemCount - 2; i += 2) { 00757 rangeTok->addRange(tok->fRanges[i] + 1, tok->fRanges[i+1] - 1); 00758 } 00759 00760 if (lastElem != UTF16_MAX) { 00761 rangeTok->addRange(lastElem + 1, UTF16_MAX); 00762 } 00763 00764 rangeTok->fCompacted = true; 00765 00766 return rangeTok; 00767 } 00768 00769 00770 // --------------------------------------------------------------------------- 00771 // RangeToken: Match methods 00772 // --------------------------------------------------------------------------- 00773 bool RangeToken::match(const XMLInt32 ch) { 00774 00775 createMap(); 00776 00777 bool ret; 00778 00779 if (getTokenType() == T_RANGE) { 00780 00781 if (ch < MAPSIZE) 00782 return ((fMap[ch/32] & (1<<(ch&0x1f))) != 0); 00783 00784 ret = false; 00785 00786 for (unsigned int i= fNonMapIndex; i< fElemCount; i +=2) { 00787 00788 if (fRanges[i] <= ch && ch <= fRanges[i+1]) 00789 return true; 00790 } 00791 } 00792 else { 00793 00794 if (ch < MAPSIZE) 00795 return ((fMap[ch/32] & (1<<(ch&0x1f))) == 0); 00796 00797 ret = true; 00798 00799 for (unsigned int i= fNonMapIndex; i< fElemCount; i += 2) { 00800 00801 if (fRanges[i] <= ch && ch <= fRanges[i+1]) 00802 return false; 00803 } 00804 } 00805 00806 return ret; 00807 } 00808 00809 // --------------------------------------------------------------------------- 00810 // RangeToken: Private helpers methods 00811 // --------------------------------------------------------------------------- 00812 void RangeToken::expand(const unsigned int length) { 00813 00814 unsigned int newMax = fElemCount + length; 00815 00816 // Avoid too many reallocations by expanding by a percentage 00817 unsigned int minNewMax = (unsigned int)((double)fElemCount * 1.25); 00818 if (newMax < minNewMax) 00819 newMax = minNewMax; 00820 00821 XMLInt32* newList = (XMLInt32*) fMemoryManager->allocate 00822 ( 00823 newMax * sizeof(XMLInt32) 00824 );//new XMLInt32[newMax]; 00825 for (unsigned int index = 0; index < fElemCount; index++) 00826 newList[index] = fRanges[index]; 00827 00828 fMemoryManager->deallocate(fRanges);//delete [] fRanges; 00829 fRanges = newList; 00830 fMaxCount = newMax; 00831 } 00832 00833 void RangeToken::doCreateMap() { 00834 00835 assert(!fMap); 00836 00837 int asize = MAPSIZE/32; 00838 fMap = (int*) fMemoryManager->allocate(asize * sizeof(int));//new int[asize]; 00839 fNonMapIndex = fElemCount; 00840 00841 for (int i = 0; i < asize; i++) { 00842 fMap[i] = 0; 00843 } 00844 00845 for (unsigned int j= 0; j < fElemCount; j += 2) { 00846 00847 XMLInt32 begin = fRanges[j]; 00848 XMLInt32 end = fRanges[j+1]; 00849 00850 if (begin < MAPSIZE) { 00851 00852 for (int k = begin; k <= end && k < MAPSIZE; k++) { 00853 fMap[k/32] |= 1<<(k&0x1F); 00854 } 00855 } 00856 else { 00857 00858 fNonMapIndex = j; 00859 break; 00860 } 00861 00862 if (end >= MAPSIZE) { 00863 00864 fNonMapIndex = j; 00865 break; 00866 } 00867 } 00868 } 00869 00870 XERCES_CPP_NAMESPACE_END 00871