GME  13
RangeToken.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: RangeToken.cpp 901107 2010-01-20 08:45:02Z borisk $
00020  */
00021 
00022 // ---------------------------------------------------------------------------
00023 //  Includes
00024 // ---------------------------------------------------------------------------
00025 #if HAVE_CONFIG_H
00026 #    include <config.h>
00027 #endif
00028 
00029 #include <assert.h>
00030 
00031 #include <xercesc/util/regx/RangeToken.hpp>
00032 #include <xercesc/util/regx/TokenFactory.hpp>
00033 #include <xercesc/util/IllegalArgumentException.hpp>
00034 #include <xercesc/util/XMLUniDefs.hpp>
00035 
00036 #if XERCES_USE_TRANSCODER_ICU
00037   #include <unicode/uchar.h>
00038 
00039 #if (U_ICU_VERSION_MAJOR_NUM > 2) || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >=4)
00040   #include <unicode/uset.h>
00041   #include <xercesc/util/XMLString.hpp>
00042   #include <xercesc/util/Janitor.hpp>
00043 #endif
00044 #endif
00045 
00046 XERCES_CPP_NAMESPACE_BEGIN
00047 
00048 // ---------------------------------------------------------------------------
00049 //  Static member data initialization
00050 // ---------------------------------------------------------------------------
00051 const int RangeToken::MAPSIZE = 256;
00052 const unsigned int RangeToken::INITIALSIZE = 16;
00053 
00054 // ---------------------------------------------------------------------------
00055 //  RangeToken: Constructors and Destructors
00056 // ---------------------------------------------------------------------------
00057 RangeToken::RangeToken(const Token::tokType tkType,
00058                        MemoryManager* const manager)
00059     : Token(tkType, manager)
00060     , fSorted(false)
00061     , fCompacted(false)
00062     , fNonMapIndex(0)
00063     , fElemCount(0)
00064     , fMaxCount(INITIALSIZE)
00065     , fMap(0)
00066     , fRanges(0)
00067     , fCaseIToken(0)
00068     , fMemoryManager(manager)
00069 {
00070 
00071 }
00072 
00073 RangeToken::~RangeToken() {
00074 
00075     // TODO(dbertoni) This is a temporary hack until we can change the ABI.
00076     // See Jira issue XERCESC-1866 for more details.
00077     if (fCaseIToken && fCaseIToken->fCaseIToken == this)
00078     {
00079         fCaseIToken->fCaseIToken = 0;
00080     }
00081     fMemoryManager->deallocate(fMap);//delete [] fMap;
00082     fMemoryManager->deallocate(fRanges);//delete[] fRanges;
00083 }
00084 
00085 
00086 // This is a struct that defines a mapping for
00087 // case-insensitive matching.  The first character
00088 // is the character we try to match in the range.
00089 // The second is the character we add to the range,
00090 // because it maps to the first when we're folding
00091 // case.
00092 struct ExceptionCharsStruct
00093 {
00094     XMLInt32    baseChar;
00095 
00096     XMLInt32    matchingChar;
00097 };
00098 
00099 
00100 // This is an array of character mappings that we will
00101 // add to ranges for case-insensitive matching.
00102 static const ExceptionCharsStruct   s_exceptions[] =
00103 {
00104     { 0x49, 0x130 },
00105     { 0x49, 0x131 },
00106     { 0x4b, 0x212a },
00107     { 0x53, 0x17f },
00108     { 0x69, 0x130 },
00109     { 0x69, 0x131 },
00110     { 0x6b, 0x212a },
00111     { 0x73, 0x17f },
00112     { 0xc5, 0x212b },
00113     { 0xe5, 0x212b },
00114     { 0x1c4, 0x1c5 },
00115     { 0x1c6, 0x1c5 },
00116     { 0x1c7, 0x1c8 },
00117     { 0x1c9, 0x1c8 },
00118     { 0x1ca, 0x1cb },
00119     { 0x1cc, 0x1cb },
00120     { 0x1f1, 0x1f2 },
00121     { 0x1f3, 0x1f2 },
00122     { 0x392, 0x3d0 },
00123     { 0x395, 0x3f5 },
00124     { 0x398, 0x3d1 },
00125     { 0x398, 0x3f4 },
00126     { 0x399, 0x345 },
00127     { 0x399, 0x1fbe },
00128     { 0x39a, 0x3f0 },
00129     { 0x39c, 0xb5 },
00130     { 0x3a0, 0x3d6 },
00131     { 0x3a1, 0x3f1 },
00132     { 0x3a3, 0x3c2 },
00133     { 0x3a6, 0x3d5 },
00134     { 0x3a9, 0x2126 },
00135     { 0x3b2, 0x3d0 },
00136     { 0x3b5, 0x3f5 },
00137     { 0x3b8, 0x3d1 },
00138     { 0x3b8, 0x3f4 },
00139     { 0x3b9, 0x345 },
00140     { 0x3b9, 0x1fbe },
00141     { 0x3ba, 0x3f0 },
00142     { 0x3bc, 0xb5 },
00143     { 0x3c0, 0x3d6 },
00144     { 0x3c1, 0x3f1 },
00145     { 0x3c3, 0x3c2 },
00146     { 0x3c6, 0x3d5 },
00147     { 0x3c9, 0x2126 },
00148     { 0x1e60, 0x1e9b },
00149     { 0x1e61, 0x1e9b }
00150 };
00151 
00152 // ---------------------------------------------------------------------------
00153 //  RangeToken: Getter methods
00154 // ---------------------------------------------------------------------------
00155 RangeToken* RangeToken::getCaseInsensitiveToken(TokenFactory* const tokFactory) {
00156 
00157     if (fCaseIToken == 0 && tokFactory && fRanges) {
00158 
00159         bool isNRange = (getTokenType() == T_NRANGE) ? true : false;
00160         RangeToken* lwrToken = tokFactory->createRange(isNRange);
00161 
00162 #if XERCES_USE_TRANSCODER_ICU && ((U_ICU_VERSION_MAJOR_NUM > 2) || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >=4))
00163         UChar* rangeStr=(UChar*)fMemoryManager->allocate(40*fElemCount*sizeof(UChar));
00164         ArrayJanitor<UChar> janRange(rangeStr, fMemoryManager);
00165         int c=0;
00166         rangeStr[c++] = chOpenSquare;
00167         for (unsigned int i = 0;  i < fElemCount - 1;  i += 2) {
00168             XMLCh buffer[10];
00169             XMLSize_t len, j;
00170 
00171             rangeStr[c++] = chBackSlash;
00172             rangeStr[c++] = chLatin_U;
00173             XMLString::binToText(fRanges[i], buffer, 10, 16, fMemoryManager);
00174             len = XMLString::stringLen(buffer);
00175             for(j=0;j<(8-len);j++)
00176                 rangeStr[c++] = chDigit_0;
00177             XMLCh* p=buffer;
00178             while(*p)
00179                 rangeStr[c++] = *p++;
00180             if(fRanges[i+1]!=fRanges[i])
00181             {
00182                 rangeStr[c++] = chDash;
00183                 rangeStr[c++] = chBackSlash;
00184                 rangeStr[c++] = chLatin_U;
00185                 XMLString::binToText(fRanges[i+1], buffer, 10, 16, fMemoryManager);
00186                 len = XMLString::stringLen(buffer);
00187                 for(j=0;j<(8-len);j++)
00188                     rangeStr[c++] = chDigit_0;
00189                 p=buffer;
00190                 while(*p)
00191                     rangeStr[c++] = *p++;
00192             }
00193         }
00194         rangeStr[c++] = chCloseSquare;
00195         rangeStr[c++] = chNull;
00196         UErrorCode ec=U_ZERO_ERROR;
00197         USet* range=uset_openPatternOptions(rangeStr, -1, USET_CASE_INSENSITIVE, &ec);
00198         if(range)
00199         {
00200             ec = U_ZERO_ERROR;
00201             uint32_t cbCount=uset_serialize(range, NULL, 0, &ec);
00202             uint16_t* buffer=(uint16_t*)fMemoryManager->allocate(cbCount*sizeof(uint16_t));
00203             ArrayJanitor<uint16_t> janSet(buffer, fMemoryManager);
00204             ec = U_ZERO_ERROR;
00205             uset_serialize(range, buffer, cbCount, &ec);
00206             USerializedSet serializedSet;
00207             uset_getSerializedSet(&serializedSet, buffer, cbCount);
00208             int32_t nSets=uset_getSerializedRangeCount(&serializedSet);
00209             for(int32_t i=0; i<nSets; i++)
00210             {
00211                 UChar32 start, end;
00212                 uset_getSerializedRange(&serializedSet, i, &start, &end);
00213                 lwrToken->addRange(start, end);
00214             }
00215             // does this release the memory allocated by the set?
00216             uset_setSerializedToOne(&serializedSet, 32);
00217             uset_close(range);
00218         }
00219 #else
00220         unsigned int exceptIndex = 0;
00221 
00222         for (unsigned int i = 0;  i < fElemCount - 1;  i += 2) {
00223             for (XMLInt32 ch = fRanges[i];  ch <= fRanges[i + 1];  ++ch) {
00224 #if XERCES_USE_TRANSCODER_ICU
00225                 const XMLInt32  upperCh = u_toupper(ch);
00226 
00227                 if (upperCh != ch)
00228                 {
00229                     lwrToken->addRange(upperCh, upperCh);
00230                 }
00231 
00232                 const XMLInt32  lowerCh = u_tolower(ch);
00233 
00234                 if (lowerCh != ch)
00235                 {
00236                     lwrToken->addRange(lowerCh, lowerCh);
00237                 }
00238 
00239                 const XMLInt32  titleCh = u_totitle(ch);
00240 
00241                 if (titleCh != ch && titleCh != upperCh)
00242                 {
00243                     lwrToken->addRange(titleCh, titleCh);
00244                 }
00245 #else
00246                 if (ch >= chLatin_A && ch <= chLatin_Z)
00247                 {
00248                     ch += chLatin_a - chLatin_A;
00249 
00250                     lwrToken->addRange(ch, ch);
00251                 }
00252                 else if (ch >= chLatin_a && ch <= chLatin_z)
00253                 {
00254                     ch -= chLatin_a - chLatin_A;
00255 
00256                     lwrToken->addRange(ch, ch);
00257                 }
00258 #endif
00259 
00260                 const unsigned int  exceptionsSize =
00261                     sizeof(s_exceptions) / sizeof(s_exceptions[0]);
00262 
00263                 // Add any exception chars.  These are characters where the the
00264                 // case mapping is not symmetric.  (Unicode case mappings are not isomorphic...)
00265                 while (exceptIndex < exceptionsSize)
00266                 {
00267                     if (s_exceptions[exceptIndex].baseChar < ch)
00268                     {
00269                         ++exceptIndex;
00270                     }
00271                     else if (s_exceptions[exceptIndex].baseChar == ch)
00272                     {
00273                         const XMLInt32  matchingChar =
00274                             s_exceptions[exceptIndex].matchingChar;
00275 
00276                         lwrToken->addRange(
00277                             matchingChar,
00278                             matchingChar);
00279 
00280                         ++exceptIndex;
00281                     }
00282                     else
00283                     {
00284                         break;
00285                     }
00286                 }
00287             }
00288         }
00289 
00290         lwrToken->mergeRanges(this);
00291 #endif
00292         lwrToken->compactRanges();
00293         lwrToken->createMap();
00294 
00295         fCaseIToken = lwrToken;
00296         // TODO(dbertoni) This is a temporary hack until we can change the ABI.
00297         // See Jira issue XERCESC-1866 for more details.
00298         // Overload the fCaseIToken data member to be the case-insensitive token
00299         // that's caching the case-insensitive one.  We need this because tokens
00300         // have varying lifetimes.
00301         fCaseIToken->setCaseInsensitiveToken(this);
00302     }
00303 
00304     return fCaseIToken;
00305 }
00306 
00307 // ---------------------------------------------------------------------------
00308 //  RangeToken: Setter methods
00309 // ---------------------------------------------------------------------------
00310 void RangeToken::setRangeValues(XMLInt32* const rangeValues, const unsigned int count)
00311 {
00312     if (fRanges) {
00313 
00314         if (fMap) {
00315             fMemoryManager->deallocate(fMap);//delete [] fMap;
00316             fMap = 0;
00317         }
00318 
00319         fElemCount = 0;
00320         fMemoryManager->deallocate(fRanges);//delete [] fRanges;
00321         fRanges = 0;
00322     }
00323 
00324     fElemCount = fMaxCount = count;
00325     fRanges = rangeValues;
00326 }
00327 
00328 // ---------------------------------------------------------------------------
00329 //  RangeToken: Range manipulation methods
00330 // ---------------------------------------------------------------------------
00331 void RangeToken::addRange(const XMLInt32 start, const XMLInt32 end) {
00332 
00333     XMLInt32 val1, val2;
00334 
00335     fCaseIToken = 0;
00336 
00337     if (start <= end) {
00338 
00339         val1 = start;
00340         val2 = end;
00341     }
00342     else {
00343 
00344         val1 = end;
00345         val2 = start;
00346     }
00347 
00348     if (fRanges == 0) {
00349 
00350         fRanges = (XMLInt32*) fMemoryManager->allocate
00351         (
00352             fMaxCount * sizeof(XMLInt32)
00353         );//new XMLInt32[fMaxCount];
00354         fRanges[0] = val1;
00355         fRanges[1] = val2;
00356         fElemCount = 2;
00357         fSorted = true;
00358     }
00359     else {
00360 
00361         if (fRanges[fElemCount-1] + 1 == val1) {
00362 
00363             fRanges[fElemCount-1] = val2;
00364             return;
00365         }
00366 
00367         if (fElemCount + 2 >= fMaxCount) {
00368             expand(2);
00369         }
00370 
00371         if(fSorted && fRanges[fElemCount-1] >= val1)
00372         {
00373             for (int i = 0; i < (int)fElemCount; i +=2)
00374             {
00375                 // check if this range is already part of this one
00376                 if (fRanges[i] <= val1 && fRanges[i+1] >= val2)
00377                     break;
00378                 // or if the new one extends the old one
00379                 else if(fRanges[i]==val1 && fRanges[i+1] < val2)
00380                 {
00381                     fRanges[i+1]=val2;
00382                     break;
00383                 }
00384                 else if (fRanges[i] > val1 ||
00385                           (fRanges[i]==val1 && fRanges[i+1] > val2))
00386                 {
00387                     for(int j=fElemCount-1;j>=i;j--)
00388                         fRanges[j+2]=fRanges[j];
00389                     fRanges[i]   = val1;
00390                     fRanges[i+1] = val2;
00391                     fElemCount  += 2;
00392                     break;
00393                 }
00394             }
00395         }
00396         else
00397         {
00398             if (fRanges[fElemCount-1] >= val1)
00399                 fSorted = false;
00400 
00401             fRanges[fElemCount++] = val1;
00402             fRanges[fElemCount++] = val2;
00403 
00404             if (!fSorted) {
00405                 sortRanges();
00406             }
00407         }
00408     }
00409 }
00410 
00411 void RangeToken::sortRanges() {
00412 
00413     if (fSorted || fRanges == 0)
00414         return;
00415 
00416     for (int i = fElemCount - 4; i >= 0; i -= 2) {
00417 
00418         for (int j = 0; j <= i; j +=2) {
00419 
00420             if (fRanges[j] > fRanges[j + 2]
00421                 || (fRanges[j]==fRanges[j+2] && fRanges[j+1] > fRanges[j+3])) {
00422 
00423                 XMLInt32 tmpVal = fRanges[j+2];
00424                 fRanges[j+2] = fRanges[j];
00425                 fRanges[j] = tmpVal;
00426                 tmpVal = fRanges[j+3];
00427                 fRanges[j+3] = fRanges[j+1];
00428                 fRanges[j+1] = tmpVal;
00429             }
00430         }
00431     }
00432 
00433     fSorted = true;
00434 }
00435 
00436 void RangeToken::compactRanges() {
00437 
00438     if (fCompacted || fRanges == 0 || fElemCount <= 2)
00439         return;
00440 
00441     unsigned int base = 0;
00442     unsigned int target = 0;
00443 
00444     while (target < fElemCount) {
00445 
00446         if (base != target) {
00447 
00448             fRanges[base] = fRanges[target++];
00449             fRanges[base+1] = fRanges[target++];
00450         }
00451         else
00452             target += 2;
00453 
00454         XMLInt32 baseEnd = fRanges[base + 1];
00455 
00456         while (target < fElemCount) {
00457 
00458             XMLInt32 startRange = fRanges[target];
00459 
00460             if (baseEnd + 1 < startRange)
00461                 break;
00462 
00463             XMLInt32 endRange = fRanges[target + 1];
00464 
00465             if (baseEnd + 1 == startRange || baseEnd < endRange) {
00466 
00467                 baseEnd = endRange;
00468                 fRanges[base+1] = baseEnd;
00469                 target += 2;
00470             }
00471             else if (baseEnd >= endRange) {
00472                 target += 2;
00473             }
00474             else {
00475                 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_CompactRangesError, fMemoryManager);
00476             }
00477         } // inner while
00478 
00479         base += 2;
00480     }
00481 
00482     fElemCount = base;
00483     fCompacted = true;
00484 }
00485 
00486 void RangeToken::mergeRanges(const Token *const tok) {
00487 
00488 
00489     if (tok->getTokenType() != this->getTokenType())
00490         ThrowXMLwithMemMgr(IllegalArgumentException, XMLExcepts::Regex_MergeRangesTypeMismatch, fMemoryManager);
00491 
00492     RangeToken* rangeTok = (RangeToken *) tok;
00493 
00494     if (rangeTok->fRanges == 0)
00495         return;
00496 
00497     fCaseIToken = 0;
00498     sortRanges();
00499     rangeTok->sortRanges();
00500 
00501     if (fRanges == 0) {
00502 
00503         fMaxCount = rangeTok->fMaxCount;
00504         fRanges = (XMLInt32*) fMemoryManager->allocate
00505         (
00506             fMaxCount * sizeof(XMLInt32)
00507         );//new XMLInt32[fMaxCount];
00508         for (unsigned int index = 0; index < rangeTok->fElemCount; index++) {
00509             fRanges[index] = rangeTok->fRanges[index];
00510         }
00511 
00512         fElemCount = rangeTok->fElemCount;
00513         fSorted = true;
00514         return;
00515     }
00516 
00517     unsigned int newMaxCount = (fElemCount + rangeTok->fElemCount >= fMaxCount)
00518                                  ? fMaxCount + rangeTok->fMaxCount : fMaxCount;
00519     XMLInt32* result = (XMLInt32*) fMemoryManager->allocate
00520     (
00521         newMaxCount * sizeof(XMLInt32)
00522     );//new XMLInt32[newMaxCount];
00523 
00524     for (unsigned int i=0, j=0, k=0; i < fElemCount || j < rangeTok->fElemCount;) {
00525 
00526         if (i >= fElemCount) {
00527 
00528             for (int count = 0; count < 2; count++) {
00529                 result[k++] = rangeTok->fRanges[j++];
00530             }
00531         }
00532         else if (j >= rangeTok->fElemCount) {
00533 
00534             for (int count = 0; count < 2; count++) {
00535                 result[k++] = fRanges[i++];
00536             }
00537         }
00538         else if (rangeTok->fRanges[j] < fRanges[i]
00539                  || (rangeTok->fRanges[j] == fRanges[i]
00540                      && rangeTok->fRanges[j+1] < fRanges[i+1])) {
00541 
00542             for (int count = 0; count < 2; count++) {
00543                 result[k++] = rangeTok->fRanges[j++];
00544             }
00545         }
00546         else {
00547 
00548             for (int count = 0; count < 2; count++) {
00549 
00550                 result[k++] = fRanges[i++];
00551             }
00552         }
00553     }
00554 
00555     fMemoryManager->deallocate(fRanges);//delete [] fRanges;
00556     fElemCount += rangeTok->fElemCount;
00557     fRanges = result;
00558     fMaxCount = newMaxCount;
00559 }
00560 
00561 void RangeToken::subtractRanges(RangeToken* const tok) {
00562 
00563     if (fRanges == 0 || tok->fRanges == 0)
00564         return;
00565 
00566     if (tok->getTokenType() == T_NRANGE) {
00567 
00568         intersectRanges(tok);
00569         return;
00570     }
00571 
00572     fCaseIToken = 0;
00573     sortRanges();
00574     compactRanges();
00575     tok->sortRanges();
00576     tok->compactRanges();
00577 
00578     unsigned int newMax = (fElemCount + tok->fElemCount >= fMaxCount)
00579                              ? fMaxCount + tok->fMaxCount : fMaxCount;
00580     XMLInt32* result = (XMLInt32*) fMemoryManager->allocate
00581     (
00582         newMax * sizeof(XMLInt32)
00583     );//new XMLInt32[newMax];
00584     unsigned int newElemCount = 0;
00585     unsigned int srcCount = 0;
00586     unsigned int subCount = 0;
00587 
00588     while (srcCount < fElemCount && subCount < tok->fElemCount) {
00589 
00590         XMLInt32 srcBegin = fRanges[srcCount];
00591         XMLInt32 srcEnd = fRanges[srcCount + 1];
00592         XMLInt32 subBegin = tok->fRanges[subCount];
00593         XMLInt32 subEnd = tok->fRanges[subCount + 1];
00594 
00595         if (srcEnd < subBegin) { // no overlap
00596 
00597             result[newElemCount++] = fRanges[srcCount++];
00598             result[newElemCount++] = fRanges[srcCount++];
00599         }
00600         else if (srcEnd >= subBegin && srcBegin <= subEnd) {
00601 
00602             if (subBegin <= srcBegin && srcEnd <= subEnd) {
00603                 srcCount += 2;
00604             }
00605             else if (subBegin <= srcBegin) {
00606 
00607                 fRanges[srcCount] = subEnd + 1;
00608                 subCount += 2;
00609             }
00610             else if (srcEnd <= subEnd) {
00611 
00612                 result[newElemCount++] = srcBegin;
00613                 result[newElemCount++] = subBegin - 1;
00614                 srcCount += 2;
00615             }
00616             else {
00617 
00618                 result[newElemCount++] = srcBegin;
00619                 result[newElemCount++] = subBegin - 1;
00620                 fRanges[srcCount] = subEnd + 1;
00621                 subCount += 2;
00622             }
00623         }
00624         else if (subEnd < srcBegin) {
00625             subCount += 2;
00626         }
00627         else {
00628             fMemoryManager->deallocate(result);//delete [] result;
00629             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_SubtractRangesError, fMemoryManager);
00630         }
00631     } //end while
00632 
00633     while (srcCount < fElemCount) {
00634 
00635         result[newElemCount++] = fRanges[srcCount++];
00636         result[newElemCount++] = fRanges[srcCount++];
00637     }
00638 
00639     fMemoryManager->deallocate(fRanges);//delete [] fRanges;
00640     fRanges = result;
00641     fElemCount = newElemCount;
00642     fMaxCount = newMax;
00643 }
00644 
00648 void RangeToken::intersectRanges(RangeToken* const tok) {
00649 
00650     if (fRanges == 0 || tok->fRanges == 0)
00651         return;
00652 
00653     fCaseIToken = 0;
00654     sortRanges();
00655     compactRanges();
00656     tok->sortRanges();
00657     tok->compactRanges();
00658 
00659     unsigned int newMax = (fElemCount + tok->fElemCount >= fMaxCount)
00660                              ? fMaxCount + tok->fMaxCount : fMaxCount;
00661     XMLInt32* result = (XMLInt32*) fMemoryManager->allocate
00662     (
00663         newMax * sizeof(XMLInt32)
00664     );//new XMLInt32[newMax];
00665     unsigned int newElemCount = 0;
00666     unsigned int srcCount = 0;
00667     unsigned int tokCount = 0;
00668 
00669     while (srcCount < fElemCount && tokCount < tok->fElemCount) {
00670 
00671         XMLInt32 srcBegin = fRanges[srcCount];
00672         XMLInt32 srcEnd = fRanges[srcCount + 1];
00673         XMLInt32 tokBegin = tok->fRanges[tokCount];
00674         XMLInt32 tokEnd = tok->fRanges[tokCount + 1];
00675 
00676         if (srcEnd < tokBegin) {
00677             srcCount += 2;
00678         }
00679         else if (srcEnd >= tokBegin && srcBegin <= tokEnd) {
00680 
00681             if (tokBegin <= srcBegin && srcEnd <= tokEnd) {
00682 
00683                 result[newElemCount++] = srcBegin;
00684                 result[newElemCount++] = srcEnd;
00685                 srcCount += 2;
00686             }
00687             else if (tokBegin <= srcBegin) {
00688 
00689                 result[newElemCount++] = srcBegin;
00690                 result[newElemCount++] = tokEnd;
00691                 tokCount += 2;
00692 
00693                 if (tokCount < tok->fElemCount)
00694                     fRanges[srcCount] = tokEnd + 1;
00695                 else
00696                     srcCount += 2;
00697             }
00698             else if (srcEnd <= tokEnd) {
00699 
00700                 result[newElemCount++] = tokBegin;
00701                 result[newElemCount++] = srcEnd;
00702                 srcCount += 2;
00703             }
00704             else {
00705 
00706                 result[newElemCount++] = tokBegin;
00707                 result[newElemCount++] = tokEnd;
00708                 tokCount += 2;
00709 
00710                 if (tokCount < tok->fElemCount)
00711                     fRanges[srcCount] = tokEnd + 1;
00712                 else
00713                     srcCount += 2;
00714             }
00715         }
00716         else if (tokEnd < srcBegin) {
00717             tokCount += 2;
00718 
00719             if (tokCount >= tok->fElemCount)
00720                 srcCount += 2;
00721         }
00722         else {
00723 
00724             fMemoryManager->deallocate(result);//delete [] result;
00725             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_IntersectRangesError, fMemoryManager);
00726         }
00727     } //end while
00728 
00729     fMemoryManager->deallocate(fRanges);//delete [] fRanges;
00730     fRanges = result;
00731     fElemCount = newElemCount;
00732     fMaxCount = newMax;
00733 }
00734 
00739 RangeToken* RangeToken::complementRanges(RangeToken* const tok,
00740                                     TokenFactory* const tokFactory,
00741                                     MemoryManager* const manager) {
00742 
00743     if (tok->getTokenType() != T_RANGE && tok->getTokenType() != T_NRANGE)
00744         ThrowXMLwithMemMgr(IllegalArgumentException, XMLExcepts::Regex_ComplementRangesInvalidArg, manager);
00745 
00746     tok->sortRanges();
00747     tok->compactRanges();
00748 
00749     XMLInt32 lastElem = tok->fRanges[tok->fElemCount - 1];
00750     RangeToken* rangeTok = tokFactory->createRange();
00751 
00752     if (tok->fRanges[0] > 0) {
00753         rangeTok->addRange(0, tok->fRanges[0] - 1);
00754     }
00755 
00756     for (unsigned int i= 1; i< tok->fElemCount - 2; i += 2) {
00757         rangeTok->addRange(tok->fRanges[i] + 1, tok->fRanges[i+1] - 1);
00758     }
00759 
00760     if (lastElem != UTF16_MAX) {
00761         rangeTok->addRange(lastElem + 1, UTF16_MAX);
00762     }
00763 
00764     rangeTok->fCompacted = true;
00765 
00766     return rangeTok;
00767 }
00768 
00769 
00770 // ---------------------------------------------------------------------------
00771 //  RangeToken: Match methods
00772 // ---------------------------------------------------------------------------
00773 bool RangeToken::match(const XMLInt32 ch) {
00774 
00775     createMap();
00776 
00777     bool ret;
00778 
00779     if (getTokenType() == T_RANGE) {
00780 
00781         if (ch < MAPSIZE)
00782             return ((fMap[ch/32] & (1<<(ch&0x1f))) != 0);
00783 
00784         ret = false;
00785 
00786         for (unsigned int i= fNonMapIndex; i< fElemCount; i +=2) {
00787 
00788             if (fRanges[i] <= ch && ch <= fRanges[i+1])
00789                 return true;
00790         }
00791     }
00792     else {
00793 
00794         if (ch < MAPSIZE)
00795             return ((fMap[ch/32] & (1<<(ch&0x1f))) == 0);
00796 
00797         ret = true;
00798 
00799         for (unsigned int i= fNonMapIndex; i< fElemCount; i += 2) {
00800 
00801             if (fRanges[i] <= ch && ch <= fRanges[i+1])
00802                 return false;
00803         }
00804     }
00805 
00806     return ret;
00807 }
00808 
00809 // ---------------------------------------------------------------------------
00810 //  RangeToken: Private helpers methods
00811 // ---------------------------------------------------------------------------
00812 void RangeToken::expand(const unsigned int length) {
00813 
00814     unsigned int newMax = fElemCount + length;
00815 
00816     // Avoid too many reallocations by expanding by a percentage
00817     unsigned int minNewMax = (unsigned int)((double)fElemCount * 1.25);
00818     if (newMax < minNewMax)
00819         newMax = minNewMax;
00820 
00821     XMLInt32* newList = (XMLInt32*) fMemoryManager->allocate
00822     (
00823         newMax * sizeof(XMLInt32)
00824     );//new XMLInt32[newMax];
00825     for (unsigned int index = 0; index < fElemCount; index++)
00826         newList[index] = fRanges[index];
00827 
00828     fMemoryManager->deallocate(fRanges);//delete [] fRanges;
00829     fRanges = newList;
00830     fMaxCount = newMax;
00831 }
00832 
00833 void RangeToken::doCreateMap() {
00834 
00835     assert(!fMap);
00836 
00837     int asize = MAPSIZE/32;
00838     fMap = (int*) fMemoryManager->allocate(asize * sizeof(int));//new int[asize];
00839     fNonMapIndex = fElemCount;
00840 
00841     for (int i = 0; i < asize; i++) {
00842         fMap[i] = 0;
00843     }
00844 
00845     for (unsigned int j= 0; j < fElemCount; j += 2) {
00846 
00847         XMLInt32 begin = fRanges[j];
00848         XMLInt32 end = fRanges[j+1];
00849 
00850         if (begin < MAPSIZE) {
00851 
00852             for (int k = begin; k <= end && k < MAPSIZE; k++) {
00853                 fMap[k/32] |= 1<<(k&0x1F);
00854             }
00855         }
00856         else {
00857 
00858             fNonMapIndex = j;
00859             break;
00860         }
00861 
00862         if (end >= MAPSIZE) {
00863 
00864             fNonMapIndex = j;
00865             break;
00866         }
00867     }
00868 }
00869 
00870 XERCES_CPP_NAMESPACE_END
00871