GME  13
XMLRangeFactory.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: XMLRangeFactory.cpp 678879 2008-07-22 20:05:05Z amassari $
00020  */
00021 
00022 // ---------------------------------------------------------------------------
00023 //  Includes
00024 // ---------------------------------------------------------------------------
00025 #include <xercesc/util/regx/XMLRangeFactory.hpp>
00026 #include <xercesc/internal/CharTypeTables.hpp>
00027 #include <xercesc/util/regx/RegxDefs.hpp>
00028 #include <xercesc/util/regx/TokenFactory.hpp>
00029 #include <xercesc/util/regx/RangeToken.hpp>
00030 #include <xercesc/util/regx/RangeTokenMap.hpp>
00031 #include <xercesc/util/regx/UnicodeRangeFactory.hpp>
00032 #include <xercesc/util/Janitor.hpp>
00033 #include <string.h>
00034 
00035 XERCES_CPP_NAMESPACE_BEGIN
00036 
00037 // ---------------------------------------------------------------------------
00038 //  Local static functions
00039 // ---------------------------------------------------------------------------
00040 static void setupRange(XMLInt32* const rangeMap,
00041                        const XMLCh* const theTable,
00042                        unsigned int startingIndex) {
00043 
00044     const XMLCh* pchCur = theTable;
00045 
00046     // Do the ranges first
00047     while (*pchCur)
00048     {
00049         rangeMap[startingIndex++] = *pchCur++;
00050     }
00051 
00052     // Skip the range terminator
00053     pchCur++;
00054 
00055     // And then the singles until we hit its terminator
00056     while (*pchCur) {
00057 
00058         const XMLCh chSingle = *pchCur++;
00059         rangeMap[startingIndex++] = chSingle;
00060         rangeMap[startingIndex++] = chSingle;
00061     }
00062 }
00063 
00064 static unsigned int getTableLen(const XMLCh* const theTable) {
00065 
00066     XMLSize_t rangeLen = XMLString::stringLen(theTable);
00067 
00068     return (unsigned int)(rangeLen + 2*XMLString::stringLen(theTable + rangeLen + 1));
00069 }
00070 
00071 // ---------------------------------------------------------------------------
00072 //  XMLRangeFactory: Constructors and Destructor
00073 // ---------------------------------------------------------------------------
00074 XMLRangeFactory::XMLRangeFactory()
00075 {
00076 
00077 }
00078 
00079 XMLRangeFactory::~XMLRangeFactory() {
00080 
00081 }
00082 
00083 // ---------------------------------------------------------------------------
00084 //  XMLRangeFactory: Range creation methods
00085 // ---------------------------------------------------------------------------
00086 void XMLRangeFactory::buildRanges(RangeTokenMap *rangeTokMap) {
00087 
00088     if (fRangesCreated)
00089         return;
00090 
00091     if (!fKeywordsInitialized) {
00092         initializeKeywordMap(rangeTokMap);
00093     }
00094 
00095     TokenFactory* tokFactory = rangeTokMap->getTokenFactory();
00096 
00097     // Create space ranges
00098     unsigned int wsTblLen = getTableLen(gWhitespaceChars);
00099     RangeToken* tok = tokFactory->createRange();
00100     XMLInt32* wsRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
00101     (
00102         wsTblLen * sizeof(XMLInt32)
00103     );//new XMLInt32[wsTblLen];
00104 
00105     tok->setRangeValues(wsRange, wsTblLen);
00106     setupRange(wsRange, gWhitespaceChars, 0);
00107     // Build the internal map.
00108     tok->createMap();
00109     rangeTokMap->setRangeToken(fgXMLSpace, tok);
00110 
00111     tok = RangeToken::complementRanges(tok, tokFactory);
00112     // Build the internal map.
00113     tok->createMap();
00114     rangeTokMap->setRangeToken(fgXMLSpace, tok , true);
00115 
00116     // Create digits ranges
00117     tok = tokFactory->createRange();
00118     unsigned int digitTblLen = getTableLen(gDigitChars);
00119     XMLInt32* digitRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
00120     (
00121         digitTblLen * sizeof(XMLInt32)
00122     );//new XMLInt32[digitTblLen];
00123 
00124     tok->setRangeValues(digitRange, digitTblLen);
00125     setupRange(digitRange, gDigitChars, 0);
00126     // Build the internal map.
00127     tok->createMap();
00128     rangeTokMap->setRangeToken(fgXMLDigit, tok);
00129 
00130     tok = RangeToken::complementRanges(tok, tokFactory);
00131     // Build the internal map.
00132     tok->createMap();
00133     rangeTokMap->setRangeToken(fgXMLDigit, tok , true);
00134 
00135     // Build word ranges
00136     unsigned int baseTblLen = getTableLen(gBaseChars);
00137     unsigned int ideoTblLen = getTableLen(gIdeographicChars);
00138     unsigned int wordRangeLen = baseTblLen + ideoTblLen + digitTblLen;
00139     XMLInt32* wordRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
00140     (
00141         wordRangeLen * sizeof(XMLInt32)
00142     );//new XMLInt32[wordRangeLen];
00143     ArrayJanitor<XMLInt32> janWordRange(wordRange, XMLPlatformUtils::fgMemoryManager);
00144 
00145     setupRange(wordRange, gBaseChars, 0);
00146     setupRange(wordRange, gIdeographicChars, baseTblLen);
00147     memcpy(wordRange + baseTblLen + ideoTblLen, digitRange, digitTblLen * sizeof(XMLInt32));
00148 
00149     // Create NameChar ranges
00150     tok = tokFactory->createRange();
00151     unsigned int combTblLen = getTableLen(gCombiningChars);
00152     unsigned int extTblLen = getTableLen(gExtenderChars);
00153     unsigned int nameTblLen = wordRangeLen + combTblLen + extTblLen;
00154     XMLInt32* nameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
00155     (
00156         (nameTblLen + 8) * sizeof(XMLInt32)
00157     );//new XMLInt32[nameTblLen + 8];
00158 
00159     tok->setRangeValues(nameRange, nameTblLen + 8);
00160     memcpy(nameRange, wordRange, wordRangeLen * sizeof(XMLInt32));
00161     setupRange(nameRange, gCombiningChars, wordRangeLen);
00162     setupRange(nameRange, gExtenderChars, wordRangeLen + combTblLen);
00163     nameRange[nameTblLen++] = chDash;
00164     nameRange[nameTblLen++] = chDash;
00165     nameRange[nameTblLen++] = chColon;
00166     nameRange[nameTblLen++] = chColon;
00167     nameRange[nameTblLen++] = chPeriod;
00168     nameRange[nameTblLen++] = chPeriod;
00169     nameRange[nameTblLen++] = chUnderscore;
00170     nameRange[nameTblLen++] = chUnderscore;
00171     tok->sortRanges();
00172     tok->compactRanges();
00173     // Build the internal map.
00174     tok->createMap();
00175     rangeTokMap->setRangeToken(fgXMLNameChar, tok);
00176 
00177     tok = RangeToken::complementRanges(tok, tokFactory);
00178     // Build the internal map.
00179     tok->createMap();
00180     rangeTokMap->setRangeToken(fgXMLNameChar, tok , true);
00181 
00182     // Create initialNameChar ranges
00183     tok = tokFactory->createRange();
00184     unsigned int initialNameTblLen = baseTblLen + ideoTblLen;
00185     XMLInt32* initialNameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate
00186     (
00187         (initialNameTblLen + 4) * sizeof(XMLInt32)
00188     );//new XMLInt32[initialNameTblLen + 4];
00189 
00190     tok->setRangeValues(initialNameRange, initialNameTblLen + 4);
00191     memcpy(initialNameRange, wordRange, initialNameTblLen * sizeof(XMLInt32));
00192     initialNameRange[initialNameTblLen++] = chColon;
00193     initialNameRange[initialNameTblLen++] = chColon;
00194     initialNameRange[initialNameTblLen++] = chUnderscore;
00195     initialNameRange[initialNameTblLen++] = chUnderscore;
00196     tok->sortRanges();
00197     tok->compactRanges();
00198     // Build the internal map.
00199     tok->createMap();
00200     rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok);
00201 
00202     tok = RangeToken::complementRanges(tok, tokFactory);
00203     // Build the internal map.
00204     tok->createMap();
00205     rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok , true);
00206 
00207     // Create word range
00208     // \w = [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters) 
00209     tok = tokFactory->createRange();
00210     for(int i=0; i<=0xFFFF; i++)
00211     {
00212         unsigned short chType=UnicodeRangeFactory::getUniCategory(XMLUniCharacter::getType(i));
00213         if(chType == UnicodeRangeFactory::CHAR_PUNCTUATION || 
00214            chType == UnicodeRangeFactory::CHAR_SEPARATOR || 
00215            chType == UnicodeRangeFactory::CHAR_OTHER)
00216             tok->addRange(i, i);
00217     }
00218     tok->sortRanges();
00219     tok->compactRanges();
00220     // Build the internal map.
00221     tok->createMap();
00222     rangeTokMap->setRangeToken(fgXMLWord, tok , true);
00223 
00224     tok = RangeToken::complementRanges(tok, tokFactory);
00225     // Build the internal map.
00226     tok->createMap();
00227     rangeTokMap->setRangeToken(fgXMLWord, tok);
00228 
00229 
00230     fRangesCreated = true;
00231 }
00232 
00233 // ---------------------------------------------------------------------------
00234 //  XMLRangeFactory: Range creation methods
00235 // ---------------------------------------------------------------------------
00236 void XMLRangeFactory::initializeKeywordMap(RangeTokenMap *rangeTokMap) {
00237 
00238     if (fKeywordsInitialized)
00239         return;
00240 
00241     rangeTokMap->addKeywordMap(fgXMLSpace, fgXMLCategory);
00242     rangeTokMap->addKeywordMap(fgXMLDigit, fgXMLCategory);
00243     rangeTokMap->addKeywordMap(fgXMLWord, fgXMLCategory);
00244     rangeTokMap->addKeywordMap(fgXMLNameChar, fgXMLCategory);
00245     rangeTokMap->addKeywordMap(fgXMLInitialNameChar, fgXMLCategory);
00246 
00247     fKeywordsInitialized = true;
00248 }
00249 
00250 XERCES_CPP_NAMESPACE_END
00251