GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: XMLRangeFactory.cpp 678879 2008-07-22 20:05:05Z amassari $ 00020 */ 00021 00022 // --------------------------------------------------------------------------- 00023 // Includes 00024 // --------------------------------------------------------------------------- 00025 #include <xercesc/util/regx/XMLRangeFactory.hpp> 00026 #include <xercesc/internal/CharTypeTables.hpp> 00027 #include <xercesc/util/regx/RegxDefs.hpp> 00028 #include <xercesc/util/regx/TokenFactory.hpp> 00029 #include <xercesc/util/regx/RangeToken.hpp> 00030 #include <xercesc/util/regx/RangeTokenMap.hpp> 00031 #include <xercesc/util/regx/UnicodeRangeFactory.hpp> 00032 #include <xercesc/util/Janitor.hpp> 00033 #include <string.h> 00034 00035 XERCES_CPP_NAMESPACE_BEGIN 00036 00037 // --------------------------------------------------------------------------- 00038 // Local static functions 00039 // --------------------------------------------------------------------------- 00040 static void setupRange(XMLInt32* const rangeMap, 00041 const XMLCh* const theTable, 00042 unsigned int startingIndex) { 00043 00044 const XMLCh* pchCur = theTable; 00045 00046 // Do the ranges first 00047 while (*pchCur) 00048 { 00049 rangeMap[startingIndex++] = *pchCur++; 00050 } 00051 00052 // Skip the range terminator 00053 pchCur++; 00054 00055 // And then the singles until we hit its terminator 00056 while (*pchCur) { 00057 00058 const XMLCh chSingle = *pchCur++; 00059 rangeMap[startingIndex++] = chSingle; 00060 rangeMap[startingIndex++] = chSingle; 00061 } 00062 } 00063 00064 static unsigned int getTableLen(const XMLCh* const theTable) { 00065 00066 XMLSize_t rangeLen = XMLString::stringLen(theTable); 00067 00068 return (unsigned int)(rangeLen + 2*XMLString::stringLen(theTable + rangeLen + 1)); 00069 } 00070 00071 // --------------------------------------------------------------------------- 00072 // XMLRangeFactory: Constructors and Destructor 00073 // --------------------------------------------------------------------------- 00074 XMLRangeFactory::XMLRangeFactory() 00075 { 00076 00077 } 00078 00079 XMLRangeFactory::~XMLRangeFactory() { 00080 00081 } 00082 00083 // --------------------------------------------------------------------------- 00084 // XMLRangeFactory: Range creation methods 00085 // --------------------------------------------------------------------------- 00086 void XMLRangeFactory::buildRanges(RangeTokenMap *rangeTokMap) { 00087 00088 if (fRangesCreated) 00089 return; 00090 00091 if (!fKeywordsInitialized) { 00092 initializeKeywordMap(rangeTokMap); 00093 } 00094 00095 TokenFactory* tokFactory = rangeTokMap->getTokenFactory(); 00096 00097 // Create space ranges 00098 unsigned int wsTblLen = getTableLen(gWhitespaceChars); 00099 RangeToken* tok = tokFactory->createRange(); 00100 XMLInt32* wsRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate 00101 ( 00102 wsTblLen * sizeof(XMLInt32) 00103 );//new XMLInt32[wsTblLen]; 00104 00105 tok->setRangeValues(wsRange, wsTblLen); 00106 setupRange(wsRange, gWhitespaceChars, 0); 00107 // Build the internal map. 00108 tok->createMap(); 00109 rangeTokMap->setRangeToken(fgXMLSpace, tok); 00110 00111 tok = RangeToken::complementRanges(tok, tokFactory); 00112 // Build the internal map. 00113 tok->createMap(); 00114 rangeTokMap->setRangeToken(fgXMLSpace, tok , true); 00115 00116 // Create digits ranges 00117 tok = tokFactory->createRange(); 00118 unsigned int digitTblLen = getTableLen(gDigitChars); 00119 XMLInt32* digitRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate 00120 ( 00121 digitTblLen * sizeof(XMLInt32) 00122 );//new XMLInt32[digitTblLen]; 00123 00124 tok->setRangeValues(digitRange, digitTblLen); 00125 setupRange(digitRange, gDigitChars, 0); 00126 // Build the internal map. 00127 tok->createMap(); 00128 rangeTokMap->setRangeToken(fgXMLDigit, tok); 00129 00130 tok = RangeToken::complementRanges(tok, tokFactory); 00131 // Build the internal map. 00132 tok->createMap(); 00133 rangeTokMap->setRangeToken(fgXMLDigit, tok , true); 00134 00135 // Build word ranges 00136 unsigned int baseTblLen = getTableLen(gBaseChars); 00137 unsigned int ideoTblLen = getTableLen(gIdeographicChars); 00138 unsigned int wordRangeLen = baseTblLen + ideoTblLen + digitTblLen; 00139 XMLInt32* wordRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate 00140 ( 00141 wordRangeLen * sizeof(XMLInt32) 00142 );//new XMLInt32[wordRangeLen]; 00143 ArrayJanitor<XMLInt32> janWordRange(wordRange, XMLPlatformUtils::fgMemoryManager); 00144 00145 setupRange(wordRange, gBaseChars, 0); 00146 setupRange(wordRange, gIdeographicChars, baseTblLen); 00147 memcpy(wordRange + baseTblLen + ideoTblLen, digitRange, digitTblLen * sizeof(XMLInt32)); 00148 00149 // Create NameChar ranges 00150 tok = tokFactory->createRange(); 00151 unsigned int combTblLen = getTableLen(gCombiningChars); 00152 unsigned int extTblLen = getTableLen(gExtenderChars); 00153 unsigned int nameTblLen = wordRangeLen + combTblLen + extTblLen; 00154 XMLInt32* nameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate 00155 ( 00156 (nameTblLen + 8) * sizeof(XMLInt32) 00157 );//new XMLInt32[nameTblLen + 8]; 00158 00159 tok->setRangeValues(nameRange, nameTblLen + 8); 00160 memcpy(nameRange, wordRange, wordRangeLen * sizeof(XMLInt32)); 00161 setupRange(nameRange, gCombiningChars, wordRangeLen); 00162 setupRange(nameRange, gExtenderChars, wordRangeLen + combTblLen); 00163 nameRange[nameTblLen++] = chDash; 00164 nameRange[nameTblLen++] = chDash; 00165 nameRange[nameTblLen++] = chColon; 00166 nameRange[nameTblLen++] = chColon; 00167 nameRange[nameTblLen++] = chPeriod; 00168 nameRange[nameTblLen++] = chPeriod; 00169 nameRange[nameTblLen++] = chUnderscore; 00170 nameRange[nameTblLen++] = chUnderscore; 00171 tok->sortRanges(); 00172 tok->compactRanges(); 00173 // Build the internal map. 00174 tok->createMap(); 00175 rangeTokMap->setRangeToken(fgXMLNameChar, tok); 00176 00177 tok = RangeToken::complementRanges(tok, tokFactory); 00178 // Build the internal map. 00179 tok->createMap(); 00180 rangeTokMap->setRangeToken(fgXMLNameChar, tok , true); 00181 00182 // Create initialNameChar ranges 00183 tok = tokFactory->createRange(); 00184 unsigned int initialNameTblLen = baseTblLen + ideoTblLen; 00185 XMLInt32* initialNameRange = (XMLInt32*) XMLPlatformUtils::fgMemoryManager->allocate 00186 ( 00187 (initialNameTblLen + 4) * sizeof(XMLInt32) 00188 );//new XMLInt32[initialNameTblLen + 4]; 00189 00190 tok->setRangeValues(initialNameRange, initialNameTblLen + 4); 00191 memcpy(initialNameRange, wordRange, initialNameTblLen * sizeof(XMLInt32)); 00192 initialNameRange[initialNameTblLen++] = chColon; 00193 initialNameRange[initialNameTblLen++] = chColon; 00194 initialNameRange[initialNameTblLen++] = chUnderscore; 00195 initialNameRange[initialNameTblLen++] = chUnderscore; 00196 tok->sortRanges(); 00197 tok->compactRanges(); 00198 // Build the internal map. 00199 tok->createMap(); 00200 rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok); 00201 00202 tok = RangeToken::complementRanges(tok, tokFactory); 00203 // Build the internal map. 00204 tok->createMap(); 00205 rangeTokMap->setRangeToken(fgXMLInitialNameChar, tok , true); 00206 00207 // Create word range 00208 // \w = [#x0000-#x10FFFF]-[\p{P}\p{Z}\p{C}] (all characters except the set of "punctuation", "separator" and "other" characters) 00209 tok = tokFactory->createRange(); 00210 for(int i=0; i<=0xFFFF; i++) 00211 { 00212 unsigned short chType=UnicodeRangeFactory::getUniCategory(XMLUniCharacter::getType(i)); 00213 if(chType == UnicodeRangeFactory::CHAR_PUNCTUATION || 00214 chType == UnicodeRangeFactory::CHAR_SEPARATOR || 00215 chType == UnicodeRangeFactory::CHAR_OTHER) 00216 tok->addRange(i, i); 00217 } 00218 tok->sortRanges(); 00219 tok->compactRanges(); 00220 // Build the internal map. 00221 tok->createMap(); 00222 rangeTokMap->setRangeToken(fgXMLWord, tok , true); 00223 00224 tok = RangeToken::complementRanges(tok, tokFactory); 00225 // Build the internal map. 00226 tok->createMap(); 00227 rangeTokMap->setRangeToken(fgXMLWord, tok); 00228 00229 00230 fRangesCreated = true; 00231 } 00232 00233 // --------------------------------------------------------------------------- 00234 // XMLRangeFactory: Range creation methods 00235 // --------------------------------------------------------------------------- 00236 void XMLRangeFactory::initializeKeywordMap(RangeTokenMap *rangeTokMap) { 00237 00238 if (fKeywordsInitialized) 00239 return; 00240 00241 rangeTokMap->addKeywordMap(fgXMLSpace, fgXMLCategory); 00242 rangeTokMap->addKeywordMap(fgXMLDigit, fgXMLCategory); 00243 rangeTokMap->addKeywordMap(fgXMLWord, fgXMLCategory); 00244 rangeTokMap->addKeywordMap(fgXMLNameChar, fgXMLCategory); 00245 rangeTokMap->addKeywordMap(fgXMLInitialNameChar, fgXMLCategory); 00246 00247 fKeywordsInitialized = true; 00248 } 00249 00250 XERCES_CPP_NAMESPACE_END 00251