GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: DFAContentModel.cpp 901107 2010-01-20 08:45:02Z borisk $ 00020 */ 00021 00022 00023 // --------------------------------------------------------------------------- 00024 // Includes 00025 // --------------------------------------------------------------------------- 00026 #include <xercesc/util/RuntimeException.hpp> 00027 #include <xercesc/framework/XMLBuffer.hpp> 00028 #include <xercesc/framework/XMLElementDecl.hpp> 00029 #include <xercesc/framework/XMLValidator.hpp> 00030 #include <xercesc/validators/common/CMAny.hpp> 00031 #include <xercesc/validators/common/CMBinaryOp.hpp> 00032 #include <xercesc/validators/common/CMLeaf.hpp> 00033 #include <xercesc/validators/common/CMRepeatingLeaf.hpp> 00034 #include <xercesc/validators/common/CMUnaryOp.hpp> 00035 #include <xercesc/validators/common/DFAContentModel.hpp> 00036 #include <xercesc/validators/common/ContentSpecNode.hpp> 00037 #include <xercesc/validators/common/Grammar.hpp> 00038 #include <xercesc/validators/schema/SchemaSymbols.hpp> 00039 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp> 00040 #include <xercesc/validators/schema/XercesElementWildcard.hpp> 00041 #include <xercesc/util/RefHashTableOf.hpp> 00042 #include <xercesc/util/XMLInteger.hpp> 00043 #include <math.h> 00044 00045 XERCES_CPP_NAMESPACE_BEGIN 00046 00047 struct CMStateSetHasher 00048 { 00049 XMLSize_t getHashVal(const void *const key, XMLSize_t mod) 00050 { 00051 const CMStateSet* const pkey = (const CMStateSet*) key; 00052 return ((pkey->hashCode()) % mod); 00053 } 00054 00055 bool equals(const void *const key1, const void *const key2) 00056 { 00057 const CMStateSet* const pkey1 = (const CMStateSet*) key1; 00058 const CMStateSet* const pkey2 = (const CMStateSet*) key2; 00059 return (*pkey1==*pkey2); 00060 } 00061 }; 00062 00063 // --------------------------------------------------------------------------- 00064 // DFAContentModel: Constructors and Destructor 00065 // --------------------------------------------------------------------------- 00066 DFAContentModel::DFAContentModel( const bool dtd 00067 , ContentSpecNode* const elemContentSpec 00068 , MemoryManager* const manager) : 00069 00070 fElemMap(0) 00071 , fElemMapType(0) 00072 , fElemMapSize(0) 00073 , fEmptyOk(false) 00074 , fEOCPos(0) 00075 , fFinalStateFlags(0) 00076 , fFollowList(0) 00077 , fHeadNode(0) 00078 , fLeafCount(0) 00079 , fLeafList(0) 00080 , fLeafListType(0) 00081 , fTransTable(0) 00082 , fTransTableSize(0) 00083 , fCountingStates(0) 00084 , fDTD(dtd) 00085 , fIsMixed(false) 00086 , fLeafNameTypeVector(0) 00087 , fMemoryManager(manager) 00088 { 00089 // And build the DFA data structures 00090 buildDFA(elemContentSpec); 00091 } 00092 00093 DFAContentModel::DFAContentModel( const bool dtd 00094 , ContentSpecNode* const elemContentSpec 00095 , const bool isMixed 00096 , MemoryManager* const manager): 00097 00098 fElemMap(0) 00099 , fElemMapType(0) 00100 , fElemMapSize(0) 00101 , fEmptyOk(false) 00102 , fEOCPos(0) 00103 , fFinalStateFlags(0) 00104 , fFollowList(0) 00105 , fHeadNode(0) 00106 , fLeafCount(0) 00107 , fLeafList(0) 00108 , fLeafListType(0) 00109 , fTransTable(0) 00110 , fTransTableSize(0) 00111 , fCountingStates(0) 00112 , fDTD(dtd) 00113 , fIsMixed(isMixed) 00114 , fLeafNameTypeVector(0) 00115 , fMemoryManager(manager) 00116 { 00117 // And build the DFA data structures 00118 buildDFA(elemContentSpec); 00119 } 00120 00121 DFAContentModel::~DFAContentModel() 00122 { 00123 // 00124 // Clean up all the stuff that is not just temporary representation 00125 // data that was cleaned up after building the DFA. 00126 // 00127 fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags; 00128 00129 unsigned int index; 00130 for (index = 0; index < fTransTableSize; index++) 00131 fMemoryManager->deallocate(fTransTable[index]); //delete [] fTransTable[index]; 00132 fMemoryManager->deallocate(fTransTable); //delete [] fTransTable; 00133 00134 if(fCountingStates) 00135 { 00136 for (unsigned int j = 0; j < fTransTableSize; ++j) 00137 delete fCountingStates[j]; 00138 fMemoryManager->deallocate(fCountingStates); 00139 } 00140 00141 for (index = 0; index < fLeafCount; index++) 00142 delete fElemMap[index]; 00143 fMemoryManager->deallocate(fElemMap); //delete [] fElemMap; 00144 00145 fMemoryManager->deallocate(fElemMapType); //delete [] fElemMapType; 00146 fMemoryManager->deallocate(fLeafListType); //delete [] fLeafListType; 00147 00148 delete fLeafNameTypeVector; 00149 } 00150 00151 00152 // --------------------------------------------------------------------------- 00153 // DFAContentModel: Implementation of the ContentModel virtual interface 00154 // --------------------------------------------------------------------------- 00155 bool 00156 DFAContentModel::validateContent( QName** const children 00157 , XMLSize_t childCount 00158 , unsigned int 00159 , XMLSize_t* indexFailingChild 00160 , MemoryManager* const) const 00161 { 00162 // 00163 // If there are no children, then either we fail on the 0th element 00164 // or we return success. It depends upon whether this content model 00165 // accepts empty content, which we determined earlier. 00166 // 00167 if (!childCount) 00168 { 00169 // success 00170 if(fEmptyOk) 00171 return true; 00172 *indexFailingChild=0; 00173 return false; 00174 } 00175 00176 // 00177 // Lets loop through the children in the array and move our way 00178 // through the states. Note that we use the fElemMap array to map 00179 // an element index to a state index. 00180 // 00181 unsigned int curState = 0; 00182 unsigned int nextState = 0; 00183 unsigned int loopCount = 0; 00184 unsigned int childIndex = 0; 00185 for (; childIndex < childCount; childIndex++) 00186 { 00187 // Get the current element index out 00188 const QName* curElem = children[childIndex]; 00189 const XMLCh* curElemRawName = 0; 00190 if (fDTD) 00191 curElemRawName = curElem->getRawName(); 00192 00193 // If this is text in a Schema mixed content model, skip it. 00194 if ( fIsMixed && 00195 ( curElem->getURI() == XMLElementDecl::fgPCDataElemId)) 00196 continue; 00197 00198 // Look up this child in our element map 00199 unsigned int elemIndex = 0; 00200 for (; elemIndex < fElemMapSize; elemIndex++) 00201 { 00202 const QName* inElem = fElemMap[elemIndex]; 00203 if (fDTD) { 00204 if (XMLString::equals(inElem->getRawName(), curElemRawName)) { 00205 nextState = fTransTable[curState][elemIndex]; 00206 if (nextState != XMLContentModel::gInvalidTrans) 00207 break; 00208 } 00209 } 00210 else { 00211 ContentSpecNode::NodeTypes type = fElemMapType[elemIndex]; 00212 if (type == ContentSpecNode::Leaf) 00213 { 00214 if ((inElem->getURI() == curElem->getURI()) && 00215 (XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) { 00216 nextState = fTransTable[curState][elemIndex]; 00217 if (nextState != XMLContentModel::gInvalidTrans) 00218 break; 00219 } 00220 } 00221 else if ((type & 0x0f)== ContentSpecNode::Any) 00222 { 00223 nextState = fTransTable[curState][elemIndex]; 00224 if (nextState != XMLContentModel::gInvalidTrans) 00225 break; 00226 } 00227 else if ((type & 0x0f) == ContentSpecNode::Any_NS) 00228 { 00229 if (inElem->getURI() == curElem->getURI()) 00230 { 00231 nextState = fTransTable[curState][elemIndex]; 00232 if (nextState != XMLContentModel::gInvalidTrans) 00233 break; 00234 } 00235 } 00236 else if ((type & 0x0f) == ContentSpecNode::Any_Other) 00237 { 00238 // Here we assume that empty string has id 1. 00239 // 00240 unsigned int uriId = curElem->getURI(); 00241 if (uriId != 1 && uriId != inElem->getURI()) { 00242 nextState = fTransTable[curState][elemIndex]; 00243 if (nextState != XMLContentModel::gInvalidTrans) 00244 break; 00245 } 00246 } 00247 } 00248 }//for elemIndex 00249 00250 // If "nextState" is -1, we found a match, but the transition is invalid 00251 if (nextState == XMLContentModel::gInvalidTrans) 00252 { 00253 *indexFailingChild=childIndex; 00254 return false; 00255 } 00256 00257 // If we didn't find it, then obviously not valid 00258 if (elemIndex == fElemMapSize) 00259 { 00260 *indexFailingChild=childIndex; 00261 return false; 00262 } 00263 00264 unsigned int nextLoop = 0; 00265 if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, 0)) 00266 { 00267 *indexFailingChild=childIndex; 00268 return false; 00269 } 00270 00271 curState = nextState; 00272 loopCount = nextLoop; 00273 nextState = 0; 00274 00275 }//for childIndex 00276 00277 // 00278 // We transitioned all the way through the input list. However, that 00279 // does not mean that we ended in a final state. So check whether 00280 // our ending state is a final state. 00281 // 00282 if (!fFinalStateFlags[curState]) 00283 { 00284 *indexFailingChild=childIndex; 00285 return false; 00286 } 00287 00288 // verify if we exited before the minOccurs was satisfied 00289 if (fCountingStates != 0) { 00290 Occurence* o = fCountingStates[curState]; 00291 if (o != 0 && loopCount < (unsigned int)o->minOccurs) { 00292 // not enough loops on the current state to be considered final. 00293 *indexFailingChild=childIndex; 00294 return false; 00295 } 00296 } 00297 00298 //success 00299 return true; 00300 } 00301 00302 bool DFAContentModel::validateContentSpecial(QName** const children 00303 , XMLSize_t childCount 00304 , unsigned int 00305 , GrammarResolver* const pGrammarResolver 00306 , XMLStringPool* const pStringPool 00307 , XMLSize_t* indexFailingChild 00308 , MemoryManager* const) const 00309 { 00310 00311 SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool); 00312 00313 if (childCount == 0) 00314 { 00315 if(fEmptyOk) 00316 return true; 00317 *indexFailingChild=0; 00318 return false; 00319 } 00320 00321 // 00322 // Lets loop through the children in the array and move our way 00323 // through the states. Note that we use the fElemMap array to map 00324 // an element index to a state index. 00325 // 00326 unsigned int curState = 0; 00327 unsigned int loopCount = 0; 00328 unsigned int nextState = 0; 00329 unsigned int childIndex = 0; 00330 for (; childIndex < childCount; childIndex++) 00331 { 00332 // Get the current element index out 00333 QName* curElem = children[childIndex]; 00334 00335 // If this is text in a Schema mixed content model, skip it. 00336 if ( fIsMixed && 00337 ( curElem->getURI() == XMLElementDecl::fgPCDataElemId)) 00338 continue; 00339 00340 // Look up this child in our element map 00341 unsigned int elemIndex = 0; 00342 for (; elemIndex < fElemMapSize; elemIndex++) 00343 { 00344 QName* inElem = fElemMap[elemIndex]; 00345 ContentSpecNode::NodeTypes type = fElemMapType[elemIndex]; 00346 if (type == ContentSpecNode::Leaf) 00347 { 00348 if (comparator.isEquivalentTo(curElem, inElem) ) 00349 { 00350 nextState = fTransTable[curState][elemIndex]; 00351 if (nextState != XMLContentModel::gInvalidTrans) 00352 break; 00353 } 00354 00355 } 00356 else if ((type & 0x0f)== ContentSpecNode::Any) 00357 { 00358 nextState = fTransTable[curState][elemIndex]; 00359 if (nextState != XMLContentModel::gInvalidTrans) 00360 break; 00361 } 00362 else if ((type & 0x0f) == ContentSpecNode::Any_NS) 00363 { 00364 if (inElem->getURI() == curElem->getURI()) 00365 { 00366 nextState = fTransTable[curState][elemIndex]; 00367 if (nextState != XMLContentModel::gInvalidTrans) 00368 break; 00369 } 00370 } 00371 else if ((type & 0x0f) == ContentSpecNode::Any_Other) 00372 { 00373 // Here we assume that empty string has id 1. 00374 // 00375 unsigned int uriId = curElem->getURI(); 00376 if (uriId != 1 && uriId != inElem->getURI()) 00377 { 00378 nextState = fTransTable[curState][elemIndex]; 00379 if (nextState != XMLContentModel::gInvalidTrans) 00380 break; 00381 } 00382 } 00383 }//for elemIndex 00384 00385 // If "nextState" is -1, we found a match, but the transition is invalid 00386 if (nextState == XMLContentModel::gInvalidTrans) 00387 { 00388 *indexFailingChild=childIndex; 00389 return false; 00390 } 00391 00392 // If we didn't find it, then obviously not valid 00393 if (elemIndex == fElemMapSize) 00394 { 00395 *indexFailingChild=childIndex; 00396 return false; 00397 } 00398 00399 unsigned int nextLoop = 0; 00400 if(!handleRepetitions(curElem, curState, loopCount, nextState, nextLoop, elemIndex, &comparator)) 00401 { 00402 *indexFailingChild=childIndex; 00403 return false; 00404 } 00405 00406 curState = nextState; 00407 loopCount = nextLoop; 00408 nextState = 0; 00409 00410 }//for childIndex 00411 00412 // 00413 // We transitioned all the way through the input list. However, that 00414 // does not mean that we ended in a final state. So check whether 00415 // our ending state is a final state. 00416 // 00417 if (!fFinalStateFlags[curState]) 00418 { 00419 *indexFailingChild=childIndex; 00420 return false; 00421 } 00422 00423 // verify if we exited before the minOccurs was satisfied 00424 if (fCountingStates != 0) { 00425 Occurence* o = fCountingStates[curState]; 00426 if (o != 0) { 00427 if (loopCount < (unsigned int)o->minOccurs) { 00428 // not enough loops on the current state. 00429 *indexFailingChild=childIndex; 00430 return false; 00431 } 00432 } 00433 } 00434 00435 //success 00436 return true; 00437 } 00438 00439 bool DFAContentModel::handleRepetitions(const QName* const curElem, 00440 unsigned int curState, 00441 unsigned int currentLoop, 00442 unsigned int& nextState, 00443 unsigned int& nextLoop, 00444 XMLSize_t elemIndex, 00445 SubstitutionGroupComparator * comparator) const 00446 { 00447 nextLoop = 0; 00448 if (fCountingStates != 0) { 00449 nextLoop = currentLoop; 00450 Occurence* o = fCountingStates[curState]; 00451 if (o != 0) { 00452 if (curState == nextState) { 00453 if (++nextLoop > (unsigned int)o->maxOccurs && o->maxOccurs != -1) { 00454 // It's likely that we looped too many times on the current state 00455 // however it's possible that we actually matched another particle 00456 // which allows the same name. 00457 // 00458 // Consider: 00459 // 00460 // <xs:sequence> 00461 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 00462 // <xs:element name="foo" type="xs:string" fixed="bar"/> 00463 // </xs:sequence> 00464 // 00465 // and 00466 // 00467 // <xs:sequence> 00468 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 00469 // <xs:any namespace="##any" processContents="skip"/> 00470 // </xs:sequence> 00471 // 00472 // In the DFA there will be two transitions from the current state which 00473 // allow "foo". Note that this is not a UPA violation. The ambiguity of which 00474 // transition to take is resolved by the current value of the counter. Since 00475 // we've already seen enough instances of the first "foo" perhaps there is 00476 // another element declaration or wildcard deeper in the element map which 00477 // matches. 00478 unsigned int tempNextState = 0; 00479 00480 while (++elemIndex < fElemMapSize) { 00481 QName* inElem = fElemMap[elemIndex]; 00482 ContentSpecNode::NodeTypes type = fElemMapType[elemIndex]; 00483 if (type == ContentSpecNode::Leaf) 00484 { 00485 if(comparator!=0) { 00486 if (comparator->isEquivalentTo(curElem, inElem) ) 00487 { 00488 tempNextState = fTransTable[curState][elemIndex]; 00489 if (tempNextState != XMLContentModel::gInvalidTrans) 00490 break; 00491 } 00492 } 00493 else if (fDTD) { 00494 if (XMLString::equals(inElem->getRawName(), curElem->getRawName())) { 00495 tempNextState = fTransTable[curState][elemIndex]; 00496 if (tempNextState != XMLContentModel::gInvalidTrans) 00497 break; 00498 } 00499 } 00500 else { 00501 if ((inElem->getURI() == curElem->getURI()) && 00502 (XMLString::equals(inElem->getLocalPart(), curElem->getLocalPart()))) { 00503 tempNextState = fTransTable[curState][elemIndex]; 00504 if (tempNextState != XMLContentModel::gInvalidTrans) 00505 break; 00506 } 00507 } 00508 } 00509 else if ((type & 0x0f)== ContentSpecNode::Any) 00510 { 00511 tempNextState = fTransTable[curState][elemIndex]; 00512 if (tempNextState != XMLContentModel::gInvalidTrans) 00513 break; 00514 } 00515 else if ((type & 0x0f) == ContentSpecNode::Any_NS) 00516 { 00517 if (inElem->getURI() == curElem->getURI()) 00518 { 00519 tempNextState = fTransTable[curState][elemIndex]; 00520 if (tempNextState != XMLContentModel::gInvalidTrans) 00521 break; 00522 } 00523 } 00524 else if ((type & 0x0f) == ContentSpecNode::Any_Other) 00525 { 00526 // Here we assume that empty string has id 1. 00527 // 00528 unsigned int uriId = curElem->getURI(); 00529 if (uriId != 1 && uriId != inElem->getURI()) 00530 { 00531 tempNextState = fTransTable[curState][elemIndex]; 00532 if (tempNextState != XMLContentModel::gInvalidTrans) 00533 break; 00534 } 00535 } 00536 } 00537 00538 // if we still can't find a match, report the error 00539 if (elemIndex == fElemMapSize) 00540 return false; 00541 00542 // if we found a match, set the next state and reset the 00543 // counter if the next state is a counting state. 00544 nextState = tempNextState; 00545 Occurence* o = fCountingStates[nextState]; 00546 if (o != 0) { 00547 nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0; 00548 } 00549 } 00550 } 00551 else if (nextLoop < (unsigned int)o->minOccurs) { 00552 // not enough loops on the current state. 00553 return false; 00554 } 00555 else { 00556 // Exiting a counting state. If we're entering a new 00557 // counting state, reset the counter. 00558 o = fCountingStates[nextState]; 00559 if (o != 0) { 00560 nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0; 00561 } 00562 } 00563 } 00564 else { 00565 o = fCountingStates[nextState]; 00566 if (o != 0) { 00567 // Entering a new counting state. Reset the counter. 00568 // If we've already seen one instance of the looping 00569 // particle set the counter to 1, otherwise set it 00570 // to 0. 00571 nextLoop = (elemIndex == XMLSize_t (o->elemIndex)) ? 1 : 0; 00572 } 00573 } 00574 } 00575 return true; 00576 } 00577 00578 // --------------------------------------------------------------------------- 00579 // DFAContentModel: Private helper methods 00580 // --------------------------------------------------------------------------- 00581 void DFAContentModel::buildDFA(ContentSpecNode* const curNode) 00582 { 00583 unsigned int index; 00584 00585 // 00586 // The first step we need to take is to rewrite the content model using 00587 // our CMNode objects, and in the process get rid of any repetition short 00588 // cuts, converting them into '*' style repetitions or getting rid of 00589 // repetitions altogether. 00590 // 00591 // The conversions done are: 00592 // 00593 // x+ -> (x|x*) 00594 // x? -> (x|epsilon) 00595 // 00596 // This is a relatively complex scenario. What is happening is that we 00597 // create a top level binary node of which the special EOC value is set 00598 // as the right side node. The the left side is set to the rewritten 00599 // syntax tree. The source is the original content model info from the 00600 // decl pool. The rewrite is done by buildSyntaxTree() which recurses the 00601 // decl pool's content of the element and builds a new tree in the 00602 // process. 00603 // 00604 // Note that, during this operation, we set each non-epsilon leaf node's 00605 // DFA state position and count the number of such leafs, which is left 00606 // in the fLeafCount member. 00607 // 00608 fLeafCount=countLeafNodes(curNode); 00609 fEOCPos = fLeafCount++; 00610 00611 // We need to build an array of references to the non-epsilon 00612 // leaf nodes. We will put them in the array according to their position values 00613 // 00614 fLeafList = (CMLeaf**) fMemoryManager->allocate(fLeafCount*sizeof(CMLeaf*)); //new CMLeaf*[fLeafCount]; 00615 fLeafListType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate 00616 ( 00617 fLeafCount * sizeof(ContentSpecNode::NodeTypes) 00618 ); //new ContentSpecNode::NodeTypes[fLeafCount]; 00619 // 00620 // And, moving onward... We now need to build the follow position sets 00621 // for all the nodes. So we allocate an array of pointers to state sets, 00622 // one for each leaf node (i.e. each significant DFA position.) 00623 // 00624 fFollowList = (CMStateSet**) fMemoryManager->allocate 00625 ( 00626 fLeafCount * sizeof(CMStateSet*) 00627 ); //new CMStateSet*[fLeafCount]; 00628 for (index = 0; index < fLeafCount; index++) 00629 fFollowList[index] = new (fMemoryManager) CMStateSet(fLeafCount, fMemoryManager); 00630 00631 // The buildSyntaxTree function will recursively iterate over the ContentSpecNode 00632 // and build the CMNode hierarchy; it will also put every leaf node in the fLeafList 00633 // array, then calculate the first and last position sets of each node. This is 00634 // cached away in each of the nodes. 00635 // 00636 // Along the way we also set the leaf count in each node as the maximum 00637 // state count. They must know this in order to create their first/last 00638 // position sets. 00639 // 00640 unsigned int counter=0; 00641 CMNode* nodeOrgContent = buildSyntaxTree(curNode, counter); 00642 // 00643 // Check to see whether this content model can handle an empty content, 00644 // which is something we need to optimize by looking now before we 00645 // throw away the info that would tell us that. 00646 // 00647 // If the left node of the head (the top level of the original content) 00648 // is nullable, then its true. 00649 // 00650 fEmptyOk = nodeOrgContent->isNullable(); 00651 00652 // 00653 // And handle specially the EOC node, which also must be numbered and 00654 // counted as a non-epsilon leaf node. It could not be handled in the 00655 // above tree build because it was created before all that started. We 00656 // save the EOC position since its used during the DFA building loop. 00657 // 00658 CMLeaf* nodeEOC = new (fMemoryManager) CMLeaf 00659 ( 00660 new (fMemoryManager) QName 00661 ( 00662 XMLUni::fgZeroLenString 00663 , XMLUni::fgZeroLenString 00664 , XMLContentModel::gEOCFakeId 00665 , fMemoryManager 00666 ) 00667 , fEOCPos 00668 , true 00669 , fLeafCount 00670 , fMemoryManager 00671 ); 00672 fHeadNode = new (fMemoryManager) CMBinaryOp 00673 ( 00674 ContentSpecNode::Sequence 00675 , nodeOrgContent 00676 , nodeEOC 00677 , fLeafCount 00678 , fMemoryManager 00679 ); 00680 00681 // Put also the final EOC node in the leaf array 00682 fLeafList[counter] = new (fMemoryManager) CMLeaf 00683 ( 00684 nodeEOC->getElement() 00685 , nodeEOC->getPosition() 00686 , fLeafCount 00687 , fMemoryManager 00688 ); 00689 fLeafListType[counter] = ContentSpecNode::Leaf; 00690 00691 // 00692 // Now handle our top level. We use our left child's last pos set and our 00693 // right child's first pos set, so get them now for convenience. 00694 // 00695 const CMStateSet& last = nodeOrgContent->getLastPos(); 00696 const CMStateSet& first = nodeEOC->getFirstPos(); 00697 00698 // 00699 // Now, for every position which is in our left child's last set 00700 // add all of the states in our right child's first set to the 00701 // follow set for that position. 00702 // 00703 CMStateSetEnumerator enumLast(&last); 00704 while(enumLast.hasMoreElements()) 00705 { 00706 XMLSize_t index=enumLast.nextElement(); 00707 *fFollowList[index] |= first; 00708 } 00709 00710 // 00711 // And finally the big push... Now we build the DFA using all the states 00712 // and the tree we've built up. First we set up the various data 00713 // structures we are going to use while we do this. 00714 // 00715 // First of all we need an array of unique element ids in our content 00716 // model. For each transition table entry, we need a set of contiguous 00717 // indices to represent the transitions for a particular input element. 00718 // So we need to a zero based range of indexes that map to element types. 00719 // This element map provides that mapping. 00720 // 00721 fElemMap = (QName**) fMemoryManager->allocate 00722 ( 00723 fLeafCount * sizeof(QName*) 00724 ); //new QName*[fLeafCount]; 00725 fElemMapType = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate 00726 ( 00727 fLeafCount * sizeof(ContentSpecNode::NodeTypes) 00728 ); //new ContentSpecNode::NodeTypes[fLeafCount]; 00729 fElemMapSize = 0; 00730 00731 Occurence** elemOccurenceMap=0; 00732 for (unsigned int outIndex = 0; outIndex < fLeafCount; outIndex++) 00733 { 00734 fElemMap[outIndex] = new (fMemoryManager) QName(fMemoryManager); 00735 00736 if ( (fLeafListType[outIndex] & 0x0f) != ContentSpecNode::Leaf ) 00737 if (!fLeafNameTypeVector) 00738 fLeafNameTypeVector = new (fMemoryManager) ContentLeafNameTypeVector(fMemoryManager); 00739 00740 // Get the current leaf's element index 00741 CMLeaf* leaf=fLeafList[outIndex]; 00742 const QName* element = leaf->getElement(); 00743 const XMLCh* elementRawName = 0; 00744 if (fDTD && element) 00745 elementRawName = element->getRawName(); 00746 00747 // See if the current leaf node's element index is in the list 00748 unsigned int inIndex = 0; 00749 00750 for (; inIndex < fElemMapSize; inIndex++) 00751 { 00752 const QName* inElem = fElemMap[inIndex]; 00753 if (fDTD) { 00754 if (XMLString::equals(inElem->getRawName(), elementRawName)) { 00755 break; 00756 } 00757 } 00758 else { 00759 if ((fElemMapType[inIndex] == fLeafListType[outIndex]) && 00760 (inElem->getURI() == element->getURI()) && 00761 (XMLString::equals(inElem->getLocalPart(), element->getLocalPart()))) { 00762 break; 00763 } 00764 } 00765 } 00766 00767 // If it was not in the list, then add it and bump the map size 00768 if (inIndex == fElemMapSize) 00769 { 00770 fElemMap[fElemMapSize]->setValues(*element); 00771 if(leaf->isRepeatableLeaf()) 00772 { 00773 if (elemOccurenceMap == 0) { 00774 elemOccurenceMap = (Occurence**)fMemoryManager->allocate(fLeafCount*sizeof(Occurence*)); 00775 memset(elemOccurenceMap, 0, fLeafCount*sizeof(Occurence*)); 00776 } 00777 elemOccurenceMap[fElemMapSize] = new (fMemoryManager) Occurence(((CMRepeatingLeaf*)leaf)->getMinOccurs(), ((CMRepeatingLeaf*)leaf)->getMaxOccurs(), fElemMapSize); 00778 } 00779 fElemMapType[fElemMapSize] = fLeafListType[outIndex]; 00780 ++fElemMapSize; 00781 } 00782 } 00783 00784 // set up the fLeafNameTypeVector object if there is one. 00785 if (fLeafNameTypeVector) { 00786 fLeafNameTypeVector->setValues(fElemMap, fElemMapType, fElemMapSize); 00787 } 00788 00789 /*** 00790 * Optimization(Jan, 2001); We sort fLeafList according to 00791 * elemIndex which is *uniquely* associated to each leaf. 00792 * We are *assuming* that each element appears in at least one leaf. 00793 **/ 00794 // don't forget to delete it 00795 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH 00796 int *fLeafSorter = (int*) fMemoryManager->allocate 00797 ( 00798 (fLeafCount + fElemMapSize) * sizeof(int) 00799 ); //new int[fLeafCount + fElemMapSize]; 00800 unsigned int fSortCount = 0; 00801 00802 for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) 00803 { 00804 const QName* element = fElemMap[elemIndex]; 00805 const XMLCh* elementRawName = 0; 00806 if (fDTD && element) 00807 elementRawName = element->getRawName(); 00808 00809 for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) 00810 { 00811 const QName* leaf = fLeafList[leafIndex]->getElement(); 00812 if (fDTD) { 00813 if (XMLString::equals(leaf->getRawName(), elementRawName)) { 00814 fLeafSorter[fSortCount++] = leafIndex; 00815 } 00816 } 00817 else { 00818 if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) && 00819 (leaf->getURI() == element->getURI()) && 00820 (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) { 00821 fLeafSorter[fSortCount++] = leafIndex; 00822 } 00823 } 00824 } 00825 fLeafSorter[fSortCount++] = -1; 00826 } 00827 #endif 00828 00829 // instead of using a single array with -1 to separate elements, use a bidimensional map 00830 unsigned int** fLeafSorter = (unsigned int**)fMemoryManager->allocate(fElemMapSize * sizeof(unsigned int*)); 00831 unsigned int* tmpSorter = (unsigned int*)fMemoryManager->allocate(fLeafCount * sizeof(unsigned int)); 00832 for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) 00833 { 00834 const QName* element = fElemMap[elemIndex]; 00835 const XMLCh* elementRawName = 0; 00836 if (fDTD && element) 00837 elementRawName = element->getRawName(); 00838 00839 unsigned int fSortCount=0; 00840 for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) 00841 { 00842 const QName* leaf = fLeafList[leafIndex]->getElement(); 00843 if (fDTD) { 00844 if (XMLString::equals(leaf->getRawName(), elementRawName)) { 00845 tmpSorter[fSortCount++] = leafIndex; 00846 } 00847 } 00848 else { 00849 if ((fElemMapType[elemIndex] == fLeafListType[leafIndex]) && 00850 (leaf->getURI() == element->getURI()) && 00851 (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) { 00852 tmpSorter[fSortCount++] = leafIndex; 00853 } 00854 } 00855 } 00856 00857 fLeafSorter[elemIndex]=(unsigned int*)fMemoryManager->allocate((fSortCount+1) * sizeof(unsigned int)); 00858 fLeafSorter[elemIndex][0]=fSortCount; 00859 for (unsigned int index=0;index<fSortCount;index++) 00860 fLeafSorter[elemIndex][index+1]=tmpSorter[index]; 00861 } 00862 fMemoryManager->deallocate(tmpSorter); 00863 00864 // 00865 // Next lets create some arrays, some that that hold transient info 00866 // during the DFA build and some that are permament. These are kind of 00867 // sticky since we cannot know how big they will get, but we don't want 00868 // to use any collection type classes because of performance. 00869 // 00870 // Basically they will probably be about fLeafCount*2 on average, but can 00871 // be as large as 2^(fLeafCount*2), worst case. So we start with 00872 // fLeafCount*4 as a middle ground. This will be very unlikely to ever 00873 // have to expand though, it if does, the overhead will be somewhat ugly. 00874 // 00875 unsigned int curArraySize = fLeafCount * 4; 00876 CMStateSet** statesToDo = (CMStateSet**) 00877 fMemoryManager->allocate 00878 ( 00879 curArraySize * sizeof(CMStateSet*) 00880 ); //new const CMStateSet*[curArraySize]; 00881 fFinalStateFlags = (bool*) fMemoryManager->allocate 00882 ( 00883 curArraySize * sizeof(bool) 00884 ); //new bool[curArraySize]; 00885 fTransTable = (unsigned int**) fMemoryManager->allocate 00886 ( 00887 curArraySize * sizeof(unsigned int*) 00888 ); //new unsigned int*[curArraySize]; 00889 00890 // 00891 // Ok we start with the initial set as the first pos set of the head node 00892 // (which is the seq node that holds the content model and the EOC node.) 00893 // 00894 CMStateSet* setT = new (fMemoryManager) CMStateSet(fHeadNode->getFirstPos()); 00895 00896 // 00897 // Note on memory leak: Bugzilla#2707: 00898 // =================================== 00899 // The CMBinary, pointed to by fHeadNode, shall be released by 00900 // deleted by itself. 00901 // 00902 // fLeafList[] maintains its **OWN** copy of CMLeaf to avoid double deletion 00903 // of CMLeaf. 00904 // 00905 00906 delete fHeadNode; 00907 00908 // 00909 // Init our two state flags. Basically the unmarked state counter is 00910 // always chasing the current state counter. When it catches up, that 00911 // means we made a pass through that did not add any new states to the 00912 // lists, at which time we are done. We could have used a expanding array 00913 // of flags which we used to mark off states as we complete them, but 00914 // this is easier though less readable maybe. 00915 // 00916 unsigned int unmarkedState = 0; 00917 unsigned int curState = 0; 00918 00919 // 00920 // Init the first transition table entry, and put the initial state 00921 // into the states to do list, then bump the current state. 00922 // 00923 fTransTable[curState] = makeDefStateList(); 00924 statesToDo[curState] = setT; 00925 curState++; 00926 00927 // 00928 // the stateTable is an auxiliary means to fast 00929 // identification of new state created (instead 00930 // of sequential loop statesToDo to find out), 00931 // while the role that statesToDo plays remain unchanged. 00932 // 00933 RefHashTableOf<XMLInteger, CMStateSetHasher> *stateTable = 00934 new (fMemoryManager) RefHashTableOf<XMLInteger, CMStateSetHasher> 00935 ( 00936 curArraySize 00937 , true 00938 , fMemoryManager 00939 ); 00940 //stateTable->put((CMStateSet*)setT, new (fMemoryManager) XMLInteger(0)); 00941 00942 // 00943 // Ok, almost done with the algorithm from hell... We now enter the 00944 // loop where we go until the states done counter catches up with 00945 // the states to do counter. 00946 // 00947 CMStateSet* newSet = 0; 00948 while (unmarkedState < curState) 00949 { 00950 // 00951 // Get the next unmarked state out of the list of states to do. 00952 // And get the associated transition table entry. 00953 // 00954 setT = statesToDo[unmarkedState]; 00955 unsigned int* transEntry = fTransTable[unmarkedState]; 00956 00957 // Mark this one final if it contains the EOC state 00958 fFinalStateFlags[unmarkedState] = setT->getBit(fEOCPos); 00959 00960 // Bump up the unmarked state count, marking this state done 00961 unmarkedState++; 00962 00963 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH 00964 // Optimization(Jan, 2001) 00965 unsigned int sorterIndex = 0; 00966 // Optimization(Jan, 2001) 00967 #endif 00968 00969 // Loop through each possible input symbol in the element map 00970 for (unsigned int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) 00971 { 00972 // 00973 // Build up a set of states which is the union of all of the 00974 // follow sets of DFA positions that are in the current state. If 00975 // we gave away the new set last time through then create a new 00976 // one. Otherwise, zero out the existing one. 00977 // 00978 if (!newSet) 00979 newSet = new (fMemoryManager) CMStateSet 00980 ( 00981 fLeafCount 00982 , fMemoryManager 00983 ); 00984 else 00985 newSet->zeroBits(); 00986 00987 #ifdef OBSOLETED 00988 // unoptimized code 00989 for (unsigned int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) 00990 { 00991 // If this leaf index (DFA position) is in the current set... 00992 if (setT->getBit(leafIndex)) 00993 { 00994 // 00995 // If this leaf is the current input symbol, then we want 00996 // to add its follow list to the set of states to transition 00997 // to from the current state. 00998 // 00999 const QName* leaf = fLeafList[leafIndex]->getElement(); 01000 const QName* element = fElemMap[elemIndex]; 01001 if (fDTD) { 01002 if (XMLString::equals(leaf->getRawName(), element->getRawName())) { 01003 *newSet |= *fFollowList[leafIndex]; 01004 } 01005 } 01006 else { 01007 if ((leaf->getURI() == element->getURI()) && 01008 (XMLString::equals(leaf->getLocalPart(), element->getLocalPart()))) { 01009 *newSet |= *fFollowList[leafIndex]; 01010 } 01011 } 01012 } 01013 } // for leafIndex 01014 #endif 01015 01016 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH 01017 // Optimization(Jan, 2001) 01018 int leafIndex = fLeafSorter[sorterIndex++]; 01019 01020 while (leafIndex != -1) 01021 { 01022 // If this leaf index (DFA position) is in the current set... 01023 if (setT->getBit(leafIndex)) 01024 { 01025 // 01026 // If this leaf is the current input symbol, then we 01027 // want to add its follow list to the set of states to 01028 // transition to from the current state. 01029 // 01030 *newSet |= *fFollowList[leafIndex]; 01031 } 01032 leafIndex = fLeafSorter[sorterIndex++]; 01033 } // while (leafIndex != -1) 01034 #endif 01035 01036 unsigned int* fLeafIndexes=fLeafSorter[elemIndex]; 01037 unsigned int fNumItems=fLeafIndexes[0]; 01038 if(fNumItems!=0) 01039 { 01040 // The algorithm requires finding the leaf that is present both in the bitfield of the current state, and in the 01041 // list of places where the currently tested item can appear. When this occurs, the follow list of this parent item 01042 // is added to the bitfield representing the next state. 01043 // Both the bitfield and the list of places are sorted, so we can analyze them in two ways; either iterating over the 01044 // parent items, testing the bitfield for the existence of the parent (N times a constant Tb), or by iterating over the 01045 // bitfield (restricted to the range of the sorted list of places), using a binary search to locate the leaf in the 01046 // sorted list of places (M times log(N) testing operations Ts) 01047 // Assuming that the time to test a bit is roughly the same of the time needed to compute the average of two integers, 01048 // plus a couple of comparisons and additions, we compare N agains M*log(N) to decide which algorithm should be faster given 01049 // the two sets 01050 if(fNumItems <= setT->getBitCountInRange(fLeafIndexes[1], fLeafIndexes[fNumItems])*log((float)fNumItems)) 01051 { 01052 for(unsigned int i=1; i<=fNumItems; ++i) 01053 if(setT->getBit(fLeafIndexes[i])) 01054 { 01055 // 01056 // If this leaf is the current input symbol, then we 01057 // want to add its follow list to the set of states to 01058 // transition to from the current state. 01059 // 01060 *newSet |= *fFollowList[ fLeafIndexes[i] ]; 01061 } 01062 } 01063 else 01064 { 01065 // Further optimization: given that the bitfield enumerator returns the numbers in order, 01066 // every time we raise the lower marker we know it will true also for the next bits, so 01067 // the next binary search will not start from 1 but from this index 01068 unsigned int lowIndex = 1; 01069 // Start the enumerator from the first index in the sorted list of places, 01070 // as nothing before that point will match 01071 CMStateSetEnumerator enumBits(setT, fLeafIndexes[1]); 01072 while(enumBits.hasMoreElements()) 01073 { 01074 unsigned int bitIndex=enumBits.nextElement(); 01075 // if this leaf is greater than the last index in the sorted list of places, 01076 // nothing can be found from now on, so get out of here 01077 if(bitIndex > fLeafIndexes[fNumItems]) 01078 break; 01079 01080 // Check if this leaf index (DFA position) is in the current set 01081 // (using binary search: the indexes are sorted) 01082 unsigned int first=lowIndex,last=fNumItems,i; 01083 while(first<=last) 01084 { 01085 i=(first+last)/2; 01086 if(fLeafIndexes[i]>bitIndex) 01087 last=i-1; 01088 else if(fLeafIndexes[i]<bitIndex) 01089 lowIndex=first=i+1; 01090 else 01091 { 01092 // 01093 // If this leaf is the current input symbol, then we 01094 // want to add its follow list to the set of states to 01095 // transition to from the current state. 01096 // 01097 *newSet |= *fFollowList[bitIndex]; 01098 break; 01099 } 01100 } 01101 } 01102 } 01103 } 01104 01105 // 01106 // If this new set is not empty, then see if its in the list 01107 // of states to do. If not, then add it. 01108 // 01109 if (!newSet->isEmpty()) 01110 { 01111 // 01112 // Search the 'states to do' list to see if this new 01113 // state set is already in there. 01114 // 01115 /*** 01116 unsigned int stateIndex = 0; 01117 for (; stateIndex < curState; stateIndex++) 01118 { 01119 if (*statesToDo[stateIndex] == *newSet) 01120 break; 01121 } 01122 ***/ 01123 01124 XMLInteger *stateObj = stateTable->get(newSet); 01125 unsigned int stateIndex = (stateObj == 0 ? curState : stateObj->intValue()); 01126 01127 // If we did not find it, then add it 01128 if (stateIndex == curState) 01129 { 01130 // 01131 // Put this new state into the states to do and init 01132 // a new entry at the same index in the transition 01133 // table. 01134 // 01135 statesToDo[curState] = newSet; 01136 fTransTable[curState] = makeDefStateList(); 01137 stateTable->put 01138 ( 01139 newSet 01140 , new (fMemoryManager) XMLInteger(curState) 01141 ); 01142 01143 // We now have a new state to do so bump the count 01144 curState++; 01145 01146 // 01147 // Null out the new set to indicate we adopted it. This 01148 // will cause the creation of a new set on the next time 01149 // around the loop. 01150 // 01151 newSet = 0; 01152 } 01153 01154 // 01155 // Now set this state in the transition table's entry for this 01156 // element (using its index), with the DFA state we will move 01157 // to from the current state when we see this input element. 01158 // 01159 transEntry[elemIndex] = stateIndex; 01160 01161 // Expand the arrays if we're full 01162 if (curState == curArraySize) 01163 { 01164 // 01165 // Yikes, we overflowed the initial array size, so we've 01166 // got to expand all of these arrays. So adjust up the 01167 // size by 50% and allocate new arrays. 01168 // 01169 const unsigned int newSize = (unsigned int)(curArraySize * 1.5); 01170 CMStateSet** newToDo = (CMStateSet**) 01171 fMemoryManager->allocate 01172 ( 01173 newSize * sizeof(CMStateSet*) 01174 ); //new const CMStateSet*[newSize]; 01175 bool* newFinalFlags = (bool*) fMemoryManager->allocate 01176 ( 01177 newSize * sizeof(bool) 01178 ); //new bool[newSize]; 01179 unsigned int** newTransTable = (unsigned int**) 01180 fMemoryManager->allocate 01181 ( 01182 newSize * sizeof(unsigned int*) 01183 ); //new unsigned int*[newSize]; 01184 01185 // Copy over all of the existing content 01186 for (unsigned int expIndex = 0; expIndex < curArraySize; expIndex++) 01187 { 01188 newToDo[expIndex] = statesToDo[expIndex]; 01189 newFinalFlags[expIndex] = fFinalStateFlags[expIndex]; 01190 newTransTable[expIndex] = fTransTable[expIndex]; 01191 } 01192 01193 // Clean up the old stuff 01194 fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; 01195 fMemoryManager->deallocate(fFinalStateFlags); //delete [] fFinalStateFlags; 01196 fMemoryManager->deallocate(fTransTable); //delete [] fTransTable; 01197 01198 // Store the new array size and pointers 01199 curArraySize = newSize; 01200 statesToDo = newToDo; 01201 fFinalStateFlags = newFinalFlags; 01202 fTransTable = newTransTable; 01203 } //if (curState == curArraySize) 01204 } //if (!newSet->isEmpty()) 01205 } // for elemIndex 01206 } //while 01207 01208 // Store the current state count in the trans table size 01209 fTransTableSize = curState; 01210 01211 // 01212 // Fill in the occurence information for each looping state 01213 // if we're using counters. 01214 // 01215 if (elemOccurenceMap != 0) { 01216 fCountingStates = (Occurence**)fMemoryManager->allocate(fTransTableSize*sizeof(Occurence)); 01217 memset(fCountingStates, 0, fTransTableSize*sizeof(Occurence*)); 01218 for (unsigned int i = 0; i < fTransTableSize; ++i) { 01219 unsigned int * transitions = fTransTable[i]; 01220 for (unsigned int j = 0; j < fElemMapSize; ++j) { 01221 if (i == transitions[j]) { 01222 Occurence* old=elemOccurenceMap[j]; 01223 if(old!=0) 01224 fCountingStates[i] = new (fMemoryManager) Occurence(old->minOccurs, old->maxOccurs, old->elemIndex); 01225 break; 01226 } 01227 } 01228 } 01229 for (unsigned int j = 0; j < fLeafCount; ++j) { 01230 if(elemOccurenceMap[j]!=0) 01231 delete elemOccurenceMap[j]; 01232 } 01233 fMemoryManager->deallocate(elemOccurenceMap); 01234 } 01235 01236 // If the last temp set was not stored, then clean it up 01237 if (newSet) 01238 delete newSet; 01239 01240 // 01241 // Now we can clean up all of the temporary data that was needed during 01242 // DFA build. 01243 // 01244 01245 for (index = 0; index < fLeafCount; index++) 01246 delete fFollowList[index]; 01247 fMemoryManager->deallocate(fFollowList); //delete [] fFollowList; 01248 01249 // 01250 // removeAll() will delete all data, XMLInteger, 01251 // while the keys are to be deleted by the 01252 // deletion of statesToDo. 01253 // 01254 delete stateTable; 01255 01256 for (index = 0; index < curState; index++) 01257 delete statesToDo[index]; 01258 fMemoryManager->deallocate(statesToDo); //delete [] statesToDo; 01259 01260 for (index = 0; index < fLeafCount; index++) 01261 delete fLeafList[index]; 01262 fMemoryManager->deallocate(fLeafList); //delete [] fLeafList; 01263 01264 #ifdef OPTIMIZED_BUT_STILL_LINEAR_SEARCH 01265 fMemoryManager->deallocate(fLeafSorter); //delete [] fLeafSorter; 01266 #endif 01267 for (index=0; index < fElemMapSize; index++) 01268 fMemoryManager->deallocate(fLeafSorter[index]); 01269 fMemoryManager->deallocate(fLeafSorter); 01270 } 01271 01272 unsigned int DFAContentModel::countLeafNodes(ContentSpecNode* const curNode) 01273 { 01274 unsigned int count = 0; 01275 01276 // Get the spec type of the passed node 01277 const ContentSpecNode::NodeTypes curType = curNode->getType(); 01278 01279 if ((curType & 0x0f) == ContentSpecNode::Any 01280 || (curType & 0x0f) == ContentSpecNode::Any_Other 01281 || (curType & 0x0f) == ContentSpecNode::Any_NS 01282 || curType == ContentSpecNode::Leaf 01283 || curType == ContentSpecNode::Loop) 01284 { 01285 count++; 01286 } 01287 else 01288 { 01289 // 01290 // Its not a leaf, so we have to recurse its left and maybe right 01291 // nodes. Save both values before we recurse and trash the node. 01292 // 01293 ContentSpecNode* leftNode = curNode->getFirst(); 01294 ContentSpecNode* rightNode = curNode->getSecond(); 01295 01296 // Detect if we have a deep tree that can be analyzed using a loop instead of recursion 01297 unsigned int nLoopCount=0; 01298 ContentSpecNode* cursor=curNode; 01299 while(cursor->getType()==ContentSpecNode::Sequence && cursor->getFirst() && cursor->getFirst()->getSecond()==rightNode) 01300 { 01301 nLoopCount++; 01302 cursor=cursor->getFirst(); 01303 } 01304 if(nLoopCount!=0) 01305 { 01306 count += countLeafNodes(cursor); 01307 for(unsigned int i=0;i<nLoopCount;i++) 01308 count += countLeafNodes(rightNode); 01309 return count; 01310 } 01311 if(leftNode) 01312 count+=countLeafNodes(leftNode); 01313 if(rightNode) 01314 count+=countLeafNodes(rightNode); 01315 } 01316 return count; 01317 } 01318 01319 CMNode* DFAContentModel::buildSyntaxTree(ContentSpecNode* const curNode 01320 , unsigned int& curIndex) 01321 { 01322 // Initialize a return node pointer 01323 CMNode* retNode = 0; 01324 01325 // Get the spec type of the passed node 01326 const ContentSpecNode::NodeTypes curType = curNode->getType(); 01327 01328 if ((curType & 0x0f) == ContentSpecNode::Any 01329 || (curType & 0x0f) == ContentSpecNode::Any_Other 01330 || (curType & 0x0f) == ContentSpecNode::Any_NS) 01331 { 01332 retNode = new (fMemoryManager) CMAny 01333 ( 01334 curType 01335 , curNode->getElement()->getURI() 01336 , curIndex 01337 , fLeafCount 01338 , fMemoryManager 01339 ); 01340 fLeafList[curIndex] = new (fMemoryManager) CMLeaf 01341 ( 01342 new (fMemoryManager) QName 01343 ( 01344 XMLUni::fgZeroLenString 01345 , XMLUni::fgZeroLenString 01346 , curNode->getElement()->getURI() 01347 , fMemoryManager 01348 ) 01349 , curIndex 01350 , true 01351 , fLeafCount 01352 , fMemoryManager 01353 ); 01354 fLeafListType[curIndex] = curType; 01355 ++curIndex; 01356 } 01357 else if (curType == ContentSpecNode::Leaf) 01358 { 01359 // 01360 // Create a new leaf node, and pass it the current leaf count, which 01361 // is its DFA state position. Bump the leaf count after storing it. 01362 // This makes the positions zero based since we store first and then 01363 // increment. 01364 // 01365 retNode = new (fMemoryManager) CMLeaf 01366 ( 01367 curNode->getElement() 01368 , curIndex 01369 , fLeafCount 01370 , fMemoryManager 01371 ); 01372 fLeafList[curIndex] = new (fMemoryManager) CMLeaf 01373 ( 01374 curNode->getElement() 01375 , curIndex 01376 , fLeafCount 01377 , fMemoryManager 01378 ); 01379 fLeafListType[curIndex] = ContentSpecNode::Leaf; 01380 ++curIndex; 01381 } 01382 else if (curType == ContentSpecNode::Loop) 01383 { 01384 // 01385 // Create a new leaf node, and pass it the current leaf count, which 01386 // is its DFA state position. Bump the leaf count after storing it. 01387 // This makes the positions zero based since we store first and then 01388 // increment. 01389 // 01390 retNode = new (fMemoryManager) CMRepeatingLeaf 01391 ( 01392 curNode->getFirst()->getElement() 01393 , curNode->getMinOccurs() 01394 , curNode->getMaxOccurs() 01395 , curIndex 01396 , fLeafCount 01397 , fMemoryManager 01398 ); 01399 fLeafList[curIndex] = new (fMemoryManager) CMRepeatingLeaf 01400 ( 01401 curNode->getFirst()->getElement() 01402 , curNode->getMinOccurs() 01403 , curNode->getMaxOccurs() 01404 , curIndex 01405 , fLeafCount 01406 , fMemoryManager 01407 ); 01408 fLeafListType[curIndex] = curNode->getFirst()->getType(); 01409 ++curIndex; 01410 } 01411 else 01412 { 01413 // 01414 // Its not a leaf, so we have to recurse its left and maybe right 01415 // nodes. Save both values before we recurse and trash the node. 01416 // 01417 ContentSpecNode* leftNode = curNode->getFirst(); 01418 ContentSpecNode* rightNode = curNode->getSecond(); 01419 01420 // Detect if we have a deep tree that can be analyzed using a loop instead of recursion 01421 unsigned int nLoopCount=0; 01422 ContentSpecNode* cursor=curNode; 01423 while(cursor->getType()==ContentSpecNode::Sequence && cursor->getFirst() && cursor->getFirst()->getSecond()==rightNode) 01424 { 01425 nLoopCount++; 01426 cursor=cursor->getFirst(); 01427 } 01428 if(nLoopCount!=0) 01429 { 01430 retNode = buildSyntaxTree(cursor, curIndex); 01431 for(unsigned int i=0;i<nLoopCount;i++) 01432 { 01433 CMNode* newRight = buildSyntaxTree(rightNode, curIndex); 01434 // 01435 // Now handle our level. We use our left child's last pos set and our 01436 // right child's first pos set, so get them now for convenience. 01437 // 01438 const CMStateSet& last = retNode->getLastPos(); 01439 const CMStateSet& first = newRight->getFirstPos(); 01440 01441 // 01442 // Now, for every position which is in our left child's last set 01443 // add all of the states in our right child's first set to the 01444 // follow set for that position. 01445 // 01446 CMStateSetEnumerator enumLast(&last); 01447 while(enumLast.hasMoreElements()) 01448 { 01449 XMLSize_t index=enumLast.nextElement(); 01450 *fFollowList[index] |= first; 01451 } 01452 retNode = new (fMemoryManager) CMBinaryOp 01453 ( 01454 ContentSpecNode::Sequence 01455 , retNode 01456 , newRight 01457 , fLeafCount 01458 , fMemoryManager 01459 ); 01460 } 01461 return retNode; 01462 } 01463 01464 if (((curType & 0x0f) == ContentSpecNode::Choice) 01465 || ((curType & 0x0f) == ContentSpecNode::Sequence)) 01466 { 01467 // 01468 // Recurse on both children, and return a binary op node with the 01469 // two created sub nodes as its children. The node type is the 01470 // same type as the source. 01471 // 01472 CMNode* newLeft = buildSyntaxTree(leftNode, curIndex); 01473 CMNode* newRight = buildSyntaxTree(rightNode, curIndex); 01474 if(((curType & 0x0f) == ContentSpecNode::Sequence)) 01475 { 01476 // 01477 // Now handle our level. We use our left child's last pos set and our 01478 // right child's first pos set, so get them now for convenience. 01479 // 01480 const CMStateSet& last = newLeft->getLastPos(); 01481 const CMStateSet& first = newRight->getFirstPos(); 01482 01483 // 01484 // Now, for every position which is in our left child's last set 01485 // add all of the states in our right child's first set to the 01486 // follow set for that position. 01487 // 01488 CMStateSetEnumerator enumLast(&last); 01489 while(enumLast.hasMoreElements()) 01490 { 01491 XMLSize_t index=enumLast.nextElement(); 01492 *fFollowList[index] |= first; 01493 } 01494 } 01495 retNode = new (fMemoryManager) CMBinaryOp 01496 ( 01497 curType 01498 , newLeft 01499 , newRight 01500 , fLeafCount 01501 , fMemoryManager 01502 ); 01503 } 01504 else if (curType == ContentSpecNode::ZeroOrMore 01505 || curType == ContentSpecNode::ZeroOrOne 01506 || curType == ContentSpecNode::OneOrMore) 01507 { 01508 CMNode* newChild = buildSyntaxTree(leftNode, curIndex); 01509 if (curType == ContentSpecNode::ZeroOrMore 01510 || curType == ContentSpecNode::OneOrMore) 01511 { 01512 // 01513 // Now handle our level. We use our own first and last position 01514 // sets, so get them up front. 01515 // 01516 const CMStateSet& first = newChild->getFirstPos(); 01517 const CMStateSet& last = newChild->getLastPos(); 01518 01519 // 01520 // For every position which is in our last position set, add all 01521 // of our first position states to the follow set for that 01522 // position. 01523 // 01524 CMStateSetEnumerator enumLast(&last); 01525 while(enumLast.hasMoreElements()) 01526 { 01527 XMLSize_t index=enumLast.nextElement(); 01528 *fFollowList[index] |= first; 01529 } 01530 } 01531 // This one is fine as is, just change to our form 01532 retNode = new (fMemoryManager) CMUnaryOp 01533 ( 01534 curType 01535 , newChild 01536 , fLeafCount 01537 , fMemoryManager 01538 ); 01539 } 01540 else 01541 { 01542 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMSpecType, fMemoryManager); 01543 } 01544 } 01545 // fault in the first and last pos, then delete it children 01546 retNode->getFirstPos(); 01547 retNode->getLastPos(); 01548 retNode->orphanChild(); 01549 return retNode; 01550 } 01551 01552 // 01553 // gInvalidTrans is used to represent bad transitions in the transition table 01554 // entry for each state. So each entry is initialized to that value. This 01555 // method creates a new entry and initializes it. 01556 // 01557 unsigned int* DFAContentModel::makeDefStateList() const 01558 { 01559 unsigned int* retArray = (unsigned int*) fMemoryManager->allocate 01560 ( 01561 fElemMapSize * sizeof(unsigned int) 01562 ); //new unsigned int[fElemMapSize]; 01563 for (unsigned int index = 0; index < fElemMapSize; index++) 01564 retArray[index] = XMLContentModel::gInvalidTrans; 01565 return retArray; 01566 } 01567 01568 ContentLeafNameTypeVector* DFAContentModel::getContentLeafNameTypeVector() const 01569 { 01570 //later change it to return the data member 01571 return fLeafNameTypeVector; 01572 } 01573 01574 void DFAContentModel::checkUniqueParticleAttribution (SchemaGrammar* const pGrammar, 01575 GrammarResolver* const pGrammarResolver, 01576 XMLStringPool* const pStringPool, 01577 XMLValidator* const pValidator, 01578 unsigned int* const pContentSpecOrgURI, 01579 const XMLCh* pComplexTypeName /*= 0*/) 01580 { 01581 01582 SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool); 01583 01584 unsigned int i, j, k; 01585 01586 // Rename the URI back 01587 for (i = 0; i < fElemMapSize; i++) { 01588 01589 unsigned int orgURIIndex = fElemMap[i]->getURI(); 01590 01591 if ((orgURIIndex != XMLContentModel::gEOCFakeId) && 01592 (orgURIIndex != XMLContentModel::gEpsilonFakeId) && 01593 (orgURIIndex != XMLElementDecl::fgInvalidElemId) && 01594 (orgURIIndex != XMLElementDecl::fgPCDataElemId)) { 01595 fElemMap[i]->setURI(pContentSpecOrgURI[orgURIIndex]); 01596 } 01597 } 01598 01599 // Unique Particle Attribution 01600 // Store the conflict results between any two elements in fElemMap 01601 // 0 - not yet tested, 1 - conflict, (-1) - no conflict 01602 signed char** conflictTable = (signed char**) fMemoryManager->allocate 01603 ( 01604 fElemMapSize * sizeof(signed char*) 01605 ); 01606 01607 // initialize the conflict table 01608 for (j = 0; j < fElemMapSize; j++) { 01609 conflictTable[j] = (signed char*) fMemoryManager->allocate 01610 ( 01611 fElemMapSize * sizeof(signed char) 01612 ); 01613 memset(conflictTable[j], 0, fElemMapSize*sizeof(signed char)); 01614 } 01615 01616 // for each state, check whether it has overlap transitions 01617 for (i = 0; i < fTransTableSize; i++) { 01618 for (j = 0; j < fElemMapSize; j++) { 01619 for (k = j+1; k < fElemMapSize; k++) { 01620 if (fTransTable[i][j] != XMLContentModel::gInvalidTrans && 01621 fTransTable[i][k] != XMLContentModel::gInvalidTrans && 01622 conflictTable[j][k] == 0) { 01623 01624 // If this is text in a Schema mixed content model, skip it. 01625 if ( fIsMixed && 01626 (( fElemMap[j]->getURI() == XMLElementDecl::fgPCDataElemId) || 01627 ( fElemMap[k]->getURI() == XMLElementDecl::fgPCDataElemId))) 01628 continue; 01629 01630 if (XercesElementWildcard::conflict(pGrammar, 01631 fElemMapType[j], 01632 fElemMap[j], 01633 fElemMapType[k], 01634 fElemMap[k], 01635 &comparator)) { 01636 if (fCountingStates != 0) { 01637 Occurence* o = fCountingStates[i]; 01638 // If "i" is a counting state and exactly one of the transitions 01639 // loops back to "i" then the two particles do not overlap if 01640 // minOccurs == maxOccurs. 01641 if (o != 0 && 01642 ((fTransTable[i][j] == i) ^ (fTransTable[i][k] == i)) && 01643 o->minOccurs == o->maxOccurs) { 01644 conflictTable[j][k] = -1; 01645 continue; 01646 } 01647 } 01648 conflictTable[j][k] = 1; 01649 01650 XMLBuffer buf1(1023, fMemoryManager); 01651 if (((fElemMapType[j] & 0x0f) == ContentSpecNode::Any) || 01652 ((fElemMapType[j] & 0x0f) == ContentSpecNode::Any_NS)) 01653 buf1.set(SchemaSymbols::fgATTVAL_TWOPOUNDANY); 01654 else if ((fElemMapType[j] & 0x0f) == ContentSpecNode::Any_Other) 01655 buf1.set(SchemaSymbols::fgATTVAL_TWOPOUNDOTHER); 01656 else 01657 buf1.set(fElemMap[j]->getRawName()); 01658 01659 XMLBuffer buf2(1023, fMemoryManager); 01660 if (((fElemMapType[k] & 0x0f) == ContentSpecNode::Any) || 01661 ((fElemMapType[k] & 0x0f) == ContentSpecNode::Any_NS)) 01662 buf2.set(SchemaSymbols::fgATTVAL_TWOPOUNDANY); 01663 else if ((fElemMapType[k] & 0x0f) == ContentSpecNode::Any_Other) 01664 buf2.set(SchemaSymbols::fgATTVAL_TWOPOUNDOTHER); 01665 else 01666 buf2.set(fElemMap[k]->getRawName()); 01667 01668 pValidator->emitError(XMLValid::UniqueParticleAttributionFail, 01669 pComplexTypeName, 01670 buf1.getRawBuffer(), 01671 buf2.getRawBuffer()); 01672 } 01673 else 01674 conflictTable[j][k] = -1; 01675 } 01676 } 01677 } 01678 } 01679 01680 for (i = 0; i < fElemMapSize; i++) 01681 fMemoryManager->deallocate(conflictTable[i]); 01682 fMemoryManager->deallocate(conflictTable); 01683 } 01684 01685 XERCES_CPP_NAMESPACE_END