GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: IGXMLScanner.cpp 882548 2009-11-20 13:44:14Z borisk $ 00020 */ 00021 00022 // --------------------------------------------------------------------------- 00023 // Includes 00024 // --------------------------------------------------------------------------- 00025 #include <xercesc/internal/IGXMLScanner.hpp> 00026 #include <xercesc/util/RuntimeException.hpp> 00027 #include <xercesc/util/UnexpectedEOFException.hpp> 00028 #include <xercesc/sax/InputSource.hpp> 00029 #include <xercesc/framework/XMLDocumentHandler.hpp> 00030 #include <xercesc/framework/XMLEntityHandler.hpp> 00031 #include <xercesc/framework/XMLPScanToken.hpp> 00032 #include <xercesc/internal/EndOfEntityException.hpp> 00033 #include <xercesc/framework/MemoryManager.hpp> 00034 #include <xercesc/framework/XMLGrammarPool.hpp> 00035 #include <xercesc/framework/XMLDTDDescription.hpp> 00036 #include <xercesc/framework/psvi/PSVIElement.hpp> 00037 #include <xercesc/framework/psvi/PSVIHandler.hpp> 00038 #include <xercesc/framework/psvi/PSVIAttributeList.hpp> 00039 #include <xercesc/validators/common/GrammarResolver.hpp> 00040 #include <xercesc/validators/DTD/DocTypeHandler.hpp> 00041 #include <xercesc/validators/DTD/DTDScanner.hpp> 00042 #include <xercesc/validators/DTD/DTDValidator.hpp> 00043 #include <xercesc/validators/schema/SchemaValidator.hpp> 00044 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp> 00045 #include <xercesc/validators/schema/identity/IC_Selector.hpp> 00046 #include <xercesc/util/OutOfMemoryException.hpp> 00047 00048 XERCES_CPP_NAMESPACE_BEGIN 00049 00050 00051 typedef JanitorMemFunCall<IGXMLScanner> CleanupType; 00052 typedef JanitorMemFunCall<ReaderMgr> ReaderMgrResetType; 00053 00054 00055 // --------------------------------------------------------------------------- 00056 // IGXMLScanner: Constructors and Destructor 00057 // --------------------------------------------------------------------------- 00058 IGXMLScanner::IGXMLScanner( XMLValidator* const valToAdopt 00059 , GrammarResolver* const grammarResolver 00060 , MemoryManager* const manager) : 00061 00062 XMLScanner(valToAdopt, grammarResolver, manager) 00063 , fSeeXsi(false) 00064 , fGrammarType(Grammar::UnKnown) 00065 , fElemStateSize(16) 00066 , fElemState(0) 00067 , fElemLoopState(0) 00068 , fContent(1023, manager) 00069 , fRawAttrList(0) 00070 , fRawAttrColonListSize(32) 00071 , fRawAttrColonList(0) 00072 , fDTDValidator(0) 00073 , fSchemaValidator(0) 00074 , fDTDGrammar(0) 00075 , fICHandler(0) 00076 , fLocationPairs(0) 00077 , fDTDElemNonDeclPool(0) 00078 , fSchemaElemNonDeclPool(0) 00079 , fElemCount(0) 00080 , fAttDefRegistry(0) 00081 , fUndeclaredAttrRegistry(0) 00082 , fPSVIAttrList(0) 00083 , fModel(0) 00084 , fPSVIElement(0) 00085 , fErrorStack(0) 00086 , fSchemaInfoList(0) 00087 , fCachedSchemaInfoList (0) 00088 { 00089 CleanupType cleanup(this, &IGXMLScanner::cleanUp); 00090 00091 try 00092 { 00093 commonInit(); 00094 } 00095 catch(const OutOfMemoryException&) 00096 { 00097 // Don't cleanup when out of memory, since executing the 00098 // code can cause problems. 00099 cleanup.release(); 00100 00101 throw; 00102 } 00103 00104 cleanup.release(); 00105 } 00106 00107 IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler 00108 , DocTypeHandler* const docTypeHandler 00109 , XMLEntityHandler* const entityHandler 00110 , XMLErrorReporter* const errHandler 00111 , XMLValidator* const valToAdopt 00112 , GrammarResolver* const grammarResolver 00113 , MemoryManager* const manager) : 00114 00115 XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager) 00116 , fSeeXsi(false) 00117 , fGrammarType(Grammar::UnKnown) 00118 , fElemStateSize(16) 00119 , fElemState(0) 00120 , fElemLoopState(0) 00121 , fContent(1023, manager) 00122 , fRawAttrList(0) 00123 , fRawAttrColonListSize(32) 00124 , fRawAttrColonList(0) 00125 , fDTDValidator(0) 00126 , fSchemaValidator(0) 00127 , fDTDGrammar(0) 00128 , fICHandler(0) 00129 , fLocationPairs(0) 00130 , fDTDElemNonDeclPool(0) 00131 , fSchemaElemNonDeclPool(0) 00132 , fElemCount(0) 00133 , fAttDefRegistry(0) 00134 , fUndeclaredAttrRegistry(0) 00135 , fPSVIAttrList(0) 00136 , fModel(0) 00137 , fPSVIElement(0) 00138 , fErrorStack(0) 00139 , fSchemaInfoList(0) 00140 , fCachedSchemaInfoList (0) 00141 { 00142 CleanupType cleanup(this, &IGXMLScanner::cleanUp); 00143 00144 try 00145 { 00146 commonInit(); 00147 } 00148 catch(const OutOfMemoryException&) 00149 { 00150 // Don't cleanup when out of memory, since executing the 00151 // code can cause problems. 00152 cleanup.release(); 00153 00154 throw; 00155 } 00156 00157 cleanup.release(); 00158 } 00159 00160 IGXMLScanner::~IGXMLScanner() 00161 { 00162 cleanUp(); 00163 } 00164 00165 // --------------------------------------------------------------------------- 00166 // XMLScanner: Getter methods 00167 // --------------------------------------------------------------------------- 00168 NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() 00169 { 00170 if(!fDTDGrammar) 00171 return 0; 00172 return fDTDGrammar->getEntityDeclPool(); 00173 } 00174 00175 const NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() const 00176 { 00177 if(!fDTDGrammar) 00178 return 0; 00179 return fDTDGrammar->getEntityDeclPool(); 00180 } 00181 00182 // --------------------------------------------------------------------------- 00183 // IGXMLScanner: Main entry point to scan a document 00184 // --------------------------------------------------------------------------- 00185 void IGXMLScanner::scanDocument(const InputSource& src) 00186 { 00187 // Bump up the sequence id for this parser instance. This will invalidate 00188 // any previous progressive scan tokens. 00189 fSequenceId++; 00190 00191 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset); 00192 00193 try 00194 { 00195 // Reset the scanner and its plugged in stuff for a new run. This 00196 // resets all the data structures, creates the initial reader and 00197 // pushes it on the stack, and sets up the base document path. 00198 scanReset(src); 00199 00200 // If we have a document handler, then call the start document 00201 if (fDocHandler) 00202 fDocHandler->startDocument(); 00203 00204 // Scan the prolog part, which is everything before the root element 00205 // including the DTD subsets. 00206 scanProlog(); 00207 00208 // If we got to the end of input, then its not a valid XML file. 00209 // Else, go on to scan the content. 00210 if (fReaderMgr.atEOF()) 00211 { 00212 emitError(XMLErrs::EmptyMainEntity); 00213 } 00214 else 00215 { 00216 // Scan content, and tell it its not an external entity 00217 if (scanContent()) 00218 { 00219 // Do post-parse validation if required 00220 if (fValidate) 00221 { 00222 // We handle ID reference semantics at this level since 00223 // its required by XML 1.0. 00224 checkIDRefs(); 00225 00226 // Then allow the validator to do any extra stuff it wants 00227 // fValidator->postParseValidation(); 00228 } 00229 00230 // That went ok, so scan for any miscellaneous stuff 00231 if (!fReaderMgr.atEOF()) 00232 scanMiscellaneous(); 00233 } 00234 } 00235 00236 // If we have a document handler, then call the end document 00237 if (fDocHandler) 00238 fDocHandler->endDocument(); 00239 00240 //cargill debug: 00241 //fGrammarResolver->getXSModel(); 00242 } 00243 // NOTE: 00244 // 00245 // In all of the error processing below, the emitError() call MUST come 00246 // before the flush of the reader mgr, or it will fail because it tries 00247 // to find out the position in the XML source of the error. 00248 catch(const XMLErrs::Codes) 00249 { 00250 // This is a 'first failure' exception, so fall through 00251 } 00252 catch(const XMLValid::Codes) 00253 { 00254 // This is a 'first fatal error' type exit, so fall through 00255 } 00256 catch(const XMLException& excToCatch) 00257 { 00258 // Emit the error and catch any user exception thrown from here. Make 00259 // sure in all cases we flush the reader manager. 00260 fInException = true; 00261 try 00262 { 00263 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) 00264 emitError 00265 ( 00266 XMLErrs::XMLException_Warning 00267 , excToCatch.getCode() 00268 , excToCatch.getMessage() 00269 ); 00270 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) 00271 emitError 00272 ( 00273 XMLErrs::XMLException_Fatal 00274 , excToCatch.getCode() 00275 , excToCatch.getMessage() 00276 ); 00277 else 00278 emitError 00279 ( 00280 XMLErrs::XMLException_Error 00281 , excToCatch.getCode() 00282 , excToCatch.getMessage() 00283 ); 00284 } 00285 catch(const OutOfMemoryException&) 00286 { 00287 // This is a special case for out-of-memory 00288 // conditions, because resetting the ReaderMgr 00289 // can be problematic. 00290 resetReaderMgr.release(); 00291 00292 throw; 00293 } 00294 } 00295 catch(const OutOfMemoryException&) 00296 { 00297 // This is a special case for out-of-memory 00298 // conditions, because resetting the ReaderMgr 00299 // can be problematic. 00300 resetReaderMgr.release(); 00301 00302 throw; 00303 } 00304 } 00305 00306 00307 bool IGXMLScanner::scanNext(XMLPScanToken& token) 00308 { 00309 // Make sure this token is still legal 00310 if (!isLegalToken(token)) 00311 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager); 00312 00313 // Find the next token and remember the reader id 00314 XMLSize_t orgReader; 00315 XMLTokens curToken; 00316 00317 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset); 00318 00319 bool retVal = true; 00320 00321 try 00322 { 00323 while (true) 00324 { 00325 // We have to handle any end of entity exceptions that happen here. 00326 // We could be at the end of X nested entities, each of which will 00327 // generate an end of entity exception as we try to move forward. 00328 try 00329 { 00330 curToken = senseNextToken(orgReader); 00331 break; 00332 } 00333 catch(const EndOfEntityException& toCatch) 00334 { 00335 // Send an end of entity reference event 00336 if (fDocHandler) 00337 fDocHandler->endEntityReference(toCatch.getEntity()); 00338 } 00339 } 00340 00341 if (curToken == Token_CharData) 00342 { 00343 scanCharData(fCDataBuf); 00344 } 00345 else if (curToken == Token_EOF) 00346 { 00347 if (!fElemStack.isEmpty()) 00348 { 00349 const ElemStack::StackElem* topElem = fElemStack.popTop(); 00350 emitError 00351 ( 00352 XMLErrs::EndedWithTagsOnStack 00353 , topElem->fThisElement->getFullName() 00354 ); 00355 } 00356 00357 retVal = false; 00358 } 00359 else 00360 { 00361 // Its some sort of markup 00362 bool gotData = true; 00363 switch(curToken) 00364 { 00365 case Token_CData : 00366 // Make sure we are within content 00367 if (fElemStack.isEmpty()) 00368 emitError(XMLErrs::CDATAOutsideOfContent); 00369 scanCDSection(); 00370 break; 00371 00372 case Token_Comment : 00373 scanComment(); 00374 break; 00375 00376 case Token_EndTag : 00377 scanEndTag(gotData); 00378 break; 00379 00380 case Token_PI : 00381 scanPI(); 00382 break; 00383 00384 case Token_StartTag : 00385 if (fDoNamespaces) 00386 scanStartTagNS(gotData); 00387 else 00388 scanStartTag(gotData); 00389 break; 00390 00391 default : 00392 fReaderMgr.skipToChar(chOpenAngle); 00393 break; 00394 } 00395 00396 if (orgReader != fReaderMgr.getCurrentReaderNum()) 00397 emitError(XMLErrs::PartialMarkupInEntity); 00398 00399 // If we hit the end, then do the miscellaneous part 00400 if (!gotData) 00401 { 00402 // Do post-parse validation if required 00403 if (fValidate) 00404 { 00405 // We handle ID reference semantics at this level since 00406 // its required by XML 1.0. 00407 checkIDRefs(); 00408 00409 // Then allow the validator to do any extra stuff it wants 00410 // fValidator->postParseValidation(); 00411 } 00412 00413 // That went ok, so scan for any miscellaneous stuff 00414 scanMiscellaneous(); 00415 00416 if (toCheckIdentityConstraint()) 00417 fICHandler->endDocument(); 00418 00419 if (fDocHandler) 00420 fDocHandler->endDocument(); 00421 } 00422 } 00423 } 00424 // NOTE: 00425 // 00426 // In all of the error processing below, the emitError() call MUST come 00427 // before the flush of the reader mgr, or it will fail because it tries 00428 // to find out the position in the XML source of the error. 00429 catch(const XMLErrs::Codes) 00430 { 00431 // This is a 'first failure' exception so return failure 00432 retVal = false; 00433 } 00434 catch(const XMLValid::Codes) 00435 { 00436 // This is a 'first fatal error' type exit, so return failure 00437 retVal = false; 00438 } 00439 catch(const XMLException& excToCatch) 00440 { 00441 // Emit the error and catch any user exception thrown from here. Make 00442 // sure in all cases we flush the reader manager. 00443 fInException = true; 00444 try 00445 { 00446 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) 00447 emitError 00448 ( 00449 XMLErrs::XMLException_Warning 00450 , excToCatch.getCode() 00451 , excToCatch.getMessage() 00452 ); 00453 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) 00454 emitError 00455 ( 00456 XMLErrs::XMLException_Fatal 00457 , excToCatch.getCode() 00458 , excToCatch.getMessage() 00459 ); 00460 else 00461 emitError 00462 ( 00463 XMLErrs::XMLException_Error 00464 , excToCatch.getCode() 00465 , excToCatch.getMessage() 00466 ); 00467 } 00468 catch(const OutOfMemoryException&) 00469 { 00470 // This is a special case for out-of-memory 00471 // conditions, because resetting the ReaderMgr 00472 // can be problematic. 00473 resetReaderMgr.release(); 00474 00475 throw; 00476 } 00477 00478 retVal = false; 00479 } 00480 catch(const OutOfMemoryException&) 00481 { 00482 // This is a special case for out-of-memory 00483 // conditions, because resetting the ReaderMgr 00484 // can be problematic. 00485 resetReaderMgr.release(); 00486 00487 throw; 00488 } 00489 00490 // If we are not at the end, release the object that will 00491 // reset the ReaderMgr. 00492 if (retVal) 00493 resetReaderMgr.release(); 00494 00495 return retVal; 00496 } 00497 00498 00499 00500 // --------------------------------------------------------------------------- 00501 // IGXMLScanner: Private helper methods. Most of these are implemented in 00502 // IGXMLScanner2.Cpp. 00503 // --------------------------------------------------------------------------- 00504 00505 // This method handles the common initialization, to avoid having to do 00506 // it redundantly in multiple constructors. 00507 void IGXMLScanner::commonInit() 00508 { 00509 00510 // Create the element state array 00511 fElemState = (unsigned int*) fMemoryManager->allocate 00512 ( 00513 fElemStateSize * sizeof(unsigned int) 00514 ); //new unsigned int[fElemStateSize]; 00515 fElemLoopState = (unsigned int*) fMemoryManager->allocate 00516 ( 00517 fElemStateSize * sizeof(unsigned int) 00518 ); //new unsigned int[fElemStateSize]; 00519 00520 // And we need one for the raw attribute scan. This just stores key/ 00521 // value string pairs (prior to any processing.) 00522 fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager); 00523 fRawAttrColonList = (int*) fMemoryManager->allocate 00524 ( 00525 fRawAttrColonListSize * sizeof(int) 00526 ); 00527 00528 // Create the Validator and init them 00529 fDTDValidator = new (fMemoryManager) DTDValidator(); 00530 initValidator(fDTDValidator); 00531 fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager); 00532 initValidator(fSchemaValidator); 00533 00534 // Create IdentityConstraint info 00535 fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager); 00536 00537 // Create schemaLocation pair info 00538 fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager); 00539 // create pools for undeclared elements 00540 fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager); 00541 fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager); 00542 fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher> 00543 ( 00544 131, false, fMemoryManager 00545 ); 00546 fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager); 00547 fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager); 00548 00549 fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager); 00550 fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager); 00551 00552 // use fDTDValidator as the default validator 00553 if (!fValidator) 00554 fValidator = fDTDValidator; 00555 } 00556 00557 void IGXMLScanner::cleanUp() 00558 { 00559 fMemoryManager->deallocate(fElemState); //delete [] fElemState; 00560 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState; 00561 delete fRawAttrList; 00562 fMemoryManager->deallocate(fRawAttrColonList); 00563 delete fDTDValidator; 00564 delete fSchemaValidator; 00565 delete fICHandler; 00566 delete fLocationPairs; 00567 delete fDTDElemNonDeclPool; 00568 delete fSchemaElemNonDeclPool; 00569 delete fAttDefRegistry; 00570 delete fUndeclaredAttrRegistry; 00571 delete fPSVIAttrList; 00572 delete fPSVIElement; 00573 delete fErrorStack; 00574 delete fSchemaInfoList; 00575 delete fCachedSchemaInfoList; 00576 } 00577 00578 // --------------------------------------------------------------------------- 00579 // IGXMLScanner: Private scanning methods 00580 // --------------------------------------------------------------------------- 00581 00582 // This method is called from scanStartTag() to handle the very raw initial 00583 // scan of the attributes. It just fills in the passed collection with 00584 // key/value pairs for each attribute. No processing is done on them at all. 00585 XMLSize_t 00586 IGXMLScanner::rawAttrScan(const XMLCh* const elemName 00587 , RefVectorOf<KVStringPair>& toFill 00588 , bool& isEmpty) 00589 { 00590 // Keep up with how many attributes we've seen so far, and how many 00591 // elements are available in the vector. This way we can reuse old 00592 // elements until we run out and then expand it. 00593 XMLSize_t attCount = 0; 00594 XMLSize_t curVecSize = toFill.size(); 00595 00596 // Assume it is not empty 00597 isEmpty = false; 00598 00599 // We loop until we either see a /> or >, handling key/value pairs util 00600 // we get there. We place them in the passed vector, which we will expand 00601 // as required to hold them. 00602 while (true) 00603 { 00604 // Get the next character, which should be non-space 00605 XMLCh nextCh = fReaderMgr.peekNextChar(); 00606 00607 // If the next character is not a slash or closed angle bracket, 00608 // then it must be whitespace, since whitespace is required 00609 // between the end of the last attribute and the name of the next 00610 // one. 00611 // 00612 if (attCount) 00613 { 00614 if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) 00615 { 00616 bool bFoundSpace; 00617 fReaderMgr.skipPastSpaces(bFoundSpace); 00618 if (!bFoundSpace) 00619 { 00620 // Emit the error but keep on going 00621 emitError(XMLErrs::ExpectedWhitespace); 00622 } 00623 // Ok, peek another char 00624 nextCh = fReaderMgr.peekNextChar(); 00625 } 00626 } 00627 00628 // Ok, here we first check for any of the special case characters. 00629 // If its not one, then we do the normal case processing, which 00630 // assumes that we've hit an attribute value, Otherwise, we do all 00631 // the special case checks. 00632 if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh)) 00633 { 00634 // Assume it's going to be an attribute, so get a name from 00635 // the input. 00636 int colonPosition; 00637 if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition)) 00638 { 00639 if (fAttNameBuf.isEmpty()) 00640 emitError(XMLErrs::ExpectedAttrName); 00641 else 00642 emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer()); 00643 fReaderMgr.skipPastChar(chCloseAngle); 00644 return attCount; 00645 } 00646 00647 const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer(); 00648 00649 // And next must be an equal sign 00650 if (!scanEq()) 00651 { 00652 static const XMLCh tmpList[] = 00653 { 00654 chSingleQuote, chDoubleQuote, chCloseAngle 00655 , chOpenAngle, chForwardSlash, chNull 00656 }; 00657 00658 emitError(XMLErrs::ExpectedEqSign); 00659 00660 // Try to sync back up by skipping forward until we either 00661 // hit something meaningful. 00662 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); 00663 00664 if ((chFound == chCloseAngle) || (chFound == chForwardSlash)) 00665 { 00666 // Jump back to top for normal processing of these 00667 continue; 00668 } 00669 else if ((chFound == chSingleQuote) 00670 || (chFound == chDoubleQuote) 00671 || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) 00672 { 00673 // Just fall through assuming that the value is to follow 00674 } 00675 else if (chFound == chOpenAngle) 00676 { 00677 // Assume a malformed tag and that new one is starting 00678 emitError(XMLErrs::UnterminatedStartTag, elemName); 00679 return attCount; 00680 } 00681 else 00682 { 00683 // Something went really wrong 00684 return attCount; 00685 } 00686 } 00687 00688 // Next should be the quoted attribute value. We just do a simple 00689 // and stupid scan of this value. The only thing we do here 00690 // is to expand entity references. 00691 if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf)) 00692 { 00693 static const XMLCh tmpList[] = 00694 { 00695 chCloseAngle, chOpenAngle, chForwardSlash, chNull 00696 }; 00697 00698 emitError(XMLErrs::ExpectedAttrValue); 00699 00700 // It failed, so lets try to get synced back up. We skip 00701 // forward until we find some whitespace or one of the 00702 // chars in our list. 00703 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); 00704 00705 if ((chFound == chCloseAngle) 00706 || (chFound == chForwardSlash) 00707 || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) 00708 { 00709 // Just fall through and process this attribute, though 00710 // the value will be "". 00711 } 00712 else if (chFound == chOpenAngle) 00713 { 00714 // Assume a malformed tag and that new one is starting 00715 emitError(XMLErrs::UnterminatedStartTag, elemName); 00716 return attCount; 00717 } 00718 else 00719 { 00720 // Something went really wrong 00721 return attCount; 00722 } 00723 } 00724 00725 // And now lets add it to the passed collection. If we have not 00726 // filled it up yet, then we use the next element. Else we add 00727 // a new one. 00728 KVStringPair* curPair = 0; 00729 if (attCount >= curVecSize) 00730 { 00731 curPair = new (fMemoryManager) KVStringPair 00732 ( 00733 curAttNameBuf 00734 , fAttNameBuf.getLen() 00735 , fAttValueBuf.getRawBuffer() 00736 , fAttValueBuf.getLen() 00737 , fMemoryManager 00738 ); 00739 toFill.addElement(curPair); 00740 } 00741 else 00742 { 00743 curPair = toFill.elementAt(attCount); 00744 curPair->set 00745 ( 00746 curAttNameBuf, 00747 fAttNameBuf.getLen(), 00748 fAttValueBuf.getRawBuffer(), 00749 fAttValueBuf.getLen() 00750 ); 00751 } 00752 00753 if (attCount >= fRawAttrColonListSize) { 00754 resizeRawAttrColonList(); 00755 } 00756 // Set the position of the colon and bump the count of attributes we've gotten 00757 fRawAttrColonList[attCount++] = colonPosition; 00758 00759 // And go to the top again for another attribute 00760 continue; 00761 } 00762 00763 // It was some special case character so do all of the checks and 00764 // deal with it. 00765 if (!nextCh) 00766 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 00767 00768 if (nextCh == chForwardSlash) 00769 { 00770 fReaderMgr.getNextChar(); 00771 isEmpty = true; 00772 if (!fReaderMgr.skippedChar(chCloseAngle)) 00773 emitError(XMLErrs::UnterminatedStartTag, elemName); 00774 break; 00775 } 00776 else if (nextCh == chCloseAngle) 00777 { 00778 fReaderMgr.getNextChar(); 00779 break; 00780 } 00781 else if (nextCh == chOpenAngle) 00782 { 00783 // Check for this one specially, since its going to be common 00784 // and it is kind of auto-recovering since we've already hit the 00785 // next open bracket, which is what we would have seeked to (and 00786 // skipped this whole tag.) 00787 emitError(XMLErrs::UnterminatedStartTag, elemName); 00788 break; 00789 } 00790 else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote)) 00791 { 00792 // Check for this one specially, which is probably a missing 00793 // attribute name, e.g. ="value". Just issue expected name 00794 // error and eat the quoted string, then jump back to the 00795 // top again. 00796 emitError(XMLErrs::ExpectedAttrName); 00797 fReaderMgr.getNextChar(); 00798 fReaderMgr.skipQuotedString(nextCh); 00799 fReaderMgr.skipPastSpaces(); 00800 continue; 00801 } 00802 } 00803 00804 return attCount; 00805 } 00806 00807 00808 // This method will kick off the scanning of the primary content of the 00809 // document, i.e. the elements. 00810 bool IGXMLScanner::scanContent() 00811 { 00812 // Go into a loop until we hit the end of the root element, or we fall 00813 // out because there is no root element. 00814 // 00815 // We have to do kind of a deeply nested double loop here in order to 00816 // avoid doing the setup/teardown of the exception handler on each 00817 // round. Doing it this way we only do it when an exception actually 00818 // occurs. 00819 bool gotData = true; 00820 bool inMarkup = false; 00821 while (gotData) 00822 { 00823 try 00824 { 00825 while (gotData) 00826 { 00827 // Sense what the next top level token is. According to what 00828 // this tells us, we will call something to handle that kind 00829 // of thing. 00830 XMLSize_t orgReader; 00831 const XMLTokens curToken = senseNextToken(orgReader); 00832 00833 // Handle character data and end of file specially. Char data 00834 // is not markup so we don't want to handle it in the loop 00835 // below. 00836 if (curToken == Token_CharData) 00837 { 00838 // Scan the character data and call appropriate events. Let 00839 // him use our local character data buffer for efficiency. 00840 scanCharData(fCDataBuf); 00841 continue; 00842 } 00843 else if (curToken == Token_EOF) 00844 { 00845 // The element stack better be empty at this point or we 00846 // ended prematurely before all elements were closed. 00847 if (!fElemStack.isEmpty()) 00848 { 00849 const ElemStack::StackElem* topElem = fElemStack.popTop(); 00850 emitError 00851 ( 00852 XMLErrs::EndedWithTagsOnStack 00853 , topElem->fThisElement->getFullName() 00854 ); 00855 } 00856 00857 // Its the end of file, so clear the got data flag 00858 gotData = false; 00859 continue; 00860 } 00861 00862 // We are in some sort of markup now 00863 inMarkup = true; 00864 00865 // According to the token we got, call the appropriate 00866 // scanning method. 00867 switch(curToken) 00868 { 00869 case Token_CData : 00870 // Make sure we are within content 00871 if (fElemStack.isEmpty()) 00872 emitError(XMLErrs::CDATAOutsideOfContent); 00873 scanCDSection(); 00874 break; 00875 00876 case Token_Comment : 00877 scanComment(); 00878 break; 00879 00880 case Token_EndTag : 00881 scanEndTag(gotData); 00882 break; 00883 00884 case Token_PI : 00885 scanPI(); 00886 break; 00887 00888 case Token_StartTag : 00889 if (fDoNamespaces) 00890 scanStartTagNS(gotData); 00891 else 00892 scanStartTag(gotData); 00893 break; 00894 00895 default : 00896 fReaderMgr.skipToChar(chOpenAngle); 00897 break; 00898 } 00899 00900 if (orgReader != fReaderMgr.getCurrentReaderNum()) 00901 emitError(XMLErrs::PartialMarkupInEntity); 00902 00903 // And we are back out of markup again 00904 inMarkup = false; 00905 } 00906 } 00907 catch(const EndOfEntityException& toCatch) 00908 { 00909 // If we were in some markup when this happened, then its a 00910 // partial markup error. 00911 if (inMarkup) 00912 emitError(XMLErrs::PartialMarkupInEntity); 00913 00914 // Send an end of entity reference event 00915 if (fDocHandler) 00916 fDocHandler->endEntityReference(toCatch.getEntity()); 00917 00918 inMarkup = false; 00919 } 00920 } 00921 00922 // It went ok, so return success 00923 return true; 00924 } 00925 00926 00927 void IGXMLScanner::scanEndTag(bool& gotData) 00928 { 00929 // Assume we will still have data until proven otherwise. It will only 00930 // ever be false if this is the end of the root element. 00931 gotData = true; 00932 00933 // Check if the element stack is empty. If so, then this is an unbalanced 00934 // element (i.e. more ends than starts, perhaps because of bad text 00935 // causing one to be skipped.) 00936 if (fElemStack.isEmpty()) 00937 { 00938 emitError(XMLErrs::MoreEndThanStartTags); 00939 fReaderMgr.skipPastChar(chCloseAngle); 00940 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); 00941 } 00942 00943 // Pop the stack of the element we are supposed to be ending. Remember 00944 // that we don't own this. The stack just keeps them and reuses them. 00945 unsigned int uriId = (fDoNamespaces) 00946 ? fElemStack.getCurrentURI() : fEmptyNamespaceId; 00947 00948 // these get initialized below 00949 const ElemStack::StackElem* topElem = 0; 00950 const XMLCh *elemName = 0; 00951 00952 // Make sure that its the end of the element that we expect 00953 // special case for schema validation, whose element decls, 00954 // obviously don't contain prefix information 00955 if(fGrammarType == Grammar::SchemaGrammarType) 00956 { 00957 elemName = fElemStack.getCurrentSchemaElemName(); 00958 topElem = fElemStack.topElement(); 00959 } 00960 else 00961 { 00962 topElem = fElemStack.topElement(); 00963 elemName = topElem->fThisElement->getFullName(); 00964 } 00965 if (!fReaderMgr.skippedStringLong(elemName)) 00966 { 00967 emitError 00968 ( 00969 XMLErrs::ExpectedEndOfTagX 00970 , elemName 00971 ); 00972 fReaderMgr.skipPastChar(chCloseAngle); 00973 fElemStack.popTop(); 00974 return; 00975 } 00976 00977 // Make sure we are back on the same reader as where we started 00978 if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) 00979 emitError(XMLErrs::PartialTagMarkupError); 00980 00981 // Skip optional whitespace 00982 fReaderMgr.skipPastSpaces(); 00983 00984 // Make sure we find the closing bracket 00985 if (!fReaderMgr.skippedChar(chCloseAngle)) 00986 { 00987 emitError 00988 ( 00989 XMLErrs::UnterminatedEndTag 00990 , topElem->fThisElement->getFullName() 00991 ); 00992 } 00993 00994 if (fGrammarType == Grammar::SchemaGrammarType) 00995 { 00996 // reset error occurred 00997 fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); 00998 if (fValidate && topElem->fThisElement->isDeclared()) 00999 { 01000 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); 01001 if(!fPSVIElemContext.fCurrentTypeInfo) 01002 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 01003 else 01004 fPSVIElemContext.fCurrentDV = 0; 01005 if(fPSVIHandler) 01006 { 01007 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue(); 01008 01009 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString)) 01010 fPSVIElemContext.fNormalizedValue = 0; 01011 } 01012 } 01013 else 01014 { 01015 fPSVIElemContext.fCurrentDV = 0; 01016 fPSVIElemContext.fCurrentTypeInfo = 0; 01017 fPSVIElemContext.fNormalizedValue = 0; 01018 } 01019 } 01020 01021 // If validation is enabled, then lets pass him the list of children and 01022 // this element and let him validate it. 01023 DatatypeValidator* psviMemberType = 0; 01024 if (fValidate) 01025 { 01026 01027 // 01028 // XML1.0-3rd 01029 // Validity Constraint: 01030 // The declaration matches EMPTY and the element has no content (not even 01031 // entity references, comments, PIs or white space). 01032 // 01033 if ( (fGrammarType == Grammar::DTDGrammarType) && 01034 (topElem->fCommentOrPISeen) && 01035 (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty)) 01036 { 01037 fValidator->emitError 01038 ( 01039 XMLValid::EmptyElemHasContent 01040 , topElem->fThisElement->getFullName() 01041 ); 01042 } 01043 01044 // 01045 // XML1.0-3rd 01046 // Validity Constraint: 01047 // 01048 // The declaration matches children and the sequence of child elements 01049 // belongs to the language generated by the regular expression in the 01050 // content model, with optional white space, comments and PIs 01051 // (i.e. markup matching production [27] Misc) between the start-tag and 01052 // the first child element, between child elements, or between the last 01053 // child element and the end-tag. 01054 // 01055 // Note that 01056 // a CDATA section containing only white space or 01057 // a reference to an entity whose replacement text is character references 01058 // expanding to white space do not match the nonterminal S, and hence 01059 // cannot appear in these positions; however, 01060 // a reference to an internal entity with a literal value consisting 01061 // of character references expanding to white space does match S, 01062 // since its replacement text is the white space resulting from expansion 01063 // of the character references. 01064 // 01065 if ( (fGrammarType == Grammar::DTDGrammarType) && 01066 (topElem->fReferenceEscaped) && 01067 (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children)) 01068 { 01069 fValidator->emitError 01070 ( 01071 XMLValid::ElemChildrenHasInvalidWS 01072 , topElem->fThisElement->getFullName() 01073 ); 01074 } 01075 XMLSize_t failure; 01076 bool res = fValidator->checkContent 01077 ( 01078 topElem->fThisElement 01079 , topElem->fChildren 01080 , topElem->fChildCount 01081 , &failure 01082 ); 01083 01084 if (!res) 01085 { 01086 // One of the elements is not valid for the content. NOTE that 01087 // if no children were provided but the content model requires 01088 // them, it comes back with a zero value. But we cannot use that 01089 // to index the child array in this case, and have to put out a 01090 // special message. 01091 if (!topElem->fChildCount) 01092 { 01093 fValidator->emitError 01094 ( 01095 XMLValid::EmptyNotValidForContent 01096 , topElem->fThisElement->getFormattedContentModel() 01097 ); 01098 } 01099 else if (failure >= topElem->fChildCount) 01100 { 01101 fValidator->emitError 01102 ( 01103 XMLValid::NotEnoughElemsForCM 01104 , topElem->fThisElement->getFormattedContentModel() 01105 ); 01106 } 01107 else 01108 { 01109 fValidator->emitError 01110 ( 01111 XMLValid::ElementNotValidForContent 01112 , topElem->fChildren[failure]->getRawName() 01113 , topElem->fThisElement->getFormattedContentModel() 01114 ); 01115 } 01116 } 01117 01118 01119 if (fGrammarType == Grammar::SchemaGrammarType) { 01120 if (((SchemaValidator*) fValidator)->getErrorOccurred()) 01121 fPSVIElemContext.fErrorOccurred = true; 01122 else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union) 01123 psviMemberType = fValidationContext->getValidatingMemberType(); 01124 01125 if (fPSVIHandler) 01126 { 01127 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified(); 01128 if(fPSVIElemContext.fIsSpecified) 01129 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue(); 01130 } 01131 01132 // call matchers and de-activate context 01133 if (toCheckIdentityConstraint()) 01134 { 01135 fICHandler->deactivateContext 01136 ( 01137 (SchemaElementDecl *) topElem->fThisElement 01138 , fContent.getRawBuffer() 01139 , fValidationContext 01140 , fPSVIElemContext.fCurrentDV 01141 ); 01142 } 01143 01144 } 01145 } 01146 01147 // QName dv needed topElem to resolve URIs on the checkContent 01148 fElemStack.popTop(); 01149 01150 // See if it was the root element, to avoid multiple calls below 01151 const bool isRoot = fElemStack.isEmpty(); 01152 01153 if (fGrammarType == Grammar::SchemaGrammarType) 01154 { 01155 if (fPSVIHandler) 01156 { 01157 endElementPSVI( 01158 (SchemaElementDecl*)topElem->fThisElement, psviMemberType); 01159 } 01160 // now we can reset the datatype buffer, since the 01161 // application has had a chance to copy the characters somewhere else 01162 ((SchemaValidator *)fValidator)->clearDatatypeBuffer(); 01163 } 01164 01165 // If we have a doc handler, tell it about the end tag 01166 if (fDocHandler) 01167 { 01168 if (fGrammarType == Grammar::SchemaGrammarType) { 01169 if (topElem->fPrefixColonPos != -1) 01170 fPrefixBuf.set(elemName, topElem->fPrefixColonPos); 01171 else 01172 fPrefixBuf.reset(); 01173 } 01174 else { 01175 fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix()); 01176 } 01177 fDocHandler->endElement 01178 ( 01179 *topElem->fThisElement 01180 , uriId 01181 , isRoot 01182 , fPrefixBuf.getRawBuffer() 01183 ); 01184 } 01185 01186 if (fGrammarType == Grammar::SchemaGrammarType) { 01187 if (!isRoot) 01188 { 01189 // update error information 01190 fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred); 01191 01192 01193 } 01194 } 01195 01196 // If this was the root, then done with content 01197 gotData = !isRoot; 01198 01199 if (gotData) { 01200 if (fDoNamespaces) { 01201 // Restore the grammar 01202 fGrammar = fElemStack.getCurrentGrammar(); 01203 fGrammarType = fGrammar->getGrammarType(); 01204 if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) { 01205 if (fValidatorFromUser) 01206 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); 01207 else { 01208 fValidator = fSchemaValidator; 01209 } 01210 } 01211 else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) { 01212 if (fValidatorFromUser) 01213 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); 01214 else { 01215 fValidator = fDTDValidator; 01216 } 01217 } 01218 01219 fValidator->setGrammar(fGrammar); 01220 } 01221 01222 // Restore the validation flag 01223 fValidate = fElemStack.getValidationFlag(); 01224 } 01225 } 01226 01227 01228 // This method handles the high level logic of scanning the DOCType 01229 // declaration. This calls the DTDScanner and kicks off both the scanning of 01230 // the internal subset and the scanning of the external subset, if any. 01231 // 01232 // When we get here the '<!DOCTYPE' part has already been scanned, which is 01233 // what told us that we had a doc type decl to parse. 01234 void IGXMLScanner::scanDocTypeDecl() 01235 { 01236 // We have a doc type. So, switch the Grammar. 01237 switchGrammar(XMLUni::fgDTDEntityString); 01238 01239 if (fDocTypeHandler) 01240 fDocTypeHandler->resetDocType(); 01241 01242 // There must be some space after DOCTYPE 01243 bool skippedSomething; 01244 fReaderMgr.skipPastSpaces(skippedSomething); 01245 if (!skippedSomething) 01246 { 01247 emitError(XMLErrs::ExpectedWhitespace); 01248 01249 // Just skip the Doctype declaration and return 01250 fReaderMgr.skipPastChar(chCloseAngle); 01251 return; 01252 } 01253 01254 // Get a buffer for the root element 01255 XMLBufBid bbRootName(&fBufMgr); 01256 01257 // Get a name from the input, which should be the name of the root 01258 // element of the upcoming content. 01259 int colonPosition; 01260 bool validName = fDoNamespaces ? fReaderMgr.getQName(bbRootName.getBuffer(), &colonPosition) : 01261 fReaderMgr.getName(bbRootName.getBuffer()); 01262 if (!validName) 01263 { 01264 if (bbRootName.isEmpty()) 01265 emitError(XMLErrs::NoRootElemInDOCTYPE); 01266 else 01267 emitError(XMLErrs::InvalidRootElemInDOCTYPE, bbRootName.getRawBuffer()); 01268 fReaderMgr.skipPastChar(chCloseAngle); 01269 return; 01270 } 01271 01272 // Store the root element name for later check 01273 setRootElemName(bbRootName.getRawBuffer()); 01274 01275 // This element obviously is not going to exist in the element decl 01276 // pool yet, but we need to call docTypeDecl. So force it into 01277 // the element decl pool, marked as being there because it was in 01278 // the DOCTYPE. Later, when its declared, the status will be updated. 01279 // 01280 // Only do this if we are not reusing the validator! If we are reusing, 01281 // then look it up instead. It has to exist! 01282 MemoryManager* const rootDeclMgr = 01283 fUseCachedGrammar ? fMemoryManager : fGrammarPoolMemoryManager; 01284 01285 DTDElementDecl* rootDecl = new (rootDeclMgr) DTDElementDecl 01286 ( 01287 bbRootName.getRawBuffer() 01288 , fEmptyNamespaceId 01289 , DTDElementDecl::Any 01290 , rootDeclMgr 01291 ); 01292 01293 Janitor<DTDElementDecl> rootDeclJanitor(rootDecl); 01294 rootDecl->setCreateReason(DTDElementDecl::AsRootElem); 01295 rootDecl->setExternalElemDeclaration(true); 01296 if(!fUseCachedGrammar) 01297 { 01298 fGrammar->putElemDecl(rootDecl); 01299 rootDeclJanitor.release(); 01300 } else 01301 { 01302 // attach this to the undeclared element pool so that it gets deleted 01303 XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer()); 01304 if (elemDecl) 01305 { 01306 rootDecl->setId(elemDecl->getId()); 01307 } 01308 else 01309 { 01310 rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl)); 01311 rootDeclJanitor.release(); 01312 } 01313 } 01314 01315 // Skip any spaces after the name 01316 fReaderMgr.skipPastSpaces(); 01317 01318 // And now if we are looking at a >, then we are done. It is not 01319 // required to have an internal or external subset, though why you 01320 // would not escapes me. 01321 if (fReaderMgr.skippedChar(chCloseAngle)) { 01322 01323 // If we have a doc type handler and advanced callbacks are enabled, 01324 // call the doctype event. 01325 if (fDocTypeHandler) 01326 fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false); 01327 return; 01328 } 01329 01330 // either internal/external subset 01331 if (fValScheme == Val_Auto && !fValidate) 01332 fValidate = true; 01333 01334 bool hasIntSubset = false; 01335 bool hasExtSubset = false; 01336 XMLCh* sysId = 0; 01337 XMLCh* pubId = 0; 01338 01339 DTDScanner dtdScanner 01340 ( 01341 (DTDGrammar*) fGrammar 01342 , fDocTypeHandler 01343 , fGrammarPoolMemoryManager 01344 , fMemoryManager 01345 ); 01346 dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr); 01347 01348 // If the next character is '[' then we have no external subset cause 01349 // there is no system id, just the opening character of the internal 01350 // subset. Else, has to be an id. 01351 // 01352 // Just look at the next char, don't eat it. 01353 if (fReaderMgr.peekNextChar() == chOpenSquare) 01354 { 01355 hasIntSubset = true; 01356 } 01357 else 01358 { 01359 // Indicate we have an external subset 01360 hasExtSubset = true; 01361 fHasNoDTD = false; 01362 01363 // Get buffers for the ids 01364 XMLBufBid bbPubId(&fBufMgr); 01365 XMLBufBid bbSysId(&fBufMgr); 01366 01367 // Get the external subset id 01368 if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External)) 01369 { 01370 fReaderMgr.skipPastChar(chCloseAngle); 01371 return; 01372 } 01373 01374 // Get copies of the ids we got 01375 pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager); 01376 sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager); 01377 01378 // Skip spaces and check again for the opening of an internal subset 01379 fReaderMgr.skipPastSpaces(); 01380 01381 // Just look at the next char, don't eat it. 01382 if (fReaderMgr.peekNextChar() == chOpenSquare) { 01383 hasIntSubset = true; 01384 } 01385 } 01386 01387 // Insure that the ids get cleaned up, if they got allocated 01388 ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager); 01389 ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager); 01390 01391 // If we have a doc type handler and advanced callbacks are enabled, 01392 // call the doctype event. 01393 if (fDocTypeHandler) 01394 fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset); 01395 01396 // Ok, if we had an internal subset, we are just past the [ character 01397 // and need to parse that first. 01398 if (hasIntSubset) 01399 { 01400 // Eat the opening square bracket 01401 fReaderMgr.getNextChar(); 01402 01403 checkInternalDTD(hasExtSubset, sysId, pubId); 01404 01405 // And try to scan the internal subset. If we fail, try to recover 01406 // by skipping forward tot he close angle and returning. 01407 if (!dtdScanner.scanInternalSubset()) 01408 { 01409 fReaderMgr.skipPastChar(chCloseAngle); 01410 return; 01411 } 01412 01413 // Do a sanity check that some expanded PE did not propogate out of 01414 // the doctype. This could happen if it was terminated early by bad 01415 // syntax. 01416 if (fReaderMgr.getReaderDepth() > 1) 01417 { 01418 emitError(XMLErrs::PEPropogated); 01419 01420 // Ask the reader manager to pop back down to the main level 01421 fReaderMgr.cleanStackBackTo(1); 01422 } 01423 01424 fReaderMgr.skipPastSpaces(); 01425 } 01426 01427 // And that should leave us at the closing > of the DOCTYPE line 01428 if (!fReaderMgr.skippedChar(chCloseAngle)) 01429 { 01430 // Do a special check for the common scenario of an extra ] char at 01431 // the end. This is easy to recover from. 01432 if (fReaderMgr.skippedChar(chCloseSquare) 01433 && fReaderMgr.skippedChar(chCloseAngle)) 01434 { 01435 emitError(XMLErrs::ExtraCloseSquare); 01436 } 01437 else 01438 { 01439 emitError(XMLErrs::UnterminatedDOCTYPE); 01440 fReaderMgr.skipPastChar(chCloseAngle); 01441 } 01442 } 01443 01444 // If we had an external subset, then we need to deal with that one 01445 // next. If we are reusing the validator, then don't scan it. 01446 if (hasExtSubset) { 01447 01448 InputSource* srcUsed=0; 01449 Janitor<InputSource> janSrc(srcUsed); 01450 // If we had an internal subset and we're using the cached grammar, it 01451 // means that the ignoreCachedDTD is set, so we ignore the cached 01452 // grammar 01453 if (fUseCachedGrammar && !hasIntSubset) 01454 { 01455 srcUsed = resolveSystemId(sysId, pubId); 01456 if (srcUsed) { 01457 janSrc.reset(srcUsed); 01458 Grammar* grammar = fGrammarResolver->getGrammar(srcUsed->getSystemId()); 01459 01460 if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) { 01461 01462 fDTDGrammar = (DTDGrammar*) grammar; 01463 fGrammar = fDTDGrammar; 01464 fValidator->setGrammar(fGrammar); 01465 // If we don't report at least the external subset boundaries, 01466 // an advanced document handler cannot know when the DTD end, 01467 // since we've already sent a doctype decl that indicates there's 01468 // there's an external subset. 01469 if (fDocTypeHandler) 01470 { 01471 fDocTypeHandler->startExtSubset(); 01472 fDocTypeHandler->endExtSubset(); 01473 } 01474 01475 return; 01476 } 01477 } 01478 } 01479 01480 if (fLoadExternalDTD || fValidate) 01481 { 01482 // And now create a reader to read this entity 01483 XMLReader* reader; 01484 if (srcUsed) { 01485 reader = fReaderMgr.createReader 01486 ( 01487 *srcUsed 01488 , false 01489 , XMLReader::RefFrom_NonLiteral 01490 , XMLReader::Type_General 01491 , XMLReader::Source_External 01492 , fCalculateSrcOfs 01493 , fLowWaterMark 01494 ); 01495 } 01496 else { 01497 reader = fReaderMgr.createReader 01498 ( 01499 sysId 01500 , pubId 01501 , false 01502 , XMLReader::RefFrom_NonLiteral 01503 , XMLReader::Type_General 01504 , XMLReader::Source_External 01505 , srcUsed 01506 , fCalculateSrcOfs 01507 , fLowWaterMark 01508 , fDisableDefaultEntityResolution 01509 ); 01510 janSrc.reset(srcUsed); 01511 } 01512 // If it failed then throw an exception 01513 if (!reader) 01514 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed ? srcUsed->getSystemId() : sysId, fMemoryManager); 01515 01516 if (fToCacheGrammar) { 01517 01518 unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId()); 01519 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId); 01520 01521 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString); 01522 ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr); 01523 fGrammarResolver->putGrammar(fGrammar); 01524 } 01525 01526 // In order to make the processing work consistently, we have to 01527 // make this look like an external entity. So create an entity 01528 // decl and fill it in and push it with the reader, as happens 01529 // with an external entity. Put a janitor on it to insure it gets 01530 // cleaned up. The reader manager does not adopt them. 01531 const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull }; 01532 DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); 01533 declDTD->setSystemId(sysId); 01534 declDTD->setIsExternal(true); 01535 Janitor<DTDEntityDecl> janDecl(declDTD); 01536 01537 // Mark this one as a throw at end 01538 reader->setThrowAtEnd(true); 01539 01540 // And push it onto the stack, with its pseudo name 01541 fReaderMgr.pushReader(reader, declDTD); 01542 01543 // Tell it its not in an include section 01544 dtdScanner.scanExtSubsetDecl(false, true); 01545 } 01546 } 01547 } 01548 01549 bool IGXMLScanner::scanStartTag(bool& gotData) 01550 { 01551 // Assume we will still have data until proven otherwise. It will only 01552 // ever be false if this is the root and its empty. 01553 gotData = true; 01554 01555 // Get the QName. In this case, we are not doing namespaces, so we just 01556 // use it as is and don't have to break it into parts. 01557 if (!fReaderMgr.getName(fQNameBuf)) 01558 { 01559 emitError(XMLErrs::ExpectedElementName); 01560 fReaderMgr.skipToChar(chOpenAngle); 01561 return false; 01562 } 01563 01564 // Assume it won't be an empty tag 01565 bool isEmpty = false; 01566 01567 // Lets try to look up the element in the validator's element decl pool 01568 // We can pass bogus values for the URI id and the base name. We know that 01569 // this can only be called if we are doing a DTD style validator and that 01570 // he will only look at the QName. 01571 // 01572 // We tell him to fault in a decl if he does not find one. 01573 // Actually, we *don't* tell him to fault in a decl if he does not find one- NG 01574 bool wasAdded = false; 01575 const XMLCh *rawQName = fQNameBuf.getRawBuffer(); 01576 XMLElementDecl* elemDecl = fGrammar->getElemDecl 01577 ( 01578 fEmptyNamespaceId 01579 , 0 01580 , rawQName 01581 , Grammar::TOP_LEVEL_SCOPE 01582 ); 01583 // look for it in the undeclared pool: 01584 if(!elemDecl) 01585 { 01586 elemDecl = fDTDElemNonDeclPool->getByKey(rawQName); 01587 } 01588 if(!elemDecl) 01589 { 01590 // we're assuming this must be a DTD element. DTD's can be 01591 // used with or without namespaces, but schemas cannot be used without 01592 // namespaces. 01593 wasAdded = true; 01594 elemDecl = new (fMemoryManager) DTDElementDecl 01595 ( 01596 rawQName 01597 , fEmptyNamespaceId 01598 , DTDElementDecl::Any 01599 , fMemoryManager 01600 ); 01601 elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl)); 01602 } 01603 01604 // We do something different here according to whether we found the 01605 // element or not. 01606 if (wasAdded) 01607 { 01608 // If validating then emit an error 01609 if (fValidate) 01610 { 01611 // This is to tell the reuse Validator that this element was 01612 // faulted-in, was not an element in the validator pool originally 01613 elemDecl->setCreateReason(XMLElementDecl::JustFaultIn); 01614 01615 fValidator->emitError 01616 ( 01617 XMLValid::ElementNotDefined 01618 , elemDecl->getFullName() 01619 ); 01620 } 01621 } 01622 else 01623 { 01624 // If its not marked declared and validating, then emit an error 01625 if (fValidate && !elemDecl->isDeclared()) 01626 { 01627 fValidator->emitError 01628 ( 01629 XMLValid::ElementNotDefined 01630 , elemDecl->getFullName() 01631 ); 01632 } 01633 } 01634 01635 // See if its the root element 01636 const bool isRoot = fElemStack.isEmpty(); 01637 01638 // Expand the element stack and add the new element 01639 fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum()); 01640 fElemStack.setValidationFlag(fValidate); 01641 01642 // Validate the element 01643 if (fValidate) 01644 fValidator->validateElement(elemDecl); 01645 01646 // If this is the first element and we are validating, check the root 01647 // element. 01648 if (isRoot) 01649 { 01650 fRootGrammar = fGrammar; 01651 01652 if (fValidate) 01653 { 01654 // If a DocType exists, then check if it matches the root name there. 01655 if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName)) 01656 fValidator->emitError(XMLValid::RootElemNotLikeDocType); 01657 } 01658 } 01659 else 01660 { 01661 // If the element stack is not empty, then add this element as a 01662 // child of the previous top element. If its empty, this is the root 01663 // elem and is not the child of anything. 01664 fElemStack.addChild(elemDecl->getElementName(), true); 01665 } 01666 01667 // Skip any whitespace after the name 01668 fReaderMgr.skipPastSpaces(); 01669 01670 // We loop until we either see a /> or >, handling attribute/value 01671 // pairs until we get there. 01672 XMLSize_t attCount = 0; 01673 XMLSize_t curAttListSize = fAttrList->size(); 01674 wasAdded = false; 01675 01676 fElemCount++; 01677 01678 while (true) 01679 { 01680 // And get the next non-space character 01681 XMLCh nextCh = fReaderMgr.peekNextChar(); 01682 01683 // If the next character is not a slash or closed angle bracket, 01684 // then it must be whitespace, since whitespace is required 01685 // between the end of the last attribute and the name of the next 01686 // one. 01687 if (attCount) 01688 { 01689 if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) 01690 { 01691 bool bFoundSpace; 01692 fReaderMgr.skipPastSpaces(bFoundSpace); 01693 if (!bFoundSpace) 01694 { 01695 // Emit the error but keep on going 01696 emitError(XMLErrs::ExpectedWhitespace); 01697 } 01698 // Ok, peek another char 01699 nextCh = fReaderMgr.peekNextChar(); 01700 } 01701 } 01702 01703 // Ok, here we first check for any of the special case characters. 01704 // If its not one, then we do the normal case processing, which 01705 // assumes that we've hit an attribute value, Otherwise, we do all 01706 // the special case checks. 01707 if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh)) 01708 { 01709 // Assume its going to be an attribute, so get a name from 01710 // the input. 01711 if (!fReaderMgr.getName(fAttNameBuf)) 01712 { 01713 emitError(XMLErrs::ExpectedAttrName); 01714 fReaderMgr.skipPastChar(chCloseAngle); 01715 return false; 01716 } 01717 01718 // And next must be an equal sign 01719 if (!scanEq()) 01720 { 01721 static const XMLCh tmpList[] = 01722 { 01723 chSingleQuote, chDoubleQuote, chCloseAngle 01724 , chOpenAngle, chForwardSlash, chNull 01725 }; 01726 01727 emitError(XMLErrs::ExpectedEqSign); 01728 01729 // Try to sync back up by skipping forward until we either 01730 // hit something meaningful. 01731 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); 01732 01733 if ((chFound == chCloseAngle) || (chFound == chForwardSlash)) 01734 { 01735 // Jump back to top for normal processing of these 01736 continue; 01737 } 01738 else if ((chFound == chSingleQuote) 01739 || (chFound == chDoubleQuote) 01740 || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) 01741 { 01742 // Just fall through assuming that the value is to follow 01743 } 01744 else if (chFound == chOpenAngle) 01745 { 01746 // Assume a malformed tag and that new one is starting 01747 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName()); 01748 return false; 01749 } 01750 else 01751 { 01752 // Something went really wrong 01753 return false; 01754 } 01755 } 01756 // See if this attribute is declared for this element. If we are 01757 // not validating of course it will not be at first, but we will 01758 // fault it into the pool (to avoid lots of redundant errors.) 01759 XMLCh * namePtr = fAttNameBuf.getRawBuffer(); 01760 XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr); 01761 01762 // Add this attribute to the attribute list that we use to 01763 // pass them to the handler. We reuse its existing elements 01764 // but expand it as required. 01765 // Note that we want to this first since this will 01766 // make a copy of the namePtr; we can then make use of 01767 // that copy in the hashtable lookup that checks 01768 // for duplicates. This will mean we may have to update 01769 // the type of the XMLAttr later. 01770 XMLAttr* curAtt; 01771 if (attCount >= curAttListSize) 01772 { 01773 curAtt = new (fMemoryManager) XMLAttr 01774 ( 01775 0 01776 , namePtr 01777 , XMLUni::fgZeroLenString 01778 , XMLUni::fgZeroLenString 01779 , (attDef)?attDef->getType():XMLAttDef::CData 01780 , true 01781 , fMemoryManager 01782 ); 01783 fAttrList->addElement(curAtt); 01784 } 01785 else 01786 { 01787 curAtt = fAttrList->elementAt(attCount); 01788 curAtt->set 01789 ( 01790 0 01791 , namePtr 01792 , XMLUni::fgZeroLenString 01793 , XMLUni::fgZeroLenString 01794 , (attDef)?attDef->getType():XMLAttDef::CData 01795 ); 01796 curAtt->setSpecified(true); 01797 } 01798 // reset namePtr so it refers to newly-allocated memory 01799 namePtr = (XMLCh *)curAtt->getName(); 01800 01801 if (!attDef) 01802 { 01803 // If there is a validation handler, then we are validating 01804 // so emit an error. 01805 if (fValidate) 01806 { 01807 fValidator->emitError 01808 ( 01809 XMLValid::AttNotDefinedForElement 01810 , fAttNameBuf.getRawBuffer() 01811 , elemDecl->getFullName() 01812 ); 01813 } 01814 if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0)) 01815 { 01816 emitError 01817 ( 01818 XMLErrs::AttrAlreadyUsedInSTag 01819 , namePtr 01820 , elemDecl->getFullName() 01821 ); 01822 } 01823 } 01824 else 01825 { 01826 // prepare for duplicate detection 01827 unsigned int *curCountPtr = fAttDefRegistry->get(attDef); 01828 if(!curCountPtr) 01829 { 01830 curCountPtr = getNewUIntPtr(); 01831 *curCountPtr = fElemCount; 01832 fAttDefRegistry->put(attDef, curCountPtr); 01833 } 01834 else if(*curCountPtr < fElemCount) 01835 *curCountPtr = fElemCount; 01836 else 01837 { 01838 emitError 01839 ( 01840 XMLErrs::AttrAlreadyUsedInSTag 01841 , attDef->getFullName() 01842 , elemDecl->getFullName() 01843 ); 01844 } 01845 } 01846 01847 // Skip any whitespace before the value and then scan the att 01848 // value. This will come back normalized with entity refs and 01849 // char refs expanded. 01850 fReaderMgr.skipPastSpaces(); 01851 if (!scanAttValue(attDef, namePtr, fAttValueBuf)) 01852 { 01853 static const XMLCh tmpList[] = 01854 { 01855 chCloseAngle, chOpenAngle, chForwardSlash, chNull 01856 }; 01857 01858 emitError(XMLErrs::ExpectedAttrValue); 01859 01860 // It failed, so lets try to get synced back up. We skip 01861 // forward until we find some whitespace or one of the 01862 // chars in our list. 01863 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); 01864 01865 if ((chFound == chCloseAngle) 01866 || (chFound == chForwardSlash) 01867 || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) 01868 { 01869 // Just fall through and process this attribute, though 01870 // the value will be "". 01871 } 01872 else if (chFound == chOpenAngle) 01873 { 01874 // Assume a malformed tag and that new one is starting 01875 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName()); 01876 return false; 01877 } 01878 else 01879 { 01880 // Something went really wrong 01881 return false; 01882 } 01883 } 01884 // must set the newly-minted value on the XMLAttr: 01885 curAtt->setValue(fAttValueBuf.getRawBuffer()); 01886 01887 // Now that its all stretched out, lets look at its type and 01888 // determine if it has a valid value. It will output any needed 01889 // errors, but we just keep going. We only need to do this if 01890 // we are validating. 01891 if (attDef) 01892 { 01893 // Let the validator pass judgement on the attribute value 01894 if (fValidate) 01895 { 01896 fValidator->validateAttrValue 01897 ( 01898 attDef 01899 , fAttValueBuf.getRawBuffer() 01900 , false 01901 , elemDecl 01902 ); 01903 } 01904 } 01905 01906 attCount++; 01907 // And jump back to the top of the loop 01908 continue; 01909 } 01910 01911 // It was some special case character so do all of the checks and 01912 // deal with it. 01913 if (!nextCh) 01914 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 01915 01916 if (nextCh == chForwardSlash) 01917 { 01918 fReaderMgr.getNextChar(); 01919 isEmpty = true; 01920 if (!fReaderMgr.skippedChar(chCloseAngle)) 01921 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName()); 01922 break; 01923 } 01924 else if (nextCh == chCloseAngle) 01925 { 01926 fReaderMgr.getNextChar(); 01927 break; 01928 } 01929 else if (nextCh == chOpenAngle) 01930 { 01931 // Check for this one specially, since its going to be common 01932 // and it is kind of auto-recovering since we've already hit the 01933 // next open bracket, which is what we would have seeked to (and 01934 // skipped this whole tag.) 01935 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName()); 01936 break; 01937 } 01938 else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote)) 01939 { 01940 // Check for this one specially, which is probably a missing 01941 // attribute name, e.g. ="value". Just issue expected name 01942 // error and eat the quoted string, then jump back to the 01943 // top again. 01944 emitError(XMLErrs::ExpectedAttrName); 01945 fReaderMgr.getNextChar(); 01946 fReaderMgr.skipQuotedString(nextCh); 01947 fReaderMgr.skipPastSpaces(); 01948 continue; 01949 } 01950 } 01951 01952 if(attCount) 01953 { 01954 // clean up after ourselves: 01955 // clear the map used to detect duplicate attributes 01956 fUndeclaredAttrRegistry->removeAll(); 01957 } 01958 01959 // Ok, so lets get an enumerator for the attributes of this element 01960 // and run through them for well formedness and validity checks. But 01961 // make sure that we had any attributes before we do it, since the list 01962 // would have have gotten faulted in anyway. 01963 if (elemDecl->hasAttDefs()) 01964 { 01965 // N.B.: this assumes DTD validation. 01966 XMLAttDefList& attDefList = elemDecl->getAttDefList(); 01967 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++) 01968 { 01969 // Get the current att def, for convenience and its def type 01970 const XMLAttDef& curDef = attDefList.getAttDef(i); 01971 const XMLAttDef::DefAttTypes defType = curDef.getDefaultType(); 01972 01973 unsigned int *attCountPtr = fAttDefRegistry->get(&curDef); 01974 if (!attCountPtr || *attCountPtr < fElemCount) 01975 { // did not occur 01976 if (fValidate) 01977 { 01978 // If we are validating and its required, then an error 01979 if (defType == XMLAttDef::Required) 01980 { 01981 fValidator->emitError 01982 ( 01983 XMLValid::RequiredAttrNotProvided 01984 , curDef.getFullName() 01985 ); 01986 } 01987 else if ((defType == XMLAttDef::Default) || 01988 (defType == XMLAttDef::Fixed) ) 01989 { 01990 if (fStandalone && curDef.isExternal()) 01991 { 01992 // XML 1.0 Section 2.9 01993 // Document is standalone, so attributes must not be defaulted. 01994 fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName()); 01995 01996 } 01997 } 01998 } 01999 02000 // Fault in the value if needed, and bump the att count 02001 if ((defType == XMLAttDef::Default) 02002 || (defType == XMLAttDef::Fixed)) 02003 { 02004 // Let the validator pass judgement on the attribute value 02005 if (fValidate) 02006 { 02007 fValidator->validateAttrValue 02008 ( 02009 &curDef 02010 , curDef.getValue() 02011 , false 02012 , elemDecl 02013 ); 02014 } 02015 02016 XMLAttr* curAtt; 02017 if (attCount >= curAttListSize) 02018 { 02019 curAtt = new (fMemoryManager) XMLAttr 02020 ( 02021 0 02022 , curDef.getFullName() 02023 , XMLUni::fgZeroLenString 02024 , curDef.getValue() 02025 , curDef.getType() 02026 , false 02027 , fMemoryManager 02028 ); 02029 fAttrList->addElement(curAtt); 02030 curAttListSize++; 02031 } 02032 else 02033 { 02034 curAtt = fAttrList->elementAt(attCount); 02035 curAtt->set 02036 ( 02037 0 02038 , curDef.getFullName() 02039 , XMLUni::fgZeroLenString 02040 , curDef.getValue() 02041 , curDef.getType() 02042 ); 02043 curAtt->setSpecified(false); 02044 } 02045 attCount++; 02046 } 02047 } 02048 } 02049 } 02050 02051 // If empty, validate content right now if we are validating and then 02052 // pop the element stack top. Else, we have to update the current stack 02053 // top's namespace mapping elements. 02054 if (isEmpty) 02055 { 02056 // If validating, then insure that its legal to have no content 02057 if (fValidate) 02058 { 02059 XMLSize_t failure; 02060 bool res = fValidator->checkContent(elemDecl, 0, 0, &failure); 02061 if (!res) 02062 { 02063 fValidator->emitError 02064 ( 02065 XMLValid::ElementNotValidForContent 02066 , elemDecl->getFullName() 02067 , elemDecl->getFormattedContentModel() 02068 ); 02069 } 02070 } 02071 02072 // Pop the element stack back off since it'll never be used now 02073 fElemStack.popTop(); 02074 02075 // If the elem stack is empty, then it was an empty root 02076 if (isRoot) 02077 gotData = false; 02078 else { 02079 // Restore the validation flag 02080 fValidate = fElemStack.getValidationFlag(); 02081 } 02082 } 02083 02084 // If we have a document handler, then tell it about this start tag. We 02085 // don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send 02086 // any prefix since its just one big name if we are not doing namespaces. 02087 if (fDocHandler) 02088 { 02089 fDocHandler->startElement 02090 ( 02091 *elemDecl 02092 , fEmptyNamespaceId 02093 , 0 02094 , *fAttrList 02095 , attCount 02096 , isEmpty 02097 , isRoot 02098 ); 02099 } 02100 02101 return true; 02102 } 02103 02104 02105 // This method is called to scan a start tag when we are processing 02106 // namespaces. There are two different versions of this method, one for 02107 // namespace aware processing and one for non-namespace aware processing. 02108 // 02109 // This method is called after we've scanned the < of a start tag. So we 02110 // have to get the element name, then scan the attributes, after which 02111 // we are either going to see >, />, or attributes followed by one of those 02112 // sequences. 02113 bool IGXMLScanner::scanStartTagNS(bool& gotData) 02114 { 02115 // Assume we will still have data until proven otherwise. It will only 02116 // ever be false if this is the root and its empty. 02117 gotData = true; 02118 02119 // Reset element content buffer 02120 fContent.reset(); 02121 02122 // The current position is after the open bracket, so we need to read in 02123 // in the element name. 02124 int prefixColonPos; 02125 if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos)) 02126 { 02127 if (fQNameBuf.isEmpty()) 02128 emitError(XMLErrs::ExpectedElementName); 02129 else 02130 emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer()); 02131 fReaderMgr.skipToChar(chOpenAngle); 02132 return false; 02133 } 02134 02135 // See if its the root element 02136 const bool isRoot = fElemStack.isEmpty(); 02137 02138 // Skip any whitespace after the name 02139 fReaderMgr.skipPastSpaces(); 02140 02141 // First we have to do the rawest attribute scan. We don't do any 02142 // normalization of them at all, since we don't know yet what type they 02143 // might be (since we need the element decl in order to do that.) 02144 bool isEmpty; 02145 XMLSize_t attCount = rawAttrScan 02146 ( 02147 fQNameBuf.getRawBuffer() 02148 , *fRawAttrList 02149 , isEmpty 02150 ); 02151 02152 // save the contentleafname and currentscope before addlevel, for later use 02153 ContentLeafNameTypeVector* cv = 0; 02154 XMLContentModel* cm = 0; 02155 unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE; 02156 bool laxThisOne = false; 02157 02158 if (!isRoot && fGrammarType == Grammar::SchemaGrammarType) 02159 { 02160 // schema validator will have correct type if validating 02161 SchemaElementDecl* tempElement = (SchemaElementDecl*) 02162 fElemStack.topElement()->fThisElement; 02163 SchemaElementDecl::ModelTypes modelType = tempElement->getModelType(); 02164 ComplexTypeInfo *currType = 0; 02165 02166 if (fValidate) 02167 { 02168 currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 02169 if (currType) 02170 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType(); 02171 else // something must have gone wrong 02172 modelType = SchemaElementDecl::Any; 02173 } 02174 else 02175 { 02176 currType = tempElement->getComplexTypeInfo(); 02177 } 02178 02179 if ((modelType == SchemaElementDecl::Mixed_Simple) 02180 || (modelType == SchemaElementDecl::Mixed_Complex) 02181 || (modelType == SchemaElementDecl::Children)) 02182 { 02183 cm = currType->getContentModel(); 02184 cv = cm->getContentLeafNameTypeVector(); 02185 currentScope = fElemStack.getCurrentScope(); 02186 } 02187 else if (modelType == SchemaElementDecl::Any) { 02188 laxThisOne = true; 02189 } 02190 } 02191 02192 // Now, since we might have to update the namespace map for this element, 02193 // but we don't have the element decl yet, we just tell the element stack 02194 // to expand up to get ready. 02195 XMLSize_t elemDepth = fElemStack.addLevel(); 02196 fElemStack.setValidationFlag(fValidate); 02197 fElemStack.setPrefixColonPos(prefixColonPos); 02198 02199 // Check if there is any external schema location specified, and if we are at root, 02200 // go through them first before scanning those specified in the instance document 02201 if (isRoot && fDoSchema 02202 && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) { 02203 02204 if (fExternalSchemaLocation) 02205 parseSchemaLocation(fExternalSchemaLocation, true); 02206 if (fExternalNoNamespaceSchemaLocation) 02207 resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true); 02208 } 02209 02210 // Make an initial pass through the list and find any xmlns attributes or 02211 // schema attributes. 02212 if (attCount) { 02213 scanRawAttrListforNameSpaces(attCount); 02214 } 02215 02216 // Also find any default or fixed xmlns attributes in DTD defined for 02217 // this element. 02218 XMLElementDecl* elemDecl = 0; 02219 const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); 02220 02221 if (fGrammarType == Grammar::DTDGrammarType) { 02222 02223 if (!fSkipDTDValidation) { 02224 elemDecl = fGrammar->getElemDecl( 02225 fEmptyNamespaceId, 0, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE 02226 ); 02227 02228 if (elemDecl) { 02229 if (elemDecl->hasAttDefs()) { 02230 XMLAttDefList& attDefList = elemDecl->getAttDefList(); 02231 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++) 02232 { 02233 // Get the current att def, for convenience and its def type 02234 const XMLAttDef& curDef = attDefList.getAttDef(i); 02235 const XMLAttDef::DefAttTypes defType = curDef.getDefaultType(); 02236 02237 // update the NSMap if there are any default/fixed xmlns attributes 02238 if ((defType == XMLAttDef::Default) 02239 || (defType == XMLAttDef::Fixed)) 02240 { 02241 const XMLCh* rawPtr = curDef.getFullName(); 02242 if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6) 02243 || XMLString::equals(rawPtr, XMLUni::fgXMLNSString)) 02244 updateNSMap(rawPtr, curDef.getValue()); 02245 } 02246 } 02247 } 02248 } 02249 } 02250 02251 if (!elemDecl) { 02252 elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf); 02253 } 02254 } 02255 02256 // Resolve the qualified name to a URI and name so that we can look up 02257 // the element decl for this element. We have now update the prefix to 02258 // namespace map so we should get the correct element now. 02259 unsigned int uriId = resolveQNameWithColon( 02260 qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos 02261 ); 02262 02263 //if schema, check if we should lax or skip the validation of this element 02264 bool parentValidation = fValidate; 02265 if (cv) { 02266 QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager); 02267 // elementDepth will be > 0, as cv is only constructed if element is not 02268 // root. 02269 laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1); 02270 } 02271 02272 // Look up the element now in the grammar. This will get us back a 02273 // generic element decl object. We tell him to fault one in if he does 02274 // not find it. 02275 bool wasAdded = false; 02276 const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1]; 02277 02278 if (fDoSchema) { 02279 02280 if (fGrammarType == Grammar::DTDGrammarType) { 02281 if (!switchGrammar(getURIText(uriId))) { 02282 fValidator->emitError( 02283 XMLValid::GrammarNotFound, getURIText(uriId) 02284 ); 02285 } 02286 } 02287 02288 if (fGrammarType == Grammar::SchemaGrammarType) { 02289 elemDecl = fGrammar->getElemDecl( 02290 uriId, nameRawBuf, qnameRawBuf, currentScope 02291 ); 02292 02293 // if not found, then it may be a reference, try TOP_LEVEL_SCOPE 02294 if (!elemDecl) { 02295 bool checkTopLevel = (currentScope != Grammar::TOP_LEVEL_SCOPE); 02296 const XMLCh* original_uriStr = fGrammar->getTargetNamespace(); 02297 unsigned int orgGrammarUri = fURIStringPool->getId(original_uriStr); 02298 02299 if (orgGrammarUri != uriId) { 02300 if (switchGrammar(getURIText(uriId))) { 02301 checkTopLevel = true; 02302 } 02303 else { 02304 // the laxElementValidation routine (called above) will 02305 // set fValidate to false for a "skipped" element 02306 if (!laxThisOne && fValidate) { 02307 fValidator->emitError( 02308 XMLValid::GrammarNotFound, getURIText(uriId) 02309 ); 02310 } 02311 checkTopLevel = false; 02312 } 02313 } 02314 02315 if (checkTopLevel) { 02316 elemDecl = fGrammar->getElemDecl( 02317 uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE 02318 ); 02319 } 02320 02321 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) { 02322 02323 if (orgGrammarUri == uriId) { 02324 // still not found in specified uri 02325 // try emptyNamespace see if element should be 02326 // un-qualified. 02327 // Use a temp variable until we decide this is the case 02328 if (uriId != fEmptyNamespaceId) { 02329 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl( 02330 fEmptyNamespaceId, nameRawBuf, qnameRawBuf, currentScope 02331 ); 02332 02333 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) { 02334 fValidator->emitError( 02335 XMLValid::ElementNotUnQualified, qnameRawBuf 02336 ); 02337 elemDecl = tempElemDecl; 02338 } 02339 } 02340 } 02341 // still Not found in specified uri 02342 // go to original Grammar again to see if element needs 02343 // to be fully qualified. 02344 // Use a temp variable until we decide this is the case 02345 else if (uriId == fEmptyNamespaceId) { 02346 02347 if (switchGrammar(original_uriStr)) { 02348 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl( 02349 orgGrammarUri, nameRawBuf, qnameRawBuf, currentScope 02350 ); 02351 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) { 02352 fValidator->emitError( 02353 XMLValid::ElementNotQualified, qnameRawBuf 02354 ); 02355 elemDecl = tempElemDecl; 02356 } 02357 } 02358 else if (!laxThisOne && fValidate) { 02359 fValidator->emitError( 02360 XMLValid::GrammarNotFound,original_uriStr 02361 ); 02362 } 02363 } 02364 } 02365 02366 if (!elemDecl) { 02367 // still not found 02368 // switch back to original grammar first if necessary 02369 if (orgGrammarUri != uriId) { 02370 switchGrammar(original_uriStr); 02371 } 02372 02373 // look in the list of undeclared elements, as would have been 02374 // done before we made grammars stateless: 02375 elemDecl = fSchemaElemNonDeclPool->getByKey( 02376 nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE 02377 ); 02378 } 02379 } 02380 } 02381 } 02382 02383 if (!elemDecl) { 02384 02385 if (fGrammarType == Grammar::DTDGrammarType) { 02386 elemDecl = new (fMemoryManager) DTDElementDecl( 02387 qnameRawBuf, uriId, DTDElementDecl::Any, fMemoryManager 02388 ); 02389 elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl)); 02390 } 02391 else if (fGrammarType == Grammar::SchemaGrammarType) { 02392 elemDecl = new (fMemoryManager) SchemaElementDecl( 02393 fPrefixBuf.getRawBuffer(), nameRawBuf, uriId 02394 , SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE 02395 , fMemoryManager 02396 ); 02397 elemDecl->setId( 02398 fSchemaElemNonDeclPool->put((void*)elemDecl->getBaseName() 02399 , uriId, (int)Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl) 02400 ); 02401 } 02402 wasAdded = true; 02403 } 02404 02405 // this info needed for DOMTypeInfo 02406 fPSVIElemContext.fErrorOccurred = false; 02407 02408 // We do something different here according to whether we found the 02409 // element or not. 02410 bool bXsiTypeSet= (fValidator && fGrammarType == Grammar::SchemaGrammarType)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false; 02411 if (wasAdded) 02412 { 02413 if (laxThisOne && !bXsiTypeSet) { 02414 fValidate = false; 02415 fElemStack.setValidationFlag(fValidate); 02416 } 02417 else if (fValidate) 02418 { 02419 // If validating then emit an error 02420 02421 // This is to tell the reuse Validator that this element was 02422 // faulted-in, was not an element in the grammar pool originally 02423 elemDecl->setCreateReason(XMLElementDecl::JustFaultIn); 02424 02425 // xsi:type was specified, don't complain about missing definition 02426 if(!bXsiTypeSet) 02427 { 02428 fValidator->emitError 02429 ( 02430 XMLValid::ElementNotDefined 02431 , elemDecl->getFullName() 02432 ); 02433 02434 if(fGrammarType == Grammar::SchemaGrammarType) 02435 { 02436 fPSVIElemContext.fErrorOccurred = true; 02437 } 02438 } 02439 } 02440 } 02441 else 02442 { 02443 // If its not marked declared and validating, then emit an error 02444 if (!elemDecl->isDeclared()) { 02445 if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) { 02446 if(!bXsiTypeSet && fGrammarType == Grammar::SchemaGrammarType) { 02447 fPSVIElemContext.fErrorOccurred = true; 02448 } 02449 } 02450 02451 if (laxThisOne) { 02452 fValidate = false; 02453 fElemStack.setValidationFlag(fValidate); 02454 } 02455 else if (fValidate && !bXsiTypeSet) 02456 { 02457 fValidator->emitError 02458 ( 02459 XMLValid::ElementNotDefined 02460 , elemDecl->getFullName() 02461 ); 02462 } 02463 } 02464 } 02465 02466 // Now we can update the element stack to set the current element 02467 // decl. We expanded the stack above, but couldn't store the element 02468 // decl because we didn't know it yet. 02469 fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum()); 02470 fElemStack.setCurrentURI(uriId); 02471 02472 if (isRoot) 02473 { 02474 fRootGrammar = fGrammar; 02475 if (fGrammarType == Grammar::SchemaGrammarType && !fRootElemName) 02476 fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager); 02477 } 02478 02479 if (fGrammarType == Grammar::SchemaGrammarType && fPSVIHandler) 02480 { 02481 02482 fPSVIElemContext.fElemDepth++; 02483 if (elemDecl->isDeclared()) 02484 { 02485 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth; 02486 } 02487 else 02488 { 02489 fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth; 02490 02491 /****** 02492 * While we report an error for historical reasons, this should 02493 * actually result in lax assessment - NG. 02494 if (isRoot && fValidate) 02495 fPSVIElemContext.fErrorOccurred = true; 02496 *****/ 02497 } 02498 } 02499 02500 // Validate the element 02501 if (fValidate) 02502 { 02503 fValidator->validateElement(elemDecl); 02504 if (fValidator->handlesSchema()) 02505 { 02506 if (((SchemaValidator*) fValidator)->getErrorOccurred()) 02507 fPSVIElemContext.fErrorOccurred = true; 02508 } 02509 } 02510 02511 if (fGrammarType == Grammar::SchemaGrammarType) { 02512 02513 // squirrel away the element's QName, so that we can do an efficient 02514 // end-tag match 02515 fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer()); 02516 02517 ComplexTypeInfo* typeinfo = (fValidate) 02518 ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo() 02519 : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo(); 02520 02521 if (typeinfo) { 02522 currentScope = typeinfo->getScopeDefined(); 02523 02524 // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type) 02525 XMLCh* typeName = typeinfo->getTypeName(); 02526 const int comma = XMLString::indexOf(typeName, chComma); 02527 if (comma > 0) { 02528 XMLBuffer prefixBuf(comma+1, fMemoryManager); 02529 prefixBuf.append(typeName, comma); 02530 const XMLCh* uriStr = prefixBuf.getRawBuffer(); 02531 02532 bool errorCondition = !switchGrammar(uriStr) && fValidate; 02533 if (errorCondition && !laxThisOne) 02534 { 02535 fValidator->emitError 02536 ( 02537 XMLValid::GrammarNotFound 02538 , prefixBuf.getRawBuffer() 02539 ); 02540 } 02541 } 02542 else if (comma == 0) { 02543 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate; 02544 if (errorCondition && !laxThisOne) 02545 { 02546 fValidator->emitError 02547 ( 02548 XMLValid::GrammarNotFound 02549 , XMLUni::fgZeroLenString 02550 ); 02551 } 02552 } 02553 } 02554 fElemStack.setCurrentScope(currentScope); 02555 02556 // Set element next state 02557 if (elemDepth >= fElemStateSize) { 02558 resizeElemState(); 02559 } 02560 02561 fElemState[elemDepth] = 0; 02562 fElemLoopState[elemDepth] = 0; 02563 } 02564 02565 fElemStack.setCurrentGrammar(fGrammar); 02566 02567 // If this is the first element and we are validating, check the root 02568 // element. 02569 if (isRoot) 02570 { 02571 if (fValidate) 02572 { 02573 // If a DocType exists, then check if it matches the root name there. 02574 if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName)) 02575 fValidator->emitError(XMLValid::RootElemNotLikeDocType); 02576 } 02577 } 02578 else if (parentValidation) 02579 { 02580 // If the element stack is not empty, then add this element as a 02581 // child of the previous top element. If its empty, this is the root 02582 // elem and is not the child of anything. 02583 fElemStack.addChild(elemDecl->getElementName(), true); 02584 } 02585 02586 // PSVI handling: even if it turns out there are 02587 // no attributes, we need to reset this list... 02588 if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType ) 02589 fPSVIAttrList->reset(); 02590 02591 // Now lets get the fAttrList filled in. This involves faulting in any 02592 // defaulted and fixed attributes and normalizing the values of any that 02593 // we got explicitly. 02594 // 02595 // We update the attCount value with the total number of attributes, but 02596 // it goes in with the number of values we got during the raw scan of 02597 // explictly provided attrs above. 02598 attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList); 02599 if(attCount) 02600 { 02601 // clean up after ourselves: 02602 // clear the map used to detect duplicate attributes 02603 fUndeclaredAttrRegistry->removeAll(); 02604 } 02605 02606 // activate identity constraints 02607 if (fGrammar && 02608 fGrammarType == Grammar::SchemaGrammarType && 02609 toCheckIdentityConstraint()) 02610 { 02611 fICHandler->activateIdentityConstraint 02612 ( 02613 (SchemaElementDecl*) elemDecl 02614 , (int) elemDepth 02615 , uriId 02616 , fPrefixBuf.getRawBuffer() 02617 , *fAttrList 02618 , attCount 02619 , fValidationContext 02620 ); 02621 } 02622 02623 // Since the element may have default values, call start tag now regardless if it is empty or not 02624 // If we have a document handler, then tell it about this start tag 02625 if (fDocHandler) 02626 { 02627 fDocHandler->startElement 02628 ( 02629 *elemDecl 02630 , uriId 02631 , fPrefixBuf.getRawBuffer() 02632 , *fAttrList 02633 , attCount 02634 , false 02635 , isRoot 02636 ); 02637 } 02638 02639 // if we have a PSVIHandler, now's the time to call 02640 // its handleAttributesPSVI method: 02641 if(fPSVIHandler && fGrammarType == Grammar::SchemaGrammarType) 02642 { 02643 QName *eName = elemDecl->getElementName(); 02644 fPSVIHandler->handleAttributesPSVI 02645 ( 02646 eName->getLocalPart() 02647 , fURIStringPool->getValueForId(eName->getURI()) 02648 , fPSVIAttrList 02649 ); 02650 } 02651 02652 // If empty, validate content right now if we are validating and then 02653 // pop the element stack top. Else, we have to update the current stack 02654 // top's namespace mapping elements. 02655 if (isEmpty) 02656 { 02657 // Pop the element stack back off since it'll never be used now 02658 fElemStack.popTop(); 02659 02660 // reset current type info 02661 DatatypeValidator* psviMemberType = 0; 02662 if (fGrammarType == Grammar::SchemaGrammarType) 02663 { 02664 if (fValidate && elemDecl->isDeclared()) 02665 { 02666 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); 02667 if(!fPSVIElemContext.fCurrentTypeInfo) 02668 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 02669 else 02670 fPSVIElemContext.fCurrentDV = 0; 02671 if(fPSVIHandler) 02672 { 02673 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue(); 02674 02675 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString)) 02676 fPSVIElemContext.fNormalizedValue = 0; 02677 } 02678 } 02679 else 02680 { 02681 fPSVIElemContext.fCurrentDV = 0; 02682 fPSVIElemContext.fCurrentTypeInfo = 0; 02683 fPSVIElemContext.fNormalizedValue = 0; 02684 } 02685 } 02686 02687 // If validating, then insure that its legal to have no content 02688 if (fValidate) 02689 { 02690 XMLSize_t failure; 02691 bool res = fValidator->checkContent(elemDecl, 0, 0, &failure); 02692 if (!res) 02693 { 02694 fValidator->emitError 02695 ( 02696 XMLValid::ElementNotValidForContent 02697 , elemDecl->getFullName() 02698 , elemDecl->getFormattedContentModel() 02699 ); 02700 } 02701 02702 if (fGrammarType == Grammar::SchemaGrammarType) { 02703 02704 if (((SchemaValidator*) fValidator)->getErrorOccurred()) 02705 { 02706 fPSVIElemContext.fErrorOccurred = true; 02707 } 02708 else 02709 { 02710 if (fPSVIHandler) 02711 { 02712 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified(); 02713 if(fPSVIElemContext.fIsSpecified) 02714 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue(); 02715 } 02716 // note that if we're empty, won't be a current DV 02717 if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union) 02718 psviMemberType = fValidationContext->getValidatingMemberType(); 02719 } 02720 02721 // call matchers and de-activate context 02722 if (toCheckIdentityConstraint()) 02723 { 02724 fICHandler->deactivateContext 02725 ( 02726 (SchemaElementDecl *) elemDecl 02727 , fContent.getRawBuffer() 02728 , fValidationContext 02729 , fPSVIElemContext.fCurrentDV 02730 ); 02731 } 02732 02733 } 02734 } 02735 else if (fGrammarType == Grammar::SchemaGrammarType) { 02736 ((SchemaValidator*)fValidator)->resetNillable(); 02737 } 02738 02739 if (fGrammarType == Grammar::SchemaGrammarType) 02740 { 02741 if (fPSVIHandler) 02742 { 02743 endElementPSVI((SchemaElementDecl*)elemDecl, psviMemberType); 02744 } 02745 } 02746 02747 // If we have a doc handler, tell it about the end tag 02748 if (fDocHandler) 02749 { 02750 fDocHandler->endElement 02751 ( 02752 *elemDecl 02753 , uriId 02754 , isRoot 02755 , fPrefixBuf.getRawBuffer() 02756 ); 02757 } 02758 02759 // If the elem stack is empty, then it was an empty root 02760 if (isRoot) 02761 gotData = false; 02762 else 02763 { 02764 // Restore the grammar 02765 fGrammar = fElemStack.getCurrentGrammar(); 02766 fGrammarType = fGrammar->getGrammarType(); 02767 if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) { 02768 if (fValidatorFromUser) 02769 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); 02770 else { 02771 fValidator = fSchemaValidator; 02772 } 02773 } 02774 else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) { 02775 if (fValidatorFromUser) 02776 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); 02777 else { 02778 fValidator = fDTDValidator; 02779 } 02780 } 02781 02782 fValidator->setGrammar(fGrammar); 02783 02784 // Restore the validation flag 02785 fValidate = fElemStack.getValidationFlag(); 02786 } 02787 } 02788 else if (fGrammarType == Grammar::SchemaGrammarType) 02789 { 02790 // send a partial element psvi 02791 if (fPSVIHandler) 02792 { 02793 02794 ComplexTypeInfo* curTypeInfo = 0; 02795 DatatypeValidator* curDV = 0; 02796 XSTypeDefinition* typeDef = 0; 02797 02798 if (fValidate && elemDecl->isDeclared()) 02799 { 02800 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); 02801 02802 if (curTypeInfo) 02803 { 02804 typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo); 02805 } 02806 else 02807 { 02808 curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 02809 02810 if (curDV) 02811 { 02812 typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV); 02813 } 02814 } 02815 } 02816 02817 fPSVIElement->reset 02818 ( 02819 PSVIElement::VALIDITY_NOTKNOWN 02820 , PSVIElement::VALIDATION_NONE 02821 , fRootElemName 02822 , ((SchemaValidator*) fValidator)->getIsElemSpecified() 02823 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0 02824 , typeDef 02825 , 0 //memberType 02826 , fModel 02827 , ((SchemaElementDecl*)elemDecl)->getDefaultValue() 02828 , 0 02829 , 0 02830 , 0 02831 ); 02832 02833 02834 fPSVIHandler->handlePartialElementPSVI 02835 ( 02836 elemDecl->getBaseName() 02837 , fURIStringPool->getValueForId(elemDecl->getURI()) 02838 , fPSVIElement 02839 ); 02840 02841 } 02842 02843 // not empty 02844 fErrorStack->push(fPSVIElemContext.fErrorOccurred); 02845 } 02846 02847 return true; 02848 } 02849 02850 02851 // --------------------------------------------------------------------------- 02852 // IGXMLScanner: Helper methos 02853 // --------------------------------------------------------------------------- 02854 void IGXMLScanner::resizeElemState() { 02855 02856 unsigned int newSize = fElemStateSize * 2; 02857 unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate 02858 ( 02859 newSize * sizeof(unsigned int) 02860 ); //new unsigned int[newSize]; 02861 unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate 02862 ( 02863 newSize * sizeof(unsigned int) 02864 ); //new unsigned int[newSize]; 02865 02866 // Copy the existing values 02867 unsigned int index = 0; 02868 for (; index < fElemStateSize; index++) 02869 { 02870 newElemState[index] = fElemState[index]; 02871 newElemLoopState[index] = fElemLoopState[index]; 02872 } 02873 02874 for (; index < newSize; index++) 02875 newElemLoopState[index] = newElemState[index] = 0; 02876 02877 // Delete the old array and udpate our members 02878 fMemoryManager->deallocate(fElemState); //delete [] fElemState; 02879 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemState; 02880 fElemState = newElemState; 02881 fElemLoopState = newElemLoopState; 02882 fElemStateSize = newSize; 02883 } 02884 02885 void IGXMLScanner::resizeRawAttrColonList() { 02886 02887 unsigned int newSize = fRawAttrColonListSize * 2; 02888 int* newRawAttrColonList = (int*) fMemoryManager->allocate 02889 ( 02890 newSize * sizeof(int) 02891 ); //new int[newSize]; 02892 02893 // Copy the existing values 02894 unsigned int index = 0; 02895 for (; index < fRawAttrColonListSize; index++) 02896 newRawAttrColonList[index] = fRawAttrColonList[index]; 02897 02898 // Delete the old array and udpate our members 02899 fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList; 02900 fRawAttrColonList = newRawAttrColonList; 02901 fRawAttrColonListSize = newSize; 02902 } 02903 02904 // --------------------------------------------------------------------------- 02905 // IGXMLScanner: Grammar preparsing 02906 // --------------------------------------------------------------------------- 02907 Grammar* IGXMLScanner::loadGrammar(const InputSource& src 02908 , const short grammarType 02909 , const bool toCache) 02910 { 02911 Grammar* loadedGrammar = 0; 02912 02913 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset); 02914 02915 try 02916 { 02917 fGrammarResolver->cacheGrammarFromParse(false); 02918 // if the new grammar has to be cached, better use the already cached 02919 // grammars, or the an exception will be thrown when caching an already 02920 // cached grammar 02921 fGrammarResolver->useCachedGrammarInParse(toCache); 02922 fRootGrammar = 0; 02923 02924 if (fValScheme == Val_Auto) { 02925 fValidate = true; 02926 } 02927 02928 // Reset some status flags 02929 fInException = false; 02930 fStandalone = false; 02931 fErrorCount = 0; 02932 fHasNoDTD = true; 02933 fSeeXsi = false; 02934 02935 if (grammarType == Grammar::SchemaGrammarType) { 02936 loadedGrammar = loadXMLSchemaGrammar(src, toCache); 02937 } 02938 else if (grammarType == Grammar::DTDGrammarType) { 02939 loadedGrammar = loadDTDGrammar(src, toCache); 02940 } 02941 } 02942 // NOTE: 02943 // 02944 // In all of the error processing below, the emitError() call MUST come 02945 // before the flush of the reader mgr, or it will fail because it tries 02946 // to find out the position in the XML source of the error. 02947 catch(const XMLErrs::Codes) 02948 { 02949 // This is a 'first fatal error' type exit, so fall through 02950 } 02951 catch(const XMLValid::Codes) 02952 { 02953 // This is a 'first fatal error' type exit, so fall through 02954 } 02955 catch(const XMLException& excToCatch) 02956 { 02957 // Emit the error and catch any user exception thrown from here. Make 02958 // sure in all cases we flush the reader manager. 02959 fInException = true; 02960 try 02961 { 02962 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) 02963 emitError 02964 ( 02965 XMLErrs::XMLException_Warning 02966 , excToCatch.getCode() 02967 , excToCatch.getMessage() 02968 ); 02969 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) 02970 emitError 02971 ( 02972 XMLErrs::XMLException_Fatal 02973 , excToCatch.getCode() 02974 , excToCatch.getMessage() 02975 ); 02976 else 02977 emitError 02978 ( 02979 XMLErrs::XMLException_Error 02980 , excToCatch.getCode() 02981 , excToCatch.getMessage() 02982 ); 02983 } 02984 catch(const OutOfMemoryException&) 02985 { 02986 // This is a special case for out-of-memory 02987 // conditions, because resetting the ReaderMgr 02988 // can be problematic. 02989 resetReaderMgr.release(); 02990 02991 throw; 02992 } 02993 } 02994 catch(const OutOfMemoryException&) 02995 { 02996 // This is a special case for out-of-memory 02997 // conditions, because resetting the ReaderMgr 02998 // can be problematic. 02999 resetReaderMgr.release(); 03000 03001 throw; 03002 } 03003 03004 return loadedGrammar; 03005 } 03006 03007 void IGXMLScanner::resetCachedGrammar () 03008 { 03009 fCachedSchemaInfoList->removeAll (); 03010 } 03011 03012 Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src, 03013 const bool toCache) 03014 { 03015 // Reset the validators 03016 fDTDValidator->reset(); 03017 if (fValidatorFromUser) 03018 fValidator->reset(); 03019 03020 if (!fValidator->handlesDTD()) { 03021 if (fValidatorFromUser && fValidate) 03022 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); 03023 else { 03024 fValidator = fDTDValidator; 03025 } 03026 } 03027 03028 fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString); 03029 03030 if (fDTDGrammar) { 03031 fDTDGrammar->reset(); 03032 } 03033 else { 03034 fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager); 03035 fGrammarResolver->putGrammar(fDTDGrammar); 03036 } 03037 03038 fGrammar = fDTDGrammar; 03039 fGrammarType = fGrammar->getGrammarType(); 03040 fValidator->setGrammar(fGrammar); 03041 03042 // And for all installed handlers, send reset events. This gives them 03043 // a chance to flush any cached data. 03044 if (fDocHandler) 03045 fDocHandler->resetDocument(); 03046 if (fEntityHandler) 03047 fEntityHandler->resetEntities(); 03048 if (fErrorReporter) 03049 fErrorReporter->resetErrors(); 03050 03051 // Clear out the id reference list 03052 resetValidationContext(); 03053 // and clear out the darned undeclared DTD element pool... 03054 fDTDElemNonDeclPool->removeAll(); 03055 03056 if (toCache) { 03057 03058 unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId()); 03059 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId); 03060 03061 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString); 03062 ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr); 03063 fGrammarResolver->putGrammar(fGrammar); 03064 } 03065 03066 // Handle the creation of the XML reader object for this input source. 03067 // This will provide us with transcoding and basic lexing services. 03068 XMLReader* newReader = fReaderMgr.createReader 03069 ( 03070 src 03071 , false 03072 , XMLReader::RefFrom_NonLiteral 03073 , XMLReader::Type_General 03074 , XMLReader::Source_External 03075 , fCalculateSrcOfs 03076 , fLowWaterMark 03077 ); 03078 if (!newReader) { 03079 if (src.getIssueFatalErrorIfNotFound()) 03080 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager); 03081 else 03082 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager); 03083 } 03084 03085 // In order to make the processing work consistently, we have to 03086 // make this look like an external entity. So create an entity 03087 // decl and fill it in and push it with the reader, as happens 03088 // with an external entity. Put a janitor on it to insure it gets 03089 // cleaned up. The reader manager does not adopt them. 03090 const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull }; 03091 DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager); 03092 declDTD->setSystemId(src.getSystemId()); 03093 declDTD->setIsExternal(true); 03094 Janitor<DTDEntityDecl> janDecl(declDTD); 03095 03096 // Mark this one as a throw at end 03097 newReader->setThrowAtEnd(true); 03098 03099 // And push it onto the stack, with its pseudo name 03100 fReaderMgr.pushReader(newReader, declDTD); 03101 03102 // If we have a doc type handler and advanced callbacks are enabled, 03103 // call the doctype event. 03104 if (fDocTypeHandler) { 03105 03106 // Create a dummy root 03107 DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl 03108 ( 03109 gDTDStr 03110 , fEmptyNamespaceId 03111 , DTDElementDecl::Any 03112 , fGrammarPoolMemoryManager 03113 ); 03114 rootDecl->setCreateReason(DTDElementDecl::AsRootElem); 03115 rootDecl->setExternalElemDeclaration(true); 03116 Janitor<DTDElementDecl> janSrc(rootDecl); 03117 03118 fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true); 03119 } 03120 03121 // Create DTDScanner 03122 DTDScanner dtdScanner 03123 ( 03124 (DTDGrammar*) fGrammar 03125 , fDocTypeHandler 03126 , fGrammarPoolMemoryManager 03127 , fMemoryManager 03128 ); 03129 dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr); 03130 03131 // Tell it its not in an include section 03132 dtdScanner.scanExtSubsetDecl(false, true); 03133 03134 if (fValidate) { 03135 // validate the DTD scan so far 03136 fValidator->preContentValidation(false, true); 03137 } 03138 03139 if (toCache) 03140 fGrammarResolver->cacheGrammars(); 03141 03142 return fDTDGrammar; 03143 } 03144 03145 // --------------------------------------------------------------------------- 03146 // IGXMLScanner: Helper methods 03147 // --------------------------------------------------------------------------- 03148 void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc) 03149 { 03150 XMLCh* locStr = schemaLoc; 03151 XMLReader* curReader = fReaderMgr.getCurrentReader(); 03152 03153 fLocationPairs->removeAllElements(); 03154 while (*locStr) 03155 { 03156 do { 03157 // Do we have an escaped character ? 03158 if (*locStr == 0xFFFF) 03159 continue; 03160 03161 if (!curReader->isWhitespace(*locStr)) 03162 break; 03163 03164 *locStr = chNull; 03165 } while (*++locStr); 03166 03167 if (*locStr) { 03168 03169 fLocationPairs->addElement(locStr); 03170 03171 while (*++locStr) { 03172 // Do we have an escaped character ? 03173 if (*locStr == 0xFFFF) 03174 continue; 03175 if (curReader->isWhitespace(*locStr)) 03176 break; 03177 } 03178 } 03179 } 03180 } 03181 03182 void IGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl, 03183 DatatypeValidator* const memberDV) 03184 { 03185 PSVIElement::ASSESSMENT_TYPE validationAttempted; 03186 PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN; 03187 03188 if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth) 03189 validationAttempted = PSVIElement::VALIDATION_FULL; 03190 else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth) 03191 validationAttempted = PSVIElement::VALIDATION_NONE; 03192 else 03193 { 03194 validationAttempted = PSVIElement::VALIDATION_PARTIAL; 03195 fPSVIElemContext.fFullValidationDepth = 03196 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1; 03197 } 03198 03199 if (fValidate && elemDecl->isDeclared()) 03200 { 03201 validity = (fPSVIElemContext.fErrorOccurred) 03202 ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID; 03203 } 03204 03205 XSTypeDefinition* typeDef = 0; 03206 bool isMixed = false; 03207 if (fPSVIElemContext.fCurrentTypeInfo) 03208 { 03209 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo); 03210 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType(); 03211 isMixed = (modelType == SchemaElementDecl::Mixed_Simple 03212 || modelType == SchemaElementDecl::Mixed_Complex); 03213 } 03214 else if (fPSVIElemContext.fCurrentDV) 03215 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV); 03216 03217 XMLCh* canonicalValue = 0; 03218 if (fPSVIElemContext.fNormalizedValue && !isMixed && 03219 validity == PSVIElement::VALIDITY_VALID) 03220 { 03221 if (memberDV) 03222 canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager); 03223 else if (fPSVIElemContext.fCurrentDV) 03224 canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager); 03225 } 03226 03227 fPSVIElement->reset 03228 ( 03229 validity 03230 , validationAttempted 03231 , fRootElemName 03232 , fPSVIElemContext.fIsSpecified 03233 , (elemDecl->isDeclared()) 03234 ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0 03235 , typeDef 03236 , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0 03237 , fModel 03238 , elemDecl->getDefaultValue() 03239 , fPSVIElemContext.fNormalizedValue 03240 , canonicalValue 03241 ); 03242 03243 fPSVIHandler->handleElementPSVI 03244 ( 03245 elemDecl->getBaseName() 03246 , fURIStringPool->getValueForId(elemDecl->getURI()) 03247 , fPSVIElement 03248 ); 03249 03250 // decrease element depth 03251 fPSVIElemContext.fElemDepth--; 03252 03253 } 03254 03255 void IGXMLScanner::resetPSVIElemContext() 03256 { 03257 fPSVIElemContext.fIsSpecified = false; 03258 fPSVIElemContext.fErrorOccurred = false; 03259 fPSVIElemContext.fElemDepth = -1; 03260 fPSVIElemContext.fFullValidationDepth = -1; 03261 fPSVIElemContext.fNoneValidationDepth = -1; 03262 fPSVIElemContext.fCurrentDV = 0; 03263 fPSVIElemContext.fCurrentTypeInfo = 0; 03264 fPSVIElemContext.fNormalizedValue = 0; 03265 } 03266 03267 XERCES_CPP_NAMESPACE_END