GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: SGXMLScanner.cpp 925236 2010-03-19 14:29:47Z borisk $ 00020 */ 00021 00022 00023 // --------------------------------------------------------------------------- 00024 // Includes 00025 // --------------------------------------------------------------------------- 00026 #include <xercesc/internal/SGXMLScanner.hpp> 00027 #include <xercesc/util/RuntimeException.hpp> 00028 #include <xercesc/util/UnexpectedEOFException.hpp> 00029 #include <xercesc/util/XMLUri.hpp> 00030 #include <xercesc/framework/LocalFileInputSource.hpp> 00031 #include <xercesc/framework/URLInputSource.hpp> 00032 #include <xercesc/framework/XMLDocumentHandler.hpp> 00033 #include <xercesc/framework/XMLEntityHandler.hpp> 00034 #include <xercesc/framework/XMLPScanToken.hpp> 00035 #include <xercesc/framework/MemoryManager.hpp> 00036 #include <xercesc/framework/XMLGrammarPool.hpp> 00037 #include <xercesc/framework/psvi/PSVIElement.hpp> 00038 #include <xercesc/framework/psvi/PSVIHandler.hpp> 00039 #include <xercesc/framework/psvi/PSVIAttributeList.hpp> 00040 #include <xercesc/framework/psvi/XSAnnotation.hpp> 00041 #include <xercesc/internal/EndOfEntityException.hpp> 00042 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> 00043 #include <xercesc/validators/schema/SchemaValidator.hpp> 00044 #include <xercesc/validators/schema/TraverseSchema.hpp> 00045 #include <xercesc/validators/schema/XSDDOMParser.hpp> 00046 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp> 00047 #include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp> 00048 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp> 00049 #include <xercesc/validators/schema/identity/IC_Selector.hpp> 00050 #include <xercesc/validators/schema/identity/ValueStore.hpp> 00051 #include <xercesc/util/OutOfMemoryException.hpp> 00052 #include <xercesc/util/XMLStringTokenizer.hpp> 00053 00054 XERCES_CPP_NAMESPACE_BEGIN 00055 00056 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl); 00057 00058 00059 typedef JanitorMemFunCall<SGXMLScanner> CleanupType; 00060 typedef JanitorMemFunCall<ReaderMgr> ReaderMgrResetType; 00061 00062 00063 // --------------------------------------------------------------------------- 00064 // SGXMLScanner: Constructors and Destructor 00065 // --------------------------------------------------------------------------- 00066 SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt 00067 , GrammarResolver* const grammarResolver 00068 , MemoryManager* const manager) : 00069 00070 XMLScanner(valToAdopt, grammarResolver, manager) 00071 , fSeeXsi(false) 00072 , fGrammarType(Grammar::UnKnown) 00073 , fElemStateSize(16) 00074 , fElemState(0) 00075 , fElemLoopState(0) 00076 , fContent(1023, manager) 00077 , fEntityTable(0) 00078 , fRawAttrList(0) 00079 , fRawAttrColonListSize(32) 00080 , fRawAttrColonList(0) 00081 , fSchemaGrammar(0) 00082 , fSchemaValidator(0) 00083 , fICHandler(0) 00084 , fElemNonDeclPool(0) 00085 , fElemCount(0) 00086 , fAttDefRegistry(0) 00087 , fUndeclaredAttrRegistry(0) 00088 , fPSVIAttrList(0) 00089 , fModel(0) 00090 , fPSVIElement(0) 00091 , fErrorStack(0) 00092 , fSchemaInfoList(0) 00093 , fCachedSchemaInfoList(0) 00094 { 00095 CleanupType cleanup(this, &SGXMLScanner::cleanUp); 00096 00097 try 00098 { 00099 commonInit(); 00100 } 00101 catch(const OutOfMemoryException&) 00102 { 00103 // Don't cleanup when out of memory, since executing the 00104 // code can cause problems. 00105 cleanup.release(); 00106 00107 throw; 00108 } 00109 00110 cleanup.release(); 00111 } 00112 00113 SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler 00114 , DocTypeHandler* const docTypeHandler 00115 , XMLEntityHandler* const entityHandler 00116 , XMLErrorReporter* const errHandler 00117 , XMLValidator* const valToAdopt 00118 , GrammarResolver* const grammarResolver 00119 , MemoryManager* const manager) : 00120 00121 XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager) 00122 , fSeeXsi(false) 00123 , fGrammarType(Grammar::UnKnown) 00124 , fElemStateSize(16) 00125 , fElemState(0) 00126 , fElemLoopState(0) 00127 , fContent(1023, manager) 00128 , fEntityTable(0) 00129 , fRawAttrList(0) 00130 , fRawAttrColonListSize(32) 00131 , fRawAttrColonList(0) 00132 , fSchemaGrammar(0) 00133 , fSchemaValidator(0) 00134 , fICHandler(0) 00135 , fElemNonDeclPool(0) 00136 , fElemCount(0) 00137 , fAttDefRegistry(0) 00138 , fUndeclaredAttrRegistry(0) 00139 , fPSVIAttrList(0) 00140 , fModel(0) 00141 , fPSVIElement(0) 00142 , fErrorStack(0) 00143 , fSchemaInfoList(0) 00144 , fCachedSchemaInfoList(0) 00145 { 00146 CleanupType cleanup(this, &SGXMLScanner::cleanUp); 00147 00148 try 00149 { 00150 commonInit(); 00151 } 00152 catch(const OutOfMemoryException&) 00153 { 00154 // Don't cleanup when out of memory, since executing the 00155 // code can cause problems. 00156 cleanup.release(); 00157 00158 throw; 00159 } 00160 00161 cleanup.release(); 00162 } 00163 00164 SGXMLScanner::~SGXMLScanner() 00165 { 00166 cleanUp(); 00167 } 00168 00169 // --------------------------------------------------------------------------- 00170 // XMLScanner: Getter methods 00171 // --------------------------------------------------------------------------- 00172 NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() 00173 { 00174 return 0; 00175 } 00176 00177 const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const 00178 { 00179 return 0; 00180 } 00181 00182 // --------------------------------------------------------------------------- 00183 // SGXMLScanner: Main entry point to scan a document 00184 // --------------------------------------------------------------------------- 00185 void SGXMLScanner::scanDocument(const InputSource& src) 00186 { 00187 // Bump up the sequence id for this parser instance. This will invalidate 00188 // any previous progressive scan tokens. 00189 fSequenceId++; 00190 00191 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset); 00192 00193 try 00194 { 00195 // Reset the scanner and its plugged in stuff for a new run. This 00196 // resets all the data structures, creates the initial reader and 00197 // pushes it on the stack, and sets up the base document path. 00198 scanReset(src); 00199 00200 // If we have a document handler, then call the start document 00201 if (fDocHandler) 00202 fDocHandler->startDocument(); 00203 00204 // Scan the prolog part, which is everything before the root element 00205 // including the DTD subsets. 00206 scanProlog(); 00207 00208 // If we got to the end of input, then its not a valid XML file. 00209 // Else, go on to scan the content. 00210 if (fReaderMgr.atEOF()) 00211 { 00212 emitError(XMLErrs::EmptyMainEntity); 00213 } 00214 else 00215 { 00216 // Scan content, and tell it its not an external entity 00217 if (scanContent()) 00218 { 00219 // Do post-parse validation if required 00220 if (fValidate) 00221 { 00222 // We handle ID reference semantics at this level since 00223 // its required by XML 1.0. 00224 checkIDRefs(); 00225 00226 // Then allow the validator to do any extra stuff it wants 00227 // fValidator->postParseValidation(); 00228 } 00229 00230 // That went ok, so scan for any miscellaneous stuff 00231 if (!fReaderMgr.atEOF()) 00232 scanMiscellaneous(); 00233 } 00234 } 00235 00236 // If we have a document handler, then call the end document 00237 if (fDocHandler) 00238 fDocHandler->endDocument(); 00239 } 00240 // NOTE: 00241 // 00242 // In all of the error processing below, the emitError() call MUST come 00243 // before the flush of the reader mgr, or it will fail because it tries 00244 // to find out the position in the XML source of the error. 00245 catch(const XMLErrs::Codes) 00246 { 00247 // This is a 'first failure' exception, so fall through 00248 } 00249 catch(const XMLValid::Codes) 00250 { 00251 // This is a 'first fatal error' type exit, so fall through 00252 } 00253 catch(const XMLException& excToCatch) 00254 { 00255 // Emit the error and catch any user exception thrown from here. Make 00256 // sure in all cases we flush the reader manager. 00257 fInException = true; 00258 try 00259 { 00260 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) 00261 emitError 00262 ( 00263 XMLErrs::XMLException_Warning 00264 , excToCatch.getCode() 00265 , excToCatch.getMessage() 00266 ); 00267 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) 00268 emitError 00269 ( 00270 XMLErrs::XMLException_Fatal 00271 , excToCatch.getCode() 00272 , excToCatch.getMessage() 00273 ); 00274 else 00275 emitError 00276 ( 00277 XMLErrs::XMLException_Error 00278 , excToCatch.getCode() 00279 , excToCatch.getMessage() 00280 ); 00281 } 00282 catch(const OutOfMemoryException&) 00283 { 00284 // This is a special case for out-of-memory 00285 // conditions, because resetting the ReaderMgr 00286 // can be problematic. 00287 resetReaderMgr.release(); 00288 00289 throw; 00290 } 00291 } 00292 catch(const OutOfMemoryException&) 00293 { 00294 // This is a special case for out-of-memory 00295 // conditions, because resetting the ReaderMgr 00296 // can be problematic. 00297 resetReaderMgr.release(); 00298 00299 throw; 00300 } 00301 } 00302 00303 00304 bool SGXMLScanner::scanNext(XMLPScanToken& token) 00305 { 00306 // Make sure this token is still legal 00307 if (!isLegalToken(token)) 00308 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager); 00309 00310 // Find the next token and remember the reader id 00311 XMLSize_t orgReader; 00312 XMLTokens curToken; 00313 00314 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset); 00315 00316 bool retVal = true; 00317 00318 try 00319 { 00320 while (true) 00321 { 00322 // We have to handle any end of entity exceptions that happen here. 00323 // We could be at the end of X nested entities, each of which will 00324 // generate an end of entity exception as we try to move forward. 00325 try 00326 { 00327 curToken = senseNextToken(orgReader); 00328 break; 00329 } 00330 catch(const EndOfEntityException& toCatch) 00331 { 00332 // Send an end of entity reference event 00333 if (fDocHandler) 00334 fDocHandler->endEntityReference(toCatch.getEntity()); 00335 } 00336 } 00337 00338 if (curToken == Token_CharData) 00339 { 00340 scanCharData(fCDataBuf); 00341 } 00342 else if (curToken == Token_EOF) 00343 { 00344 if (!fElemStack.isEmpty()) 00345 { 00346 const ElemStack::StackElem* topElem = fElemStack.popTop(); 00347 emitError 00348 ( 00349 XMLErrs::EndedWithTagsOnStack 00350 , topElem->fThisElement->getFullName() 00351 ); 00352 } 00353 00354 retVal = false; 00355 } 00356 else 00357 { 00358 // Its some sort of markup 00359 bool gotData = true; 00360 switch(curToken) 00361 { 00362 case Token_CData : 00363 // Make sure we are within content 00364 if (fElemStack.isEmpty()) 00365 emitError(XMLErrs::CDATAOutsideOfContent); 00366 scanCDSection(); 00367 break; 00368 00369 case Token_Comment : 00370 scanComment(); 00371 break; 00372 00373 case Token_EndTag : 00374 scanEndTag(gotData); 00375 break; 00376 00377 case Token_PI : 00378 scanPI(); 00379 break; 00380 00381 case Token_StartTag : 00382 scanStartTag(gotData); 00383 break; 00384 00385 default : 00386 fReaderMgr.skipToChar(chOpenAngle); 00387 break; 00388 } 00389 00390 if (orgReader != fReaderMgr.getCurrentReaderNum()) 00391 emitError(XMLErrs::PartialMarkupInEntity); 00392 00393 // If we hit the end, then do the miscellaneous part 00394 if (!gotData) 00395 { 00396 // Do post-parse validation if required 00397 if (fValidate) 00398 { 00399 // We handle ID reference semantics at this level since 00400 // its required by XML 1.0. 00401 checkIDRefs(); 00402 00403 // Then allow the validator to do any extra stuff it wants 00404 // fValidator->postParseValidation(); 00405 } 00406 00407 // That went ok, so scan for any miscellaneous stuff 00408 scanMiscellaneous(); 00409 00410 if (toCheckIdentityConstraint()) 00411 fICHandler->endDocument(); 00412 00413 if (fDocHandler) 00414 fDocHandler->endDocument(); 00415 } 00416 } 00417 } 00418 // NOTE: 00419 // 00420 // In all of the error processing below, the emitError() call MUST come 00421 // before the flush of the reader mgr, or it will fail because it tries 00422 // to find out the position in the XML source of the error. 00423 catch(const XMLErrs::Codes) 00424 { 00425 // This is a 'first failure' exception, so return failure 00426 retVal = false; 00427 } 00428 catch(const XMLValid::Codes) 00429 { 00430 // This is a 'first fatal error' type exit, so return failure 00431 retVal = false; 00432 } 00433 catch(const XMLException& excToCatch) 00434 { 00435 // Emit the error and catch any user exception thrown from here. Make 00436 // sure in all cases we flush the reader manager. 00437 fInException = true; 00438 try 00439 { 00440 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) 00441 emitError 00442 ( 00443 XMLErrs::XMLException_Warning 00444 , excToCatch.getCode() 00445 , excToCatch.getMessage() 00446 ); 00447 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) 00448 emitError 00449 ( 00450 XMLErrs::XMLException_Fatal 00451 , excToCatch.getCode() 00452 , excToCatch.getMessage() 00453 ); 00454 else 00455 emitError 00456 ( 00457 XMLErrs::XMLException_Error 00458 , excToCatch.getCode() 00459 , excToCatch.getMessage() 00460 ); 00461 } 00462 catch(const OutOfMemoryException&) 00463 { 00464 // This is a special case for out-of-memory 00465 // conditions, because resetting the ReaderMgr 00466 // can be problematic. 00467 resetReaderMgr.release(); 00468 00469 throw; 00470 } 00471 00472 retVal = false; 00473 } 00474 catch(const OutOfMemoryException&) 00475 { 00476 // This is a special case for out-of-memory 00477 // conditions, because resetting the ReaderMgr 00478 // can be problematic. 00479 resetReaderMgr.release(); 00480 00481 throw; 00482 } 00483 00484 // If we are not at the end, release the object that will 00485 // reset the ReaderMgr. 00486 if (retVal) 00487 resetReaderMgr.release(); 00488 00489 return retVal; 00490 } 00491 00492 // --------------------------------------------------------------------------- 00493 // SGXMLScanner: Private scanning methods 00494 // --------------------------------------------------------------------------- 00495 00496 // This method is called from scanStartTag() to handle the very raw initial 00497 // scan of the attributes. It just fills in the passed collection with 00498 // key/value pairs for each attribute. No processing is done on them at all. 00499 XMLSize_t 00500 SGXMLScanner::rawAttrScan(const XMLCh* const elemName 00501 , RefVectorOf<KVStringPair>& toFill 00502 , bool& isEmpty) 00503 { 00504 // Keep up with how many attributes we've seen so far, and how many 00505 // elements are available in the vector. This way we can reuse old 00506 // elements until we run out and then expand it. 00507 XMLSize_t attCount = 0; 00508 XMLSize_t curVecSize = toFill.size(); 00509 00510 // Assume it is not empty 00511 isEmpty = false; 00512 00513 // We loop until we either see a /> or >, handling key/value pairs util 00514 // we get there. We place them in the passed vector, which we will expand 00515 // as required to hold them. 00516 while (true) 00517 { 00518 // Get the next character, which should be non-space 00519 XMLCh nextCh = fReaderMgr.peekNextChar(); 00520 00521 // If the next character is not a slash or closed angle bracket, 00522 // then it must be whitespace, since whitespace is required 00523 // between the end of the last attribute and the name of the next 00524 // one. 00525 // 00526 if (attCount) 00527 { 00528 if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle)) 00529 { 00530 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 00531 { 00532 // Ok, skip by them and get another char 00533 fReaderMgr.getNextChar(); 00534 fReaderMgr.skipPastSpaces(); 00535 nextCh = fReaderMgr.peekNextChar(); 00536 } 00537 else 00538 { 00539 // Emit the error but keep on going 00540 emitError(XMLErrs::ExpectedWhitespace); 00541 } 00542 } 00543 } 00544 00545 // Ok, here we first check for any of the special case characters. 00546 // If its not one, then we do the normal case processing, which 00547 // assumes that we've hit an attribute value, Otherwise, we do all 00548 // the special case checks. 00549 if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh)) 00550 { 00551 // Assume its going to be an attribute, so get a name from 00552 // the input. 00553 int colonPosition; 00554 if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition)) 00555 { 00556 if (fAttNameBuf.isEmpty()) 00557 emitError(XMLErrs::ExpectedAttrName); 00558 else 00559 emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer()); 00560 fReaderMgr.skipPastChar(chCloseAngle); 00561 return attCount; 00562 } 00563 00564 const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer(); 00565 00566 // And next must be an equal sign 00567 if (!scanEq()) 00568 { 00569 static const XMLCh tmpList[] = 00570 { 00571 chSingleQuote, chDoubleQuote, chCloseAngle 00572 , chOpenAngle, chForwardSlash, chNull 00573 }; 00574 00575 emitError(XMLErrs::ExpectedEqSign); 00576 00577 // Try to sync back up by skipping forward until we either 00578 // hit something meaningful. 00579 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); 00580 00581 if ((chFound == chCloseAngle) || (chFound == chForwardSlash)) 00582 { 00583 // Jump back to top for normal processing of these 00584 continue; 00585 } 00586 else if ((chFound == chSingleQuote) 00587 || (chFound == chDoubleQuote) 00588 || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) 00589 { 00590 // Just fall through assuming that the value is to follow 00591 } 00592 else if (chFound == chOpenAngle) 00593 { 00594 // Assume a malformed tag and that new one is starting 00595 emitError(XMLErrs::UnterminatedStartTag, elemName); 00596 return attCount; 00597 } 00598 else 00599 { 00600 // Something went really wrong 00601 return attCount; 00602 } 00603 } 00604 00605 // Next should be the quoted attribute value. We just do a simple 00606 // and stupid scan of this value. The only thing we do here 00607 // is to expand entity references. 00608 if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf)) 00609 { 00610 static const XMLCh tmpList[] = 00611 { 00612 chCloseAngle, chOpenAngle, chForwardSlash, chNull 00613 }; 00614 00615 emitError(XMLErrs::ExpectedAttrValue); 00616 00617 // It failed, so lets try to get synced back up. We skip 00618 // forward until we find some whitespace or one of the 00619 // chars in our list. 00620 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList); 00621 00622 if ((chFound == chCloseAngle) 00623 || (chFound == chForwardSlash) 00624 || fReaderMgr.getCurrentReader()->isWhitespace(chFound)) 00625 { 00626 // Just fall through and process this attribute, though 00627 // the value will be "". 00628 } 00629 else if (chFound == chOpenAngle) 00630 { 00631 // Assume a malformed tag and that new one is starting 00632 emitError(XMLErrs::UnterminatedStartTag, elemName); 00633 return attCount; 00634 } 00635 else 00636 { 00637 // Something went really wrong 00638 return attCount; 00639 } 00640 } 00641 00642 // And now lets add it to the passed collection. If we have not 00643 // filled it up yet, then we use the next element. Else we add 00644 // a new one. 00645 KVStringPair* curPair = 0; 00646 if (attCount >= curVecSize) 00647 { 00648 curPair = new (fMemoryManager) KVStringPair 00649 ( 00650 curAttNameBuf 00651 , fAttNameBuf.getLen() 00652 , fAttValueBuf.getRawBuffer() 00653 , fAttValueBuf.getLen() 00654 , fMemoryManager 00655 ); 00656 toFill.addElement(curPair); 00657 } 00658 else 00659 { 00660 curPair = toFill.elementAt(attCount); 00661 curPair->set 00662 ( 00663 curAttNameBuf 00664 , fAttNameBuf.getLen() 00665 , fAttValueBuf.getRawBuffer() 00666 , fAttValueBuf.getLen() 00667 ); 00668 } 00669 if (attCount >= fRawAttrColonListSize) { 00670 resizeRawAttrColonList(); 00671 } 00672 fRawAttrColonList[attCount] = colonPosition; 00673 00674 // And bump the count of attributes we've gotten 00675 attCount++; 00676 00677 // And go to the top again for another attribute 00678 continue; 00679 } 00680 00681 // It was some special case character so do all of the checks and 00682 // deal with it. 00683 if (!nextCh) 00684 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 00685 00686 if (nextCh == chForwardSlash) 00687 { 00688 fReaderMgr.getNextChar(); 00689 isEmpty = true; 00690 if (!fReaderMgr.skippedChar(chCloseAngle)) 00691 emitError(XMLErrs::UnterminatedStartTag, elemName); 00692 break; 00693 } 00694 else if (nextCh == chCloseAngle) 00695 { 00696 fReaderMgr.getNextChar(); 00697 break; 00698 } 00699 else if (nextCh == chOpenAngle) 00700 { 00701 // Check for this one specially, since its going to be common 00702 // and it is kind of auto-recovering since we've already hit the 00703 // next open bracket, which is what we would have seeked to (and 00704 // skipped this whole tag.) 00705 emitError(XMLErrs::UnterminatedStartTag, elemName); 00706 break; 00707 } 00708 else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote)) 00709 { 00710 // Check for this one specially, which is probably a missing 00711 // attribute name, e.g. ="value". Just issue expected name 00712 // error and eat the quoted string, then jump back to the 00713 // top again. 00714 emitError(XMLErrs::ExpectedAttrName); 00715 fReaderMgr.getNextChar(); 00716 fReaderMgr.skipQuotedString(nextCh); 00717 fReaderMgr.skipPastSpaces(); 00718 continue; 00719 } 00720 } 00721 00722 return attCount; 00723 } 00724 00725 00726 // This method will kick off the scanning of the primary content of the 00727 // document, i.e. the elements. 00728 bool SGXMLScanner::scanContent() 00729 { 00730 // Go into a loop until we hit the end of the root element, or we fall 00731 // out because there is no root element. 00732 // 00733 // We have to do kind of a deeply nested double loop here in order to 00734 // avoid doing the setup/teardown of the exception handler on each 00735 // round. Doing it this way we only do it when an exception actually 00736 // occurs. 00737 bool gotData = true; 00738 bool inMarkup = false; 00739 while (gotData) 00740 { 00741 try 00742 { 00743 while (gotData) 00744 { 00745 // Sense what the next top level token is. According to what 00746 // this tells us, we will call something to handle that kind 00747 // of thing. 00748 XMLSize_t orgReader; 00749 const XMLTokens curToken = senseNextToken(orgReader); 00750 00751 // Handle character data and end of file specially. Char data 00752 // is not markup so we don't want to handle it in the loop 00753 // below. 00754 if (curToken == Token_CharData) 00755 { 00756 // Scan the character data and call appropriate events. Let 00757 // him use our local character data buffer for efficiency. 00758 scanCharData(fCDataBuf); 00759 continue; 00760 } 00761 else if (curToken == Token_EOF) 00762 { 00763 // The element stack better be empty at this point or we 00764 // ended prematurely before all elements were closed. 00765 if (!fElemStack.isEmpty()) 00766 { 00767 const ElemStack::StackElem* topElem = fElemStack.popTop(); 00768 emitError 00769 ( 00770 XMLErrs::EndedWithTagsOnStack 00771 , topElem->fThisElement->getFullName() 00772 ); 00773 } 00774 00775 // Its the end of file, so clear the got data flag 00776 gotData = false; 00777 continue; 00778 } 00779 00780 // We are in some sort of markup now 00781 inMarkup = true; 00782 00783 // According to the token we got, call the appropriate 00784 // scanning method. 00785 switch(curToken) 00786 { 00787 case Token_CData : 00788 // Make sure we are within content 00789 if (fElemStack.isEmpty()) 00790 emitError(XMLErrs::CDATAOutsideOfContent); 00791 scanCDSection(); 00792 break; 00793 00794 case Token_Comment : 00795 scanComment(); 00796 break; 00797 00798 case Token_EndTag : 00799 scanEndTag(gotData); 00800 break; 00801 00802 case Token_PI : 00803 scanPI(); 00804 break; 00805 00806 case Token_StartTag : 00807 scanStartTag(gotData); 00808 break; 00809 00810 default : 00811 fReaderMgr.skipToChar(chOpenAngle); 00812 break; 00813 } 00814 00815 if (orgReader != fReaderMgr.getCurrentReaderNum()) 00816 emitError(XMLErrs::PartialMarkupInEntity); 00817 00818 // And we are back out of markup again 00819 inMarkup = false; 00820 } 00821 } 00822 catch(const EndOfEntityException& toCatch) 00823 { 00824 // If we were in some markup when this happened, then its a 00825 // partial markup error. 00826 if (inMarkup) 00827 emitError(XMLErrs::PartialMarkupInEntity); 00828 00829 // Send an end of entity reference event 00830 if (fDocHandler) 00831 fDocHandler->endEntityReference(toCatch.getEntity()); 00832 00833 inMarkup = false; 00834 } 00835 } 00836 00837 // It went ok, so return success 00838 return true; 00839 } 00840 00841 00842 void SGXMLScanner::scanEndTag(bool& gotData) 00843 { 00844 // Assume we will still have data until proven otherwise. It will only 00845 // ever be false if this is the end of the root element. 00846 gotData = true; 00847 00848 // Check if the element stack is empty. If so, then this is an unbalanced 00849 // element (i.e. more ends than starts, perhaps because of bad text 00850 // causing one to be skipped.) 00851 if (fElemStack.isEmpty()) 00852 { 00853 emitError(XMLErrs::MoreEndThanStartTags); 00854 fReaderMgr.skipPastChar(chCloseAngle); 00855 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager); 00856 } 00857 00858 // Pop the stack of the element we are supposed to be ending. Remember 00859 // that we don't own this. The stack just keeps them and reuses them. 00860 unsigned int uriId = (fDoNamespaces) 00861 ? fElemStack.getCurrentURI() : fEmptyNamespaceId; 00862 00863 // Make sure that its the end of the element that we expect 00864 const XMLCh *elemName = fElemStack.getCurrentSchemaElemName(); 00865 const ElemStack::StackElem* topElem = fElemStack.topElement(); 00866 if (!fReaderMgr.skippedStringLong(elemName)) 00867 { 00868 emitError 00869 ( 00870 XMLErrs::ExpectedEndOfTagX 00871 , elemName 00872 ); 00873 fReaderMgr.skipPastChar(chCloseAngle); 00874 fElemStack.popTop(); 00875 return; 00876 } 00877 00878 fPSVIElemContext.fErrorOccurred = fErrorStack->pop(); 00879 00880 // Make sure we are back on the same reader as where we started 00881 if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum()) 00882 emitError(XMLErrs::PartialTagMarkupError); 00883 00884 // Skip optional whitespace 00885 fReaderMgr.skipPastSpaces(); 00886 00887 // Make sure we find the closing bracket 00888 if (!fReaderMgr.skippedChar(chCloseAngle)) 00889 { 00890 emitError 00891 ( 00892 XMLErrs::UnterminatedEndTag 00893 , topElem->fThisElement->getFullName() 00894 ); 00895 } 00896 00897 if (fValidate && topElem->fThisElement->isDeclared()) 00898 { 00899 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); 00900 if(!fPSVIElemContext.fCurrentTypeInfo) 00901 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 00902 else 00903 fPSVIElemContext.fCurrentDV = 0; 00904 if (fPSVIHandler) 00905 { 00906 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue(); 00907 00908 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString)) 00909 fPSVIElemContext.fNormalizedValue = 0; 00910 00911 } 00912 } 00913 else 00914 { 00915 fPSVIElemContext.fCurrentDV = 0; 00916 fPSVIElemContext.fCurrentTypeInfo = 0; 00917 fPSVIElemContext.fNormalizedValue = 0; 00918 } 00919 00920 // If validation is enabled, then lets pass him the list of children and 00921 // this element and let him validate it. 00922 DatatypeValidator* psviMemberType = 0; 00923 if (fValidate) 00924 { 00925 XMLSize_t failure; 00926 bool res = fValidator->checkContent 00927 ( 00928 topElem->fThisElement 00929 , topElem->fChildren 00930 , topElem->fChildCount 00931 , &failure 00932 ); 00933 00934 if (!res) 00935 { 00936 // One of the elements is not valid for the content. NOTE that 00937 // if no children were provided but the content model requires 00938 // them, it comes back with a zero value. But we cannot use that 00939 // to index the child array in this case, and have to put out a 00940 // special message. 00941 if (!topElem->fChildCount) 00942 { 00943 fValidator->emitError 00944 ( 00945 XMLValid::EmptyNotValidForContent 00946 , topElem->fThisElement->getFormattedContentModel() 00947 ); 00948 } 00949 else if (failure >= topElem->fChildCount) 00950 { 00951 fValidator->emitError 00952 ( 00953 XMLValid::NotEnoughElemsForCM 00954 , topElem->fThisElement->getFormattedContentModel() 00955 ); 00956 } 00957 else 00958 { 00959 fValidator->emitError 00960 ( 00961 XMLValid::ElementNotValidForContent 00962 , topElem->fChildren[failure]->getRawName() 00963 , topElem->fThisElement->getFormattedContentModel() 00964 ); 00965 } 00966 00967 } 00968 00969 // update PSVI info 00970 if (((SchemaValidator*) fValidator)->getErrorOccurred()) 00971 fPSVIElemContext.fErrorOccurred = true; 00972 else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union) 00973 psviMemberType = fValidationContext->getValidatingMemberType(); 00974 if (fPSVIHandler) 00975 { 00976 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified(); 00977 if(fPSVIElemContext.fIsSpecified) 00978 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue(); 00979 } 00980 00981 // call matchers and de-activate context 00982 if (toCheckIdentityConstraint()) 00983 { 00984 fICHandler->deactivateContext 00985 ( 00986 (SchemaElementDecl *) topElem->fThisElement 00987 , fContent.getRawBuffer() 00988 , fValidationContext 00989 , fPSVIElemContext.fCurrentDV 00990 ); 00991 } 00992 00993 } 00994 00995 // QName dv needed topElem to resolve URIs on the checkContent 00996 fElemStack.popTop(); 00997 00998 // See if it was the root element, to avoid multiple calls below 00999 const bool isRoot = fElemStack.isEmpty(); 01000 01001 if (fPSVIHandler) 01002 { 01003 endElementPSVI 01004 ( 01005 (SchemaElementDecl*)topElem->fThisElement, psviMemberType 01006 ); 01007 } 01008 // now we can reset the datatype buffer, since the 01009 // application has had a chance to copy the characters somewhere else 01010 ((SchemaValidator *)fValidator)->clearDatatypeBuffer(); 01011 01012 // If we have a doc handler, tell it about the end tag 01013 if (fDocHandler) 01014 { 01015 if (fGrammarType == Grammar::SchemaGrammarType) { 01016 if (topElem->fPrefixColonPos != -1) 01017 fPrefixBuf.set(elemName, topElem->fPrefixColonPos); 01018 else 01019 fPrefixBuf.reset(); 01020 } 01021 else { 01022 fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix()); 01023 } 01024 fDocHandler->endElement 01025 ( 01026 *topElem->fThisElement 01027 , uriId 01028 , isRoot 01029 , fPrefixBuf.getRawBuffer() 01030 ); 01031 } 01032 01033 if (!isRoot) 01034 { 01035 // update error information 01036 fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred); 01037 } 01038 01039 // If this was the root, then done with content 01040 gotData = !isRoot; 01041 01042 if (gotData) { 01043 01044 // Restore the grammar 01045 fGrammar = fElemStack.getCurrentGrammar(); 01046 fGrammarType = fGrammar->getGrammarType(); 01047 fValidator->setGrammar(fGrammar); 01048 01049 // Restore the validation flag 01050 fValidate = fElemStack.getValidationFlag(); 01051 } 01052 } 01053 01054 01055 // This method handles the high level logic of scanning the DOCType 01056 // declaration. This calls the DTDScanner and kicks off both the scanning of 01057 // the internal subset and the scanning of the external subset, if any. 01058 // 01059 // When we get here the '<!DOCTYPE' part has already been scanned, which is 01060 // what told us that we had a doc type decl to parse. 01061 void SGXMLScanner::scanDocTypeDecl() 01062 { 01063 // Just skips over it 01064 // REVISIT: Should we issue a warning 01065 static const XMLCh doctypeIE[] = 01066 { 01067 chOpenSquare, chCloseAngle, chNull 01068 }; 01069 XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE); 01070 01071 if (nextCh == chOpenSquare) 01072 fReaderMgr.skipPastChar(chCloseSquare); 01073 01074 fReaderMgr.skipPastChar(chCloseAngle); 01075 } 01076 01077 // This method is called to scan a start tag when we are processing 01078 // namespaces. This method is called after we've scanned the < of a 01079 // start tag. So we have to get the element name, then scan the attributes, 01080 // after which we are either going to see >, />, or attributes followed 01081 // by one of those sequences. 01082 bool SGXMLScanner::scanStartTag(bool& gotData) 01083 { 01084 // Assume we will still have data until proven otherwise. It will only 01085 // ever be false if this is the root and its empty. 01086 gotData = true; 01087 01088 // Reset element content 01089 fContent.reset(); 01090 01091 // The current position is after the open bracket, so we need to read in 01092 // in the element name. 01093 int prefixColonPos; 01094 if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos)) 01095 { 01096 if (fQNameBuf.isEmpty()) 01097 emitError(XMLErrs::ExpectedElementName); 01098 else 01099 emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer()); 01100 fReaderMgr.skipToChar(chOpenAngle); 01101 return false; 01102 } 01103 01104 // See if its the root element 01105 const bool isRoot = fElemStack.isEmpty(); 01106 01107 // Skip any whitespace after the name 01108 fReaderMgr.skipPastSpaces(); 01109 01110 // First we have to do the rawest attribute scan. We don't do any 01111 // normalization of them at all, since we don't know yet what type they 01112 // might be (since we need the element decl in order to do that.) 01113 const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer(); 01114 bool isEmpty; 01115 XMLSize_t attCount = rawAttrScan 01116 ( 01117 qnameRawBuf 01118 , *fRawAttrList 01119 , isEmpty 01120 ); 01121 01122 // save the contentleafname and currentscope before addlevel, for later use 01123 ContentLeafNameTypeVector* cv = 0; 01124 XMLContentModel* cm = 0; 01125 unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE; 01126 bool laxThisOne = false; 01127 if (!isRoot) 01128 { 01129 // schema validator will have correct type if validating 01130 SchemaElementDecl* tempElement = (SchemaElementDecl*) 01131 fElemStack.topElement()->fThisElement; 01132 SchemaElementDecl::ModelTypes modelType = tempElement->getModelType(); 01133 ComplexTypeInfo *currType = 0; 01134 01135 if (fValidate) 01136 { 01137 currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 01138 if (currType) 01139 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType(); 01140 else // something must have gone wrong 01141 modelType = SchemaElementDecl::Any; 01142 } 01143 else 01144 { 01145 currType = tempElement->getComplexTypeInfo(); 01146 } 01147 01148 if ((modelType == SchemaElementDecl::Mixed_Simple) 01149 || (modelType == SchemaElementDecl::Mixed_Complex) 01150 || (modelType == SchemaElementDecl::Children)) 01151 { 01152 cm = currType->getContentModel(); 01153 cv = cm->getContentLeafNameTypeVector(); 01154 currentScope = fElemStack.getCurrentScope(); 01155 } 01156 else if (modelType == SchemaElementDecl::Any) { 01157 laxThisOne = true; 01158 } 01159 } 01160 01161 // Now, since we might have to update the namespace map for this element, 01162 // but we don't have the element decl yet, we just tell the element stack 01163 // to expand up to get ready. 01164 XMLSize_t elemDepth = fElemStack.addLevel(); 01165 fElemStack.setValidationFlag(fValidate); 01166 fElemStack.setPrefixColonPos(prefixColonPos); 01167 01168 // Check if there is any external schema location specified, and if we are at root, 01169 // go through them first before scanning those specified in the instance document 01170 if (isRoot 01171 && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) { 01172 01173 if (fExternalSchemaLocation) 01174 parseSchemaLocation(fExternalSchemaLocation, true); 01175 if (fExternalNoNamespaceSchemaLocation) 01176 resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true); 01177 } 01178 01179 // Make an initial pass through the list and find any xmlns attributes or 01180 // schema attributes. 01181 if (attCount) 01182 scanRawAttrListforNameSpaces(attCount); 01183 01184 // Resolve the qualified name to a URI and name so that we can look up 01185 // the element decl for this element. We have now update the prefix to 01186 // namespace map so we should get the correct element now. 01187 unsigned int uriId = resolveQNameWithColon 01188 ( 01189 qnameRawBuf 01190 , fPrefixBuf 01191 , ElemStack::Mode_Element 01192 , prefixColonPos 01193 ); 01194 01195 //if schema, check if we should lax or skip the validation of this element 01196 bool parentValidation = fValidate; 01197 if (cv) { 01198 QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager); 01199 // elementDepth will be > 0, as cv is only constructed if element is not 01200 // root. 01201 laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1); 01202 } 01203 01204 // Look up the element now in the grammar. This will get us back a 01205 // generic element decl object. We tell him to fault one in if he does 01206 // not find it. 01207 XMLElementDecl* elemDecl = 0; 01208 bool wasAdded = false; 01209 const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1]; 01210 const XMLCh* original_uriStr = fGrammar->getTargetNamespace(); 01211 01212 if (uriId != fEmptyNamespaceId) { 01213 01214 // Check in current grammar before switching if necessary 01215 elemDecl = fGrammar->getElemDecl 01216 ( 01217 uriId 01218 , nameRawBuf 01219 , qnameRawBuf 01220 , currentScope 01221 ); 01222 if(!elemDecl) 01223 { 01224 // look in the list of undeclared elements, as would have been done 01225 // before we made grammars stateless: 01226 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope); 01227 } 01228 // this is initialized correctly only if there is 01229 // no element decl. The other uses in this scope will only 01230 // be encountered if there continues to be no element decl--which 01231 // implies that this will have been initialized correctly. 01232 unsigned int orgGrammarUri = uriId; 01233 if (!elemDecl && ( orgGrammarUri = fURIStringPool->getId(original_uriStr)) != uriId) { 01234 // not found, switch to the specified grammar 01235 const XMLCh* uriStr = getURIText(uriId); 01236 bool errorCondition = !switchGrammar(uriStr) && fValidate; 01237 if (errorCondition && !laxThisOne) 01238 { 01239 fValidator->emitError 01240 ( 01241 XMLValid::GrammarNotFound 01242 ,uriStr 01243 ); 01244 } 01245 01246 elemDecl = fGrammar->getElemDecl 01247 ( 01248 uriId 01249 , nameRawBuf 01250 , qnameRawBuf 01251 , currentScope 01252 ); 01253 } 01254 01255 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) { 01256 // if not found, then it may be a reference, try TOP_LEVEL_SCOPE 01257 elemDecl = fGrammar->getElemDecl 01258 ( 01259 uriId 01260 , nameRawBuf 01261 , qnameRawBuf 01262 , Grammar::TOP_LEVEL_SCOPE 01263 ); 01264 if(!elemDecl) 01265 { 01266 // look in the list of undeclared elements, as would have been done 01267 // before we made grammars stateless: 01268 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE); 01269 } 01270 if(!elemDecl) { 01271 // still not found in specified uri 01272 // try emptyNamespace see if element should be un-qualified. 01273 // Use a temp variable until we decide this is the case 01274 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl 01275 ( 01276 fEmptyNamespaceId 01277 , nameRawBuf 01278 , qnameRawBuf 01279 , currentScope 01280 ); 01281 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) { 01282 fValidator->emitError 01283 ( 01284 XMLValid::ElementNotUnQualified 01285 , qnameRawBuf 01286 ); 01287 elemDecl = tempElemDecl; 01288 } 01289 } 01290 } 01291 01292 if (!elemDecl) { 01293 // still not found, fault this in and issue error later 01294 // switch back to original grammar first (if necessary) 01295 if(orgGrammarUri != uriId) 01296 { 01297 switchGrammar(original_uriStr); 01298 } 01299 elemDecl = new (fMemoryManager) SchemaElementDecl 01300 ( 01301 fPrefixBuf.getRawBuffer() 01302 , nameRawBuf 01303 , uriId 01304 , SchemaElementDecl::Any 01305 , Grammar::TOP_LEVEL_SCOPE 01306 , fMemoryManager 01307 ); 01308 elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl)); 01309 wasAdded = true; 01310 } 01311 } 01312 else if (!elemDecl) 01313 { 01314 //the element has no prefix, 01315 //thus it is either a non-qualified element defined in current targetNS 01316 //or an element that is defined in the globalNS 01317 01318 //try unqualifed first 01319 elemDecl = fGrammar->getElemDecl 01320 ( 01321 uriId 01322 , nameRawBuf 01323 , qnameRawBuf 01324 , currentScope 01325 ); 01326 if(!elemDecl) 01327 { 01328 // look in the list of undeclared elements, as would have been done 01329 // before we made grammars stateless: 01330 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope); 01331 } 01332 // this is initialized correctly only if there is 01333 // no element decl. The other uses in this scope will only 01334 // be encountered if there continues to be no element decl--which 01335 // implies that this will have been initialized correctly. 01336 unsigned int orgGrammarUri = fEmptyNamespaceId; 01337 if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) { 01338 //not found, switch grammar and try globalNS 01339 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate; 01340 if (errorCondition && !laxThisOne) 01341 { 01342 fValidator->emitError 01343 ( 01344 XMLValid::GrammarNotFound 01345 , XMLUni::fgZeroLenString 01346 ); 01347 } 01348 01349 elemDecl = fGrammar->getElemDecl 01350 ( 01351 uriId 01352 , nameRawBuf 01353 , qnameRawBuf 01354 , currentScope 01355 ); 01356 } 01357 01358 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) { 01359 // if not found, then it may be a reference, try TOP_LEVEL_SCOPE 01360 elemDecl = fGrammar->getElemDecl 01361 ( 01362 uriId 01363 , nameRawBuf 01364 , qnameRawBuf 01365 , Grammar::TOP_LEVEL_SCOPE 01366 ); 01367 if(!elemDecl) 01368 { 01369 // look in the list of undeclared elements, as would have been done 01370 // before we made grammars stateless: 01371 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE); 01372 } 01373 if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) { 01374 // still Not found in specified uri 01375 // go to original Grammar again to see if element needs to be fully qualified. 01376 bool errorCondition = !switchGrammar(original_uriStr) && fValidate; 01377 if (errorCondition && !laxThisOne) 01378 { 01379 fValidator->emitError 01380 ( 01381 XMLValid::GrammarNotFound 01382 ,original_uriStr 01383 ); 01384 } 01385 01386 // Use a temp variable until we decide this is the case 01387 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl 01388 ( 01389 orgGrammarUri 01390 , nameRawBuf 01391 , qnameRawBuf 01392 , currentScope 01393 ); 01394 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) { 01395 fValidator->emitError 01396 ( 01397 XMLValid::ElementNotQualified 01398 , qnameRawBuf 01399 ); 01400 elemDecl=tempElemDecl; 01401 } 01402 } 01403 } 01404 01405 if (!elemDecl) { 01406 // still not found, fault this in and issue error later 01407 // switch back to original grammar first (if necessary) 01408 if(orgGrammarUri != fEmptyNamespaceId) 01409 { 01410 switchGrammar(original_uriStr); 01411 } 01412 elemDecl = new (fMemoryManager) SchemaElementDecl 01413 ( 01414 fPrefixBuf.getRawBuffer() 01415 , nameRawBuf 01416 , uriId 01417 , SchemaElementDecl::Any 01418 , Grammar::TOP_LEVEL_SCOPE 01419 , fMemoryManager 01420 ); 01421 elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl)); 01422 wasAdded = true; 01423 } 01424 } 01425 01426 // this info needed for DOMTypeInfo 01427 fPSVIElemContext.fErrorOccurred = false; 01428 01429 // We do something different here according to whether we found the 01430 // element or not. 01431 bool bXsiTypeSet= (fValidator)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false; 01432 if (wasAdded) 01433 { 01434 if (laxThisOne && !bXsiTypeSet) { 01435 fValidate = false; 01436 fElemStack.setValidationFlag(fValidate); 01437 } 01438 01439 // If validating then emit an error 01440 if (fValidate) 01441 { 01442 // This is to tell the reuse Validator that this element was 01443 // faulted-in, was not an element in the grammar pool originally 01444 elemDecl->setCreateReason(XMLElementDecl::JustFaultIn); 01445 01446 if(!bXsiTypeSet) 01447 { 01448 fValidator->emitError 01449 ( 01450 XMLValid::ElementNotDefined 01451 , elemDecl->getFullName() 01452 ); 01453 fPSVIElemContext.fErrorOccurred = true; 01454 } 01455 } 01456 } 01457 else 01458 { 01459 // If its not marked declared and validating, then emit an error 01460 if (!elemDecl->isDeclared()) { 01461 if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) { 01462 if(!bXsiTypeSet) 01463 fPSVIElemContext.fErrorOccurred = true; 01464 } 01465 if (laxThisOne) { 01466 fValidate = false; 01467 fElemStack.setValidationFlag(fValidate); 01468 } 01469 01470 if (fValidate && !bXsiTypeSet) 01471 { 01472 fValidator->emitError 01473 ( 01474 XMLValid::ElementNotDefined 01475 , elemDecl->getFullName() 01476 ); 01477 } 01478 } 01479 } 01480 01481 01482 // Now we can update the element stack to set the current element 01483 // decl. We expanded the stack above, but couldn't store the element 01484 // decl because we didn't know it yet. 01485 fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum()); 01486 fElemStack.setCurrentURI(uriId); 01487 01488 if (isRoot) 01489 { 01490 fRootGrammar = fGrammar; 01491 fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager); 01492 } 01493 01494 if (fPSVIHandler) 01495 { 01496 fPSVIElemContext.fElemDepth++; 01497 01498 if (elemDecl->isDeclared()) 01499 { 01500 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth; 01501 } 01502 else 01503 { 01504 fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth; 01505 01506 /****** 01507 * While we report an error for historical reasons, this should 01508 * actually result in lax assessment - NG. 01509 if (isRoot && fValidate) 01510 fPSVIElemContext.fErrorOccurred = true; 01511 ******/ 01512 } 01513 } 01514 01515 // Validate the element 01516 if (fValidate) 01517 { 01518 fValidator->validateElement(elemDecl); 01519 if (((SchemaValidator*) fValidator)->getErrorOccurred()) 01520 fPSVIElemContext.fErrorOccurred = true; 01521 } 01522 01523 // squirrel away the element's QName, so that we can do an efficient 01524 // end-tag match 01525 fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer()); 01526 01527 ComplexTypeInfo* typeinfo = (fValidate) 01528 ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo() 01529 : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo(); 01530 01531 if (typeinfo) { 01532 currentScope = typeinfo->getScopeDefined(); 01533 01534 // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type) 01535 XMLCh* typeName = typeinfo->getTypeName(); 01536 const int comma = XMLString::indexOf(typeName, chComma); 01537 if (comma > 0) { 01538 XMLBuffer prefixBuf(comma+1, fMemoryManager); 01539 prefixBuf.append(typeName, comma); 01540 const XMLCh* uriStr = prefixBuf.getRawBuffer(); 01541 01542 bool errorCondition = !switchGrammar(uriStr) && fValidate; 01543 if (errorCondition && !laxThisOne) 01544 { 01545 fValidator->emitError 01546 ( 01547 XMLValid::GrammarNotFound 01548 , prefixBuf.getRawBuffer() 01549 ); 01550 } 01551 } 01552 else if (comma == 0) { 01553 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate; 01554 if (errorCondition && !laxThisOne) 01555 { 01556 fValidator->emitError 01557 ( 01558 XMLValid::GrammarNotFound 01559 , XMLUni::fgZeroLenString 01560 ); 01561 } 01562 } 01563 } 01564 fElemStack.setCurrentScope(currentScope); 01565 01566 // Set element next state 01567 if (elemDepth >= fElemStateSize) { 01568 resizeElemState(); 01569 } 01570 01571 fElemState[elemDepth] = 0; 01572 fElemLoopState[elemDepth] = 0; 01573 fElemStack.setCurrentGrammar(fGrammar); 01574 01575 // If this is the first element and we are validating, check the root 01576 // element. 01577 if (!isRoot && parentValidation) 01578 { 01579 // If the element stack is not empty, then add this element as a 01580 // child of the previous top element. If its empty, this is the root 01581 // elem and is not the child of anything. 01582 fElemStack.addChild(elemDecl->getElementName(), true); 01583 } 01584 01585 // PSVI handling: must reset this, even if no attributes... 01586 if(getPSVIHandler()) 01587 fPSVIAttrList->reset(); 01588 01589 // Now lets get the fAttrList filled in. This involves faulting in any 01590 // defaulted and fixed attributes and normalizing the values of any that 01591 // we got explicitly. 01592 // 01593 // We update the attCount value with the total number of attributes, but 01594 // it goes in with the number of values we got during the raw scan of 01595 // explictly provided attrs above. 01596 attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList); 01597 01598 if(attCount) 01599 { 01600 // clean up after ourselves: 01601 // clear the map used to detect duplicate attributes 01602 fUndeclaredAttrRegistry->removeAll(); 01603 } 01604 01605 // activate identity constraints 01606 if (toCheckIdentityConstraint()) 01607 { 01608 fICHandler->activateIdentityConstraint 01609 ( 01610 (SchemaElementDecl*) elemDecl 01611 , (int) elemDepth 01612 , uriId 01613 , fPrefixBuf.getRawBuffer() 01614 , *fAttrList 01615 , attCount 01616 , fValidationContext 01617 ); 01618 01619 } 01620 01621 // Since the element may have default values, call start tag now regardless if it is empty or not 01622 // If we have a document handler, then tell it about this start tag 01623 if (fDocHandler) 01624 { 01625 fDocHandler->startElement 01626 ( 01627 *elemDecl 01628 , uriId 01629 , fPrefixBuf.getRawBuffer() 01630 , *fAttrList 01631 , attCount 01632 , false 01633 , isRoot 01634 ); 01635 } // may be where we output something... 01636 01637 // if we have a PSVIHandler, now's the time to call 01638 // its handleAttributesPSVI method: 01639 if(fPSVIHandler) 01640 { 01641 QName *eName = elemDecl->getElementName(); 01642 fPSVIHandler->handleAttributesPSVI 01643 ( 01644 eName->getLocalPart() 01645 , fURIStringPool->getValueForId(eName->getURI()) 01646 , fPSVIAttrList 01647 ); 01648 } 01649 01650 // If empty, validate content right now if we are validating and then 01651 // pop the element stack top. Else, we have to update the current stack 01652 // top's namespace mapping elements. 01653 if (isEmpty) 01654 { 01655 // Pop the element stack back off since it'll never be used now 01656 fElemStack.popTop(); 01657 01658 // reset current type info 01659 DatatypeValidator* psviMemberType = 0; 01660 if (fGrammarType == Grammar::SchemaGrammarType) 01661 { 01662 if (fValidate && elemDecl->isDeclared()) 01663 { 01664 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); 01665 if(!fPSVIElemContext.fCurrentTypeInfo) 01666 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 01667 else 01668 fPSVIElemContext.fCurrentDV = 0; 01669 if(fPSVIHandler) 01670 { 01671 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue(); 01672 01673 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString)) 01674 fPSVIElemContext.fNormalizedValue = 0; 01675 } 01676 } 01677 else 01678 { 01679 fPSVIElemContext.fCurrentDV = 0; 01680 fPSVIElemContext.fCurrentTypeInfo = 0; 01681 fPSVIElemContext.fNormalizedValue = 0; 01682 } 01683 } 01684 01685 // If validating, then insure that its legal to have no content 01686 if (fValidate) 01687 { 01688 XMLSize_t failure; 01689 bool res = fValidator->checkContent(elemDecl, 0, 0, &failure); 01690 if (!res) 01691 { 01692 // REVISIT: in the case of xsi:type, this may 01693 // return the wrong string... 01694 fValidator->emitError 01695 ( 01696 XMLValid::ElementNotValidForContent 01697 , elemDecl->getFullName() 01698 , elemDecl->getFormattedContentModel() 01699 ); 01700 } 01701 01702 if (((SchemaValidator*) fValidator)->getErrorOccurred()) 01703 fPSVIElemContext.fErrorOccurred = true; 01704 // note that if we're empty, won't be a current DV 01705 else 01706 { 01707 if (fPSVIHandler) 01708 { 01709 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified(); 01710 if(fPSVIElemContext.fIsSpecified) 01711 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue(); 01712 } 01713 if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union) 01714 psviMemberType = fValidationContext->getValidatingMemberType(); 01715 } 01716 01717 // call matchers and de-activate context 01718 if (toCheckIdentityConstraint()) 01719 { 01720 fICHandler->deactivateContext 01721 ( 01722 (SchemaElementDecl *) elemDecl 01723 , fContent.getRawBuffer() 01724 , fValidationContext 01725 , fPSVIElemContext.fCurrentDV 01726 ); 01727 } 01728 01729 } 01730 else if (fGrammarType == Grammar::SchemaGrammarType) { 01731 ((SchemaValidator*)fValidator)->resetNillable(); 01732 } 01733 01734 if (fPSVIHandler) 01735 { 01736 endElementPSVI 01737 ( 01738 (SchemaElementDecl*)elemDecl, psviMemberType 01739 ); 01740 } 01741 01742 // If we have a doc handler, tell it about the end tag 01743 if (fDocHandler) 01744 { 01745 fDocHandler->endElement 01746 ( 01747 *elemDecl 01748 , uriId 01749 , isRoot 01750 , fPrefixBuf.getRawBuffer() 01751 ); 01752 } 01753 01754 // If the elem stack is empty, then it was an empty root 01755 if (isRoot) 01756 gotData = false; 01757 else 01758 { 01759 // Restore the grammar 01760 fGrammar = fElemStack.getCurrentGrammar(); 01761 fGrammarType = fGrammar->getGrammarType(); 01762 fValidator->setGrammar(fGrammar); 01763 01764 // Restore the validation flag 01765 fValidate = fElemStack.getValidationFlag(); 01766 } 01767 } 01768 else // not empty 01769 { 01770 01771 // send a partial element psvi 01772 if (fPSVIHandler) 01773 { 01774 01775 ComplexTypeInfo* curTypeInfo = 0; 01776 DatatypeValidator* curDV = 0; 01777 XSTypeDefinition* typeDef = 0; 01778 01779 if (fValidate && elemDecl->isDeclared()) 01780 { 01781 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo(); 01782 01783 if (curTypeInfo) 01784 { 01785 typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo); 01786 } 01787 else 01788 { 01789 curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 01790 01791 if (curDV) 01792 { 01793 typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV); 01794 } 01795 } 01796 } 01797 01798 fPSVIElement->reset 01799 ( 01800 PSVIElement::VALIDITY_NOTKNOWN 01801 , PSVIElement::VALIDATION_NONE 01802 , fRootElemName 01803 , ((SchemaValidator*) fValidator)->getIsElemSpecified() 01804 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0 01805 , typeDef 01806 , 0 //memberType 01807 , fModel 01808 , ((SchemaElementDecl*)elemDecl)->getDefaultValue() 01809 , 0 01810 , 0 01811 , 0 01812 ); 01813 01814 01815 fPSVIHandler->handlePartialElementPSVI 01816 ( 01817 elemDecl->getBaseName() 01818 , fURIStringPool->getValueForId(elemDecl->getURI()) 01819 , fPSVIElement 01820 ); 01821 01822 } 01823 01824 fErrorStack->push(fPSVIElemContext.fErrorOccurred); 01825 } 01826 01827 return true; 01828 } 01829 01830 01831 // --------------------------------------------------------------------------- 01832 // SGXMLScanner: Grammar preparsing 01833 // --------------------------------------------------------------------------- 01834 Grammar* SGXMLScanner::loadGrammar(const InputSource& src 01835 , const short grammarType 01836 , const bool toCache) 01837 { 01838 Grammar* loadedGrammar = 0; 01839 01840 ReaderMgrResetType resetReaderMgr(&fReaderMgr, &ReaderMgr::reset); 01841 01842 try 01843 { 01844 fGrammarResolver->cacheGrammarFromParse(false); 01845 // if the new grammar has to be cached, better use the already cached 01846 // grammars, or the an exception will be thrown when caching an already 01847 // cached grammar 01848 fGrammarResolver->useCachedGrammarInParse(toCache); 01849 fRootGrammar = 0; 01850 01851 if (fValScheme == Val_Auto) { 01852 fValidate = true; 01853 } 01854 01855 // Reset some status flags 01856 fInException = false; 01857 fStandalone = false; 01858 fErrorCount = 0; 01859 fHasNoDTD = true; 01860 fSeeXsi = false; 01861 01862 if (grammarType == Grammar::SchemaGrammarType) { 01863 loadedGrammar = loadXMLSchemaGrammar(src, toCache); 01864 } 01865 } 01866 // NOTE: 01867 // 01868 // In all of the error processing below, the emitError() call MUST come 01869 // before the flush of the reader mgr, or it will fail because it tries 01870 // to find out the position in the XML source of the error. 01871 catch(const XMLErrs::Codes) 01872 { 01873 // This is a 'first failure' exception, so fall through 01874 } 01875 catch(const XMLValid::Codes) 01876 { 01877 // This is a 'first fatal error' type exit, so fall through 01878 } 01879 catch(const XMLException& excToCatch) 01880 { 01881 // Emit the error and catch any user exception thrown from here. Make 01882 // sure in all cases we flush the reader manager. 01883 fInException = true; 01884 try 01885 { 01886 if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning) 01887 emitError 01888 ( 01889 XMLErrs::XMLException_Warning 01890 , excToCatch.getCode() 01891 , excToCatch.getMessage() 01892 ); 01893 else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal) 01894 emitError 01895 ( 01896 XMLErrs::XMLException_Fatal 01897 , excToCatch.getCode() 01898 , excToCatch.getMessage() 01899 ); 01900 else 01901 emitError 01902 ( 01903 XMLErrs::XMLException_Error 01904 , excToCatch.getCode() 01905 , excToCatch.getMessage() 01906 ); 01907 } 01908 catch(const OutOfMemoryException&) 01909 { 01910 // This is a special case for out-of-memory 01911 // conditions, because resetting the ReaderMgr 01912 // can be problematic. 01913 resetReaderMgr.release(); 01914 01915 throw; 01916 } 01917 } 01918 catch(const OutOfMemoryException&) 01919 { 01920 // This is a special case for out-of-memory 01921 // conditions, because resetting the ReaderMgr 01922 // can be problematic. 01923 resetReaderMgr.release(); 01924 01925 throw; 01926 } 01927 01928 return loadedGrammar; 01929 } 01930 01931 void SGXMLScanner::resetCachedGrammar () 01932 { 01933 fCachedSchemaInfoList->removeAll (); 01934 } 01935 01936 // --------------------------------------------------------------------------- 01937 // SGXMLScanner: Private helper methods 01938 // --------------------------------------------------------------------------- 01939 // This method handles the common initialization, to avoid having to do 01940 // it redundantly in multiple constructors. 01941 void SGXMLScanner::commonInit() 01942 { 01943 // Create the element state array 01944 fElemState = (unsigned int*) fMemoryManager->allocate 01945 ( 01946 fElemStateSize * sizeof(unsigned int) 01947 ); //new unsigned int[fElemStateSize]; 01948 fElemLoopState = (unsigned int*) fMemoryManager->allocate 01949 ( 01950 fElemStateSize * sizeof(unsigned int) 01951 ); //new unsigned int[fElemStateSize]; 01952 01953 // And we need one for the raw attribute scan. This just stores key/ 01954 // value string pairs (prior to any processing.) 01955 fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager); 01956 fRawAttrColonList = (int*) fMemoryManager->allocate 01957 ( 01958 fRawAttrColonListSize * sizeof(int) 01959 ); 01960 01961 // Create the Validator and init them 01962 fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager); 01963 initValidator(fSchemaValidator); 01964 01965 // Create IdentityConstraint info 01966 fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager); 01967 01968 // Add the default entity entries for the character refs that must always 01969 // be present. 01970 fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager); 01971 fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand); 01972 fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle); 01973 fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle); 01974 fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote); 01975 fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote); 01976 fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager); 01977 fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher> 01978 ( 01979 131, false, fMemoryManager 01980 ); 01981 fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager); 01982 fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager); 01983 01984 fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager); 01985 fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager); 01986 01987 if (fValidator) 01988 { 01989 if (!fValidator->handlesSchema()) 01990 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); 01991 } 01992 else 01993 { 01994 fValidator = fSchemaValidator; 01995 } 01996 } 01997 01998 void SGXMLScanner::cleanUp() 01999 { 02000 fMemoryManager->deallocate(fElemState); //delete [] fElemState; 02001 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState; 02002 delete fSchemaGrammar; 02003 delete fEntityTable; 02004 delete fRawAttrList; 02005 fMemoryManager->deallocate(fRawAttrColonList); 02006 delete fSchemaValidator; 02007 delete fICHandler; 02008 delete fElemNonDeclPool; 02009 delete fAttDefRegistry; 02010 delete fUndeclaredAttrRegistry; 02011 delete fPSVIAttrList; 02012 if (fPSVIElement) 02013 delete fPSVIElement; 02014 02015 if (fErrorStack) 02016 delete fErrorStack; 02017 02018 delete fSchemaInfoList; 02019 delete fCachedSchemaInfoList; 02020 } 02021 02022 void SGXMLScanner::resizeElemState() { 02023 02024 unsigned int newSize = fElemStateSize * 2; 02025 unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate 02026 ( 02027 newSize * sizeof(unsigned int) 02028 ); //new unsigned int[newSize]; 02029 unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate 02030 ( 02031 newSize * sizeof(unsigned int) 02032 ); //new unsigned int[newSize]; 02033 02034 // Copy the existing values 02035 unsigned int index = 0; 02036 for (; index < fElemStateSize; index++) 02037 { 02038 newElemState[index] = fElemState[index]; 02039 newElemLoopState[index] = fElemLoopState[index]; 02040 } 02041 02042 for (; index < newSize; index++) 02043 newElemLoopState[index] = newElemState[index] = 0; 02044 02045 // Delete the old array and udpate our members 02046 fMemoryManager->deallocate(fElemState); //delete [] fElemState; 02047 fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState; 02048 fElemState = newElemState; 02049 fElemLoopState = newElemLoopState; 02050 fElemStateSize = newSize; 02051 } 02052 02053 void SGXMLScanner::resizeRawAttrColonList() { 02054 02055 unsigned int newSize = fRawAttrColonListSize * 2; 02056 int* newRawAttrColonList = (int*) fMemoryManager->allocate 02057 ( 02058 newSize * sizeof(int) 02059 ); //new int[newSize]; 02060 02061 // Copy the existing values 02062 unsigned int index = 0; 02063 for (; index < fRawAttrColonListSize; index++) 02064 newRawAttrColonList[index] = fRawAttrColonList[index]; 02065 02066 // Delete the old array and udpate our members 02067 fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList; 02068 fRawAttrColonList = newRawAttrColonList; 02069 fRawAttrColonListSize = newSize; 02070 } 02071 02072 // This method is called from scanStartTag() to build up the list of 02073 // XMLAttr objects that will be passed out in the start tag callout. We 02074 // get the key/value pairs from the raw scan of explicitly provided attrs, 02075 // which have not been normalized. And we get the element declaration from 02076 // which we will get any defaulted or fixed attribute defs and add those 02077 // in as well. 02078 XMLSize_t 02079 SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs 02080 , const XMLSize_t attCount 02081 , XMLElementDecl* elemDecl 02082 , RefVectorOf<XMLAttr>& toFill) 02083 { 02084 // Ask the element to clear the 'provided' flag on all of the att defs 02085 // that it owns, and to return us a boolean indicating whether it has 02086 // any defs. 02087 DatatypeValidator *currDV = 0; 02088 ComplexTypeInfo *currType = 0; 02089 02090 if (fValidate) 02091 { 02092 currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 02093 if (!currType) { 02094 currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator(); 02095 } 02096 } 02097 02098 const bool hasDefs = (currType && fValidate) 02099 ? currType->hasAttDefs() 02100 : elemDecl->hasAttDefs(); 02101 02102 fElemCount++; 02103 02104 // If there are no expliclitily provided attributes and there are no 02105 // defined attributes for the element, the we don't have anything to do. 02106 // So just return zero in this case. 02107 if (!hasDefs && !attCount) 02108 return 0; 02109 02110 // Keep up with how many attrs we end up with total 02111 XMLSize_t retCount = 0; 02112 02113 // And get the current size of the output vector. This lets us use 02114 // existing elements until we fill it, then start adding new ones. 02115 const XMLSize_t curAttListSize = toFill.size(); 02116 02117 // We need a buffer into which raw scanned attribute values will be 02118 // normalized. 02119 XMLBufBid bbNormal(&fBufMgr); 02120 XMLBuffer& normBuf = bbNormal.getBuffer(); 02121 02122 XMLBufBid bbPrefix(&fBufMgr); 02123 XMLBuffer& prefixBuf = bbPrefix.getBuffer(); 02124 02125 // Loop through our explicitly provided attributes, which are in the raw 02126 // scanned form, and build up XMLAttr objects. 02127 XMLSize_t index; 02128 const XMLCh* prefPtr, *suffPtr; 02129 for (index = 0; index < attCount; index++) 02130 { 02131 PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID; 02132 PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL; 02133 const KVStringPair* curPair = providedAttrs.elementAt(index); 02134 02135 // We have to split the name into its prefix and name parts. Then 02136 // we map the prefix to its URI. 02137 const XMLCh* const namePtr = curPair->getKey(); 02138 02139 const int colonInd = fRawAttrColonList[index]; 02140 unsigned int uriId; 02141 if (colonInd != -1) 02142 { 02143 prefixBuf.set(namePtr, colonInd); 02144 prefPtr = prefixBuf.getRawBuffer(); 02145 suffPtr = namePtr + colonInd + 1; 02146 // Map the prefix to a URI id 02147 uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); 02148 } 02149 else 02150 { 02151 // No colon, so we just have a name with no prefix 02152 prefPtr = XMLUni::fgZeroLenString; 02153 suffPtr = namePtr; 02154 // an empty prefix is always the empty namespace, when dealing with attributes 02155 uriId = fEmptyNamespaceId; 02156 } 02157 02158 // If the uri comes back as the xmlns or xml URI or its just a name 02159 // and that name is 'xmlns', then we handle it specially. So set a 02160 // boolean flag that lets us quickly below know which we are dealing 02161 // with. 02162 const bool isNSAttr = (uriId == fEmptyNamespaceId)? 02163 XMLString::equals(suffPtr, XMLUni::fgXMLNSString) : 02164 (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)); 02165 02166 // If its not a special case namespace attr of some sort, then we 02167 // do normal checking and processing. 02168 XMLAttDef::AttTypes attType = XMLAttDef::CData; 02169 DatatypeValidator *attrValidator = 0; 02170 PSVIAttribute *psviAttr = 0; 02171 bool otherXSI = false; 02172 02173 if (isNSAttr) 02174 { 02175 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId)) 02176 { 02177 emitError 02178 ( 02179 XMLErrs::AttrAlreadyUsedInSTag 02180 , namePtr 02181 , elemDecl->getFullName() 02182 ); 02183 fPSVIElemContext.fErrorOccurred = true; 02184 } 02185 else 02186 { 02187 bool ValueValidate = false; 02188 bool tokenizeBuffer = false; 02189 02190 if (uriId == fXMLNSNamespaceId) 02191 { 02192 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); 02193 } 02194 else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)) 02195 { 02196 if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)) 02197 { 02198 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN); 02199 02200 ValueValidate = true; 02201 } 02202 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION)) 02203 { 02204 // use anyURI as the validator 02205 // tokenize the data and use the anyURI data for each piece 02206 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); 02207 //We should validate each value in the schema location however 02208 //this lead to a performance degradation of around 4%. Since 02209 //the first value of each pair needs to match what is in the 02210 //schema document and the second value needs to be valid in 02211 //order to open the document we won't validate it. Need to 02212 //do performance analysis of the anyuri datatype. 02213 //ValueValidate = true; 02214 ValueValidate = false; 02215 tokenizeBuffer = true; 02216 } 02217 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION)) 02218 { 02219 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); 02220 //We should validate this value however 02221 //this lead to a performance degradation of around 4%. Since 02222 //the value needs to be valid in 02223 //order to open the document we won't validate it. Need to 02224 //do performance analysis of the anyuri datatype. 02225 //ValueValidate = true; 02226 ValueValidate = false; 02227 } 02228 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) 02229 { 02230 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME); 02231 02232 ValueValidate = true; 02233 } 02234 else { 02235 otherXSI = true; 02236 } 02237 } 02238 02239 if (!otherXSI) { 02240 normalizeAttRawValue 02241 ( 02242 namePtr 02243 , curPair->getValue() 02244 , normBuf 02245 ); 02246 02247 if (fValidate && attrValidator && ValueValidate) 02248 { 02249 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true); 02250 02251 ValidationContext* const theContext = 02252 getValidationContext(); 02253 02254 if (theContext) 02255 { 02256 try 02257 { 02258 if (tokenizeBuffer) { 02259 XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager); 02260 while (tokenizer.hasMoreTokens()) { 02261 attrValidator->validate( 02262 tokenizer.nextToken(), 02263 theContext, 02264 fMemoryManager); 02265 } 02266 } 02267 else { 02268 attrValidator->validate( 02269 normBuf.getRawBuffer(), 02270 theContext, 02271 fMemoryManager); 02272 } 02273 } 02274 catch (const XMLException& idve) 02275 { 02276 fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage()); 02277 } 02278 } 02279 } 02280 02281 if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType) 02282 { 02283 psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId)); 02284 XSSimpleTypeDefinition *validatingType = (attrValidator) 02285 ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator) 02286 : 0; 02287 // no attribute declarations for these... 02288 psviAttr->reset( 02289 fRootElemName 02290 , PSVIItem::VALIDITY_NOTKNOWN 02291 , PSVIItem::VALIDATION_NONE 02292 , validatingType 02293 , 0 02294 , 0 02295 , false 02296 , 0 02297 , attrValidator 02298 ); 02299 } 02300 } 02301 } 02302 } 02303 02304 if (!isNSAttr || otherXSI) 02305 { 02306 // Some checking for attribute wild card first (for schema) 02307 bool laxThisOne = false; 02308 bool skipThisOne = false; 02309 02310 XMLAttDef* attDefForWildCard = 0; 02311 XMLAttDef* attDef = 0; 02312 02313 if (fGrammarType == Grammar::SchemaGrammarType) { 02314 02315 //retrieve the att def 02316 SchemaAttDef* attWildCard = 0; 02317 if (currType) { 02318 attDef = currType->getAttDef(suffPtr, uriId); 02319 attWildCard = currType->getAttWildCard(); 02320 } 02321 else if (!currDV) { // check explicitly-set wildcard 02322 attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard(); 02323 } 02324 02325 // if not found or faulted in - check for a matching wildcard attribute 02326 // if no matching wildcard attribute, check (un)qualifed cases and flag 02327 // appropriate errors 02328 if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) { 02329 02330 if (attWildCard) { 02331 //if schema, see if we should lax or skip the validation of this attribute 02332 if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) { 02333 02334 if(!skipThisOne) 02335 { 02336 SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId)); 02337 if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) { 02338 RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry(); 02339 if (attRegistry) { 02340 attDefForWildCard = attRegistry->get(suffPtr); 02341 } 02342 } 02343 } 02344 } 02345 } 02346 else if (currType) { 02347 // not found, see if the attDef should be qualified or not 02348 if (uriId == fEmptyNamespaceId) { 02349 attDef = currType->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace())); 02350 if (fValidate 02351 && attDef 02352 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { 02353 // the attribute should be qualified 02354 fValidator->emitError 02355 ( 02356 XMLValid::AttributeNotQualified 02357 , attDef->getFullName() 02358 ); 02359 fPSVIElemContext.fErrorOccurred = true; 02360 if (getPSVIHandler()) 02361 { 02362 attrValid = PSVIItem::VALIDITY_INVALID; 02363 } 02364 } 02365 } 02366 else { 02367 attDef = currType->getAttDef(suffPtr, fEmptyNamespaceId); 02368 if (fValidate 02369 && attDef 02370 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { 02371 // the attribute should be qualified 02372 fValidator->emitError 02373 ( 02374 XMLValid::AttributeNotUnQualified 02375 , attDef->getFullName() 02376 ); 02377 fPSVIElemContext.fErrorOccurred = true; 02378 if (getPSVIHandler()) 02379 { 02380 attrValid = PSVIItem::VALIDITY_INVALID; 02381 } 02382 } 02383 } 02384 } 02385 } 02386 } 02387 02388 // now need to prepare for duplicate detection 02389 if(attDef) 02390 { 02391 unsigned int *curCountPtr = fAttDefRegistry->get(attDef); 02392 if(!curCountPtr) 02393 { 02394 curCountPtr = getNewUIntPtr(); 02395 *curCountPtr = fElemCount; 02396 fAttDefRegistry->put(attDef, curCountPtr); 02397 } 02398 else if(*curCountPtr < fElemCount) 02399 *curCountPtr = fElemCount; 02400 else 02401 { 02402 emitError 02403 ( 02404 XMLErrs::AttrAlreadyUsedInSTag 02405 , attDef->getFullName() 02406 , elemDecl->getFullName() 02407 ); 02408 fPSVIElemContext.fErrorOccurred = true; 02409 } 02410 } 02411 else 02412 { 02413 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId)) 02414 { 02415 emitError 02416 ( 02417 XMLErrs::AttrAlreadyUsedInSTag 02418 , namePtr 02419 , elemDecl->getFullName() 02420 ); 02421 fPSVIElemContext.fErrorOccurred = true; 02422 } 02423 } 02424 02425 // if we've found either an attDef or an attDefForWildCard, 02426 // then we're doing full validation and it may still be valid. 02427 if(!attDef && !attDefForWildCard) 02428 { 02429 if(!laxThisOne && !skipThisOne) 02430 { 02431 fPSVIElemContext.fErrorOccurred = true; 02432 } 02433 if(getPSVIHandler()) 02434 { 02435 if(!laxThisOne && !skipThisOne) 02436 { 02437 attrValid = PSVIItem::VALIDITY_INVALID; 02438 } 02439 else if(laxThisOne) 02440 { 02441 attrValid = PSVIItem::VALIDITY_NOTKNOWN; 02442 attrAssessed = PSVIItem::VALIDATION_PARTIAL; 02443 } 02444 else 02445 { 02446 attrValid = PSVIItem::VALIDITY_NOTKNOWN; 02447 attrAssessed = PSVIItem::VALIDATION_NONE; 02448 } 02449 } 02450 } 02451 02452 bool errorCondition = fValidate && !attDefForWildCard && !attDef; 02453 if (errorCondition && !skipThisOne && !laxThisOne) 02454 { 02455 // 02456 // Its not valid for this element, so issue an error if we are 02457 // validating. 02458 // 02459 XMLBufBid bbMsg(&fBufMgr); 02460 XMLBuffer& bufMsg = bbMsg.getBuffer(); 02461 if (uriId != fEmptyNamespaceId) { 02462 XMLBufBid bbURI(&fBufMgr); 02463 XMLBuffer& bufURI = bbURI.getBuffer(); 02464 02465 getURIText(uriId, bufURI); 02466 02467 bufMsg.append(chOpenCurly); 02468 bufMsg.append(bufURI.getRawBuffer()); 02469 bufMsg.append(chCloseCurly); 02470 } 02471 bufMsg.append(suffPtr); 02472 fValidator->emitError 02473 ( 02474 XMLValid::AttNotDefinedForElement 02475 , bufMsg.getRawBuffer() 02476 , elemDecl->getFullName() 02477 ); 02478 } 02479 02480 // Now normalize the raw value since we have the attribute type. We 02481 // don't care about the return status here. If it failed, an error 02482 // was issued, which is all we care about. 02483 if (attDefForWildCard) { 02484 normalizeAttValue( 02485 attDefForWildCard, namePtr, curPair->getValue(), normBuf 02486 ); 02487 02488 // If we found an attdef for this one, then lets validate it. 02489 const XMLCh* xsNormalized = normBuf.getRawBuffer(); 02490 DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator(); 02491 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 02492 { 02493 // normalize the attribute according to schema whitespace facet 02494 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true); 02495 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 02496 if (fNormalizeData && fValidate) { 02497 normBuf.set(xsNormalized); 02498 } 02499 } 02500 02501 if (fValidate ) { 02502 fValidator->validateAttrValue( 02503 attDefForWildCard, xsNormalized, false, elemDecl 02504 ); 02505 attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator(); 02506 if(((SchemaValidator *)fValidator)->getErrorOccurred()) 02507 { 02508 fPSVIElemContext.fErrorOccurred = true; 02509 if(getPSVIHandler()) 02510 attrValid = PSVIItem::VALIDITY_INVALID; 02511 } 02512 } 02513 else { // no decl; default DOMTypeInfo to anySimpleType 02514 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); 02515 } 02516 02517 // Save the type for later use 02518 attType = attDefForWildCard->getType(); 02519 } 02520 else { 02521 normalizeAttValue( 02522 attDef, namePtr, curPair->getValue(), normBuf 02523 ); 02524 02525 // If we found an attdef for this one, then lets validate it. 02526 if (attDef) 02527 { 02528 const XMLCh* xsNormalized = normBuf.getRawBuffer(); 02529 if (fGrammarType == Grammar::SchemaGrammarType) 02530 { 02531 DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator(); 02532 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 02533 { 02534 // normalize the attribute according to schema whitespace facet 02535 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true); 02536 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 02537 if (fNormalizeData && fValidate && !skipThisOne) { 02538 normBuf.set(xsNormalized); 02539 } 02540 } 02541 } 02542 02543 if (fValidate && !skipThisOne) 02544 { 02545 fValidator->validateAttrValue( 02546 attDef, xsNormalized, false, elemDecl 02547 ); 02548 attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator(); 02549 if(((SchemaValidator *)fValidator)->getErrorOccurred()) 02550 { 02551 fPSVIElemContext.fErrorOccurred = true; 02552 if(getPSVIHandler()) 02553 attrValid = PSVIItem::VALIDITY_INVALID; 02554 } 02555 } 02556 else { 02557 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); 02558 } 02559 } 02560 else { 02561 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); 02562 } 02563 02564 // Save the type for later use 02565 if (attDef) 02566 { 02567 attType = attDef->getType(); 02568 } 02569 } 02570 02571 // now fill in the PSVIAttributes entry for this attribute: 02572 if(getPSVIHandler()) 02573 { 02574 psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId)); 02575 SchemaAttDef *actualAttDef = 0; 02576 if(attDef) 02577 actualAttDef = (SchemaAttDef *)attDef; 02578 else if (attDefForWildCard) 02579 actualAttDef = (SchemaAttDef *)attDefForWildCard; 02580 if(actualAttDef) 02581 { 02582 XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef); 02583 DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator(); 02584 XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType); 02585 if(attrValid != PSVIItem::VALIDITY_VALID) 02586 { 02587 psviAttr->reset 02588 ( 02589 fRootElemName 02590 , attrValid 02591 , attrAssessed 02592 , validatingType 02593 , 0 02594 , actualAttDef->getValue() 02595 , false 02596 , attrDecl 02597 , 0 02598 ); 02599 } 02600 else 02601 { 02602 XSSimpleTypeDefinition *memberType = 0; 02603 if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION) 02604 memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator); 02605 psviAttr->reset 02606 ( 02607 fRootElemName 02608 , attrValid 02609 , attrAssessed 02610 , validatingType 02611 , memberType 02612 , actualAttDef->getValue() 02613 , false 02614 , attrDecl 02615 , (memberType)?attrValidator:attrDataType 02616 ); 02617 } 02618 } 02619 else 02620 { 02621 psviAttr->reset 02622 ( 02623 fRootElemName 02624 , attrValid 02625 , attrAssessed 02626 , 0 02627 , 0 02628 , 0 02629 , false 02630 , 0 02631 , 0 02632 ); 02633 } 02634 } 02635 } 02636 02637 // Add this attribute to the attribute list that we use to pass them 02638 // to the handler. We reuse its existing elements but expand it as 02639 // required. 02640 XMLAttr* curAttr; 02641 if (retCount >= curAttListSize) 02642 { 02643 curAttr = new (fMemoryManager) XMLAttr 02644 ( 02645 uriId 02646 , suffPtr 02647 , prefPtr 02648 , normBuf.getRawBuffer() 02649 , attType 02650 , true 02651 , fMemoryManager 02652 ); 02653 toFill.addElement(curAttr); 02654 } 02655 else 02656 { 02657 curAttr = toFill.elementAt(retCount); 02658 curAttr->set 02659 ( 02660 uriId 02661 , suffPtr 02662 , prefPtr 02663 , normBuf.getRawBuffer() 02664 , attType 02665 ); 02666 curAttr->setSpecified(true); 02667 } 02668 if(psviAttr) 02669 psviAttr->setValue(curAttr->getValue()); 02670 02671 // Bump the count of attrs in the list 02672 retCount++; 02673 } 02674 02675 // Now, if there are any attributes declared by this element, let's 02676 // go through them and make sure that any required ones are provided, 02677 // and fault in any fixed ones and defaulted ones that are not provided 02678 // literally. 02679 if (hasDefs) 02680 { 02681 // Check after all specified attrs are scanned 02682 // (1) report error for REQUIRED attrs that are missing (V_TAGc) 02683 // (2) add default attrs if missing (FIXED and NOT_FIXED) 02684 02685 XMLAttDefList& attDefList = getAttDefList(currType, elemDecl); 02686 02687 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++) 02688 { 02689 // Get the current att def, for convenience and its def type 02690 XMLAttDef *curDef = &attDefList.getAttDef(i); 02691 const XMLAttDef::DefAttTypes defType = curDef->getDefaultType(); 02692 02693 unsigned int *attCountPtr = fAttDefRegistry->get(curDef); 02694 if (!attCountPtr || *attCountPtr < fElemCount) 02695 { // did not occur 02696 // note that since there is no attribute information 02697 // item present, there is no PSVI infoset to augment here *except* 02698 // that the element is invalid 02699 02700 //the attribute is not provided 02701 if (fValidate) 02702 { 02703 // If we are validating and its required, then an error 02704 if ((defType == XMLAttDef::Required) || 02705 (defType == XMLAttDef::Required_And_Fixed) ) 02706 02707 { 02708 fValidator->emitError 02709 ( 02710 XMLValid::RequiredAttrNotProvided 02711 , curDef->getFullName() 02712 ); 02713 fPSVIElemContext.fErrorOccurred = true; 02714 } 02715 else if ((defType == XMLAttDef::Default) || 02716 (defType == XMLAttDef::Fixed) ) 02717 { 02718 if (fStandalone && curDef->isExternal()) 02719 { 02720 // XML 1.0 Section 2.9 02721 // Document is standalone, so attributes must not be defaulted. 02722 fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName()); 02723 } 02724 } 02725 } 02726 02727 // Fault in the value if needed, and bump the att count. 02728 if ((defType == XMLAttDef::Default) 02729 || (defType == XMLAttDef::Fixed)) 02730 { 02731 // Let the validator pass judgement on the attribute value 02732 if (fValidate) 02733 { 02734 fValidator->validateAttrValue 02735 ( 02736 curDef 02737 , curDef->getValue() 02738 , false 02739 , elemDecl 02740 ); 02741 } 02742 02743 XMLAttr* curAtt; 02744 if (retCount >= curAttListSize) 02745 { 02746 curAtt = new (fMemoryManager) XMLAttr(fMemoryManager); 02747 fValidator->faultInAttr(*curAtt, *curDef); 02748 fAttrList->addElement(curAtt); 02749 } 02750 else 02751 { 02752 curAtt = fAttrList->elementAt(retCount); 02753 fValidator->faultInAttr(*curAtt, *curDef); 02754 } 02755 02756 // Indicate it was not explicitly specified and bump count 02757 curAtt->setSpecified(false); 02758 retCount++; 02759 if(getPSVIHandler()) 02760 { 02761 QName *attName = ((SchemaAttDef *)curDef)->getAttName(); 02762 PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill 02763 ( 02764 attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI()) 02765 ); 02766 XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef); 02767 DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator(); 02768 XSSimpleTypeDefinition *defAttrType = 02769 (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType); 02770 // would have occurred during validation of default value 02771 if(((SchemaValidator *)fValidator)->getErrorOccurred()) 02772 { 02773 defAttrToFill->reset( 02774 fRootElemName 02775 , PSVIItem::VALIDITY_INVALID 02776 , PSVIItem::VALIDATION_FULL 02777 , defAttrType 02778 , 0 02779 , curDef->getValue() 02780 , true 02781 , defAttrDecl 02782 , 0 02783 ); 02784 } 02785 else 02786 { 02787 XSSimpleTypeDefinition *defAttrMemberType = 0; 02788 if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION) 02789 { 02790 defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject 02791 ( 02792 ((SchemaValidator*)fValidator)->getMostRecentAttrValidator() 02793 ); 02794 } 02795 defAttrToFill->reset 02796 ( 02797 fRootElemName 02798 , PSVIItem::VALIDITY_VALID 02799 , PSVIItem::VALIDATION_FULL 02800 , defAttrType 02801 , defAttrMemberType 02802 , curDef->getValue() 02803 , true 02804 , defAttrDecl 02805 , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType 02806 ); 02807 } 02808 defAttrToFill->setValue(curDef->getValue()); 02809 } 02810 } 02811 } 02812 else if (attCountPtr) 02813 { 02814 //attribute is provided 02815 // (schema) report error for PROHIBITED attrs that are present (V_TAGc) 02816 if (defType == XMLAttDef::Prohibited && fValidate) 02817 { 02818 fValidator->emitError 02819 ( 02820 XMLValid::ProhibitedAttributePresent 02821 , curDef->getFullName() 02822 ); 02823 fPSVIElemContext.fErrorOccurred = true; 02824 if (getPSVIHandler()) 02825 { 02826 QName *attQName = ((SchemaAttDef *)curDef)->getAttName(); 02827 // bad luck... 02828 PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName 02829 ( 02830 attQName->getLocalPart(), 02831 fURIStringPool->getValueForId(attQName->getURI()) 02832 ); 02833 prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID); 02834 } 02835 } 02836 } 02837 } 02838 } 02839 02840 return retCount; 02841 } 02842 02843 02844 // This method will take a raw attribute value and normalize it according to 02845 // the rules of the attribute type. It will put the resulting value into the 02846 // passed buffer. 02847 // 02848 // This code assumes that escaped characters in the original value (via char 02849 // refs) are prefixed by a 0xFFFF character. This is because some characters 02850 // are legal if escaped only. And some escape chars are not subject to 02851 // normalization rules. 02852 bool SGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef 02853 , const XMLCh* const attName 02854 , const XMLCh* const value 02855 , XMLBuffer& toFill) 02856 { 02857 // A simple state value for a whitespace processing state machine 02858 enum States 02859 { 02860 InWhitespace 02861 , InContent 02862 }; 02863 02864 // Get the type and name 02865 const XMLAttDef::AttTypes type = (attDef) 02866 ?attDef->getType() 02867 :XMLAttDef::CData; 02868 02869 // Assume its going to go fine, and empty the target buffer in preperation 02870 bool retVal = true; 02871 toFill.reset(); 02872 02873 // Get attribute def - to check to see if it's declared externally or not 02874 bool isAttExternal = (attDef) 02875 ?attDef->isExternal() 02876 :false; 02877 02878 // Loop through the chars of the source value and normalize it according 02879 // to the type. 02880 States curState = InContent; 02881 bool firstNonWS = false; 02882 XMLCh nextCh; 02883 const XMLCh* srcPtr = value; 02884 02885 if (type == XMLAttDef::CData || type > XMLAttDef::Notation) { 02886 while (*srcPtr) { 02887 // Get the next character from the source. We have to watch for 02888 // escaped characters (which are indicated by a 0xFFFF value followed 02889 // by the char that was escaped.) 02890 nextCh = *srcPtr; 02891 02892 // Do we have an escaped character ? 02893 if (nextCh == 0xFFFF) 02894 { 02895 nextCh = *++srcPtr; 02896 } 02897 else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) { 02898 // Check Validity Constraint for Standalone document declaration 02899 // XML 1.0, Section 2.9 02900 if (fStandalone && fValidate && isAttExternal) 02901 { 02902 // Can't have a standalone document declaration of "yes" if attribute 02903 // values are subject to normalisation 02904 fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); 02905 } 02906 nextCh = chSpace; 02907 } 02908 else if (nextCh == chOpenAngle) { 02909 // If its not escaped, then make sure its not a < character, which is 02910 // not allowed in attribute values. 02911 emitError(XMLErrs::BracketInAttrValue, attName); 02912 retVal = false; 02913 } 02914 02915 // Add this char to the target buffer 02916 toFill.append(nextCh); 02917 02918 // And move up to the next character in the source 02919 srcPtr++; 02920 } 02921 } 02922 else { 02923 while (*srcPtr) 02924 { 02925 // Get the next character from the source. We have to watch for 02926 // escaped characters (which are indicated by a 0xFFFF value followed 02927 // by the char that was escaped.) 02928 nextCh = *srcPtr; 02929 02930 // Do we have an escaped character ? 02931 if (nextCh == 0xFFFF) 02932 { 02933 nextCh = *++srcPtr; 02934 } 02935 else if (nextCh == chOpenAngle) { 02936 // If its not escaped, then make sure its not a < character, which is 02937 // not allowed in attribute values. 02938 emitError(XMLErrs::BracketInAttrValue, attName); 02939 retVal = false; 02940 } 02941 02942 if (curState == InWhitespace) 02943 { 02944 if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 02945 { 02946 if (firstNonWS) 02947 toFill.append(chSpace); 02948 curState = InContent; 02949 firstNonWS = true; 02950 } 02951 else 02952 { 02953 srcPtr++; 02954 continue; 02955 } 02956 } 02957 else if (curState == InContent) 02958 { 02959 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 02960 { 02961 curState = InWhitespace; 02962 srcPtr++; 02963 02964 // Check Validity Constraint for Standalone document declaration 02965 // XML 1.0, Section 2.9 02966 if (fStandalone && fValidate && isAttExternal) 02967 { 02968 if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr)) 02969 { 02970 // Can't have a standalone document declaration of "yes" if attribute 02971 // values are subject to normalisation 02972 fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); 02973 } 02974 } 02975 continue; 02976 } 02977 firstNonWS = true; 02978 } 02979 02980 // Add this char to the target buffer 02981 toFill.append(nextCh); 02982 02983 // And move up to the next character in the source 02984 srcPtr++; 02985 } 02986 } 02987 02988 return retVal; 02989 } 02990 02991 // This method will just normalize the input value as CDATA without 02992 // any standalone checking. 02993 bool SGXMLScanner::normalizeAttRawValue( const XMLCh* const attrName 02994 , const XMLCh* const value 02995 , XMLBuffer& toFill) 02996 { 02997 // Assume its going to go fine, and empty the target buffer in preperation 02998 bool retVal = true; 02999 toFill.reset(); 03000 03001 // Loop through the chars of the source value and normalize it according 03002 // to the type. 03003 bool escaped; 03004 XMLCh nextCh; 03005 const XMLCh* srcPtr = value; 03006 while (*srcPtr) 03007 { 03008 // Get the next character from the source. We have to watch for 03009 // escaped characters (which are indicated by a 0xFFFF value followed 03010 // by the char that was escaped.) 03011 nextCh = *srcPtr; 03012 escaped = (nextCh == 0xFFFF); 03013 if (escaped) 03014 nextCh = *++srcPtr; 03015 03016 // If its not escaped, then make sure its not a < character, which is 03017 // not allowed in attribute values. 03018 if (!escaped && (*srcPtr == chOpenAngle)) 03019 { 03020 emitError(XMLErrs::BracketInAttrValue, attrName); 03021 retVal = false; 03022 } 03023 03024 if (!escaped) 03025 { 03026 // NOTE: Yes this is a little redundant in that a 0x20 is 03027 // replaced with an 0x20. But its faster to do this (I think) 03028 // than checking for 9, A, and D separately. 03029 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 03030 nextCh = chSpace; 03031 } 03032 03033 // Add this char to the target buffer 03034 toFill.append(nextCh); 03035 03036 // And move up to the next character in the source 03037 srcPtr++; 03038 } 03039 return retVal; 03040 } 03041 03042 // This method will reset the scanner data structures, and related plugged 03043 // in stuff, for a new scan session. We get the input source for the primary 03044 // XML entity, create the reader for it, and push it on the stack so that 03045 // upon successful return from here we are ready to go. 03046 void SGXMLScanner::scanReset(const InputSource& src) 03047 { 03048 03049 // This call implicitly tells us that we are going to reuse the scanner 03050 // if it was previously used. So tell the validator to reset itself. 03051 // 03052 // But, if the fUseCacheGrammar flag is set, then don't reset it. 03053 // 03054 // NOTE: The ReaderMgr is flushed on the way out, because that is 03055 // required to insure that files are closed. 03056 fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar); 03057 fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar); 03058 03059 // Clear transient schema info list. 03060 // 03061 fSchemaInfoList->removeAll (); 03062 03063 // fModel may need updating, as fGrammarResolver could have cleaned it 03064 if(fModel && getPSVIHandler()) 03065 fModel = fGrammarResolver->getXSModel(); 03066 03067 // Create dummy schema grammar 03068 if (!fSchemaGrammar) { 03069 fSchemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager); 03070 } 03071 03072 fGrammar = fSchemaGrammar; 03073 fGrammarType = Grammar::DTDGrammarType; 03074 fRootGrammar = 0; 03075 03076 fValidator->setGrammar(fGrammar); 03077 if (fValidatorFromUser) { 03078 03079 ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter); 03080 ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver); 03081 ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal); 03082 } 03083 03084 // Reset validation 03085 fValidate = (fValScheme == Val_Always) ? true : false; 03086 03087 // And for all installed handlers, send reset events. This gives them 03088 // a chance to flush any cached data. 03089 if (fDocHandler) 03090 fDocHandler->resetDocument(); 03091 if (fEntityHandler) 03092 fEntityHandler->resetEntities(); 03093 if (fErrorReporter) 03094 fErrorReporter->resetErrors(); 03095 03096 // Clear out the id reference list 03097 resetValidationContext(); 03098 03099 // Reset the Root Element Name 03100 fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName; 03101 fRootElemName = 0; 03102 03103 // Reset IdentityConstraints 03104 if (fICHandler) 03105 fICHandler->reset(); 03106 03107 // Reset the element stack, and give it the latest ids for the special 03108 // URIs it has to know about. 03109 fElemStack.reset 03110 ( 03111 fEmptyNamespaceId 03112 , fUnknownNamespaceId 03113 , fXMLNamespaceId 03114 , fXMLNSNamespaceId 03115 ); 03116 03117 if (!fSchemaNamespaceId) 03118 fSchemaNamespaceId = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI); 03119 03120 // Reset some status flags 03121 fInException = false; 03122 fStandalone = false; 03123 fErrorCount = 0; 03124 fHasNoDTD = true; 03125 fSeeXsi = false; 03126 fDoNamespaces = true; 03127 fDoSchema = true; 03128 03129 // Reset PSVI context 03130 // Note that we always need this around for DOMTypeInfo 03131 if (!fPSVIElement) 03132 fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager); 03133 03134 if (!fErrorStack) 03135 { 03136 fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager); 03137 } 03138 else 03139 { 03140 fErrorStack->removeAllElements(); 03141 } 03142 03143 resetPSVIElemContext(); 03144 03145 // Reset the validators 03146 fSchemaValidator->reset(); 03147 fSchemaValidator->setErrorReporter(fErrorReporter); 03148 fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); 03149 fSchemaValidator->setGrammarResolver(fGrammarResolver); 03150 if (fValidatorFromUser) 03151 fValidator->reset(); 03152 03153 // Handle the creation of the XML reader object for this input source. 03154 // This will provide us with transcoding and basic lexing services. 03155 XMLReader* newReader = fReaderMgr.createReader 03156 ( 03157 src 03158 , true 03159 , XMLReader::RefFrom_NonLiteral 03160 , XMLReader::Type_General 03161 , XMLReader::Source_External 03162 , fCalculateSrcOfs 03163 , fLowWaterMark 03164 ); 03165 03166 if (!newReader) { 03167 if (src.getIssueFatalErrorIfNotFound()) 03168 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager); 03169 else 03170 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager); 03171 } 03172 03173 // Push this read onto the reader manager 03174 fReaderMgr.pushReader(newReader, 0); 03175 03176 // and reset security-related things if necessary: 03177 if(fSecurityManager != 0) 03178 { 03179 fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit(); 03180 fEntityExpansionCount = 0; 03181 } 03182 fElemCount = 0; 03183 if(fUIntPoolRowTotal >= 32) 03184 { // 8 KB tied up with validating attributes... 03185 fAttDefRegistry->removeAll(); 03186 recreateUIntPool(); 03187 } 03188 else 03189 { 03190 // note that this will implicitly reset the values of the hashtables, 03191 // though their buckets will still be tied up 03192 resetUIntPool(); 03193 } 03194 fUndeclaredAttrRegistry->removeAll(); 03195 } 03196 03197 03198 // This method is called between markup in content. It scans for character 03199 // data that is sent to the document handler. It watches for any markup 03200 // characters that would indicate that the character data has ended. It also 03201 // handles expansion of general and character entities. 03202 // 03203 // sendData() is a local static helper for this method which handles some 03204 // code that must be done in three different places here. 03205 void SGXMLScanner::sendCharData(XMLBuffer& toSend) 03206 { 03207 // If no data in the buffer, then nothing to do 03208 if (toSend.isEmpty()) 03209 return; 03210 03211 // We do different things according to whether we are validating or 03212 // not. If not, its always just characters; else, it depends on the 03213 // current element's content model. 03214 if (fValidate) 03215 { 03216 // Get the raw data we need for the callback 03217 const XMLCh* rawBuf = toSend.getRawBuffer(); 03218 const XMLSize_t len = toSend.getLen(); 03219 03220 // Get the character data opts for the current element 03221 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; 03222 // And see if the current element is a 'Children' style content model 03223 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 03224 if(currType) 03225 { 03226 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); 03227 if(modelType == SchemaElementDecl::Children || 03228 modelType == SchemaElementDecl::ElementOnlyEmpty) 03229 charOpts = XMLElementDecl::SpacesOk; 03230 else if(modelType == SchemaElementDecl::Empty) 03231 charOpts = XMLElementDecl::NoCharData; 03232 } 03233 03234 // should not be necessary once PSVI method on element decls 03235 // are removed 03236 if (charOpts == XMLElementDecl::NoCharData) 03237 { 03238 // They definitely cannot handle any type of char data 03239 fValidator->emitError(XMLValid::NoCharDataInCM); 03240 if (getPSVIHandler()) 03241 { 03242 // REVISIT: 03243 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 03244 } 03245 } 03246 else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len)) 03247 { 03248 // Its all spaces. So, if they can take spaces, then send it 03249 // as ignorable whitespace. If they can handle any char data 03250 // send it as characters. 03251 if (charOpts == XMLElementDecl::SpacesOk) { 03252 if (fDocHandler) 03253 fDocHandler->ignorableWhitespace(rawBuf, len, false); 03254 } 03255 else if (charOpts == XMLElementDecl::AllCharData) 03256 { 03257 XMLSize_t xsLen; 03258 const XMLCh* xsNormalized; 03259 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 03260 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 03261 { 03262 // normalize the character according to schema whitespace facet 03263 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf); 03264 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 03265 xsLen = fWSNormalizeBuf.getLen(); 03266 } 03267 else { 03268 xsNormalized = rawBuf; 03269 xsLen = len; 03270 } 03271 03272 // tell the schema validation about the character data for checkContent later 03273 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized); 03274 03275 // call all active identity constraints 03276 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { 03277 fContent.append(xsNormalized, xsLen); 03278 } 03279 03280 if (fDocHandler) { 03281 if (fNormalizeData) { 03282 fDocHandler->docCharacters(xsNormalized, xsLen, false); 03283 } 03284 else { 03285 fDocHandler->docCharacters(rawBuf, len, false); 03286 } 03287 } 03288 } 03289 } 03290 else 03291 { 03292 // If they can take any char data, then send it. Otherwise, they 03293 // can only handle whitespace and can't handle this stuff so 03294 // issue an error. 03295 if (charOpts == XMLElementDecl::AllCharData) 03296 { 03297 XMLSize_t xsLen; 03298 const XMLCh *xsNormalized; 03299 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 03300 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 03301 { 03302 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf); 03303 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 03304 xsLen = fWSNormalizeBuf.getLen(); 03305 } 03306 else { 03307 xsNormalized = rawBuf; 03308 xsLen = len; 03309 } 03310 03311 // tell the schema validation about the character data for checkContent later 03312 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized); 03313 03314 // call all active identity constraints 03315 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { 03316 fContent.append(xsNormalized, xsLen); 03317 } 03318 03319 if (fDocHandler) { 03320 if (fNormalizeData) { 03321 fDocHandler->docCharacters(xsNormalized, xsLen, false); 03322 } 03323 else { 03324 fDocHandler->docCharacters(rawBuf, len, false); 03325 } 03326 } 03327 } 03328 else 03329 { 03330 fValidator->emitError(XMLValid::NoCharDataInCM); 03331 if (getPSVIHandler()) 03332 { 03333 // REVISIT: 03334 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 03335 } 03336 } 03337 } 03338 } 03339 else 03340 { 03341 // call all active identity constraints 03342 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) 03343 fContent.append(toSend.getRawBuffer(), toSend.getLen()); 03344 03345 // Always assume its just char data if not validating 03346 if (fDocHandler) 03347 fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false); 03348 } 03349 03350 // Reset buffer 03351 toSend.reset(); 03352 } 03353 03354 03355 03356 // This method is called with a key/value string pair that represents an 03357 // xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the 03358 // current top of the element stack based on this data. We know that when 03359 // we get here, that it is one of these forms, so we don't bother confirming 03360 // it. 03361 // 03362 // But we have to ensure 03363 // 1. xxx is not xmlns 03364 // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa 03365 // 3. yyy is not XMLUni::fgXMLNSURIName 03366 // 4. if xxx is not null, then yyy cannot be an empty string. 03367 void SGXMLScanner::updateNSMap(const XMLCh* const attrName 03368 , const XMLCh* const attrValue) 03369 { 03370 updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon)); 03371 } 03372 03373 void SGXMLScanner::updateNSMap(const XMLCh* const attrName 03374 , const XMLCh* const attrValue 03375 , const int colonOfs) 03376 { 03377 // We need a buffer to normalize the attribute value into 03378 XMLBufBid bbNormal(&fBufMgr); 03379 XMLBuffer& normalBuf = bbNormal.getBuffer(); 03380 03381 // Normalize the value into the passed buffer. In this case, we don't 03382 // care about the return value. An error was issued for the error, which 03383 // is all we care about here. 03384 normalizeAttRawValue(attrName, attrValue, normalBuf); 03385 XMLCh* namespaceURI = normalBuf.getRawBuffer(); 03386 03387 // We either have the default prefix (""), or we point it into the attr 03388 // name parameter. Note that the xmlns is not the prefix we care about 03389 // here. To us, the 'prefix' is really the local part of the attrName 03390 // parameter. 03391 // 03392 // Check 1. xxx is not xmlns 03393 // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa 03394 // 3. yyy is not XMLUni::fgXMLNSURIName 03395 // 4. if xxx is not null, then yyy cannot be an empty string. 03396 const XMLCh* prefPtr = XMLUni::fgZeroLenString; 03397 if (colonOfs != -1) { 03398 prefPtr = &attrName[colonOfs + 1]; 03399 03400 if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString)) 03401 emitError(XMLErrs::NoUseOfxmlnsAsPrefix); 03402 else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) { 03403 if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) 03404 emitError(XMLErrs::PrefixXMLNotMatchXMLURI); 03405 } 03406 03407 if (!namespaceURI) 03408 emitError(XMLErrs::NoEmptyStrNamespace, attrName); 03409 else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0) 03410 emitError(XMLErrs::NoEmptyStrNamespace, attrName); 03411 } 03412 03413 if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName)) 03414 emitError(XMLErrs::NoUseOfxmlnsURI); 03415 else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) { 03416 if (!XMLString::equals(prefPtr, XMLUni::fgXMLString)) 03417 emitError(XMLErrs::XMLURINotMatchXMLPrefix); 03418 } 03419 03420 // Ok, we have to get the unique id for the attribute value, which is the 03421 // URI that this value should be mapped to. The validator has the 03422 // namespace string pool, so we ask him to find or add this new one. Then 03423 // we ask the element stack to add this prefix to URI Id mapping. 03424 fElemStack.addPrefix 03425 ( 03426 prefPtr 03427 , fURIStringPool->addOrFind(namespaceURI) 03428 ); 03429 } 03430 03431 void SGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount) 03432 { 03433 // Make an initial pass through the list and find any xmlns attributes or 03434 // schema attributes. 03435 // When we find one, send it off to be used to update the element stack's 03436 // namespace mappings. 03437 for (XMLSize_t index = 0; index < attCount; index++) 03438 { 03439 // each attribute has the prefix:suffix="value" 03440 const KVStringPair* curPair = fRawAttrList->elementAt(index); 03441 const XMLCh* rawPtr = curPair->getKey(); 03442 03443 // If either the key begins with "xmlns:" or its just plain 03444 // "xmlns", then use it to update the map. 03445 if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6) 03446 || XMLString::equals(rawPtr, XMLUni::fgXMLNSString)) 03447 { 03448 const XMLCh* valuePtr = curPair->getValue(); 03449 03450 updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]); 03451 03452 // if the schema URI is seen in the the valuePtr, set the boolean seeXsi 03453 if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) { 03454 fSeeXsi = true; 03455 } 03456 } 03457 } 03458 03459 // walk through the list again to deal with "xsi:...." 03460 if (fSeeXsi) 03461 { 03462 // Schema Xsi Type yyyy (e.g. xsi:type="yyyyy") 03463 XMLBufBid bbXsi(&fBufMgr); 03464 XMLBuffer& fXsiType = bbXsi.getBuffer(); 03465 03466 for (XMLSize_t index = 0; index < attCount; index++) 03467 { 03468 // each attribute has the prefix:suffix="value" 03469 const KVStringPair* curPair = fRawAttrList->elementAt(index); 03470 const XMLCh* rawPtr = curPair->getKey(); 03471 const XMLCh* prefPtr; 03472 03473 int colonInd = fRawAttrColonList[index]; 03474 03475 if (colonInd != -1) { 03476 fURIBuf.set(rawPtr, colonInd); 03477 prefPtr = fURIBuf.getRawBuffer(); 03478 } 03479 else { 03480 prefPtr = XMLUni::fgZeroLenString; 03481 } 03482 03483 // if schema URI has been seen, scan for the schema location and uri 03484 // and resolve the schema grammar; or scan for schema type 03485 if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) { 03486 03487 const XMLCh* valuePtr = curPair->getValue(); 03488 const XMLCh* suffPtr = &rawPtr[colonInd + 1]; 03489 03490 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION)) 03491 parseSchemaLocation(valuePtr); 03492 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION)) 03493 resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString); 03494 03495 if( fValidator && fValidator->handlesSchema() ) 03496 { 03497 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) 03498 { 03499 // normalize the attribute according to schema whitespace facet 03500 DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME); 03501 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiType, true); 03502 } 03503 else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)) 03504 { 03505 // normalize the attribute according to schema whitespace facet 03506 XMLBuffer& fXsiNil = fBufMgr.bidOnBuffer(); 03507 DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN); 03508 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiNil, true); 03509 if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE)) 03510 ((SchemaValidator*)fValidator)->setNillable(true); 03511 else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE)) 03512 ((SchemaValidator*)fValidator)->setNillable(false); 03513 else 03514 emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr); 03515 fBufMgr.releaseBuffer(fXsiNil); 03516 } 03517 } 03518 } 03519 } 03520 03521 if (fValidator && fValidator->handlesSchema()) { 03522 if (!fXsiType.isEmpty()) { 03523 int colonPos = -1; 03524 unsigned int uriId = resolveQName ( 03525 fXsiType.getRawBuffer() 03526 , fPrefixBuf 03527 , ElemStack::Mode_Element 03528 , colonPos 03529 ); 03530 ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId); 03531 } 03532 } 03533 } 03534 } 03535 03536 void SGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema) 03537 { 03538 BaseRefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr, fMemoryManager); 03539 Janitor<BaseRefVectorOf<XMLCh> > janLoc(schemaLocation); 03540 03541 XMLSize_t size = schemaLocation->size(); 03542 if (size % 2 != 0 ) { 03543 emitError(XMLErrs::BadSchemaLocation); 03544 } else { 03545 // We need a buffer to normalize the attribute value into 03546 XMLBuffer normalBuf(1023, fMemoryManager); 03547 for(XMLSize_t i=0; i<size; i=i+2) { 03548 normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, schemaLocation->elementAt(i), normalBuf); 03549 resolveSchemaGrammar(schemaLocation->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema); 03550 } 03551 } 03552 } 03553 03554 void SGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) { 03555 03556 Grammar* grammar = 0; 03557 03558 { 03559 XMLSchemaDescriptionImpl theSchemaDescription(uri, fMemoryManager); 03560 theSchemaDescription.setLocationHints(loc); 03561 grammar = fGrammarResolver->getGrammar(&theSchemaDescription); 03562 } 03563 03564 // If multi-import is enabled, make sure the existing grammar came 03565 // from the import directive. Otherwise we may end up reloading 03566 // the same schema that came from the external grammar pool. Ideally, 03567 // we would move fSchemaInfoList to XMLGrammarPool so that it survives 03568 // the destruction of the scanner in which case we could rely on the 03569 // same logic we use to weed out duplicate schemas below. 03570 // 03571 if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType || 03572 (getHandleMultipleImports() && 03573 ((XMLSchemaDescription*)grammar->getGrammarDescription())-> 03574 getContextType () == XMLSchemaDescription::CONTEXT_IMPORT)) 03575 { 03576 if (fLoadSchema || ignoreLoadSchema) 03577 { 03578 XSDDOMParser parser(0, fMemoryManager, 0); 03579 03580 parser.setValidationScheme(XercesDOMParser::Val_Never); 03581 parser.setDoNamespaces(true); 03582 parser.setUserEntityHandler(fEntityHandler); 03583 parser.setUserErrorReporter(fErrorReporter); 03584 03585 //Normalize sysId 03586 XMLBufBid nnSys(&fBufMgr); 03587 XMLBuffer& normalizedSysId = nnSys.getBuffer(); 03588 XMLString::removeChar(loc, 0xFFFF, normalizedSysId); 03589 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); 03590 03591 // Create a buffer for expanding the system id 03592 XMLBufBid bbSys(&fBufMgr); 03593 XMLBuffer& expSysId = bbSys.getBuffer(); 03594 03595 // Allow the entity handler to expand the system id if they choose 03596 // to do so. 03597 InputSource* srcToFill = 0; 03598 if (fEntityHandler) 03599 { 03600 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) 03601 expSysId.set(normalizedURI); 03602 03603 ReaderMgr::LastExtEntityInfo lastInfo; 03604 fReaderMgr.getLastExtEntityInfo(lastInfo); 03605 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar, 03606 expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId, 03607 &fReaderMgr); 03608 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); 03609 } 03610 else 03611 { 03612 expSysId.set(normalizedURI); 03613 } 03614 03615 // If they didn't create a source via the entity handler, then we 03616 // have to create one on our own. 03617 if (!srcToFill) 03618 { 03619 if (fDisableDefaultEntityResolution) 03620 return; 03621 03622 ReaderMgr::LastExtEntityInfo lastInfo; 03623 fReaderMgr.getLastExtEntityInfo(lastInfo); 03624 03625 XMLURL urlTmp(fMemoryManager); 03626 if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) || 03627 (urlTmp.isRelative())) 03628 { 03629 if (!fStandardUriConformant) 03630 { 03631 XMLBufBid ddSys(&fBufMgr); 03632 XMLBuffer& resolvedSysId = ddSys.getBuffer(); 03633 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); 03634 03635 srcToFill = new (fMemoryManager) LocalFileInputSource 03636 ( 03637 lastInfo.systemId 03638 , resolvedSysId.getRawBuffer() 03639 , fMemoryManager 03640 ); 03641 } 03642 else 03643 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 03644 } 03645 else 03646 { 03647 if (fStandardUriConformant && urlTmp.hasInvalidChar()) 03648 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 03649 03650 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); 03651 } 03652 } 03653 03654 // Put a janitor on the input source 03655 Janitor<InputSource> janSrc(srcToFill); 03656 03657 // Check if this exact schema has already been seen. 03658 // 03659 const XMLCh* sysId = srcToFill->getSystemId(); 03660 unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId; 03661 SchemaInfo* importSchemaInfo = 0; 03662 03663 if (fUseCachedGrammar) 03664 importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId); 03665 03666 if (!importSchemaInfo && !fToCacheGrammar) 03667 importSchemaInfo = fSchemaInfoList->get(sysId, uriId); 03668 03669 if (importSchemaInfo) 03670 { 03671 // We haven't added any new grammars so it is safe to just 03672 // return. 03673 // 03674 return; 03675 } 03676 03677 // Should just issue warning if the schema is not found 03678 bool flag = srcToFill->getIssueFatalErrorIfNotFound(); 03679 srcToFill->setIssueFatalErrorIfNotFound(false); 03680 03681 parser.parse(*srcToFill); 03682 03683 // Reset the InputSource 03684 srcToFill->setIssueFatalErrorIfNotFound(flag); 03685 03686 if (parser.getSawFatal() && fExitOnFirstFatal) 03687 emitError(XMLErrs::SchemaScanFatalError); 03688 03689 DOMDocument* document = parser.getDocument(); //Our Grammar 03690 03691 if (document != 0) { 03692 03693 DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema 03694 if (root != 0) 03695 { 03696 const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE); 03697 bool newGrammar = false; 03698 if (!XMLString::equals(newUri, uri)) { 03699 if (fValidate || fValScheme == Val_Auto) { 03700 fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri); 03701 } 03702 03703 grammar = fGrammarResolver->getGrammar(newUri); 03704 newGrammar = true; 03705 } 03706 03707 if (!grammar || 03708 grammar->getGrammarType() == Grammar::DTDGrammarType || 03709 (getHandleMultipleImports() && 03710 ((XMLSchemaDescription*) grammar->getGrammarDescription())-> 03711 getContextType () == XMLSchemaDescription::CONTEXT_IMPORT)) 03712 { 03713 // If we switched namespace URI, recheck the schema info. 03714 // 03715 if (newGrammar) 03716 { 03717 unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId; 03718 03719 if (fUseCachedGrammar) 03720 importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId); 03721 03722 if (!importSchemaInfo && !fToCacheGrammar) 03723 importSchemaInfo = fSchemaInfoList->get(sysId, newUriId); 03724 03725 if (importSchemaInfo) 03726 return; 03727 } 03728 03729 // Since we have seen a grammar, set our validation flag 03730 // at this point if the validation scheme is auto 03731 if (fValScheme == Val_Auto && !fValidate) { 03732 fValidate = true; 03733 fElemStack.setValidationFlag(fValidate); 03734 } 03735 03736 bool grammarFound = grammar && 03737 grammar->getGrammarType() == Grammar::SchemaGrammarType; 03738 03739 SchemaGrammar* schemaGrammar; 03740 03741 if (grammarFound) { 03742 schemaGrammar = (SchemaGrammar*) grammar; 03743 } 03744 else { 03745 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager); 03746 } 03747 03748 XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription(); 03749 03750 gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE); 03751 gramDesc->setLocationHints(sysId); 03752 03753 TraverseSchema traverseSchema 03754 ( 03755 root 03756 , fURIStringPool 03757 , schemaGrammar 03758 , fGrammarResolver 03759 , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList 03760 , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList 03761 , this 03762 , sysId 03763 , fEntityHandler 03764 , fErrorReporter 03765 , fMemoryManager 03766 , grammarFound 03767 ); 03768 03769 // Reset the now invalid schema roots in the collected 03770 // schema info entries. 03771 // 03772 { 03773 RefHash2KeysTableOfEnumerator<SchemaInfo> i ( 03774 fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList); 03775 03776 while (i.hasMoreElements ()) 03777 i.nextElement().resetRoot (); 03778 } 03779 03780 if (fGrammarType == Grammar::DTDGrammarType) { 03781 fGrammar = schemaGrammar; 03782 fGrammarType = Grammar::SchemaGrammarType; 03783 fValidator->setGrammar(fGrammar); 03784 } 03785 03786 if (fValidate) { 03787 // validate the Schema scan so far 03788 fValidator->preContentValidation(false); 03789 } 03790 } 03791 } 03792 } 03793 } 03794 } 03795 else 03796 { 03797 // Since we have seen a grammar, set our validation flag 03798 // at this point if the validation scheme is auto 03799 if (fValScheme == Val_Auto && !fValidate) { 03800 fValidate = true; 03801 fElemStack.setValidationFlag(fValidate); 03802 } 03803 03804 // we have seen a schema, so set up the fValidator as fSchemaValidator 03805 if (fGrammarType == Grammar::DTDGrammarType) { 03806 fGrammar = grammar; 03807 fGrammarType = Grammar::SchemaGrammarType; 03808 fValidator->setGrammar(fGrammar); 03809 } 03810 } 03811 // update fModel; rely on the grammar resolver to do this 03812 // efficiently 03813 if(getPSVIHandler()) 03814 fModel = fGrammarResolver->getXSModel(); 03815 } 03816 03817 InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId 03818 ,const XMLCh* const pubId) 03819 { 03820 //Normalize sysId 03821 XMLBufBid nnSys(&fBufMgr); 03822 XMLBuffer& normalizedSysId = nnSys.getBuffer(); 03823 XMLString::removeChar(sysId, 0xFFFF, normalizedSysId); 03824 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); 03825 03826 // Create a buffer for expanding the system id 03827 XMLBufBid bbSys(&fBufMgr); 03828 XMLBuffer& expSysId = bbSys.getBuffer(); 03829 03830 // Allow the entity handler to expand the system id if they choose 03831 // to do so. 03832 InputSource* srcToFill = 0; 03833 if (fEntityHandler) 03834 { 03835 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) 03836 expSysId.set(normalizedURI); 03837 03838 ReaderMgr::LastExtEntityInfo lastInfo; 03839 fReaderMgr.getLastExtEntityInfo(lastInfo); 03840 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity, 03841 expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId, 03842 &fReaderMgr); 03843 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); 03844 } 03845 else 03846 { 03847 expSysId.set(normalizedURI); 03848 } 03849 03850 // If they didn't create a source via the entity handler, then we 03851 // have to create one on our own. 03852 if (!srcToFill) 03853 { 03854 if (fDisableDefaultEntityResolution) 03855 return 0; 03856 03857 ReaderMgr::LastExtEntityInfo lastInfo; 03858 fReaderMgr.getLastExtEntityInfo(lastInfo); 03859 03860 XMLURL urlTmp(fMemoryManager); 03861 if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) || 03862 (urlTmp.isRelative())) 03863 { 03864 if (!fStandardUriConformant) 03865 { 03866 XMLBufBid ddSys(&fBufMgr); 03867 XMLBuffer& resolvedSysId = ddSys.getBuffer(); 03868 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); 03869 03870 srcToFill = new (fMemoryManager) LocalFileInputSource 03871 ( 03872 lastInfo.systemId 03873 , resolvedSysId.getRawBuffer() 03874 , fMemoryManager 03875 ); 03876 } 03877 else 03878 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 03879 } 03880 else 03881 { 03882 if (fStandardUriConformant && urlTmp.hasInvalidChar()) 03883 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 03884 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); 03885 } 03886 } 03887 03888 return srcToFill; 03889 } 03890 03891 03892 // --------------------------------------------------------------------------- 03893 // SGXMLScanner: Private grammar preparsing methods 03894 // --------------------------------------------------------------------------- 03895 Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src, 03896 const bool toCache) 03897 { 03898 // Reset the validators 03899 fSchemaValidator->reset(); 03900 fSchemaValidator->setErrorReporter(fErrorReporter); 03901 fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); 03902 fSchemaValidator->setGrammarResolver(fGrammarResolver); 03903 03904 if (fValidatorFromUser) 03905 fValidator->reset(); 03906 03907 XSDDOMParser parser(0, fMemoryManager, 0); 03908 03909 parser.setValidationScheme(XercesDOMParser::Val_Never); 03910 parser.setDoNamespaces(true); 03911 parser.setUserEntityHandler(fEntityHandler); 03912 parser.setUserErrorReporter(fErrorReporter); 03913 03914 // Should just issue warning if the schema is not found 03915 bool flag = src.getIssueFatalErrorIfNotFound(); 03916 ((InputSource&) src).setIssueFatalErrorIfNotFound(false); 03917 03918 parser.parse(src); 03919 03920 // Reset the InputSource 03921 ((InputSource&) src).setIssueFatalErrorIfNotFound(flag); 03922 03923 if (parser.getSawFatal() && fExitOnFirstFatal) 03924 emitError(XMLErrs::SchemaScanFatalError); 03925 03926 DOMDocument* document = parser.getDocument(); //Our Grammar 03927 03928 if (document != 0) { 03929 03930 DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema 03931 if (root != 0) 03932 { 03933 const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE); 03934 Grammar* grammar = fGrammarResolver->getGrammar(nsUri); 03935 03936 // Check if this exact schema has already been seen. 03937 // 03938 const XMLCh* sysId = src.getSystemId(); 03939 SchemaInfo* importSchemaInfo = 0; 03940 03941 if (grammar) 03942 { 03943 if (nsUri && *nsUri) 03944 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri)); 03945 else 03946 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId); 03947 } 03948 03949 if (!importSchemaInfo) 03950 { 03951 bool grammarFound = grammar && 03952 grammar->getGrammarType() == Grammar::SchemaGrammarType && 03953 getHandleMultipleImports(); 03954 03955 SchemaGrammar* schemaGrammar; 03956 03957 if (grammarFound) 03958 schemaGrammar = (SchemaGrammar*) grammar; 03959 else 03960 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager); 03961 03962 XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription(); 03963 gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE); 03964 gramDesc->setLocationHints(sysId); 03965 03966 TraverseSchema traverseSchema 03967 ( 03968 root 03969 , fURIStringPool 03970 , schemaGrammar 03971 , fGrammarResolver 03972 , fCachedSchemaInfoList 03973 , toCache ? fCachedSchemaInfoList : fSchemaInfoList 03974 , this 03975 , sysId 03976 , fEntityHandler 03977 , fErrorReporter 03978 , fMemoryManager 03979 , grammarFound 03980 ); 03981 03982 grammar = schemaGrammar; 03983 03984 // Reset the now invalid schema roots in the collected 03985 // schema info entries. 03986 // 03987 { 03988 RefHash2KeysTableOfEnumerator<SchemaInfo> i ( 03989 toCache ? fCachedSchemaInfoList : fSchemaInfoList); 03990 03991 while (i.hasMoreElements ()) 03992 i.nextElement().resetRoot (); 03993 } 03994 } 03995 03996 if (fValidate) { 03997 // validate the Schema scan so far 03998 fValidator->setGrammar(grammar); 03999 fValidator->preContentValidation(false); 04000 } 04001 04002 if (toCache) { 04003 fGrammarResolver->cacheGrammars(); 04004 } 04005 04006 if(getPSVIHandler()) 04007 fModel = fGrammarResolver->getXSModel(); 04008 04009 return grammar; 04010 } 04011 } 04012 04013 return 0; 04014 } 04015 04016 04017 04018 // --------------------------------------------------------------------------- 04019 // SGXMLScanner: Private parsing methods 04020 // --------------------------------------------------------------------------- 04021 04022 // This method is called to do a raw scan of an attribute value. It does not 04023 // do normalization (since we don't know their types yet.) It just scans the 04024 // value and does entity expansion. 04025 // 04026 // End of entity's must be dealt with here. During DTD scan, they can come 04027 // from external entities. During content, they can come from any entity. 04028 // We just eat the end of entity and continue with our scan until we come 04029 // to the closing quote. If an unterminated value causes us to go through 04030 // subsequent entities, that will cause errors back in the calling code, 04031 // but there's little we can do about it here. 04032 bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill) 04033 { 04034 // Reset the target buffer 04035 toFill.reset(); 04036 04037 // Get the next char which must be a single or double quote 04038 XMLCh quoteCh; 04039 if (!fReaderMgr.skipIfQuote(quoteCh)) 04040 return false; 04041 04042 // We have to get the current reader because we have to ignore closing 04043 // quotes until we hit the same reader again. 04044 const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); 04045 04046 // Loop until we get the attribute value. Note that we use a double 04047 // loop here to avoid the setup/teardown overhead of the exception 04048 // handler on every round. 04049 while (true) 04050 { 04051 try 04052 { 04053 while(true) 04054 { 04055 XMLCh nextCh = fReaderMgr.getNextChar(); 04056 04057 if (nextCh != quoteCh) 04058 { 04059 if (nextCh != chAmpersand) 04060 { 04061 if ((nextCh < 0xD800) || (nextCh > 0xDFFF)) 04062 { 04063 // Its got to at least be a valid XML character 04064 if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) 04065 { 04066 if (nextCh == 0) 04067 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 04068 04069 XMLCh tmpBuf[9]; 04070 XMLString::binToText 04071 ( 04072 nextCh 04073 , tmpBuf 04074 , 8 04075 , 16 04076 , fMemoryManager 04077 ); 04078 emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); 04079 } 04080 } else // its a surrogate 04081 { 04082 // Deal with surrogate pairs 04083 04084 // we expect a a leading surrogate. 04085 if (nextCh <= 0xDBFF) 04086 { 04087 toFill.append(nextCh); 04088 04089 // process the trailing surrogate 04090 nextCh = fReaderMgr.getNextChar(); 04091 04092 // it should be a trailing surrogate. 04093 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF)) 04094 { 04095 emitError(XMLErrs::Expected2ndSurrogateChar); 04096 } 04097 } else 04098 { 04099 // Its a trailing surrogate, but we are not expecting it 04100 emitError(XMLErrs::Unexpected2ndSurrogateChar); 04101 } 04102 } 04103 } else // its a chAmpersand 04104 { 04105 // Check for an entity ref . We ignore the empty flag in 04106 // this one. 04107 04108 bool escaped; 04109 XMLCh firstCh; 04110 XMLCh secondCh 04111 ; 04112 // If it was not returned directly, then jump back up 04113 if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned) 04114 { 04115 // If it was escaped, then put in a 0xFFFF value. This will 04116 // be used later during validation and normalization of the 04117 // value to know that the following character was via an 04118 // escape char. 04119 if (escaped) 04120 toFill.append(0xFFFF); 04121 04122 toFill.append(firstCh); 04123 if (secondCh) 04124 toFill.append(secondCh); 04125 } 04126 continue; 04127 } 04128 } else // its a quoteCh 04129 { 04130 // Check for our ending quote. It has to be in the same entity 04131 // as where we started. Quotes in nested entities are ignored. 04132 04133 if (curReader == fReaderMgr.getCurrentReaderNum()) 04134 { 04135 return true; 04136 } 04137 04138 // Watch for spillover into a previous entity 04139 if (curReader > fReaderMgr.getCurrentReaderNum()) 04140 { 04141 emitError(XMLErrs::PartialMarkupInEntity); 04142 return false; 04143 } 04144 } 04145 04146 // add it to the buffer 04147 toFill.append(nextCh); 04148 04149 } 04150 } 04151 catch(const EndOfEntityException&) 04152 { 04153 // Just eat it and continue. 04154 } 04155 } 04156 return true; 04157 } 04158 04159 04160 // This method scans a CDATA section. It collects the character into one 04161 // of the temp buffers and calls the document handler, if any, with the 04162 // characters. It assumes that the <![CDATA string has been scanned before 04163 // this call. 04164 void SGXMLScanner::scanCDSection() 04165 { 04166 static const XMLCh CDataClose[] = 04167 { 04168 chCloseSquare, chCloseAngle, chNull 04169 }; 04170 04171 // The next character should be the opening square bracket. If not 04172 // issue an error, but then try to recover by skipping any whitespace 04173 // and checking again. 04174 if (!fReaderMgr.skippedChar(chOpenSquare)) 04175 { 04176 emitError(XMLErrs::ExpectedOpenSquareBracket); 04177 fReaderMgr.skipPastSpaces(); 04178 04179 // If we still don't find it, then give up, else keep going 04180 if (!fReaderMgr.skippedChar(chOpenSquare)) 04181 return; 04182 } 04183 04184 // Get a buffer for this 04185 XMLBufBid bbCData(&fBufMgr); 04186 04187 // We just scan forward until we hit the end of CDATA section sequence. 04188 // CDATA is effectively a big escape mechanism so we don't treat markup 04189 // characters specially here. 04190 bool emittedError = false; 04191 bool gotLeadingSurrogate = false; 04192 04193 // Get the character data opts for the current element 04194 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; 04195 // And see if the current element is a 'Children' style content model 04196 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 04197 if(currType) 04198 { 04199 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); 04200 if(modelType == SchemaElementDecl::Children || 04201 modelType == SchemaElementDecl::ElementOnlyEmpty) 04202 charOpts = XMLElementDecl::SpacesOk; 04203 else if(modelType == SchemaElementDecl::Empty) 04204 charOpts = XMLElementDecl::NoCharData; 04205 } 04206 04207 // should not be necessary when PSVI on element decl removed 04208 const ElemStack::StackElem* topElem = fElemStack.topElement(); 04209 04210 while (true) 04211 { 04212 const XMLCh nextCh = fReaderMgr.getNextChar(); 04213 04214 // Watch for unexpected end of file 04215 if (!nextCh) 04216 { 04217 emitError(XMLErrs::UnterminatedCDATASection); 04218 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 04219 } 04220 04221 if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))) 04222 { 04223 // This document is standalone; this ignorable CDATA whitespace is forbidden. 04224 // XML 1.0, Section 2.9 04225 // And see if the current element is a 'Children' style content model 04226 if (topElem->fThisElement->isExternal()) { 04227 04228 if (charOpts == XMLElementDecl::SpacesOk) // Element Content 04229 { 04230 // Error - standalone should have a value of "no" as whitespace detected in an 04231 // element type with element content whose element declaration was external 04232 fValidator->emitError(XMLValid::NoWSForStandalone); 04233 if (getPSVIHandler()) 04234 { 04235 // REVISIT: 04236 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 04237 } 04238 } 04239 } 04240 } 04241 04242 // If this is a close square bracket it could be our closing 04243 // sequence. 04244 if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose)) 04245 { 04246 // make sure we were not expecting a trailing surrogate. 04247 if (gotLeadingSurrogate) { 04248 emitError(XMLErrs::Expected2ndSurrogateChar); 04249 } 04250 04251 XMLSize_t xsLen = bbCData.getLen(); 04252 const XMLCh* xsNormalized = bbCData.getRawBuffer(); 04253 if (fValidate) { 04254 04255 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 04256 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 04257 { 04258 // normalize the character according to schema whitespace facet 04259 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf); 04260 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 04261 xsLen = fWSNormalizeBuf.getLen(); 04262 } 04263 04264 // tell the schema validation about the character data for checkContent later 04265 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized); 04266 04267 if (charOpts != XMLElementDecl::AllCharData) 04268 { 04269 // They definitely cannot handle any type of char data 04270 fValidator->emitError(XMLValid::NoCharDataInCM); 04271 if (getPSVIHandler()) 04272 { 04273 // REVISIT: 04274 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 04275 } 04276 } 04277 } 04278 04279 // call all active identity constraints 04280 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { 04281 fContent.append(xsNormalized, xsLen); 04282 } 04283 04284 // If we have a doc handler, call it 04285 if (fDocHandler) 04286 { 04287 if (fNormalizeData) { 04288 fDocHandler->docCharacters(xsNormalized, xsLen, true); 04289 } 04290 else { 04291 fDocHandler->docCharacters( 04292 bbCData.getRawBuffer(), bbCData.getLen(), true 04293 ); 04294 } 04295 } 04296 04297 // And we are done 04298 break; 04299 } 04300 04301 // Make sure its a valid character. But if we've emitted an error 04302 // already, don't bother with the overhead since we've already told 04303 // them about it. 04304 if (!emittedError) 04305 { 04306 // Deal with surrogate pairs 04307 if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) 04308 { 04309 // Its a leading surrogate. If we already got one, then 04310 // issue an error, else set leading flag to make sure that 04311 // we look for a trailing next time. 04312 if (gotLeadingSurrogate) 04313 emitError(XMLErrs::Expected2ndSurrogateChar); 04314 else 04315 gotLeadingSurrogate = true; 04316 } 04317 else 04318 { 04319 // If its a trailing surrogate, make sure that we are 04320 // prepared for that. Else, its just a regular char so make 04321 // sure that we were not expected a trailing surrogate. 04322 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) 04323 { 04324 // Its trailing, so make sure we were expecting it 04325 if (!gotLeadingSurrogate) 04326 emitError(XMLErrs::Unexpected2ndSurrogateChar); 04327 } 04328 else 04329 { 04330 // Its just a char, so make sure we were not expecting a 04331 // trailing surrogate. 04332 if (gotLeadingSurrogate) 04333 emitError(XMLErrs::Expected2ndSurrogateChar); 04334 04335 // Its got to at least be a valid XML character 04336 else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) 04337 { 04338 XMLCh tmpBuf[9]; 04339 XMLString::binToText 04340 ( 04341 nextCh 04342 , tmpBuf 04343 , 8 04344 , 16 04345 , fMemoryManager 04346 ); 04347 emitError(XMLErrs::InvalidCharacter, tmpBuf); 04348 emittedError = true; 04349 } 04350 } 04351 gotLeadingSurrogate = false; 04352 } 04353 } 04354 04355 // Add it to the buffer 04356 bbCData.append(nextCh); 04357 } 04358 } 04359 04360 04361 void SGXMLScanner::scanCharData(XMLBuffer& toUse) 04362 { 04363 // We have to watch for the stupid ]]> sequence, which is illegal in 04364 // character data. So this is a little state machine that handles that. 04365 enum States 04366 { 04367 State_Waiting 04368 , State_GotOne 04369 , State_GotTwo 04370 }; 04371 04372 // Reset the buffer before we start 04373 toUse.reset(); 04374 04375 // Turn on the 'throw at end' flag of the reader manager 04376 ThrowEOEJanitor jan(&fReaderMgr, true); 04377 04378 // In order to be more efficient we have to use kind of a deeply nested 04379 // set of blocks here. The outer block puts on a try and catches end of 04380 // entity exceptions. The inner loop is the per-character loop. If we 04381 // put the try inside the inner loop, it would work but would require 04382 // the exception handling code setup/teardown code to be invoked for 04383 // each character. 04384 XMLCh nextCh; 04385 XMLCh secondCh = 0; 04386 States curState = State_Waiting; 04387 bool escaped = false; 04388 bool gotLeadingSurrogate = false; 04389 bool notDone = true; 04390 while (notDone) 04391 { 04392 try 04393 { 04394 while (true) 04395 { 04396 // Eat through as many plain content characters as possible without 04397 // needing special handling. Moving most content characters here, 04398 // in this one call, rather than running the overall loop once 04399 // per content character, is a speed optimization. 04400 if (curState == State_Waiting && !gotLeadingSurrogate) 04401 { 04402 fReaderMgr.movePlainContentChars(toUse); 04403 } 04404 04405 // Try to get another char from the source 04406 // The code from here on down covers all contengencies, 04407 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) 04408 { 04409 // If we were waiting for a trailing surrogate, its an error 04410 if (gotLeadingSurrogate) 04411 emitError(XMLErrs::Expected2ndSurrogateChar); 04412 04413 notDone = false; 04414 break; 04415 } 04416 04417 // Watch for a reference. Note that the escapement mechanism 04418 // is ignored in this content. 04419 escaped = false; 04420 if (nextCh == chAmpersand) 04421 { 04422 sendCharData(toUse); 04423 04424 // Turn off the throwing at the end of entity during this 04425 ThrowEOEJanitor jan(&fReaderMgr, false); 04426 04427 if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned) 04428 { 04429 gotLeadingSurrogate = false; 04430 continue; 04431 } 04432 } 04433 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) 04434 { 04435 // Deal with surrogate pairs 04436 // Its a leading surrogate. If we already got one, then 04437 // issue an error, else set leading flag to make sure that 04438 // we look for a trailing next time. 04439 if (gotLeadingSurrogate) 04440 emitError(XMLErrs::Expected2ndSurrogateChar); 04441 else 04442 gotLeadingSurrogate = true; 04443 } 04444 else 04445 { 04446 // If its a trailing surrogate, make sure that we are 04447 // prepared for that. Else, its just a regular char so make 04448 // sure that we were not expected a trailing surrogate. 04449 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) 04450 { 04451 // Its trailing, so make sure we were expecting it 04452 if (!gotLeadingSurrogate) 04453 emitError(XMLErrs::Unexpected2ndSurrogateChar); 04454 } 04455 else 04456 { 04457 // Its just a char, so make sure we were not expecting a 04458 // trailing surrogate. 04459 if (gotLeadingSurrogate) 04460 emitError(XMLErrs::Expected2ndSurrogateChar); 04461 04462 // Make sure the returned char is a valid XML char 04463 if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) 04464 { 04465 XMLCh tmpBuf[9]; 04466 XMLString::binToText 04467 ( 04468 nextCh 04469 , tmpBuf 04470 , 8 04471 , 16 04472 , fMemoryManager 04473 ); 04474 emitError(XMLErrs::InvalidCharacter, tmpBuf); 04475 } 04476 } 04477 gotLeadingSurrogate = false; 04478 } 04479 04480 // Keep the state machine up to date 04481 if (!escaped) 04482 { 04483 if (nextCh == chCloseSquare) 04484 { 04485 if (curState == State_Waiting) 04486 curState = State_GotOne; 04487 else if (curState == State_GotOne) 04488 curState = State_GotTwo; 04489 } 04490 else if (nextCh == chCloseAngle) 04491 { 04492 if (curState == State_GotTwo) 04493 emitError(XMLErrs::BadSequenceInCharData); 04494 curState = State_Waiting; 04495 } 04496 else 04497 { 04498 curState = State_Waiting; 04499 } 04500 } 04501 else 04502 { 04503 curState = State_Waiting; 04504 } 04505 04506 // Add this char to the buffer 04507 toUse.append(nextCh); 04508 04509 if (secondCh) 04510 { 04511 toUse.append(secondCh); 04512 secondCh=0; 04513 } 04514 } 04515 } 04516 catch(const EndOfEntityException& toCatch) 04517 { 04518 // Some entity ended, so we have to send any accumulated 04519 // chars and send an end of entity event. 04520 sendCharData(toUse); 04521 gotLeadingSurrogate = false; 04522 04523 if (fDocHandler) 04524 fDocHandler->endEntityReference(toCatch.getEntity()); 04525 } 04526 } 04527 04528 // Check the validity constraints as per XML 1.0 Section 2.9 04529 if (fValidate && fStandalone) 04530 { 04531 // See if the text contains whitespace 04532 // Get the raw data we need for the callback 04533 const XMLCh* rawBuf = toUse.getRawBuffer(); 04534 const XMLSize_t len = toUse.getLen(); 04535 const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len); 04536 04537 if (isSpaces) 04538 { 04539 // And see if the current element is a 'Children' style content model 04540 const ElemStack::StackElem* topElem = fElemStack.topElement(); 04541 04542 if (topElem->fThisElement->isExternal()) { 04543 04544 // Get the character data opts for the current element 04545 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; 04546 // And see if the current element is a 'Children' style content model 04547 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 04548 if(currType) 04549 { 04550 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); 04551 if(modelType == SchemaElementDecl::Children || 04552 modelType == SchemaElementDecl::ElementOnlyEmpty) 04553 charOpts = XMLElementDecl::SpacesOk; 04554 } 04555 04556 if (charOpts == XMLElementDecl::SpacesOk) // => Element Content 04557 { 04558 // Error - standalone should have a value of "no" as whitespace detected in an 04559 // element type with element content whose element declaration was external 04560 // 04561 fValidator->emitError(XMLValid::NoWSForStandalone); 04562 if (getPSVIHandler()) 04563 { 04564 // REVISIT: 04565 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 04566 } 04567 } 04568 } 04569 } 04570 } 04571 // Send any char data that we accumulated into the buffer 04572 sendCharData(toUse); 04573 } 04574 04575 04576 // This method will scan a general/character entity ref. It will either 04577 // expand a char ref and return it directly, or push a reader for a general 04578 // entity. 04579 // 04580 // The return value indicates whether the char parameters hold the value 04581 // or whether the value was pushed as a reader, or that it failed. 04582 // 04583 // The escaped flag tells the caller whether the returned parameter resulted 04584 // from a character reference, which escapes the character in some cases. It 04585 // only makes any difference if the return value indicates the value was 04586 // returned directly. 04587 SGXMLScanner::EntityExpRes 04588 SGXMLScanner::scanEntityRef( const bool 04589 , XMLCh& firstCh 04590 , XMLCh& secondCh 04591 , bool& escaped) 04592 { 04593 // Assume no escape 04594 secondCh = 0; 04595 escaped = false; 04596 04597 // We have to insure that its all in one entity 04598 const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); 04599 04600 // If the next char is a pound, then its a character reference and we 04601 // need to expand it always. 04602 if (fReaderMgr.skippedChar(chPound)) 04603 { 04604 // Its a character reference, so scan it and get back the numeric 04605 // value it represents. 04606 if (!scanCharRef(firstCh, secondCh)) 04607 return EntityExp_Failed; 04608 04609 escaped = true; 04610 04611 if (curReader != fReaderMgr.getCurrentReaderNum()) 04612 emitError(XMLErrs::PartialMarkupInEntity); 04613 04614 return EntityExp_Returned; 04615 } 04616 04617 // Expand it since its a normal entity ref 04618 XMLBufBid bbName(&fBufMgr); 04619 int colonPosition; 04620 if (!fReaderMgr.getQName(bbName.getBuffer(), &colonPosition)) 04621 { 04622 if (bbName.isEmpty()) 04623 emitError(XMLErrs::ExpectedEntityRefName); 04624 else 04625 emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer()); 04626 return EntityExp_Failed; 04627 } 04628 04629 // Next char must be a semi-colon. But if its not, just emit 04630 // an error and try to continue. 04631 if (!fReaderMgr.skippedChar(chSemiColon)) 04632 emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer()); 04633 04634 // Make sure we ended up on the same entity reader as the & char 04635 if (curReader != fReaderMgr.getCurrentReaderNum()) 04636 emitError(XMLErrs::PartialMarkupInEntity); 04637 04638 // Look up the name in the general entity pool 04639 // If it does not exist, then obviously an error 04640 if (!fEntityTable->containsKey(bbName.getRawBuffer())) 04641 { 04642 // XML 1.0 Section 4.1 04643 // Well-formedness Constraint for entity not found: 04644 // In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references, 04645 // or a document with "standalone='yes'", for an entity reference that does not occur within the external subset 04646 // or a parameter entity 04647 if (fStandalone || fHasNoDTD) 04648 emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer()); 04649 04650 return EntityExp_Failed; 04651 } 04652 04653 // here's where we need to check if there's a SecurityManager, 04654 // how many entity references we've had 04655 if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) { 04656 XMLCh expLimStr[32]; 04657 XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager); 04658 emitError 04659 ( 04660 XMLErrs::EntityExpansionLimitExceeded 04661 , expLimStr 04662 ); 04663 // there seems nothing better to be done than to reset the entity expansion limit 04664 fEntityExpansionCount = 0; 04665 } 04666 04667 firstCh = fEntityTable->get(bbName.getRawBuffer()); 04668 escaped = true; 04669 return EntityExp_Returned; 04670 } 04671 04672 04673 bool SGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace) 04674 { 04675 Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace); 04676 04677 if (!tempGrammar) { 04678 tempGrammar = fSchemaGrammar; 04679 } 04680 04681 if (!tempGrammar) 04682 return false; 04683 else { 04684 fGrammar = tempGrammar; 04685 fGrammarType = fGrammar->getGrammarType(); 04686 if (fGrammarType == Grammar::DTDGrammarType) { 04687 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); 04688 } 04689 04690 fValidator->setGrammar(fGrammar); 04691 return true; 04692 } 04693 } 04694 04695 // check if we should skip or lax the validation of the element 04696 // if skip - no validation 04697 // if lax - validate only if the element if found 04698 bool SGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv, 04699 const XMLContentModel* const cm, 04700 const XMLSize_t parentElemDepth) 04701 { 04702 bool skipThisOne = false; 04703 bool laxThisOne = false; 04704 unsigned int elementURI = element->getURI(); 04705 unsigned int currState = fElemState[parentElemDepth]; 04706 unsigned int currLoop = fElemLoopState[parentElemDepth]; 04707 04708 if (currState == XMLContentModel::gInvalidTrans) { 04709 return laxThisOne; 04710 } 04711 04712 SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool); 04713 04714 if (cv) { 04715 XMLSize_t i = 0; 04716 XMLSize_t leafCount = cv->getLeafCount(); 04717 unsigned int nextState = 0; 04718 04719 for (; i < leafCount; i++) { 04720 04721 QName* fElemMap = cv->getLeafNameAt(i); 04722 unsigned int uri = fElemMap->getURI(); 04723 ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i); 04724 04725 if (type == ContentSpecNode::Leaf) { 04726 if (((uri == elementURI) 04727 && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart())) 04728 || comparator.isEquivalentTo(element, fElemMap)) { 04729 04730 nextState = cm->getNextState(currState, i); 04731 04732 if (nextState != XMLContentModel::gInvalidTrans) 04733 break; 04734 } 04735 } else if ((type & 0x0f) == ContentSpecNode::Any) { 04736 nextState = cm->getNextState(currState, i); 04737 if (nextState != XMLContentModel::gInvalidTrans) 04738 break; 04739 } 04740 else if ((type & 0x0f) == ContentSpecNode::Any_Other) { 04741 if (uri != elementURI && elementURI != fEmptyNamespaceId) { 04742 nextState = cm->getNextState(currState, i); 04743 if (nextState != XMLContentModel::gInvalidTrans) 04744 break; 04745 } 04746 } 04747 else if ((type & 0x0f) == ContentSpecNode::Any_NS) { 04748 if (uri == elementURI) { 04749 nextState = cm->getNextState(currState, i); 04750 if (nextState != XMLContentModel::gInvalidTrans) 04751 break; 04752 } 04753 } 04754 04755 } // for 04756 04757 if (i == leafCount) { // no match 04758 fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans; 04759 fElemLoopState[parentElemDepth] = 0; 04760 return laxThisOne; 04761 } 04762 04763 ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i); 04764 if ((type & 0x0f) == ContentSpecNode::Any || 04765 (type & 0x0f) == ContentSpecNode::Any_Other || 04766 (type & 0x0f) == ContentSpecNode::Any_NS) 04767 { 04768 if (type == ContentSpecNode::Any_Skip || 04769 type == ContentSpecNode::Any_NS_Skip || 04770 type == ContentSpecNode::Any_Other_Skip) { 04771 skipThisOne = true; 04772 } 04773 else if (type == ContentSpecNode::Any_Lax || 04774 type == ContentSpecNode::Any_NS_Lax || 04775 type == ContentSpecNode::Any_Other_Lax) { 04776 laxThisOne = true; 04777 } 04778 } 04779 fElemState[parentElemDepth] = nextState; 04780 fElemLoopState[parentElemDepth] = currLoop; 04781 } // if 04782 04783 if (skipThisOne) { 04784 fValidate = false; 04785 fElemStack.setValidationFlag(fValidate); 04786 } 04787 04788 return laxThisOne; 04789 } 04790 04791 04792 // check if there is an AnyAttribute, and if so, see if we should lax or skip 04793 // if skip - no validation 04794 // if lax - validate only if the attribute if found 04795 bool SGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne) 04796 { 04797 XMLAttDef::AttTypes wildCardType = attWildCard->getType(); 04798 bool anyEncountered = false; 04799 skipThisOne = false; 04800 laxThisOne = false; 04801 if (wildCardType == XMLAttDef::Any_Any) 04802 anyEncountered = true; 04803 else if (wildCardType == XMLAttDef::Any_Other) { 04804 if (attWildCard->getAttName()->getURI() != uriId 04805 && uriId != fEmptyNamespaceId) 04806 anyEncountered = true; 04807 } 04808 else if (wildCardType == XMLAttDef::Any_List) { 04809 ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList(); 04810 XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0; 04811 04812 if (listSize) { 04813 for (XMLSize_t i=0; i < listSize; i++) { 04814 if (nameURIList->elementAt(i) == uriId) 04815 anyEncountered = true; 04816 } 04817 } 04818 } 04819 04820 if (anyEncountered) { 04821 XMLAttDef::DefAttTypes defType = attWildCard->getDefaultType(); 04822 if (defType == XMLAttDef::ProcessContents_Skip) { 04823 // attribute should just be bypassed, 04824 skipThisOne = true; 04825 if (getPSVIHandler()) 04826 { 04827 // REVISIT: 04828 // PSVIAttribute->setValidationAttempted(PSVIItem::VALIDATION_NONE); 04829 } 04830 } 04831 else if (defType == XMLAttDef::ProcessContents_Lax) { 04832 laxThisOne = true; 04833 } 04834 } 04835 04836 return anyEncountered; 04837 } 04838 04839 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl) 04840 { 04841 if (currType) 04842 return currType->getAttDefList(); 04843 else 04844 return elemDecl->getAttDefList(); 04845 } 04846 04847 void SGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl, 04848 DatatypeValidator* const memberDV) 04849 { 04850 PSVIElement::ASSESSMENT_TYPE validationAttempted; 04851 PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN; 04852 04853 if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth) 04854 validationAttempted = PSVIElement::VALIDATION_FULL; 04855 else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth) 04856 validationAttempted = PSVIElement::VALIDATION_NONE; 04857 else 04858 { 04859 validationAttempted = PSVIElement::VALIDATION_PARTIAL; 04860 fPSVIElemContext.fFullValidationDepth = 04861 fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1; 04862 } 04863 04864 if (fValidate && elemDecl->isDeclared()) 04865 { 04866 validity = (fPSVIElemContext.fErrorOccurred) 04867 ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID; 04868 } 04869 04870 XSTypeDefinition* typeDef = 0; 04871 bool isMixed = false; 04872 if (fPSVIElemContext.fCurrentTypeInfo) 04873 { 04874 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo); 04875 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType(); 04876 isMixed = (modelType == SchemaElementDecl::Mixed_Simple 04877 || modelType == SchemaElementDecl::Mixed_Complex); 04878 } 04879 else if (fPSVIElemContext.fCurrentDV) 04880 typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV); 04881 04882 XMLCh* canonicalValue = 0; 04883 if (fPSVIElemContext.fNormalizedValue && !isMixed && 04884 validity == PSVIElement::VALIDITY_VALID) 04885 { 04886 if (memberDV) 04887 canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager); 04888 else if (fPSVIElemContext.fCurrentDV) 04889 canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager); 04890 } 04891 04892 fPSVIElement->reset 04893 ( 04894 validity 04895 , validationAttempted 04896 , fRootElemName 04897 , fPSVIElemContext.fIsSpecified 04898 , (elemDecl->isDeclared()) 04899 ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0 04900 , typeDef 04901 , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0 04902 , fModel 04903 , elemDecl->getDefaultValue() 04904 , fPSVIElemContext.fNormalizedValue 04905 , canonicalValue 04906 ); 04907 04908 fPSVIHandler->handleElementPSVI 04909 ( 04910 elemDecl->getBaseName() 04911 , fURIStringPool->getValueForId(elemDecl->getURI()) 04912 , fPSVIElement 04913 ); 04914 04915 // decrease element depth 04916 fPSVIElemContext.fElemDepth--; 04917 04918 } 04919 04920 void SGXMLScanner::resetPSVIElemContext() 04921 { 04922 fPSVIElemContext.fIsSpecified = false; 04923 fPSVIElemContext.fErrorOccurred = false; 04924 fPSVIElemContext.fElemDepth = -1; 04925 fPSVIElemContext.fFullValidationDepth = -1; 04926 fPSVIElemContext.fNoneValidationDepth = -1; 04927 fPSVIElemContext.fCurrentDV = 0; 04928 fPSVIElemContext.fCurrentTypeInfo = 0; 04929 fPSVIElemContext.fNormalizedValue = 0; 04930 } 04931 04932 XERCES_CPP_NAMESPACE_END