GME: IGXMLScanner.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: IGXMLScanner.cpp 882548 2009-11-20 13:44:14Z borisk $
00020  */
00021 
00022 // ---------------------------------------------------------------------------
00023 //  Includes
00024 // ---------------------------------------------------------------------------
00025 #include <xercesc/internal/IGXMLScanner.hpp>
00026 #include <xercesc/util/RuntimeException.hpp>
00027 #include <xercesc/util/UnexpectedEOFException.hpp>
00028 #include <xercesc/sax/InputSource.hpp>
00029 #include <xercesc/framework/XMLDocumentHandler.hpp>
00030 #include <xercesc/framework/XMLEntityHandler.hpp>
00031 #include <xercesc/framework/XMLPScanToken.hpp>
00032 #include <xercesc/internal/EndOfEntityException.hpp>
00033 #include <xercesc/framework/MemoryManager.hpp>
00034 #include <xercesc/framework/XMLGrammarPool.hpp>
00035 #include <xercesc/framework/XMLDTDDescription.hpp>
00036 #include <xercesc/framework/psvi/PSVIElement.hpp>
00037 #include <xercesc/framework/psvi/PSVIHandler.hpp>
00038 #include <xercesc/framework/psvi/PSVIAttributeList.hpp>
00039 #include <xercesc/validators/common/GrammarResolver.hpp>
00040 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
00041 #include <xercesc/validators/DTD/DTDScanner.hpp>
00042 #include <xercesc/validators/DTD/DTDValidator.hpp>
00043 #include <xercesc/validators/schema/SchemaValidator.hpp>
00044 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
00045 #include <xercesc/validators/schema/identity/IC_Selector.hpp>
00046 #include <xercesc/util/OutOfMemoryException.hpp>
00047 
00048 XERCES_CPP_NAMESPACE_BEGIN
00049 
00050 
00051 typedef JanitorMemFunCall<IGXMLScanner> CleanupType;
00052 typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;
00053 
00054 
00055 // ---------------------------------------------------------------------------
00056 //  IGXMLScanner: Constructors and Destructor
00057 // ---------------------------------------------------------------------------
00058 IGXMLScanner::IGXMLScanner( XMLValidator* const  valToAdopt
00059                           , GrammarResolver* const grammarResolver
00060                           , MemoryManager* const manager) :
00061 
00062     XMLScanner(valToAdopt, grammarResolver, manager)
00063     , fSeeXsi(false)
00064     , fGrammarType(Grammar::UnKnown)
00065     , fElemStateSize(16)
00066     , fElemState(0)
00067     , fElemLoopState(0)
00068     , fContent(1023, manager)
00069     , fRawAttrList(0)
00070     , fRawAttrColonListSize(32)
00071     , fRawAttrColonList(0)
00072     , fDTDValidator(0)
00073     , fSchemaValidator(0)
00074     , fDTDGrammar(0)
00075     , fICHandler(0)
00076     , fLocationPairs(0)
00077     , fDTDElemNonDeclPool(0)
00078     , fSchemaElemNonDeclPool(0)
00079     , fElemCount(0)
00080     , fAttDefRegistry(0)
00081     , fUndeclaredAttrRegistry(0)
00082     , fPSVIAttrList(0)
00083     , fModel(0)
00084     , fPSVIElement(0)
00085     , fErrorStack(0)
00086     , fSchemaInfoList(0)
00087     , fCachedSchemaInfoList (0)
00088 {
00089     CleanupType cleanup(this, &IGXMLScanner::cleanUp);
00090 
00091     try
00092     {
00093         commonInit();
00094     }
00095     catch(const OutOfMemoryException&)
00096     {
00097         // Don't cleanup when out of memory, since executing the
00098         // code can cause problems.
00099         cleanup.release();
00100 
00101         throw;
00102     }
00103 
00104     cleanup.release();
00105 }
00106 
00107 IGXMLScanner::IGXMLScanner( XMLDocumentHandler* const docHandler
00108                           , DocTypeHandler* const     docTypeHandler
00109                           , XMLEntityHandler* const   entityHandler
00110                           , XMLErrorReporter* const   errHandler
00111                           , XMLValidator* const       valToAdopt
00112                           , GrammarResolver* const    grammarResolver
00113                           , MemoryManager* const      manager) :
00114 
00115     XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
00116     , fSeeXsi(false)
00117     , fGrammarType(Grammar::UnKnown)
00118     , fElemStateSize(16)
00119     , fElemState(0)
00120     , fElemLoopState(0)
00121     , fContent(1023, manager)
00122     , fRawAttrList(0)
00123     , fRawAttrColonListSize(32)
00124     , fRawAttrColonList(0)
00125     , fDTDValidator(0)
00126     , fSchemaValidator(0)
00127     , fDTDGrammar(0)
00128     , fICHandler(0)
00129     , fLocationPairs(0)
00130     , fDTDElemNonDeclPool(0)
00131     , fSchemaElemNonDeclPool(0)
00132     , fElemCount(0)
00133     , fAttDefRegistry(0)
00134     , fUndeclaredAttrRegistry(0)
00135     , fPSVIAttrList(0)
00136     , fModel(0)
00137     , fPSVIElement(0)
00138     , fErrorStack(0)
00139     , fSchemaInfoList(0)
00140     , fCachedSchemaInfoList (0)
00141 {
00142     CleanupType cleanup(this, &IGXMLScanner::cleanUp);
00143 
00144     try
00145     {
00146         commonInit();
00147     }
00148     catch(const OutOfMemoryException&)
00149     {
00150         // Don't cleanup when out of memory, since executing the
00151         // code can cause problems.
00152         cleanup.release();
00153 
00154         throw;
00155     }
00156 
00157     cleanup.release();
00158 }
00159 
00160 IGXMLScanner::~IGXMLScanner()
00161 {
00162     cleanUp();
00163 }
00164 
00165 // ---------------------------------------------------------------------------
00166 //  XMLScanner: Getter methods
00167 // ---------------------------------------------------------------------------
00168 NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool()
00169 {
00170     if(!fDTDGrammar)
00171         return 0;
00172     return fDTDGrammar->getEntityDeclPool();
00173 }
00174 
00175 const NameIdPool<DTDEntityDecl>* IGXMLScanner::getEntityDeclPool() const
00176 {
00177     if(!fDTDGrammar)
00178         return 0;
00179     return fDTDGrammar->getEntityDeclPool();
00180 }
00181 
00182 // ---------------------------------------------------------------------------
00183 //  IGXMLScanner: Main entry point to scan a document
00184 // ---------------------------------------------------------------------------
00185 void IGXMLScanner::scanDocument(const InputSource& src)
00186 {
00187     //  Bump up the sequence id for this parser instance. This will invalidate
00188     //  any previous progressive scan tokens.
00189     fSequenceId++;
00190 
00191     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
00192 
00193     try
00194     {
00195         //  Reset the scanner and its plugged in stuff for a new run. This
00196         //  resets all the data structures, creates the initial reader and
00197         //  pushes it on the stack, and sets up the base document path.
00198         scanReset(src);
00199 
00200         // If we have a document handler, then call the start document
00201         if (fDocHandler)
00202             fDocHandler->startDocument();
00203 
00204         //  Scan the prolog part, which is everything before the root element
00205         //  including the DTD subsets.
00206         scanProlog();
00207 
00208         //  If we got to the end of input, then its not a valid XML file.
00209         //  Else, go on to scan the content.
00210         if (fReaderMgr.atEOF())
00211         {
00212             emitError(XMLErrs::EmptyMainEntity);
00213         }
00214         else
00215         {
00216             // Scan content, and tell it its not an external entity
00217             if (scanContent())
00218             {
00219                 // Do post-parse validation if required
00220                 if (fValidate)
00221                 {
00222                     //  We handle ID reference semantics at this level since
00223                     //  its required by XML 1.0.
00224                     checkIDRefs();
00225 
00226                     // Then allow the validator to do any extra stuff it wants
00227 //                    fValidator->postParseValidation();
00228                 }
00229 
00230                 // That went ok, so scan for any miscellaneous stuff
00231                 if (!fReaderMgr.atEOF())
00232                     scanMiscellaneous();
00233             }
00234         }
00235 
00236         // If we have a document handler, then call the end document
00237         if (fDocHandler)
00238             fDocHandler->endDocument();
00239 
00240         //cargill debug:
00241         //fGrammarResolver->getXSModel();
00242     }
00243     //  NOTE:
00244     //
00245     //  In all of the error processing below, the emitError() call MUST come
00246     //  before the flush of the reader mgr, or it will fail because it tries
00247     //  to find out the position in the XML source of the error.
00248     catch(const XMLErrs::Codes)
00249     {
00250         // This is a 'first failure' exception, so fall through
00251     }
00252     catch(const XMLValid::Codes)
00253     {
00254         // This is a 'first fatal error' type exit, so fall through
00255     }
00256     catch(const XMLException& excToCatch)
00257     {
00258         //  Emit the error and catch any user exception thrown from here. Make
00259         //  sure in all cases we flush the reader manager.
00260         fInException = true;
00261         try
00262         {
00263             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
00264                 emitError
00265                 (
00266                     XMLErrs::XMLException_Warning
00267                     , excToCatch.getCode()
00268                     , excToCatch.getMessage()
00269                 );
00270             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
00271                 emitError
00272                 (
00273                     XMLErrs::XMLException_Fatal
00274                     , excToCatch.getCode()
00275                     , excToCatch.getMessage()
00276                 );
00277             else
00278                 emitError
00279                 (
00280                     XMLErrs::XMLException_Error
00281                     , excToCatch.getCode()
00282                     , excToCatch.getMessage()
00283                 );
00284         }
00285         catch(const OutOfMemoryException&)
00286         {
00287             // This is a special case for out-of-memory
00288             // conditions, because resetting the ReaderMgr
00289             // can be problematic.
00290             resetReaderMgr.release();
00291 
00292             throw;
00293         }
00294     }
00295     catch(const OutOfMemoryException&)
00296     {
00297         // This is a special case for out-of-memory
00298         // conditions, because resetting the ReaderMgr
00299         // can be problematic.
00300         resetReaderMgr.release();
00301 
00302         throw;
00303     }
00304 }
00305 
00306 
00307 bool IGXMLScanner::scanNext(XMLPScanToken& token)
00308 {
00309     // Make sure this token is still legal
00310     if (!isLegalToken(token))
00311         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
00312 
00313     // Find the next token and remember the reader id
00314     XMLSize_t orgReader;
00315     XMLTokens curToken;
00316 
00317     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
00318 
00319     bool retVal = true;
00320 
00321     try
00322     {
00323         while (true)
00324         {
00325             //  We have to handle any end of entity exceptions that happen here.
00326             //  We could be at the end of X nested entities, each of which will
00327             //  generate an end of entity exception as we try to move forward.
00328             try
00329             {
00330                 curToken = senseNextToken(orgReader);
00331                 break;
00332             }
00333             catch(const EndOfEntityException& toCatch)
00334             {
00335                 // Send an end of entity reference event
00336                 if (fDocHandler)
00337                     fDocHandler->endEntityReference(toCatch.getEntity());
00338             }
00339         }
00340 
00341         if (curToken == Token_CharData)
00342         {
00343             scanCharData(fCDataBuf);
00344         }
00345         else if (curToken == Token_EOF)
00346         {
00347             if (!fElemStack.isEmpty())
00348             {
00349                 const ElemStack::StackElem* topElem = fElemStack.popTop();
00350                 emitError
00351                 (
00352                     XMLErrs::EndedWithTagsOnStack
00353                     , topElem->fThisElement->getFullName()
00354                 );
00355             }
00356 
00357             retVal = false;
00358         }
00359         else
00360         {
00361             // Its some sort of markup
00362             bool gotData = true;
00363             switch(curToken)
00364             {
00365                 case Token_CData :
00366                     // Make sure we are within content
00367                     if (fElemStack.isEmpty())
00368                         emitError(XMLErrs::CDATAOutsideOfContent);
00369                     scanCDSection();
00370                     break;
00371 
00372                 case Token_Comment :
00373                     scanComment();
00374                     break;
00375 
00376                 case Token_EndTag :
00377                     scanEndTag(gotData);
00378                     break;
00379 
00380                 case Token_PI :
00381                     scanPI();
00382                     break;
00383 
00384                 case Token_StartTag :
00385                     if (fDoNamespaces)
00386                         scanStartTagNS(gotData);
00387                     else
00388                         scanStartTag(gotData);
00389                     break;
00390 
00391                 default :
00392                     fReaderMgr.skipToChar(chOpenAngle);
00393                     break;
00394             }
00395 
00396             if (orgReader != fReaderMgr.getCurrentReaderNum())
00397                 emitError(XMLErrs::PartialMarkupInEntity);
00398 
00399             // If we hit the end, then do the miscellaneous part
00400             if (!gotData)
00401             {
00402                 // Do post-parse validation if required
00403                 if (fValidate)
00404                 {
00405                     //  We handle ID reference semantics at this level since
00406                     //  its required by XML 1.0.
00407                     checkIDRefs();
00408 
00409                     // Then allow the validator to do any extra stuff it wants
00410 //                    fValidator->postParseValidation();
00411                 }
00412 
00413                 // That went ok, so scan for any miscellaneous stuff
00414                 scanMiscellaneous();
00415 
00416                 if (toCheckIdentityConstraint())
00417                     fICHandler->endDocument();
00418 
00419                 if (fDocHandler)
00420                     fDocHandler->endDocument();
00421             }
00422         }
00423     }
00424     //  NOTE:
00425     //
00426     //  In all of the error processing below, the emitError() call MUST come
00427     //  before the flush of the reader mgr, or it will fail because it tries
00428     //  to find out the position in the XML source of the error.
00429     catch(const XMLErrs::Codes)
00430     {
00431         // This is a 'first failure' exception so return failure
00432         retVal = false;
00433     }
00434     catch(const XMLValid::Codes)
00435     {
00436         // This is a 'first fatal error' type exit, so return failure
00437         retVal = false;
00438     }
00439     catch(const XMLException& excToCatch)
00440     {
00441         //  Emit the error and catch any user exception thrown from here. Make
00442         //  sure in all cases we flush the reader manager.
00443         fInException = true;
00444         try
00445         {
00446             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
00447                 emitError
00448                 (
00449                     XMLErrs::XMLException_Warning
00450                     , excToCatch.getCode()
00451                     , excToCatch.getMessage()
00452                 );
00453             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
00454                 emitError
00455                 (
00456                     XMLErrs::XMLException_Fatal
00457                     , excToCatch.getCode()
00458                     , excToCatch.getMessage()
00459                 );
00460             else
00461                 emitError
00462                 (
00463                     XMLErrs::XMLException_Error
00464                     , excToCatch.getCode()
00465                     , excToCatch.getMessage()
00466                 );
00467         }
00468         catch(const OutOfMemoryException&)
00469         {
00470             // This is a special case for out-of-memory
00471             // conditions, because resetting the ReaderMgr
00472             // can be problematic.
00473             resetReaderMgr.release();
00474 
00475             throw;
00476         }
00477 
00478         retVal = false;
00479     }
00480     catch(const OutOfMemoryException&)
00481     {
00482         // This is a special case for out-of-memory
00483         // conditions, because resetting the ReaderMgr
00484         // can be problematic.
00485         resetReaderMgr.release();
00486 
00487         throw;
00488     }
00489 
00490     // If we are not at the end, release the object that will
00491     // reset the ReaderMgr.
00492     if (retVal)
00493         resetReaderMgr.release();
00494 
00495     return retVal;
00496 }
00497 
00498 
00499 
00500 // ---------------------------------------------------------------------------
00501 //  IGXMLScanner: Private helper methods. Most of these are implemented in
00502 //  IGXMLScanner2.Cpp.
00503 // ---------------------------------------------------------------------------
00504 
00505 //  This method handles the common initialization, to avoid having to do
00506 //  it redundantly in multiple constructors.
00507 void IGXMLScanner::commonInit()
00508 {
00509 
00510     //  Create the element state array
00511     fElemState = (unsigned int*) fMemoryManager->allocate
00512     (
00513         fElemStateSize * sizeof(unsigned int)
00514     ); //new unsigned int[fElemStateSize];
00515     fElemLoopState = (unsigned int*) fMemoryManager->allocate
00516     (
00517         fElemStateSize * sizeof(unsigned int)
00518     ); //new unsigned int[fElemStateSize];
00519 
00520     //  And we need one for the raw attribute scan. This just stores key/
00521     //  value string pairs (prior to any processing.)
00522     fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
00523     fRawAttrColonList = (int*) fMemoryManager->allocate
00524     (
00525         fRawAttrColonListSize * sizeof(int)
00526     );
00527 
00528     //  Create the Validator and init them
00529     fDTDValidator = new (fMemoryManager) DTDValidator();
00530     initValidator(fDTDValidator);
00531     fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
00532     initValidator(fSchemaValidator);
00533 
00534     // Create IdentityConstraint info
00535     fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
00536 
00537     // Create schemaLocation pair info
00538     fLocationPairs = new (fMemoryManager) ValueVectorOf<XMLCh*>(8, fMemoryManager);
00539     // create pools for undeclared elements
00540     fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
00541     fSchemaElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
00542     fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
00543     (
00544         131, false, fMemoryManager
00545     );
00546     fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
00547     fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
00548 
00549     fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
00550     fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
00551 
00552     // use fDTDValidator as the default validator
00553     if (!fValidator)
00554         fValidator = fDTDValidator;
00555 }
00556 
00557 void IGXMLScanner::cleanUp()
00558 {
00559     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
00560     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
00561     delete fRawAttrList;
00562     fMemoryManager->deallocate(fRawAttrColonList);
00563     delete fDTDValidator;
00564     delete fSchemaValidator;
00565     delete fICHandler;
00566     delete fLocationPairs;
00567     delete fDTDElemNonDeclPool;
00568     delete fSchemaElemNonDeclPool;
00569     delete fAttDefRegistry;
00570     delete fUndeclaredAttrRegistry;
00571     delete fPSVIAttrList;
00572     delete fPSVIElement;
00573     delete fErrorStack;
00574     delete fSchemaInfoList;
00575     delete fCachedSchemaInfoList;
00576 }
00577 
00578 // ---------------------------------------------------------------------------
00579 //  IGXMLScanner: Private scanning methods
00580 // ---------------------------------------------------------------------------
00581 
00582 //  This method is called from scanStartTag() to handle the very raw initial
00583 //  scan of the attributes. It just fills in the passed collection with
00584 //  key/value pairs for each attribute. No processing is done on them at all.
00585 XMLSize_t
00586 IGXMLScanner::rawAttrScan(const   XMLCh* const                elemName
00587                           ,       RefVectorOf<KVStringPair>&  toFill
00588                           ,       bool&                       isEmpty)
00589 {
00590     //  Keep up with how many attributes we've seen so far, and how many
00591     //  elements are available in the vector. This way we can reuse old
00592     //  elements until we run out and then expand it.
00593     XMLSize_t attCount = 0;
00594     XMLSize_t curVecSize = toFill.size();
00595 
00596     // Assume it is not empty
00597     isEmpty = false;
00598 
00599     //  We loop until we either see a /> or >, handling key/value pairs util
00600     //  we get there. We place them in the passed vector, which we will expand
00601     //  as required to hold them.
00602     while (true)
00603     {
00604         // Get the next character, which should be non-space
00605         XMLCh nextCh = fReaderMgr.peekNextChar();
00606 
00607         //  If the next character is not a slash or closed angle bracket,
00608         //  then it must be whitespace, since whitespace is required
00609         //  between the end of the last attribute and the name of the next
00610         //  one.
00611         //
00612         if (attCount)
00613         {
00614             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
00615             {
00616                 bool bFoundSpace;
00617                 fReaderMgr.skipPastSpaces(bFoundSpace);
00618                 if (!bFoundSpace)
00619                 {
00620                     // Emit the error but keep on going
00621                     emitError(XMLErrs::ExpectedWhitespace);
00622                 }
00623                 // Ok, peek another char
00624                 nextCh = fReaderMgr.peekNextChar();
00625             }
00626         }
00627 
00628         //  Ok, here we first check for any of the special case characters.
00629         //  If its not one, then we do the normal case processing, which
00630         //  assumes that we've hit an attribute value, Otherwise, we do all
00631         //  the special case checks.
00632         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
00633         {
00634             //  Assume it's going to be an attribute, so get a name from
00635             //  the input.
00636             int colonPosition;
00637             if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
00638             {
00639                 if (fAttNameBuf.isEmpty())
00640                     emitError(XMLErrs::ExpectedAttrName);
00641                 else
00642                     emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
00643                 fReaderMgr.skipPastChar(chCloseAngle);
00644                 return attCount;
00645             }
00646 
00647             const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
00648 
00649             // And next must be an equal sign
00650             if (!scanEq())
00651             {
00652                 static const XMLCh tmpList[] =
00653                 {
00654                     chSingleQuote, chDoubleQuote, chCloseAngle
00655                     , chOpenAngle, chForwardSlash, chNull
00656                 };
00657 
00658                 emitError(XMLErrs::ExpectedEqSign);
00659 
00660                 //  Try to sync back up by skipping forward until we either
00661                 //  hit something meaningful.
00662                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
00663 
00664                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
00665                 {
00666                     // Jump back to top for normal processing of these
00667                     continue;
00668                 }
00669                 else if ((chFound == chSingleQuote)
00670                       ||  (chFound == chDoubleQuote)
00671                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
00672                 {
00673                     // Just fall through assuming that the value is to follow
00674                 }
00675                 else if (chFound == chOpenAngle)
00676                 {
00677                     // Assume a malformed tag and that new one is starting
00678                     emitError(XMLErrs::UnterminatedStartTag, elemName);
00679                     return attCount;
00680                 }
00681                 else
00682                 {
00683                     // Something went really wrong
00684                     return attCount;
00685                 }
00686             }
00687 
00688             //  Next should be the quoted attribute value. We just do a simple
00689             //  and stupid scan of this value. The only thing we do here
00690             //  is to expand entity references.
00691             if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
00692             {
00693                 static const XMLCh tmpList[] =
00694                 {
00695                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
00696                 };
00697 
00698                 emitError(XMLErrs::ExpectedAttrValue);
00699 
00700                 //  It failed, so lets try to get synced back up. We skip
00701                 //  forward until we find some whitespace or one of the
00702                 //  chars in our list.
00703                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
00704 
00705                 if ((chFound == chCloseAngle)
00706                 ||  (chFound == chForwardSlash)
00707                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
00708                 {
00709                     //  Just fall through and process this attribute, though
00710                     //  the value will be "".
00711                 }
00712                 else if (chFound == chOpenAngle)
00713                 {
00714                     // Assume a malformed tag and that new one is starting
00715                     emitError(XMLErrs::UnterminatedStartTag, elemName);
00716                     return attCount;
00717                 }
00718                 else
00719                 {
00720                     // Something went really wrong
00721                     return attCount;
00722                 }
00723             }
00724 
00725             //  And now lets add it to the passed collection. If we have not
00726             //  filled it up yet, then we use the next element. Else we add
00727             //  a new one.
00728             KVStringPair* curPair = 0;
00729             if (attCount >= curVecSize)
00730             {
00731                 curPair = new (fMemoryManager) KVStringPair
00732                 (
00733                     curAttNameBuf
00734                     , fAttNameBuf.getLen()
00735                     , fAttValueBuf.getRawBuffer()
00736                     , fAttValueBuf.getLen()
00737                     , fMemoryManager
00738                 );
00739                 toFill.addElement(curPair);
00740             }
00741              else
00742             {
00743                 curPair = toFill.elementAt(attCount);
00744                 curPair->set
00745                 (
00746                     curAttNameBuf,
00747                     fAttNameBuf.getLen(),
00748                     fAttValueBuf.getRawBuffer(),
00749                     fAttValueBuf.getLen()
00750                 );
00751             }
00752 
00753             if (attCount >= fRawAttrColonListSize) {
00754                 resizeRawAttrColonList();
00755             }
00756             // Set the position of the colon and bump the count of attributes we've gotten
00757             fRawAttrColonList[attCount++] = colonPosition;
00758 
00759             // And go to the top again for another attribute
00760             continue;
00761         }
00762 
00763         //  It was some special case character so do all of the checks and
00764         //  deal with it.
00765         if (!nextCh)
00766             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
00767 
00768         if (nextCh == chForwardSlash)
00769         {
00770             fReaderMgr.getNextChar();
00771             isEmpty = true;
00772             if (!fReaderMgr.skippedChar(chCloseAngle))
00773                 emitError(XMLErrs::UnterminatedStartTag, elemName);
00774             break;
00775         }
00776         else if (nextCh == chCloseAngle)
00777         {
00778             fReaderMgr.getNextChar();
00779             break;
00780         }
00781         else if (nextCh == chOpenAngle)
00782         {
00783             //  Check for this one specially, since its going to be common
00784             //  and it is kind of auto-recovering since we've already hit the
00785             //  next open bracket, which is what we would have seeked to (and
00786             //  skipped this whole tag.)
00787             emitError(XMLErrs::UnterminatedStartTag, elemName);
00788             break;
00789         }
00790         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
00791         {
00792             //  Check for this one specially, which is probably a missing
00793             //  attribute name, e.g. ="value". Just issue expected name
00794             //  error and eat the quoted string, then jump back to the
00795             //  top again.
00796             emitError(XMLErrs::ExpectedAttrName);
00797             fReaderMgr.getNextChar();
00798             fReaderMgr.skipQuotedString(nextCh);
00799             fReaderMgr.skipPastSpaces();
00800             continue;
00801         }
00802     }
00803 
00804     return attCount;
00805 }
00806 
00807 
00808 //  This method will kick off the scanning of the primary content of the
00809 //  document, i.e. the elements.
00810 bool IGXMLScanner::scanContent()
00811 {
00812     //  Go into a loop until we hit the end of the root element, or we fall
00813     //  out because there is no root element.
00814     //
00815     //  We have to do kind of a deeply nested double loop here in order to
00816     //  avoid doing the setup/teardown of the exception handler on each
00817     //  round. Doing it this way we only do it when an exception actually
00818     //  occurs.
00819     bool gotData = true;
00820     bool inMarkup = false;
00821     while (gotData)
00822     {
00823         try
00824         {
00825             while (gotData)
00826             {
00827                 //  Sense what the next top level token is. According to what
00828                 //  this tells us, we will call something to handle that kind
00829                 //  of thing.
00830                 XMLSize_t orgReader;
00831                 const XMLTokens curToken = senseNextToken(orgReader);
00832 
00833                 //  Handle character data and end of file specially. Char data
00834                 //  is not markup so we don't want to handle it in the loop
00835                 //  below.
00836                 if (curToken == Token_CharData)
00837                 {
00838                     //  Scan the character data and call appropriate events. Let
00839                     //  him use our local character data buffer for efficiency.
00840                     scanCharData(fCDataBuf);
00841                     continue;
00842                 }
00843                 else if (curToken == Token_EOF)
00844                 {
00845                     //  The element stack better be empty at this point or we
00846                     //  ended prematurely before all elements were closed.
00847                     if (!fElemStack.isEmpty())
00848                     {
00849                         const ElemStack::StackElem* topElem = fElemStack.popTop();
00850                         emitError
00851                         (
00852                             XMLErrs::EndedWithTagsOnStack
00853                             , topElem->fThisElement->getFullName()
00854                         );
00855                     }
00856 
00857                     // Its the end of file, so clear the got data flag
00858                     gotData = false;
00859                     continue;
00860                 }
00861 
00862                 // We are in some sort of markup now
00863                 inMarkup = true;
00864 
00865                 //  According to the token we got, call the appropriate
00866                 //  scanning method.
00867                 switch(curToken)
00868                 {
00869                     case Token_CData :
00870                         // Make sure we are within content
00871                         if (fElemStack.isEmpty())
00872                             emitError(XMLErrs::CDATAOutsideOfContent);
00873                         scanCDSection();
00874                         break;
00875 
00876                     case Token_Comment :
00877                         scanComment();
00878                         break;
00879 
00880                     case Token_EndTag :
00881                         scanEndTag(gotData);
00882                         break;
00883 
00884                     case Token_PI :
00885                         scanPI();
00886                         break;
00887 
00888                     case Token_StartTag :
00889                         if (fDoNamespaces)
00890                             scanStartTagNS(gotData);
00891                         else
00892                             scanStartTag(gotData);
00893                         break;
00894 
00895                     default :
00896                         fReaderMgr.skipToChar(chOpenAngle);
00897                         break;
00898                 }
00899 
00900                 if (orgReader != fReaderMgr.getCurrentReaderNum())
00901                     emitError(XMLErrs::PartialMarkupInEntity);
00902 
00903                 // And we are back out of markup again
00904                 inMarkup = false;
00905             }
00906         }
00907         catch(const EndOfEntityException& toCatch)
00908         {
00909             //  If we were in some markup when this happened, then its a
00910             //  partial markup error.
00911             if (inMarkup)
00912                 emitError(XMLErrs::PartialMarkupInEntity);
00913 
00914             // Send an end of entity reference event
00915             if (fDocHandler)
00916                 fDocHandler->endEntityReference(toCatch.getEntity());
00917 
00918             inMarkup = false;
00919         }
00920     }
00921 
00922     // It went ok, so return success
00923     return true;
00924 }
00925 
00926 
00927 void IGXMLScanner::scanEndTag(bool& gotData)
00928 {
00929     //  Assume we will still have data until proven otherwise. It will only
00930     //  ever be false if this is the end of the root element.
00931     gotData = true;
00932 
00933     //  Check if the element stack is empty. If so, then this is an unbalanced
00934     //  element (i.e. more ends than starts, perhaps because of bad text
00935     //  causing one to be skipped.)
00936     if (fElemStack.isEmpty())
00937     {
00938         emitError(XMLErrs::MoreEndThanStartTags);
00939         fReaderMgr.skipPastChar(chCloseAngle);
00940         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
00941     }
00942 
00943     //  Pop the stack of the element we are supposed to be ending. Remember
00944     //  that we don't own this. The stack just keeps them and reuses them.
00945     unsigned int uriId = (fDoNamespaces)
00946         ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
00947 
00948     // these get initialized below
00949     const ElemStack::StackElem* topElem = 0;
00950     const XMLCh *elemName = 0;
00951 
00952     // Make sure that its the end of the element that we expect
00953     // special case for schema validation, whose element decls,
00954     // obviously don't contain prefix information
00955     if(fGrammarType == Grammar::SchemaGrammarType)
00956     {
00957         elemName = fElemStack.getCurrentSchemaElemName();
00958         topElem = fElemStack.topElement();
00959     }
00960     else
00961     {
00962         topElem = fElemStack.topElement();
00963         elemName = topElem->fThisElement->getFullName();
00964     }
00965     if (!fReaderMgr.skippedStringLong(elemName))
00966     {
00967         emitError
00968         (
00969             XMLErrs::ExpectedEndOfTagX
00970             , elemName
00971         );
00972         fReaderMgr.skipPastChar(chCloseAngle);
00973         fElemStack.popTop();
00974         return;
00975     }
00976 
00977     // Make sure we are back on the same reader as where we started
00978     if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
00979         emitError(XMLErrs::PartialTagMarkupError);
00980 
00981     // Skip optional whitespace
00982     fReaderMgr.skipPastSpaces();
00983 
00984     // Make sure we find the closing bracket
00985     if (!fReaderMgr.skippedChar(chCloseAngle))
00986     {
00987         emitError
00988         (
00989             XMLErrs::UnterminatedEndTag
00990             , topElem->fThisElement->getFullName()
00991         );
00992     }
00993 
00994     if (fGrammarType == Grammar::SchemaGrammarType)
00995     {
00996         // reset error occurred
00997         fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
00998         if (fValidate && topElem->fThisElement->isDeclared())
00999         {
01000             fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
01001             if(!fPSVIElemContext.fCurrentTypeInfo)
01002                 fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
01003             else
01004                 fPSVIElemContext.fCurrentDV = 0;
01005             if(fPSVIHandler)
01006             {
01007                 fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
01008 
01009                 if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
01010                     fPSVIElemContext.fNormalizedValue = 0;
01011             }
01012         }
01013         else
01014         {
01015             fPSVIElemContext.fCurrentDV = 0;
01016             fPSVIElemContext.fCurrentTypeInfo = 0;
01017             fPSVIElemContext.fNormalizedValue = 0;
01018         }
01019     }
01020 
01021     //  If validation is enabled, then lets pass him the list of children and
01022     //  this element and let him validate it.
01023     DatatypeValidator* psviMemberType = 0;
01024     if (fValidate)
01025     {
01026 
01027        //
01028        // XML1.0-3rd
01029        // Validity Constraint:
01030        // The declaration matches EMPTY and the element has no content (not even
01031        // entity references, comments, PIs or white space).
01032        //
01033        if ( (fGrammarType == Grammar::DTDGrammarType) &&
01034             (topElem->fCommentOrPISeen)               &&
01035             (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))
01036        {
01037            fValidator->emitError
01038                (
01039                XMLValid::EmptyElemHasContent
01040                , topElem->fThisElement->getFullName()
01041                );
01042        }
01043 
01044        //
01045        // XML1.0-3rd
01046        // Validity Constraint:
01047        //
01048        // The declaration matches children and the sequence of child elements
01049        // belongs to the language generated by the regular expression in the
01050        // content model, with optional white space, comments and PIs
01051        // (i.e. markup matching production [27] Misc) between the start-tag and
01052        // the first child element, between child elements, or between the last
01053        // child element and the end-tag.
01054        //
01055        // Note that
01056        //    a CDATA section containing only white space or
01057        //    a reference to an entity whose replacement text is character references
01058        //       expanding to white space do not match the nonterminal S, and hence
01059        //       cannot appear in these positions; however,
01060        //    a reference to an internal entity with a literal value consisting
01061        //       of character references expanding to white space does match S,
01062        //       since its replacement text is the white space resulting from expansion
01063        //       of the character references.
01064        //
01065        if ( (fGrammarType == Grammar::DTDGrammarType)  &&
01066             (topElem->fReferenceEscaped)               &&
01067             (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))
01068        {
01069            fValidator->emitError
01070                (
01071                XMLValid::ElemChildrenHasInvalidWS
01072                , topElem->fThisElement->getFullName()
01073                );
01074        }
01075         XMLSize_t failure;
01076         bool res = fValidator->checkContent
01077         (
01078             topElem->fThisElement
01079             , topElem->fChildren
01080             , topElem->fChildCount
01081             , &failure
01082         );
01083 
01084         if (!res)
01085         {
01086             //  One of the elements is not valid for the content. NOTE that
01087             //  if no children were provided but the content model requires
01088             //  them, it comes back with a zero value. But we cannot use that
01089             //  to index the child array in this case, and have to put out a
01090             //  special message.
01091             if (!topElem->fChildCount)
01092             {
01093                 fValidator->emitError
01094                 (
01095                     XMLValid::EmptyNotValidForContent
01096                     , topElem->fThisElement->getFormattedContentModel()
01097                 );
01098             }
01099             else if (failure >= topElem->fChildCount)
01100             {
01101                 fValidator->emitError
01102                 (
01103                     XMLValid::NotEnoughElemsForCM
01104                     , topElem->fThisElement->getFormattedContentModel()
01105                 );
01106             }
01107             else
01108             {
01109                 fValidator->emitError
01110                 (
01111                     XMLValid::ElementNotValidForContent
01112                     , topElem->fChildren[failure]->getRawName()
01113                     , topElem->fThisElement->getFormattedContentModel()
01114                 );
01115             }
01116         }
01117 
01118 
01119         if (fGrammarType == Grammar::SchemaGrammarType) {
01120             if (((SchemaValidator*) fValidator)->getErrorOccurred())
01121                 fPSVIElemContext.fErrorOccurred = true;
01122             else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
01123                 psviMemberType = fValidationContext->getValidatingMemberType();
01124 
01125             if (fPSVIHandler)
01126             {
01127                 fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
01128                 if(fPSVIElemContext.fIsSpecified)
01129                     fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
01130             }
01131 
01132             // call matchers and de-activate context
01133             if (toCheckIdentityConstraint())
01134             {
01135                 fICHandler->deactivateContext
01136                              (
01137                               (SchemaElementDecl *) topElem->fThisElement
01138                             , fContent.getRawBuffer()
01139                             , fValidationContext
01140                             , fPSVIElemContext.fCurrentDV
01141                              );
01142             }
01143 
01144         }
01145     }
01146 
01147     // QName dv needed topElem to resolve URIs on the checkContent
01148     fElemStack.popTop();
01149 
01150     // See if it was the root element, to avoid multiple calls below
01151     const bool isRoot = fElemStack.isEmpty();
01152 
01153     if (fGrammarType == Grammar::SchemaGrammarType)
01154     {
01155         if (fPSVIHandler)
01156         {
01157             endElementPSVI(
01158                 (SchemaElementDecl*)topElem->fThisElement, psviMemberType);
01159         }
01160         // now we can reset the datatype buffer, since the
01161         // application has had a chance to copy the characters somewhere else
01162         ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
01163     }
01164 
01165     // If we have a doc handler, tell it about the end tag
01166     if (fDocHandler)
01167     {
01168         if (fGrammarType == Grammar::SchemaGrammarType) {
01169             if (topElem->fPrefixColonPos != -1)
01170                 fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
01171             else
01172                 fPrefixBuf.reset();
01173         }
01174         else {
01175             fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
01176         }
01177         fDocHandler->endElement
01178         (
01179             *topElem->fThisElement
01180             , uriId
01181             , isRoot
01182             , fPrefixBuf.getRawBuffer()
01183         );
01184     }
01185 
01186     if (fGrammarType == Grammar::SchemaGrammarType) {
01187         if (!isRoot)
01188         {
01189             // update error information
01190             fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
01191 
01192 
01193         }
01194     }
01195 
01196     // If this was the root, then done with content
01197     gotData = !isRoot;
01198 
01199     if (gotData) {
01200         if (fDoNamespaces) {
01201             // Restore the grammar
01202             fGrammar = fElemStack.getCurrentGrammar();
01203             fGrammarType = fGrammar->getGrammarType();
01204             if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
01205                 if (fValidatorFromUser)
01206                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
01207                 else {
01208                     fValidator = fSchemaValidator;
01209                 }
01210             }
01211             else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
01212                 if (fValidatorFromUser)
01213                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
01214                 else {
01215                     fValidator = fDTDValidator;
01216                 }
01217             }
01218 
01219             fValidator->setGrammar(fGrammar);
01220         }
01221 
01222         // Restore the validation flag
01223         fValidate = fElemStack.getValidationFlag();
01224     }
01225 }
01226 
01227 
01228 //  This method handles the high level logic of scanning the DOCType
01229 //  declaration. This calls the DTDScanner and kicks off both the scanning of
01230 //  the internal subset and the scanning of the external subset, if any.
01231 //
01232 //  When we get here the '<!DOCTYPE' part has already been scanned, which is
01233 //  what told us that we had a doc type decl to parse.
01234 void IGXMLScanner::scanDocTypeDecl()
01235 {
01236     //  We have a doc type. So, switch the Grammar.
01237     switchGrammar(XMLUni::fgDTDEntityString);
01238 
01239     if (fDocTypeHandler)
01240         fDocTypeHandler->resetDocType();
01241 
01242     // There must be some space after DOCTYPE
01243     bool skippedSomething;
01244     fReaderMgr.skipPastSpaces(skippedSomething);
01245     if (!skippedSomething)
01246     {
01247         emitError(XMLErrs::ExpectedWhitespace);
01248 
01249         // Just skip the Doctype declaration and return
01250         fReaderMgr.skipPastChar(chCloseAngle);
01251         return;
01252     }
01253 
01254     // Get a buffer for the root element
01255     XMLBufBid bbRootName(&fBufMgr);
01256 
01257     //  Get a name from the input, which should be the name of the root
01258     //  element of the upcoming content.
01259     int  colonPosition;
01260     bool validName = fDoNamespaces ? fReaderMgr.getQName(bbRootName.getBuffer(), &colonPosition) :
01261                                      fReaderMgr.getName(bbRootName.getBuffer());
01262     if (!validName)
01263     {
01264         if (bbRootName.isEmpty())
01265             emitError(XMLErrs::NoRootElemInDOCTYPE);
01266         else
01267             emitError(XMLErrs::InvalidRootElemInDOCTYPE, bbRootName.getRawBuffer());
01268         fReaderMgr.skipPastChar(chCloseAngle);
01269         return;
01270     }
01271 
01272     //  Store the root element name for later check
01273     setRootElemName(bbRootName.getRawBuffer());
01274 
01275     //  This element obviously is not going to exist in the element decl
01276     //  pool yet, but we need to call docTypeDecl. So force it into
01277     //  the element decl pool, marked as being there because it was in
01278     //  the DOCTYPE. Later, when its declared, the status will be updated.
01279     //
01280     //  Only do this if we are not reusing the validator! If we are reusing,
01281     //  then look it up instead. It has to exist!
01282     MemoryManager* const  rootDeclMgr =
01283         fUseCachedGrammar ? fMemoryManager : fGrammarPoolMemoryManager;
01284 
01285     DTDElementDecl* rootDecl = new (rootDeclMgr) DTDElementDecl
01286     (
01287         bbRootName.getRawBuffer()
01288         , fEmptyNamespaceId
01289         , DTDElementDecl::Any
01290         , rootDeclMgr
01291     );
01292 
01293     Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);
01294     rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
01295     rootDecl->setExternalElemDeclaration(true);
01296     if(!fUseCachedGrammar)
01297     {
01298         fGrammar->putElemDecl(rootDecl);
01299         rootDeclJanitor.release();
01300     } else
01301     {
01302         // attach this to the undeclared element pool so that it gets deleted
01303         XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());
01304         if (elemDecl)
01305         {
01306             rootDecl->setId(elemDecl->getId());
01307         }
01308         else
01309         {
01310             rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
01311             rootDeclJanitor.release();
01312         }
01313     }
01314 
01315     // Skip any spaces after the name
01316     fReaderMgr.skipPastSpaces();
01317 
01318     //  And now if we are looking at a >, then we are done. It is not
01319     //  required to have an internal or external subset, though why you
01320     //  would not escapes me.
01321     if (fReaderMgr.skippedChar(chCloseAngle)) {
01322 
01323         //  If we have a doc type handler and advanced callbacks are enabled,
01324         //  call the doctype event.
01325         if (fDocTypeHandler)
01326             fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
01327         return;
01328     }
01329 
01330     // either internal/external subset
01331     if (fValScheme == Val_Auto && !fValidate)
01332         fValidate = true;
01333 
01334     bool    hasIntSubset = false;
01335     bool    hasExtSubset = false;
01336     XMLCh*  sysId = 0;
01337     XMLCh*  pubId = 0;
01338 
01339     DTDScanner dtdScanner
01340     (
01341         (DTDGrammar*) fGrammar
01342         , fDocTypeHandler
01343         , fGrammarPoolMemoryManager
01344         , fMemoryManager
01345     );
01346     dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
01347 
01348     //  If the next character is '[' then we have no external subset cause
01349     //  there is no system id, just the opening character of the internal
01350     //  subset. Else, has to be an id.
01351     //
01352     // Just look at the next char, don't eat it.
01353     if (fReaderMgr.peekNextChar() == chOpenSquare)
01354     {
01355         hasIntSubset = true;
01356     }
01357     else
01358     {
01359         // Indicate we have an external subset
01360         hasExtSubset = true;
01361         fHasNoDTD = false;
01362 
01363         // Get buffers for the ids
01364         XMLBufBid bbPubId(&fBufMgr);
01365         XMLBufBid bbSysId(&fBufMgr);
01366 
01367         // Get the external subset id
01368         if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
01369         {
01370             fReaderMgr.skipPastChar(chCloseAngle);
01371             return;
01372         }
01373 
01374         // Get copies of the ids we got
01375         pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
01376         sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
01377 
01378         // Skip spaces and check again for the opening of an internal subset
01379         fReaderMgr.skipPastSpaces();
01380 
01381         // Just look at the next char, don't eat it.
01382         if (fReaderMgr.peekNextChar() == chOpenSquare) {
01383             hasIntSubset = true;
01384         }
01385     }
01386 
01387     // Insure that the ids get cleaned up, if they got allocated
01388     ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
01389     ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
01390 
01391     //  If we have a doc type handler and advanced callbacks are enabled,
01392     //  call the doctype event.
01393     if (fDocTypeHandler)
01394         fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
01395 
01396     //  Ok, if we had an internal subset, we are just past the [ character
01397     //  and need to parse that first.
01398     if (hasIntSubset)
01399     {
01400         // Eat the opening square bracket
01401         fReaderMgr.getNextChar();
01402 
01403         checkInternalDTD(hasExtSubset, sysId, pubId);
01404 
01405         //  And try to scan the internal subset. If we fail, try to recover
01406         //  by skipping forward tot he close angle and returning.
01407         if (!dtdScanner.scanInternalSubset())
01408         {
01409             fReaderMgr.skipPastChar(chCloseAngle);
01410             return;
01411         }
01412 
01413         //  Do a sanity check that some expanded PE did not propogate out of
01414         //  the doctype. This could happen if it was terminated early by bad
01415         //  syntax.
01416         if (fReaderMgr.getReaderDepth() > 1)
01417         {
01418             emitError(XMLErrs::PEPropogated);
01419 
01420             // Ask the reader manager to pop back down to the main level
01421             fReaderMgr.cleanStackBackTo(1);
01422         }
01423 
01424         fReaderMgr.skipPastSpaces();
01425     }
01426 
01427     // And that should leave us at the closing > of the DOCTYPE line
01428     if (!fReaderMgr.skippedChar(chCloseAngle))
01429     {
01430         //  Do a special check for the common scenario of an extra ] char at
01431         //  the end. This is easy to recover from.
01432         if (fReaderMgr.skippedChar(chCloseSquare)
01433         &&  fReaderMgr.skippedChar(chCloseAngle))
01434         {
01435             emitError(XMLErrs::ExtraCloseSquare);
01436         }
01437          else
01438         {
01439             emitError(XMLErrs::UnterminatedDOCTYPE);
01440             fReaderMgr.skipPastChar(chCloseAngle);
01441         }
01442     }
01443 
01444     //  If we had an external subset, then we need to deal with that one
01445     //  next. If we are reusing the validator, then don't scan it.
01446     if (hasExtSubset) {
01447 
01448         InputSource* srcUsed=0;
01449         Janitor<InputSource> janSrc(srcUsed);
01450         // If we had an internal subset and we're using the cached grammar, it
01451         // means that the ignoreCachedDTD is set, so we ignore the cached
01452         // grammar
01453         if (fUseCachedGrammar && !hasIntSubset)
01454         {
01455             srcUsed = resolveSystemId(sysId, pubId);
01456             if (srcUsed) {
01457                 janSrc.reset(srcUsed);
01458                 Grammar* grammar = fGrammarResolver->getGrammar(srcUsed->getSystemId());
01459 
01460                 if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
01461 
01462                     fDTDGrammar = (DTDGrammar*) grammar;
01463                     fGrammar = fDTDGrammar;
01464                     fValidator->setGrammar(fGrammar);
01465                     // If we don't report at least the external subset boundaries,
01466                     // an advanced document handler cannot know when the DTD end,
01467                     // since we've already sent a doctype decl that indicates there's
01468                     // there's an external subset.
01469                     if (fDocTypeHandler)
01470                     {
01471                         fDocTypeHandler->startExtSubset();
01472                         fDocTypeHandler->endExtSubset();
01473                     }
01474 
01475                     return;
01476                 }
01477             }
01478         }
01479 
01480         if (fLoadExternalDTD || fValidate)
01481         {
01482             // And now create a reader to read this entity
01483             XMLReader* reader;
01484             if (srcUsed) {
01485                 reader = fReaderMgr.createReader
01486                         (
01487                             *srcUsed
01488                             , false
01489                             , XMLReader::RefFrom_NonLiteral
01490                             , XMLReader::Type_General
01491                             , XMLReader::Source_External
01492                             , fCalculateSrcOfs
01493                             , fLowWaterMark
01494                         );
01495             }
01496             else {
01497                 reader = fReaderMgr.createReader
01498                         (
01499                             sysId
01500                             , pubId
01501                             , false
01502                             , XMLReader::RefFrom_NonLiteral
01503                             , XMLReader::Type_General
01504                             , XMLReader::Source_External
01505                             , srcUsed
01506                             , fCalculateSrcOfs
01507                             , fLowWaterMark
01508                             , fDisableDefaultEntityResolution
01509                         );
01510                 janSrc.reset(srcUsed);
01511             }
01512             //  If it failed then throw an exception
01513             if (!reader)
01514                 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed ? srcUsed->getSystemId() : sysId, fMemoryManager);
01515 
01516             if (fToCacheGrammar) {
01517 
01518                 unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
01519                 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
01520 
01521                 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
01522                 ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
01523                 fGrammarResolver->putGrammar(fGrammar);
01524             }
01525 
01526             //  In order to make the processing work consistently, we have to
01527             //  make this look like an external entity. So create an entity
01528             //  decl and fill it in and push it with the reader, as happens
01529             //  with an external entity. Put a janitor on it to insure it gets
01530             //  cleaned up. The reader manager does not adopt them.
01531             const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
01532             DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
01533             declDTD->setSystemId(sysId);
01534             declDTD->setIsExternal(true);
01535             Janitor<DTDEntityDecl> janDecl(declDTD);
01536 
01537             // Mark this one as a throw at end
01538             reader->setThrowAtEnd(true);
01539 
01540             // And push it onto the stack, with its pseudo name
01541             fReaderMgr.pushReader(reader, declDTD);
01542 
01543             // Tell it its not in an include section
01544             dtdScanner.scanExtSubsetDecl(false, true);
01545         }
01546     }
01547 }
01548 
01549 bool IGXMLScanner::scanStartTag(bool& gotData)
01550 {
01551     //  Assume we will still have data until proven otherwise. It will only
01552     //  ever be false if this is the root and its empty.
01553     gotData = true;
01554 
01555     //  Get the QName. In this case, we are not doing namespaces, so we just
01556     //  use it as is and don't have to break it into parts.
01557     if (!fReaderMgr.getName(fQNameBuf))
01558     {
01559         emitError(XMLErrs::ExpectedElementName);
01560         fReaderMgr.skipToChar(chOpenAngle);
01561         return false;
01562     }
01563 
01564     // Assume it won't be an empty tag
01565     bool isEmpty = false;
01566 
01567     //  Lets try to look up the element in the validator's element decl pool
01568     //  We can pass bogus values for the URI id and the base name. We know that
01569     //  this can only be called if we are doing a DTD style validator and that
01570     //  he will only look at the QName.
01571     //
01572     //  We tell him to fault in a decl if he does not find one.
01573     //  Actually, we *don't* tell him to fault in a decl if he does not find one- NG
01574     bool wasAdded = false;
01575     const XMLCh *rawQName = fQNameBuf.getRawBuffer();
01576     XMLElementDecl* elemDecl = fGrammar->getElemDecl
01577     (
01578         fEmptyNamespaceId
01579         , 0
01580         , rawQName
01581         , Grammar::TOP_LEVEL_SCOPE
01582     );
01583     // look for it in the undeclared pool:
01584     if(!elemDecl)
01585     {
01586         elemDecl = fDTDElemNonDeclPool->getByKey(rawQName);
01587     }
01588     if(!elemDecl)
01589     {
01590         // we're assuming this must be a DTD element.  DTD's can be
01591         // used with or without namespaces, but schemas cannot be used without
01592         // namespaces.
01593         wasAdded = true;
01594         elemDecl = new (fMemoryManager) DTDElementDecl
01595         (
01596             rawQName
01597             , fEmptyNamespaceId
01598             , DTDElementDecl::Any
01599             , fMemoryManager
01600         );
01601         elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
01602     }
01603 
01604     //  We do something different here according to whether we found the
01605     //  element or not.
01606     if (wasAdded)
01607     {
01608         // If validating then emit an error
01609         if (fValidate)
01610         {
01611             // This is to tell the reuse Validator that this element was
01612             // faulted-in, was not an element in the validator pool originally
01613             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
01614 
01615             fValidator->emitError
01616             (
01617                 XMLValid::ElementNotDefined
01618                 , elemDecl->getFullName()
01619             );
01620         }
01621     }
01622     else
01623     {
01624         // If its not marked declared and validating, then emit an error
01625         if (fValidate && !elemDecl->isDeclared())
01626         {
01627             fValidator->emitError
01628             (
01629                 XMLValid::ElementNotDefined
01630                 , elemDecl->getFullName()
01631             );
01632         }
01633     }
01634 
01635     // See if its the root element
01636     const bool isRoot = fElemStack.isEmpty();
01637 
01638     // Expand the element stack and add the new element
01639     fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
01640     fElemStack.setValidationFlag(fValidate);
01641 
01642     //  Validate the element
01643     if (fValidate)
01644         fValidator->validateElement(elemDecl);
01645 
01646     //  If this is the first element and we are validating, check the root
01647     //  element.
01648     if (isRoot)
01649     {
01650         fRootGrammar = fGrammar;
01651 
01652         if (fValidate)
01653         {
01654             //  If a DocType exists, then check if it matches the root name there.
01655             if (fRootElemName && !XMLString::equals(fQNameBuf.getRawBuffer(), fRootElemName))
01656                 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
01657         }
01658     }
01659     else
01660     {
01661         //  If the element stack is not empty, then add this element as a
01662         //  child of the previous top element. If its empty, this is the root
01663         //  elem and is not the child of anything.
01664         fElemStack.addChild(elemDecl->getElementName(), true);
01665     }
01666 
01667     // Skip any whitespace after the name
01668     fReaderMgr.skipPastSpaces();
01669 
01670     //  We loop until we either see a /> or >, handling attribute/value
01671     //  pairs until we get there.
01672     XMLSize_t    attCount = 0;
01673     XMLSize_t    curAttListSize = fAttrList->size();
01674     wasAdded = false;
01675 
01676     fElemCount++;
01677 
01678     while (true)
01679     {
01680         // And get the next non-space character
01681         XMLCh nextCh = fReaderMgr.peekNextChar();
01682 
01683         //  If the next character is not a slash or closed angle bracket,
01684         //  then it must be whitespace, since whitespace is required
01685         //  between the end of the last attribute and the name of the next
01686         //  one.
01687         if (attCount)
01688         {
01689             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
01690             {
01691                 bool bFoundSpace;
01692                 fReaderMgr.skipPastSpaces(bFoundSpace);
01693                 if (!bFoundSpace)
01694                 {
01695                     // Emit the error but keep on going
01696                     emitError(XMLErrs::ExpectedWhitespace);
01697                 }
01698                 // Ok, peek another char
01699                 nextCh = fReaderMgr.peekNextChar();
01700             }
01701         }
01702 
01703         //  Ok, here we first check for any of the special case characters.
01704         //  If its not one, then we do the normal case processing, which
01705         //  assumes that we've hit an attribute value, Otherwise, we do all
01706         //  the special case checks.
01707         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
01708         {
01709             //  Assume its going to be an attribute, so get a name from
01710             //  the input.
01711             if (!fReaderMgr.getName(fAttNameBuf))
01712             {
01713                 emitError(XMLErrs::ExpectedAttrName);
01714                 fReaderMgr.skipPastChar(chCloseAngle);
01715                 return false;
01716             }
01717 
01718             // And next must be an equal sign
01719             if (!scanEq())
01720             {
01721                 static const XMLCh tmpList[] =
01722                 {
01723                     chSingleQuote, chDoubleQuote, chCloseAngle
01724                     , chOpenAngle, chForwardSlash, chNull
01725                 };
01726 
01727                 emitError(XMLErrs::ExpectedEqSign);
01728 
01729                 //  Try to sync back up by skipping forward until we either
01730                 //  hit something meaningful.
01731                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
01732 
01733                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
01734                 {
01735                     // Jump back to top for normal processing of these
01736                     continue;
01737                 }
01738                 else if ((chFound == chSingleQuote)
01739                       ||  (chFound == chDoubleQuote)
01740                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
01741                 {
01742                     // Just fall through assuming that the value is to follow
01743                 }
01744                 else if (chFound == chOpenAngle)
01745                 {
01746                     // Assume a malformed tag and that new one is starting
01747                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01748                     return false;
01749                 }
01750                 else
01751                 {
01752                     // Something went really wrong
01753                     return false;
01754                 }
01755             }
01756             //  See if this attribute is declared for this element. If we are
01757             //  not validating of course it will not be at first, but we will
01758             //  fault it into the pool (to avoid lots of redundant errors.)
01759             XMLCh * namePtr = fAttNameBuf.getRawBuffer();
01760             XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr);
01761 
01762             //  Add this attribute to the attribute list that we use to
01763             //  pass them to the handler. We reuse its existing elements
01764             //  but expand it as required.
01765             // Note that we want to this first since this will
01766             // make a copy of the namePtr; we can then make use of
01767             // that copy in the hashtable lookup that checks
01768             // for duplicates.  This will mean we may have to update
01769             // the type of the XMLAttr later.
01770             XMLAttr* curAtt;
01771             if (attCount >= curAttListSize)
01772             {
01773                 curAtt = new (fMemoryManager) XMLAttr
01774                 (
01775                     0
01776                     , namePtr
01777                     , XMLUni::fgZeroLenString
01778                     , XMLUni::fgZeroLenString
01779                     , (attDef)?attDef->getType():XMLAttDef::CData
01780                     , true
01781                     , fMemoryManager
01782                 );
01783                 fAttrList->addElement(curAtt);
01784             }
01785             else
01786             {
01787                 curAtt = fAttrList->elementAt(attCount);
01788                 curAtt->set
01789                 (
01790                     0
01791                     , namePtr
01792                     , XMLUni::fgZeroLenString
01793                     , XMLUni::fgZeroLenString
01794                     , (attDef)?attDef->getType():XMLAttDef::CData
01795                 );
01796                 curAtt->setSpecified(true);
01797             }
01798             // reset namePtr so it refers to newly-allocated memory
01799             namePtr = (XMLCh *)curAtt->getName();
01800 
01801             if (!attDef)
01802             {
01803                 //  If there is a validation handler, then we are validating
01804                 //  so emit an error.
01805                 if (fValidate)
01806                 {
01807                     fValidator->emitError
01808                     (
01809                         XMLValid::AttNotDefinedForElement
01810                         , fAttNameBuf.getRawBuffer()
01811                         , elemDecl->getFullName()
01812                     );
01813                 }
01814                 if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
01815                 {
01816                     emitError
01817                     (
01818                         XMLErrs::AttrAlreadyUsedInSTag
01819                         , namePtr
01820                         , elemDecl->getFullName()
01821                      );
01822                 }
01823             }
01824             else
01825             {
01826                 // prepare for duplicate detection
01827                 unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
01828                 if(!curCountPtr)
01829                 {
01830                     curCountPtr = getNewUIntPtr();
01831                     *curCountPtr = fElemCount;
01832                     fAttDefRegistry->put(attDef, curCountPtr);
01833                 }
01834                 else if(*curCountPtr < fElemCount)
01835                     *curCountPtr = fElemCount;
01836                 else
01837                 {
01838                     emitError
01839                     (
01840                         XMLErrs::AttrAlreadyUsedInSTag
01841                         , attDef->getFullName()
01842                         , elemDecl->getFullName()
01843                     );
01844                 }
01845             }
01846 
01847             //  Skip any whitespace before the value and then scan the att
01848             //  value. This will come back normalized with entity refs and
01849             //  char refs expanded.
01850             fReaderMgr.skipPastSpaces();
01851             if (!scanAttValue(attDef, namePtr, fAttValueBuf))
01852             {
01853                 static const XMLCh tmpList[] =
01854                 {
01855                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
01856                 };
01857 
01858                 emitError(XMLErrs::ExpectedAttrValue);
01859 
01860                 //  It failed, so lets try to get synced back up. We skip
01861                 //  forward until we find some whitespace or one of the
01862                 //  chars in our list.
01863                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
01864 
01865                 if ((chFound == chCloseAngle)
01866                 ||  (chFound == chForwardSlash)
01867                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
01868                 {
01869                     //  Just fall through and process this attribute, though
01870                     //  the value will be "".
01871                 }
01872                 else if (chFound == chOpenAngle)
01873                 {
01874                     // Assume a malformed tag and that new one is starting
01875                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01876                     return false;
01877                 }
01878                 else
01879                 {
01880                     // Something went really wrong
01881                     return false;
01882                 }
01883             }
01884             // must set the newly-minted value on the XMLAttr:
01885             curAtt->setValue(fAttValueBuf.getRawBuffer());
01886 
01887             //  Now that its all stretched out, lets look at its type and
01888             //  determine if it has a valid value. It will output any needed
01889             //  errors, but we just keep going. We only need to do this if
01890             //  we are validating.
01891             if (attDef)
01892             {
01893                 // Let the validator pass judgement on the attribute value
01894                 if (fValidate)
01895                 {
01896                     fValidator->validateAttrValue
01897                     (
01898                         attDef
01899                         , fAttValueBuf.getRawBuffer()
01900                         , false
01901                         , elemDecl
01902                     );
01903                 }
01904             }
01905 
01906             attCount++;
01907             // And jump back to the top of the loop
01908             continue;
01909         }
01910 
01911         //  It was some special case character so do all of the checks and
01912         //  deal with it.
01913         if (!nextCh)
01914             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
01915 
01916         if (nextCh == chForwardSlash)
01917         {
01918             fReaderMgr.getNextChar();
01919             isEmpty = true;
01920             if (!fReaderMgr.skippedChar(chCloseAngle))
01921                 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01922             break;
01923         }
01924         else if (nextCh == chCloseAngle)
01925         {
01926             fReaderMgr.getNextChar();
01927             break;
01928         }
01929         else if (nextCh == chOpenAngle)
01930         {
01931             //  Check for this one specially, since its going to be common
01932             //  and it is kind of auto-recovering since we've already hit the
01933             //  next open bracket, which is what we would have seeked to (and
01934             //  skipped this whole tag.)
01935             emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01936             break;
01937         }
01938         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
01939         {
01940             //  Check for this one specially, which is probably a missing
01941             //  attribute name, e.g. ="value". Just issue expected name
01942             //  error and eat the quoted string, then jump back to the
01943             //  top again.
01944             emitError(XMLErrs::ExpectedAttrName);
01945             fReaderMgr.getNextChar();
01946             fReaderMgr.skipQuotedString(nextCh);
01947             fReaderMgr.skipPastSpaces();
01948             continue;
01949         }
01950     }
01951 
01952     if(attCount)
01953     {
01954         // clean up after ourselves:
01955         // clear the map used to detect duplicate attributes
01956         fUndeclaredAttrRegistry->removeAll();
01957     }
01958 
01959     //  Ok, so lets get an enumerator for the attributes of this element
01960     //  and run through them for well formedness and validity checks. But
01961     //  make sure that we had any attributes before we do it, since the list
01962     //  would have have gotten faulted in anyway.
01963     if (elemDecl->hasAttDefs())
01964     {
01965         // N.B.:  this assumes DTD validation.
01966         XMLAttDefList& attDefList = elemDecl->getAttDefList();
01967         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
01968         {
01969             // Get the current att def, for convenience and its def type
01970             const XMLAttDef& curDef = attDefList.getAttDef(i);
01971             const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
01972 
01973             unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
01974             if (!attCountPtr || *attCountPtr < fElemCount)
01975             { // did not occur
01976                 if (fValidate)
01977                 {
01978                     // If we are validating and its required, then an error
01979                     if (defType == XMLAttDef::Required)
01980                     {
01981                         fValidator->emitError
01982                         (
01983                             XMLValid::RequiredAttrNotProvided
01984                             , curDef.getFullName()
01985                         );
01986                     }
01987                     else if ((defType == XMLAttDef::Default) ||
01988                                        (defType == XMLAttDef::Fixed)  )
01989                     {
01990                         if (fStandalone && curDef.isExternal())
01991                         {
01992                             // XML 1.0 Section 2.9
01993                             // Document is standalone, so attributes must not be defaulted.
01994                             fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
01995 
01996                         }
01997                     }
01998                 }
01999 
02000                 // Fault in the value if needed, and bump the att count
02001                 if ((defType == XMLAttDef::Default)
02002                 ||  (defType == XMLAttDef::Fixed))
02003                 {
02004                     // Let the validator pass judgement on the attribute value
02005                     if (fValidate)
02006                     {
02007                         fValidator->validateAttrValue
02008                         (
02009                             &curDef
02010                             , curDef.getValue()
02011                             , false
02012                             , elemDecl
02013                         );
02014                     }
02015 
02016                     XMLAttr* curAtt;
02017                     if (attCount >= curAttListSize)
02018                     {
02019                         curAtt = new (fMemoryManager) XMLAttr
02020                         (
02021                             0
02022                             , curDef.getFullName()
02023                             , XMLUni::fgZeroLenString
02024                             , curDef.getValue()
02025                             , curDef.getType()
02026                             , false
02027                             , fMemoryManager
02028                         );
02029                         fAttrList->addElement(curAtt);
02030                         curAttListSize++;
02031                     }
02032                     else
02033                     {
02034                         curAtt = fAttrList->elementAt(attCount);
02035                         curAtt->set
02036                         (
02037                             0
02038                             , curDef.getFullName()
02039                             , XMLUni::fgZeroLenString
02040                             , curDef.getValue()
02041                             , curDef.getType()
02042                         );
02043                         curAtt->setSpecified(false);
02044                     }
02045                     attCount++;
02046                 }
02047             }
02048         }
02049     }
02050 
02051     //  If empty, validate content right now if we are validating and then
02052     //  pop the element stack top. Else, we have to update the current stack
02053     //  top's namespace mapping elements.
02054     if (isEmpty)
02055     {
02056         // If validating, then insure that its legal to have no content
02057         if (fValidate)
02058         {
02059             XMLSize_t failure;
02060             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
02061             if (!res)
02062             {
02063                 fValidator->emitError
02064                 (
02065                     XMLValid::ElementNotValidForContent
02066                     , elemDecl->getFullName()
02067                     , elemDecl->getFormattedContentModel()
02068                 );
02069             }
02070         }
02071 
02072         // Pop the element stack back off since it'll never be used now
02073         fElemStack.popTop();
02074 
02075         // If the elem stack is empty, then it was an empty root
02076         if (isRoot)
02077             gotData = false;
02078         else {
02079             // Restore the validation flag
02080             fValidate = fElemStack.getValidationFlag();
02081         }
02082     }
02083 
02084     //  If we have a document handler, then tell it about this start tag. We
02085     //  don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
02086     //  any prefix since its just one big name if we are not doing namespaces.
02087     if (fDocHandler)
02088     {
02089         fDocHandler->startElement
02090         (
02091             *elemDecl
02092             , fEmptyNamespaceId
02093             , 0
02094             , *fAttrList
02095             , attCount
02096             , isEmpty
02097             , isRoot
02098         );
02099     }
02100 
02101     return true;
02102 }
02103 
02104 
02105 //  This method is called to scan a start tag when we are processing
02106 //  namespaces. There are two different versions of this method, one for
02107 //  namespace aware processing and one for non-namespace aware processing.
02108 //
02109 //  This method is called after we've scanned the < of a start tag. So we
02110 //  have to get the element name, then scan the attributes, after which
02111 //  we are either going to see >, />, or attributes followed by one of those
02112 //  sequences.
02113 bool IGXMLScanner::scanStartTagNS(bool& gotData)
02114 {
02115     //  Assume we will still have data until proven otherwise. It will only
02116     //  ever be false if this is the root and its empty.
02117     gotData = true;
02118 
02119     // Reset element content buffer
02120     fContent.reset();
02121 
02122     //  The current position is after the open bracket, so we need to read in
02123     //  in the element name.
02124     int prefixColonPos;
02125     if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
02126     {
02127         if (fQNameBuf.isEmpty())
02128             emitError(XMLErrs::ExpectedElementName);
02129         else
02130             emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
02131         fReaderMgr.skipToChar(chOpenAngle);
02132         return false;
02133     }
02134 
02135     // See if its the root element
02136     const bool isRoot = fElemStack.isEmpty();
02137 
02138     // Skip any whitespace after the name
02139     fReaderMgr.skipPastSpaces();
02140 
02141     //  First we have to do the rawest attribute scan. We don't do any
02142     //  normalization of them at all, since we don't know yet what type they
02143     //  might be (since we need the element decl in order to do that.)
02144     bool isEmpty;
02145     XMLSize_t attCount = rawAttrScan
02146     (
02147         fQNameBuf.getRawBuffer()
02148         , *fRawAttrList
02149         , isEmpty
02150     );
02151 
02152     // save the contentleafname and currentscope before addlevel, for later use
02153     ContentLeafNameTypeVector* cv = 0;
02154     XMLContentModel* cm = 0;
02155     unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
02156     bool laxThisOne = false;
02157 
02158     if (!isRoot && fGrammarType == Grammar::SchemaGrammarType)
02159     {
02160         // schema validator will have correct type if validating
02161         SchemaElementDecl* tempElement = (SchemaElementDecl*)
02162             fElemStack.topElement()->fThisElement;
02163         SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
02164         ComplexTypeInfo *currType = 0;
02165 
02166         if (fValidate)
02167         {
02168             currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
02169             if (currType)
02170                 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
02171             else // something must have gone wrong
02172                 modelType = SchemaElementDecl::Any;
02173         }
02174         else
02175         {
02176             currType = tempElement->getComplexTypeInfo();
02177         }
02178 
02179         if ((modelType == SchemaElementDecl::Mixed_Simple)
02180           ||  (modelType == SchemaElementDecl::Mixed_Complex)
02181           ||  (modelType == SchemaElementDecl::Children))
02182         {
02183             cm = currType->getContentModel();
02184             cv = cm->getContentLeafNameTypeVector();
02185             currentScope = fElemStack.getCurrentScope();
02186         }
02187         else if (modelType == SchemaElementDecl::Any) {
02188             laxThisOne = true;
02189         }
02190     }
02191 
02192     //  Now, since we might have to update the namespace map for this element,
02193     //  but we don't have the element decl yet, we just tell the element stack
02194     //  to expand up to get ready.
02195     XMLSize_t elemDepth = fElemStack.addLevel();
02196     fElemStack.setValidationFlag(fValidate);
02197     fElemStack.setPrefixColonPos(prefixColonPos);
02198 
02199     //  Check if there is any external schema location specified, and if we are at root,
02200     //  go through them first before scanning those specified in the instance document
02201     if (isRoot && fDoSchema
02202         && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
02203 
02204         if (fExternalSchemaLocation)
02205             parseSchemaLocation(fExternalSchemaLocation, true);
02206         if (fExternalNoNamespaceSchemaLocation)
02207             resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
02208     }
02209 
02210     //  Make an initial pass through the list and find any xmlns attributes or
02211     //  schema attributes.
02212     if (attCount) {
02213         scanRawAttrListforNameSpaces(attCount);
02214     }
02215 
02216     //  Also find any default or fixed xmlns attributes in DTD defined for
02217     //  this element.
02218     XMLElementDecl* elemDecl = 0;
02219     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
02220 
02221     if (fGrammarType == Grammar::DTDGrammarType) {
02222 
02223         if (!fSkipDTDValidation) {
02224             elemDecl = fGrammar->getElemDecl(
02225                 fEmptyNamespaceId, 0, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
02226             );
02227 
02228             if (elemDecl) {
02229                 if (elemDecl->hasAttDefs()) {
02230                     XMLAttDefList& attDefList = elemDecl->getAttDefList();
02231                     for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
02232                     {
02233                         // Get the current att def, for convenience and its def type
02234                         const XMLAttDef& curDef = attDefList.getAttDef(i);
02235                         const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
02236 
02237                         // update the NSMap if there are any default/fixed xmlns attributes
02238                         if ((defType == XMLAttDef::Default)
02239                         ||  (defType == XMLAttDef::Fixed))
02240                         {
02241                             const XMLCh* rawPtr = curDef.getFullName();
02242                             if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
02243                             ||  XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
02244                                 updateNSMap(rawPtr, curDef.getValue());
02245                         }
02246                     }
02247                 }
02248             }
02249         }
02250 
02251         if (!elemDecl) {
02252             elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
02253         }
02254     }
02255 
02256     //  Resolve the qualified name to a URI and name so that we can look up
02257     //  the element decl for this element. We have now update the prefix to
02258     //  namespace map so we should get the correct element now.
02259     unsigned int uriId = resolveQNameWithColon(
02260         qnameRawBuf, fPrefixBuf, ElemStack::Mode_Element, prefixColonPos
02261     );
02262 
02263     //if schema, check if we should lax or skip the validation of this element
02264     bool parentValidation = fValidate;
02265     if (cv) {
02266         QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
02267         // elementDepth will be > 0, as cv is only constructed if element is not
02268         // root.
02269         laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
02270     }
02271 
02272     //  Look up the element now in the grammar. This will get us back a
02273     //  generic element decl object. We tell him to fault one in if he does
02274     //  not find it.
02275     bool wasAdded = false;
02276     const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
02277 
02278     if (fDoSchema) {
02279 
02280         if (fGrammarType == Grammar::DTDGrammarType) {
02281             if (!switchGrammar(getURIText(uriId))) {
02282                 fValidator->emitError(
02283                     XMLValid::GrammarNotFound, getURIText(uriId)
02284                 );
02285             }
02286         }
02287 
02288         if (fGrammarType == Grammar::SchemaGrammarType) {
02289             elemDecl = fGrammar->getElemDecl(
02290                 uriId, nameRawBuf, qnameRawBuf, currentScope
02291             );
02292 
02293             // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
02294             if (!elemDecl) {
02295                 bool checkTopLevel = (currentScope != Grammar::TOP_LEVEL_SCOPE);
02296                 const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
02297                 unsigned int orgGrammarUri = fURIStringPool->getId(original_uriStr);
02298 
02299                 if (orgGrammarUri != uriId) {
02300                     if (switchGrammar(getURIText(uriId))) {
02301                         checkTopLevel = true;
02302                     }
02303                     else {
02304                         // the laxElementValidation routine (called above) will
02305                         // set fValidate to false for a "skipped" element
02306                         if (!laxThisOne && fValidate) {
02307                             fValidator->emitError(
02308                                 XMLValid::GrammarNotFound, getURIText(uriId)
02309                             );
02310                         }
02311                         checkTopLevel = false;
02312                     }
02313                 }
02314 
02315                 if (checkTopLevel) {
02316                     elemDecl = fGrammar->getElemDecl(
02317                         uriId, nameRawBuf, qnameRawBuf, Grammar::TOP_LEVEL_SCOPE
02318                     );
02319                 }
02320 
02321                 if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
02322 
02323                     if (orgGrammarUri == uriId) {
02324                         // still not found in specified uri
02325                         // try emptyNamespace see if element should be
02326                         // un-qualified.
02327                         // Use a temp variable until we decide this is the case
02328                         if (uriId != fEmptyNamespaceId) {
02329                             XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
02330                                 fEmptyNamespaceId, nameRawBuf, qnameRawBuf, currentScope
02331                             );
02332 
02333                             if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
02334                                 fValidator->emitError(
02335                                     XMLValid::ElementNotUnQualified, qnameRawBuf
02336                                 );
02337                                 elemDecl = tempElemDecl;
02338                             }
02339                         }
02340                     }
02341                     // still Not found in specified uri
02342                     // go to original Grammar again to see if element needs
02343                     // to be fully qualified.
02344                     // Use a temp variable until we decide this is the case
02345                     else if (uriId == fEmptyNamespaceId) {
02346 
02347                         if (switchGrammar(original_uriStr)) {
02348                             XMLElementDecl* tempElemDecl = fGrammar->getElemDecl(
02349                                 orgGrammarUri, nameRawBuf, qnameRawBuf, currentScope
02350                             );
02351                             if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
02352                                 fValidator->emitError(
02353                                     XMLValid::ElementNotQualified, qnameRawBuf
02354                                 );
02355                                 elemDecl = tempElemDecl;
02356                             }
02357                         }
02358                         else if (!laxThisOne && fValidate) {
02359                             fValidator->emitError(
02360                                 XMLValid::GrammarNotFound,original_uriStr
02361                             );
02362                         }
02363                     }
02364                 }
02365 
02366                 if (!elemDecl) {
02367                     // still not found
02368                     // switch back to original grammar first if necessary
02369                     if (orgGrammarUri != uriId) {
02370                         switchGrammar(original_uriStr);
02371                     }
02372 
02373                     // look in the list of undeclared elements, as would have been
02374                     // done before we made grammars stateless:
02375                     elemDecl = fSchemaElemNonDeclPool->getByKey(
02376                         nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE
02377                     );
02378                 }
02379             }
02380         }
02381     }
02382 
02383     if (!elemDecl) {
02384 
02385         if (fGrammarType == Grammar::DTDGrammarType) {
02386             elemDecl = new (fMemoryManager) DTDElementDecl(
02387                 qnameRawBuf, uriId, DTDElementDecl::Any, fMemoryManager
02388             );
02389             elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
02390         }
02391         else if (fGrammarType == Grammar::SchemaGrammarType)  {
02392             elemDecl = new (fMemoryManager) SchemaElementDecl(
02393                 fPrefixBuf.getRawBuffer(), nameRawBuf, uriId
02394                 , SchemaElementDecl::Any, Grammar::TOP_LEVEL_SCOPE
02395                 , fMemoryManager
02396             );
02397             elemDecl->setId(
02398                 fSchemaElemNonDeclPool->put((void*)elemDecl->getBaseName()
02399                 , uriId, (int)Grammar::TOP_LEVEL_SCOPE, (SchemaElementDecl*)elemDecl)
02400             );
02401         }
02402         wasAdded = true;
02403     }
02404 
02405     // this info needed for DOMTypeInfo
02406     fPSVIElemContext.fErrorOccurred = false;
02407 
02408     //  We do something different here according to whether we found the
02409     //  element or not.
02410     bool bXsiTypeSet= (fValidator && fGrammarType == Grammar::SchemaGrammarType)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
02411     if (wasAdded)
02412     {
02413         if (laxThisOne && !bXsiTypeSet) {
02414             fValidate = false;
02415             fElemStack.setValidationFlag(fValidate);
02416         }
02417         else if (fValidate)
02418         {
02419             // If validating then emit an error
02420 
02421             // This is to tell the reuse Validator that this element was
02422             // faulted-in, was not an element in the grammar pool originally
02423             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
02424 
02425             // xsi:type was specified, don't complain about missing definition
02426             if(!bXsiTypeSet)
02427             {
02428                 fValidator->emitError
02429                 (
02430                     XMLValid::ElementNotDefined
02431                     , elemDecl->getFullName()
02432                 );
02433 
02434                 if(fGrammarType == Grammar::SchemaGrammarType)
02435                 {
02436                     fPSVIElemContext.fErrorOccurred = true;
02437                 }
02438             }
02439         }
02440     }
02441     else
02442     {
02443         // If its not marked declared and validating, then emit an error
02444         if (!elemDecl->isDeclared()) {
02445             if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
02446                 if(!bXsiTypeSet && fGrammarType == Grammar::SchemaGrammarType) {
02447                     fPSVIElemContext.fErrorOccurred = true;
02448                 }
02449             }
02450 
02451             if (laxThisOne) {
02452                 fValidate = false;
02453                 fElemStack.setValidationFlag(fValidate);
02454             }
02455             else if (fValidate && !bXsiTypeSet)
02456             {
02457                 fValidator->emitError
02458                 (
02459                     XMLValid::ElementNotDefined
02460                     , elemDecl->getFullName()
02461                 );
02462             }
02463         }
02464     }
02465 
02466     //  Now we can update the element stack to set the current element
02467     //  decl. We expanded the stack above, but couldn't store the element
02468     //  decl because we didn't know it yet.
02469     fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
02470     fElemStack.setCurrentURI(uriId);
02471 
02472     if (isRoot)
02473     {
02474         fRootGrammar = fGrammar;
02475         if (fGrammarType == Grammar::SchemaGrammarType && !fRootElemName)
02476             fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
02477     }
02478 
02479     if (fGrammarType == Grammar::SchemaGrammarType && fPSVIHandler)
02480     {
02481 
02482         fPSVIElemContext.fElemDepth++;
02483         if (elemDecl->isDeclared())
02484         {
02485             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
02486         }
02487         else
02488         {
02489             fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
02490 
02491             /******
02492              * While we report an error for historical reasons, this should
02493              * actually result in lax assessment - NG.
02494             if (isRoot && fValidate)
02495                 fPSVIElemContext.fErrorOccurred = true;
02496             *****/
02497         }
02498     }
02499 
02500     //  Validate the element
02501     if (fValidate)
02502     {
02503         fValidator->validateElement(elemDecl);
02504         if (fValidator->handlesSchema())
02505         {
02506             if (((SchemaValidator*) fValidator)->getErrorOccurred())
02507                 fPSVIElemContext.fErrorOccurred = true;
02508         }
02509     }
02510 
02511     if (fGrammarType == Grammar::SchemaGrammarType) {
02512 
02513         // squirrel away the element's QName, so that we can do an efficient
02514         // end-tag match
02515         fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
02516 
02517         ComplexTypeInfo* typeinfo = (fValidate)
02518             ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
02519             : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
02520 
02521         if (typeinfo) {
02522             currentScope = typeinfo->getScopeDefined();
02523 
02524             // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
02525             XMLCh* typeName = typeinfo->getTypeName();
02526             const int comma = XMLString::indexOf(typeName, chComma);
02527             if (comma > 0) {
02528                 XMLBuffer prefixBuf(comma+1, fMemoryManager);
02529                 prefixBuf.append(typeName, comma);
02530                 const XMLCh* uriStr = prefixBuf.getRawBuffer();
02531 
02532                 bool errorCondition = !switchGrammar(uriStr) && fValidate;
02533                 if (errorCondition && !laxThisOne)
02534                 {
02535                     fValidator->emitError
02536                     (
02537                         XMLValid::GrammarNotFound
02538                         , prefixBuf.getRawBuffer()
02539                     );
02540                 }
02541             }
02542             else if (comma == 0) {
02543                 bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
02544                 if (errorCondition && !laxThisOne)
02545                 {
02546                     fValidator->emitError
02547                     (
02548                         XMLValid::GrammarNotFound
02549                         , XMLUni::fgZeroLenString
02550                     );
02551                 }
02552             }
02553         }
02554         fElemStack.setCurrentScope(currentScope);
02555 
02556         // Set element next state
02557         if (elemDepth >= fElemStateSize) {
02558             resizeElemState();
02559         }
02560 
02561         fElemState[elemDepth] = 0;
02562         fElemLoopState[elemDepth] = 0;
02563     }
02564 
02565     fElemStack.setCurrentGrammar(fGrammar);
02566 
02567     //  If this is the first element and we are validating, check the root
02568     //  element.
02569     if (isRoot)
02570     {
02571         if (fValidate)
02572         {
02573             //  If a DocType exists, then check if it matches the root name there.
02574             if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
02575                 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
02576         }
02577     }
02578     else if (parentValidation)
02579     {
02580         //  If the element stack is not empty, then add this element as a
02581         //  child of the previous top element. If its empty, this is the root
02582         //  elem and is not the child of anything.
02583         fElemStack.addChild(elemDecl->getElementName(), true);
02584     }
02585 
02586     // PSVI handling:  even if it turns out there are
02587     // no attributes, we need to reset this list...
02588     if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType )
02589         fPSVIAttrList->reset();
02590 
02591     //  Now lets get the fAttrList filled in. This involves faulting in any
02592     //  defaulted and fixed attributes and normalizing the values of any that
02593     //  we got explicitly.
02594     //
02595     //  We update the attCount value with the total number of attributes, but
02596     //  it goes in with the number of values we got during the raw scan of
02597     //  explictly provided attrs above.
02598     attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
02599     if(attCount)
02600     {
02601         // clean up after ourselves:
02602         // clear the map used to detect duplicate attributes
02603         fUndeclaredAttrRegistry->removeAll();
02604     }
02605 
02606     // activate identity constraints
02607     if (fGrammar  &&
02608         fGrammarType == Grammar::SchemaGrammarType &&
02609         toCheckIdentityConstraint())
02610     {
02611         fICHandler->activateIdentityConstraint
02612                         (
02613                           (SchemaElementDecl*) elemDecl
02614                         , (int) elemDepth
02615                         , uriId
02616                         , fPrefixBuf.getRawBuffer()
02617                         , *fAttrList
02618                         , attCount
02619                         , fValidationContext
02620                         );
02621     }
02622 
02623     // Since the element may have default values, call start tag now regardless if it is empty or not
02624     // If we have a document handler, then tell it about this start tag
02625     if (fDocHandler)
02626     {
02627         fDocHandler->startElement
02628         (
02629             *elemDecl
02630             , uriId
02631             , fPrefixBuf.getRawBuffer()
02632             , *fAttrList
02633             , attCount
02634             , false
02635             , isRoot
02636         );
02637     }
02638 
02639     // if we have a PSVIHandler, now's the time to call
02640     // its handleAttributesPSVI method:
02641     if(fPSVIHandler && fGrammarType == Grammar::SchemaGrammarType)
02642     {
02643         QName *eName = elemDecl->getElementName();
02644         fPSVIHandler->handleAttributesPSVI
02645         (
02646             eName->getLocalPart()
02647             , fURIStringPool->getValueForId(eName->getURI())
02648             , fPSVIAttrList
02649         );
02650     }
02651 
02652     //  If empty, validate content right now if we are validating and then
02653     //  pop the element stack top. Else, we have to update the current stack
02654     //  top's namespace mapping elements.
02655     if (isEmpty)
02656     {
02657         // Pop the element stack back off since it'll never be used now
02658         fElemStack.popTop();
02659 
02660         // reset current type info
02661         DatatypeValidator* psviMemberType = 0;
02662         if (fGrammarType == Grammar::SchemaGrammarType)
02663         {
02664             if (fValidate && elemDecl->isDeclared())
02665             {
02666                 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
02667                 if(!fPSVIElemContext.fCurrentTypeInfo)
02668                     fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
02669                 else
02670                     fPSVIElemContext.fCurrentDV = 0;
02671                 if(fPSVIHandler)
02672                 {
02673                     fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
02674 
02675                     if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
02676                         fPSVIElemContext.fNormalizedValue = 0;
02677                 }
02678             }
02679             else
02680             {
02681                 fPSVIElemContext.fCurrentDV = 0;
02682                 fPSVIElemContext.fCurrentTypeInfo = 0;
02683                 fPSVIElemContext.fNormalizedValue = 0;
02684             }
02685         }
02686 
02687         // If validating, then insure that its legal to have no content
02688         if (fValidate)
02689         {
02690             XMLSize_t failure;
02691             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
02692             if (!res)
02693             {
02694                 fValidator->emitError
02695                 (
02696                     XMLValid::ElementNotValidForContent
02697                     , elemDecl->getFullName()
02698                     , elemDecl->getFormattedContentModel()
02699                 );
02700             }
02701 
02702             if (fGrammarType == Grammar::SchemaGrammarType) {
02703 
02704                 if (((SchemaValidator*) fValidator)->getErrorOccurred())
02705                 {
02706                     fPSVIElemContext.fErrorOccurred = true;
02707                 }
02708                 else
02709                 {
02710                     if (fPSVIHandler)
02711                     {
02712                         fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
02713                         if(fPSVIElemContext.fIsSpecified)
02714                             fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
02715                     }
02716                     // note that if we're empty, won't be a current DV
02717                     if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
02718                         psviMemberType = fValidationContext->getValidatingMemberType();
02719                 }
02720 
02721                 // call matchers and de-activate context
02722                 if (toCheckIdentityConstraint())
02723                 {
02724                     fICHandler->deactivateContext
02725                                    (
02726                                     (SchemaElementDecl *) elemDecl
02727                                   , fContent.getRawBuffer()
02728                                   , fValidationContext
02729                                   , fPSVIElemContext.fCurrentDV
02730                                    );
02731                 }
02732 
02733             }
02734         }
02735         else if (fGrammarType == Grammar::SchemaGrammarType) {
02736             ((SchemaValidator*)fValidator)->resetNillable();
02737         }
02738 
02739         if (fGrammarType == Grammar::SchemaGrammarType)
02740         {
02741             if (fPSVIHandler)
02742             {
02743                 endElementPSVI((SchemaElementDecl*)elemDecl, psviMemberType);
02744             }
02745         }
02746 
02747         // If we have a doc handler, tell it about the end tag
02748         if (fDocHandler)
02749         {
02750             fDocHandler->endElement
02751             (
02752                 *elemDecl
02753                 , uriId
02754                 , isRoot
02755                 , fPrefixBuf.getRawBuffer()
02756             );
02757         }
02758 
02759         // If the elem stack is empty, then it was an empty root
02760         if (isRoot)
02761             gotData = false;
02762         else
02763         {
02764             // Restore the grammar
02765             fGrammar = fElemStack.getCurrentGrammar();
02766             fGrammarType = fGrammar->getGrammarType();
02767             if (fGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
02768                 if (fValidatorFromUser)
02769                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
02770                 else {
02771                     fValidator = fSchemaValidator;
02772                 }
02773             }
02774             else if (fGrammarType == Grammar::DTDGrammarType && !fValidator->handlesDTD()) {
02775                 if (fValidatorFromUser)
02776                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
02777                 else {
02778                     fValidator = fDTDValidator;
02779                 }
02780             }
02781 
02782             fValidator->setGrammar(fGrammar);
02783 
02784             // Restore the validation flag
02785             fValidate = fElemStack.getValidationFlag();
02786         }
02787     }
02788     else if (fGrammarType == Grammar::SchemaGrammarType)
02789     {
02790         // send a partial element psvi
02791         if (fPSVIHandler)
02792         {
02793 
02794             ComplexTypeInfo*   curTypeInfo = 0;
02795             DatatypeValidator* curDV = 0;
02796             XSTypeDefinition*  typeDef = 0;
02797 
02798             if (fValidate && elemDecl->isDeclared())
02799             {
02800                 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
02801 
02802                 if (curTypeInfo)
02803                 {
02804                     typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
02805                 }
02806                 else
02807                 {
02808                     curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
02809 
02810                     if (curDV)
02811                     {
02812                         typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
02813                     }
02814                 }
02815             }
02816 
02817             fPSVIElement->reset
02818                 (
02819                   PSVIElement::VALIDITY_NOTKNOWN
02820                 , PSVIElement::VALIDATION_NONE
02821                 , fRootElemName
02822                 , ((SchemaValidator*) fValidator)->getIsElemSpecified()
02823                 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
02824                 , typeDef
02825                 , 0 //memberType
02826                 , fModel
02827                 , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
02828                 , 0
02829                 , 0
02830                 , 0
02831                 );
02832 
02833 
02834             fPSVIHandler->handlePartialElementPSVI
02835                 (
02836                   elemDecl->getBaseName()
02837                 , fURIStringPool->getValueForId(elemDecl->getURI())
02838                 , fPSVIElement
02839                 );
02840 
02841         }
02842 
02843         // not empty
02844         fErrorStack->push(fPSVIElemContext.fErrorOccurred);
02845     }
02846 
02847     return true;
02848 }
02849 
02850 
02851 // ---------------------------------------------------------------------------
02852 //  IGXMLScanner: Helper methos
02853 // ---------------------------------------------------------------------------
02854 void IGXMLScanner::resizeElemState() {
02855 
02856     unsigned int newSize = fElemStateSize * 2;
02857     unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
02858     (
02859         newSize * sizeof(unsigned int)
02860     ); //new unsigned int[newSize];
02861     unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
02862     (
02863         newSize * sizeof(unsigned int)
02864     ); //new unsigned int[newSize];
02865 
02866     // Copy the existing values
02867     unsigned int index = 0;
02868     for (; index < fElemStateSize; index++)
02869     {
02870         newElemState[index] = fElemState[index];
02871         newElemLoopState[index] = fElemLoopState[index];
02872     }
02873 
02874     for (; index < newSize; index++)
02875         newElemLoopState[index] = newElemState[index] = 0;
02876 
02877     // Delete the old array and udpate our members
02878     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
02879     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemState;
02880     fElemState = newElemState;
02881     fElemLoopState = newElemLoopState;
02882     fElemStateSize = newSize;
02883 }
02884 
02885 void IGXMLScanner::resizeRawAttrColonList() {
02886 
02887     unsigned int newSize = fRawAttrColonListSize * 2;
02888     int* newRawAttrColonList = (int*) fMemoryManager->allocate
02889     (
02890         newSize * sizeof(int)
02891     ); //new int[newSize];
02892 
02893     // Copy the existing values
02894     unsigned int index = 0;
02895     for (; index < fRawAttrColonListSize; index++)
02896         newRawAttrColonList[index] = fRawAttrColonList[index];
02897 
02898     // Delete the old array and udpate our members
02899     fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
02900     fRawAttrColonList = newRawAttrColonList;
02901     fRawAttrColonListSize = newSize;
02902 }
02903 
02904 // ---------------------------------------------------------------------------
02905 //  IGXMLScanner: Grammar preparsing
02906 // ---------------------------------------------------------------------------
02907 Grammar* IGXMLScanner::loadGrammar(const   InputSource& src
02908                                    , const short        grammarType
02909                                    , const bool         toCache)
02910 {
02911     Grammar* loadedGrammar = 0;
02912 
02913     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
02914 
02915     try
02916     {
02917         fGrammarResolver->cacheGrammarFromParse(false);
02918                 // if the new grammar has to be cached, better use the already cached
02919                 // grammars, or the an exception will be thrown when caching an already
02920                 // cached grammar
02921         fGrammarResolver->useCachedGrammarInParse(toCache);
02922         fRootGrammar = 0;
02923 
02924         if (fValScheme == Val_Auto) {
02925             fValidate = true;
02926         }
02927 
02928         // Reset some status flags
02929         fInException = false;
02930         fStandalone = false;
02931         fErrorCount = 0;
02932         fHasNoDTD = true;
02933         fSeeXsi = false;
02934 
02935         if (grammarType == Grammar::SchemaGrammarType) {
02936             loadedGrammar = loadXMLSchemaGrammar(src, toCache);
02937         }
02938         else if (grammarType == Grammar::DTDGrammarType) {
02939             loadedGrammar = loadDTDGrammar(src, toCache);
02940         }
02941     }
02942     //  NOTE:
02943     //
02944     //  In all of the error processing below, the emitError() call MUST come
02945     //  before the flush of the reader mgr, or it will fail because it tries
02946     //  to find out the position in the XML source of the error.
02947     catch(const XMLErrs::Codes)
02948     {
02949         // This is a 'first fatal error' type exit, so fall through
02950     }
02951     catch(const XMLValid::Codes)
02952     {
02953         // This is a 'first fatal error' type exit, so fall through
02954     }
02955     catch(const XMLException& excToCatch)
02956     {
02957         //  Emit the error and catch any user exception thrown from here. Make
02958         //  sure in all cases we flush the reader manager.
02959         fInException = true;
02960         try
02961         {
02962             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
02963                 emitError
02964                 (
02965                     XMLErrs::XMLException_Warning
02966                     , excToCatch.getCode()
02967                     , excToCatch.getMessage()
02968                 );
02969             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
02970                 emitError
02971                 (
02972                     XMLErrs::XMLException_Fatal
02973                     , excToCatch.getCode()
02974                     , excToCatch.getMessage()
02975                 );
02976             else
02977                 emitError
02978                 (
02979                     XMLErrs::XMLException_Error
02980                     , excToCatch.getCode()
02981                     , excToCatch.getMessage()
02982                 );
02983         }
02984         catch(const OutOfMemoryException&)
02985         {
02986             // This is a special case for out-of-memory
02987             // conditions, because resetting the ReaderMgr
02988             // can be problematic.
02989             resetReaderMgr.release();
02990 
02991             throw;
02992         }
02993     }
02994     catch(const OutOfMemoryException&)
02995     {
02996         // This is a special case for out-of-memory
02997         // conditions, because resetting the ReaderMgr
02998         // can be problematic.
02999         resetReaderMgr.release();
03000 
03001         throw;
03002     }
03003 
03004     return loadedGrammar;
03005 }
03006 
03007 void IGXMLScanner::resetCachedGrammar ()
03008 {
03009   fCachedSchemaInfoList->removeAll ();
03010 }
03011 
03012 Grammar* IGXMLScanner::loadDTDGrammar(const InputSource& src,
03013                                       const bool toCache)
03014 {
03015     // Reset the validators
03016     fDTDValidator->reset();
03017     if (fValidatorFromUser)
03018         fValidator->reset();
03019 
03020     if (!fValidator->handlesDTD()) {
03021         if (fValidatorFromUser && fValidate)
03022             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
03023         else {
03024             fValidator = fDTDValidator;
03025         }
03026     }
03027 
03028     fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(XMLUni::fgDTDEntityString);
03029 
03030     if (fDTDGrammar) {
03031         fDTDGrammar->reset();
03032     }
03033     else {
03034         fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
03035         fGrammarResolver->putGrammar(fDTDGrammar);
03036     }
03037 
03038     fGrammar = fDTDGrammar;
03039     fGrammarType = fGrammar->getGrammarType();
03040     fValidator->setGrammar(fGrammar);
03041 
03042     //  And for all installed handlers, send reset events. This gives them
03043     //  a chance to flush any cached data.
03044     if (fDocHandler)
03045         fDocHandler->resetDocument();
03046     if (fEntityHandler)
03047         fEntityHandler->resetEntities();
03048     if (fErrorReporter)
03049         fErrorReporter->resetErrors();
03050 
03051     // Clear out the id reference list
03052     resetValidationContext();
03053     // and clear out the darned undeclared DTD element pool...
03054     fDTDElemNonDeclPool->removeAll();
03055 
03056     if (toCache) {
03057 
03058         unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
03059         const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
03060 
03061         fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
03062         ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
03063         fGrammarResolver->putGrammar(fGrammar);
03064     }
03065 
03066     //  Handle the creation of the XML reader object for this input source.
03067     //  This will provide us with transcoding and basic lexing services.
03068     XMLReader* newReader = fReaderMgr.createReader
03069     (
03070         src
03071         , false
03072         , XMLReader::RefFrom_NonLiteral
03073         , XMLReader::Type_General
03074         , XMLReader::Source_External
03075         , fCalculateSrcOfs
03076         , fLowWaterMark
03077     );
03078     if (!newReader) {
03079         if (src.getIssueFatalErrorIfNotFound())
03080             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
03081         else
03082             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
03083     }
03084 
03085     //  In order to make the processing work consistently, we have to
03086     //  make this look like an external entity. So create an entity
03087     //  decl and fill it in and push it with the reader, as happens
03088     //  with an external entity. Put a janitor on it to insure it gets
03089     //  cleaned up. The reader manager does not adopt them.
03090     const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
03091     DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
03092     declDTD->setSystemId(src.getSystemId());
03093     declDTD->setIsExternal(true);
03094     Janitor<DTDEntityDecl> janDecl(declDTD);
03095 
03096     // Mark this one as a throw at end
03097     newReader->setThrowAtEnd(true);
03098 
03099     // And push it onto the stack, with its pseudo name
03100     fReaderMgr.pushReader(newReader, declDTD);
03101 
03102     //  If we have a doc type handler and advanced callbacks are enabled,
03103     //  call the doctype event.
03104     if (fDocTypeHandler) {
03105 
03106         // Create a dummy root
03107         DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
03108         (
03109             gDTDStr
03110             , fEmptyNamespaceId
03111             , DTDElementDecl::Any
03112             , fGrammarPoolMemoryManager
03113         );
03114         rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
03115         rootDecl->setExternalElemDeclaration(true);
03116         Janitor<DTDElementDecl> janSrc(rootDecl);
03117 
03118         fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
03119     }
03120 
03121     // Create DTDScanner
03122     DTDScanner dtdScanner
03123     (
03124         (DTDGrammar*) fGrammar
03125         , fDocTypeHandler
03126         , fGrammarPoolMemoryManager
03127         , fMemoryManager
03128     );
03129     dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
03130 
03131     // Tell it its not in an include section
03132     dtdScanner.scanExtSubsetDecl(false, true);
03133 
03134     if (fValidate) {
03135         //  validate the DTD scan so far
03136         fValidator->preContentValidation(false, true);
03137     }
03138 
03139     if (toCache)
03140         fGrammarResolver->cacheGrammars();
03141 
03142     return fDTDGrammar;
03143 }
03144 
03145 // ---------------------------------------------------------------------------
03146 //  IGXMLScanner: Helper methods
03147 // ---------------------------------------------------------------------------
03148 void IGXMLScanner::processSchemaLocation(XMLCh* const schemaLoc)
03149 {
03150     XMLCh* locStr = schemaLoc;
03151     XMLReader* curReader = fReaderMgr.getCurrentReader();
03152 
03153     fLocationPairs->removeAllElements();
03154     while (*locStr)
03155     {
03156         do {
03157             // Do we have an escaped character ?
03158             if (*locStr == 0xFFFF)
03159                 continue;
03160 
03161             if (!curReader->isWhitespace(*locStr))
03162                break;
03163 
03164             *locStr = chNull;
03165         } while (*++locStr);
03166 
03167         if (*locStr) {
03168 
03169             fLocationPairs->addElement(locStr);
03170 
03171             while (*++locStr) {
03172                 // Do we have an escaped character ?
03173                 if (*locStr == 0xFFFF)
03174                     continue;
03175                 if (curReader->isWhitespace(*locStr))
03176                     break;
03177             }
03178         }
03179     }
03180 }
03181 
03182 void IGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
03183                                   DatatypeValidator* const memberDV)
03184 {
03185     PSVIElement::ASSESSMENT_TYPE validationAttempted;
03186     PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
03187 
03188     if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
03189         validationAttempted = PSVIElement::VALIDATION_FULL;
03190     else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
03191         validationAttempted = PSVIElement::VALIDATION_NONE;
03192     else
03193     {
03194         validationAttempted  = PSVIElement::VALIDATION_PARTIAL;
03195                 fPSVIElemContext.fFullValidationDepth =
03196             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
03197     }
03198 
03199     if (fValidate && elemDecl->isDeclared())
03200     {
03201         validity = (fPSVIElemContext.fErrorOccurred)
03202             ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
03203     }
03204 
03205     XSTypeDefinition* typeDef = 0;
03206     bool isMixed = false;
03207     if (fPSVIElemContext.fCurrentTypeInfo)
03208     {
03209         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
03210         SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
03211         isMixed = (modelType == SchemaElementDecl::Mixed_Simple
03212                 || modelType == SchemaElementDecl::Mixed_Complex);
03213     }
03214     else if (fPSVIElemContext.fCurrentDV)
03215         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
03216 
03217     XMLCh* canonicalValue = 0;
03218     if (fPSVIElemContext.fNormalizedValue && !isMixed &&
03219             validity == PSVIElement::VALIDITY_VALID)
03220     {
03221         if (memberDV)
03222             canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
03223         else if (fPSVIElemContext.fCurrentDV)
03224             canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
03225     }
03226 
03227     fPSVIElement->reset
03228     (
03229         validity
03230         , validationAttempted
03231         , fRootElemName
03232         , fPSVIElemContext.fIsSpecified
03233         , (elemDecl->isDeclared())
03234             ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
03235         , typeDef
03236         , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
03237         , fModel
03238         , elemDecl->getDefaultValue()
03239         , fPSVIElemContext.fNormalizedValue
03240         , canonicalValue
03241     );
03242 
03243     fPSVIHandler->handleElementPSVI
03244     (
03245         elemDecl->getBaseName()
03246         , fURIStringPool->getValueForId(elemDecl->getURI())
03247         , fPSVIElement
03248     );
03249 
03250     // decrease element depth
03251     fPSVIElemContext.fElemDepth--;
03252 
03253 }
03254 
03255 void IGXMLScanner::resetPSVIElemContext()
03256 {
03257     fPSVIElemContext.fIsSpecified = false;
03258     fPSVIElemContext.fErrorOccurred = false;
03259     fPSVIElemContext.fElemDepth = -1;
03260     fPSVIElemContext.fFullValidationDepth = -1;
03261     fPSVIElemContext.fNoneValidationDepth = -1;
03262     fPSVIElemContext.fCurrentDV = 0;
03263     fPSVIElemContext.fCurrentTypeInfo = 0;
03264     fPSVIElemContext.fNormalizedValue = 0;
03265 }
03266 
03267 XERCES_CPP_NAMESPACE_END