GME  13
SGXMLScanner.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: SGXMLScanner.cpp 925236 2010-03-19 14:29:47Z borisk $
00020  */
00021 
00022 
00023 // ---------------------------------------------------------------------------
00024 //  Includes
00025 // ---------------------------------------------------------------------------
00026 #include <xercesc/internal/SGXMLScanner.hpp>
00027 #include <xercesc/util/RuntimeException.hpp>
00028 #include <xercesc/util/UnexpectedEOFException.hpp>
00029 #include <xercesc/util/XMLUri.hpp>
00030 #include <xercesc/framework/LocalFileInputSource.hpp>
00031 #include <xercesc/framework/URLInputSource.hpp>
00032 #include <xercesc/framework/XMLDocumentHandler.hpp>
00033 #include <xercesc/framework/XMLEntityHandler.hpp>
00034 #include <xercesc/framework/XMLPScanToken.hpp>
00035 #include <xercesc/framework/MemoryManager.hpp>
00036 #include <xercesc/framework/XMLGrammarPool.hpp>
00037 #include <xercesc/framework/psvi/PSVIElement.hpp>
00038 #include <xercesc/framework/psvi/PSVIHandler.hpp>
00039 #include <xercesc/framework/psvi/PSVIAttributeList.hpp>
00040 #include <xercesc/framework/psvi/XSAnnotation.hpp>
00041 #include <xercesc/internal/EndOfEntityException.hpp>
00042 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
00043 #include <xercesc/validators/schema/SchemaValidator.hpp>
00044 #include <xercesc/validators/schema/TraverseSchema.hpp>
00045 #include <xercesc/validators/schema/XSDDOMParser.hpp>
00046 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
00047 #include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp>
00048 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
00049 #include <xercesc/validators/schema/identity/IC_Selector.hpp>
00050 #include <xercesc/validators/schema/identity/ValueStore.hpp>
00051 #include <xercesc/util/OutOfMemoryException.hpp>
00052 #include <xercesc/util/XMLStringTokenizer.hpp>
00053 
00054 XERCES_CPP_NAMESPACE_BEGIN
00055 
00056 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl);
00057 
00058 
00059 typedef JanitorMemFunCall<SGXMLScanner> CleanupType;
00060 typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;
00061 
00062 
00063 // ---------------------------------------------------------------------------
00064 //  SGXMLScanner: Constructors and Destructor
00065 // ---------------------------------------------------------------------------
00066 SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt
00067                           , GrammarResolver* const grammarResolver
00068                           , MemoryManager* const manager) :
00069 
00070     XMLScanner(valToAdopt, grammarResolver, manager)
00071     , fSeeXsi(false)
00072     , fGrammarType(Grammar::UnKnown)
00073     , fElemStateSize(16)
00074     , fElemState(0)
00075     , fElemLoopState(0)
00076     , fContent(1023, manager)
00077     , fEntityTable(0)
00078     , fRawAttrList(0)
00079     , fRawAttrColonListSize(32)
00080     , fRawAttrColonList(0)
00081     , fSchemaGrammar(0)
00082     , fSchemaValidator(0)
00083     , fICHandler(0)
00084     , fElemNonDeclPool(0)
00085     , fElemCount(0)
00086     , fAttDefRegistry(0)
00087     , fUndeclaredAttrRegistry(0)
00088     , fPSVIAttrList(0)
00089     , fModel(0)
00090     , fPSVIElement(0)
00091     , fErrorStack(0)
00092     , fSchemaInfoList(0)
00093     , fCachedSchemaInfoList(0)
00094 {
00095     CleanupType cleanup(this, &SGXMLScanner::cleanUp);
00096 
00097     try
00098     {
00099          commonInit();
00100     }
00101     catch(const OutOfMemoryException&)
00102     {
00103         // Don't cleanup when out of memory, since executing the
00104         // code can cause problems.
00105         cleanup.release();
00106 
00107         throw;
00108     }
00109 
00110     cleanup.release();
00111 }
00112 
00113 SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler
00114                           , DocTypeHandler* const     docTypeHandler
00115                           , XMLEntityHandler* const   entityHandler
00116                           , XMLErrorReporter* const   errHandler
00117                           , XMLValidator* const       valToAdopt
00118                           , GrammarResolver* const    grammarResolver
00119                           , MemoryManager* const      manager) :
00120 
00121     XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
00122     , fSeeXsi(false)
00123     , fGrammarType(Grammar::UnKnown)
00124     , fElemStateSize(16)
00125     , fElemState(0)
00126     , fElemLoopState(0)
00127     , fContent(1023, manager)
00128     , fEntityTable(0)
00129     , fRawAttrList(0)
00130     , fRawAttrColonListSize(32)
00131     , fRawAttrColonList(0)
00132     , fSchemaGrammar(0)
00133     , fSchemaValidator(0)
00134     , fICHandler(0)
00135     , fElemNonDeclPool(0)
00136     , fElemCount(0)
00137     , fAttDefRegistry(0)
00138     , fUndeclaredAttrRegistry(0)
00139     , fPSVIAttrList(0)
00140     , fModel(0)
00141     , fPSVIElement(0)
00142     , fErrorStack(0)
00143     , fSchemaInfoList(0)
00144     , fCachedSchemaInfoList(0)
00145 {
00146     CleanupType cleanup(this, &SGXMLScanner::cleanUp);
00147 
00148     try
00149     {
00150         commonInit();
00151     }
00152     catch(const OutOfMemoryException&)
00153     {
00154         // Don't cleanup when out of memory, since executing the
00155         // code can cause problems.
00156         cleanup.release();
00157 
00158         throw;
00159     }
00160 
00161     cleanup.release();
00162 }
00163 
00164 SGXMLScanner::~SGXMLScanner()
00165 {
00166     cleanUp();
00167 }
00168 
00169 // ---------------------------------------------------------------------------
00170 //  XMLScanner: Getter methods
00171 // ---------------------------------------------------------------------------
00172 NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool()
00173 {
00174     return 0;
00175 }
00176 
00177 const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const
00178 {
00179     return 0;
00180 }
00181 
00182 // ---------------------------------------------------------------------------
00183 //  SGXMLScanner: Main entry point to scan a document
00184 // ---------------------------------------------------------------------------
00185 void SGXMLScanner::scanDocument(const InputSource& src)
00186 {
00187     //  Bump up the sequence id for this parser instance. This will invalidate
00188     //  any previous progressive scan tokens.
00189     fSequenceId++;
00190 
00191     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
00192 
00193     try
00194     {
00195         //  Reset the scanner and its plugged in stuff for a new run. This
00196         //  resets all the data structures, creates the initial reader and
00197         //  pushes it on the stack, and sets up the base document path.
00198         scanReset(src);
00199 
00200         // If we have a document handler, then call the start document
00201         if (fDocHandler)
00202             fDocHandler->startDocument();
00203 
00204         //  Scan the prolog part, which is everything before the root element
00205         //  including the DTD subsets.
00206         scanProlog();
00207 
00208         //  If we got to the end of input, then its not a valid XML file.
00209         //  Else, go on to scan the content.
00210         if (fReaderMgr.atEOF())
00211         {
00212             emitError(XMLErrs::EmptyMainEntity);
00213         }
00214         else
00215         {
00216             // Scan content, and tell it its not an external entity
00217             if (scanContent())
00218             {
00219                 // Do post-parse validation if required
00220                 if (fValidate)
00221                 {
00222                     //  We handle ID reference semantics at this level since
00223                     //  its required by XML 1.0.
00224                     checkIDRefs();
00225 
00226                     // Then allow the validator to do any extra stuff it wants
00227 //                    fValidator->postParseValidation();
00228                 }
00229 
00230                 // That went ok, so scan for any miscellaneous stuff
00231                 if (!fReaderMgr.atEOF())
00232                     scanMiscellaneous();
00233             }
00234         }
00235 
00236         // If we have a document handler, then call the end document
00237         if (fDocHandler)
00238             fDocHandler->endDocument();
00239     }
00240     //  NOTE:
00241     //
00242     //  In all of the error processing below, the emitError() call MUST come
00243     //  before the flush of the reader mgr, or it will fail because it tries
00244     //  to find out the position in the XML source of the error.
00245     catch(const XMLErrs::Codes)
00246     {
00247         // This is a 'first failure' exception, so fall through
00248     }
00249     catch(const XMLValid::Codes)
00250     {
00251         // This is a 'first fatal error' type exit, so fall through
00252     }
00253     catch(const XMLException& excToCatch)
00254     {
00255         //  Emit the error and catch any user exception thrown from here. Make
00256         //  sure in all cases we flush the reader manager.
00257         fInException = true;
00258         try
00259         {
00260             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
00261                 emitError
00262                 (
00263                     XMLErrs::XMLException_Warning
00264                     , excToCatch.getCode()
00265                     , excToCatch.getMessage()
00266                 );
00267             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
00268                 emitError
00269                 (
00270                     XMLErrs::XMLException_Fatal
00271                     , excToCatch.getCode()
00272                     , excToCatch.getMessage()
00273                 );
00274             else
00275                 emitError
00276                 (
00277                     XMLErrs::XMLException_Error
00278                     , excToCatch.getCode()
00279                     , excToCatch.getMessage()
00280                 );
00281         }
00282         catch(const OutOfMemoryException&)
00283         {
00284             // This is a special case for out-of-memory
00285             // conditions, because resetting the ReaderMgr
00286             // can be problematic.
00287             resetReaderMgr.release();
00288 
00289             throw;
00290         }
00291     }
00292     catch(const OutOfMemoryException&)
00293     {
00294         // This is a special case for out-of-memory
00295         // conditions, because resetting the ReaderMgr
00296         // can be problematic.
00297         resetReaderMgr.release();
00298 
00299         throw;
00300     }
00301 }
00302 
00303 
00304 bool SGXMLScanner::scanNext(XMLPScanToken& token)
00305 {
00306     // Make sure this token is still legal
00307     if (!isLegalToken(token))
00308         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
00309 
00310     // Find the next token and remember the reader id
00311     XMLSize_t orgReader;
00312     XMLTokens curToken;
00313 
00314     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
00315 
00316     bool retVal = true;
00317 
00318     try
00319     {
00320         while (true)
00321         {
00322             //  We have to handle any end of entity exceptions that happen here.
00323             //  We could be at the end of X nested entities, each of which will
00324             //  generate an end of entity exception as we try to move forward.
00325             try
00326             {
00327                 curToken = senseNextToken(orgReader);
00328                 break;
00329             }
00330             catch(const EndOfEntityException& toCatch)
00331             {
00332                 // Send an end of entity reference event
00333                 if (fDocHandler)
00334                     fDocHandler->endEntityReference(toCatch.getEntity());
00335             }
00336         }
00337 
00338         if (curToken == Token_CharData)
00339         {
00340             scanCharData(fCDataBuf);
00341         }
00342         else if (curToken == Token_EOF)
00343         {
00344             if (!fElemStack.isEmpty())
00345             {
00346                 const ElemStack::StackElem* topElem = fElemStack.popTop();
00347                 emitError
00348                 (
00349                     XMLErrs::EndedWithTagsOnStack
00350                     , topElem->fThisElement->getFullName()
00351                 );
00352             }
00353 
00354             retVal = false;
00355         }
00356         else
00357         {
00358             // Its some sort of markup
00359             bool gotData = true;
00360             switch(curToken)
00361             {
00362                 case Token_CData :
00363                     // Make sure we are within content
00364                     if (fElemStack.isEmpty())
00365                         emitError(XMLErrs::CDATAOutsideOfContent);
00366                     scanCDSection();
00367                     break;
00368 
00369                 case Token_Comment :
00370                     scanComment();
00371                     break;
00372 
00373                 case Token_EndTag :
00374                     scanEndTag(gotData);
00375                     break;
00376 
00377                 case Token_PI :
00378                     scanPI();
00379                     break;
00380 
00381                 case Token_StartTag :
00382                     scanStartTag(gotData);
00383                     break;
00384 
00385                 default :
00386                     fReaderMgr.skipToChar(chOpenAngle);
00387                     break;
00388             }
00389 
00390             if (orgReader != fReaderMgr.getCurrentReaderNum())
00391                 emitError(XMLErrs::PartialMarkupInEntity);
00392 
00393             // If we hit the end, then do the miscellaneous part
00394             if (!gotData)
00395             {
00396                 // Do post-parse validation if required
00397                 if (fValidate)
00398                 {
00399                     //  We handle ID reference semantics at this level since
00400                     //  its required by XML 1.0.
00401                     checkIDRefs();
00402 
00403                     // Then allow the validator to do any extra stuff it wants
00404 //                    fValidator->postParseValidation();
00405                 }
00406 
00407                 // That went ok, so scan for any miscellaneous stuff
00408                 scanMiscellaneous();
00409 
00410                 if (toCheckIdentityConstraint())
00411                     fICHandler->endDocument();
00412 
00413                 if (fDocHandler)
00414                     fDocHandler->endDocument();
00415             }
00416         }
00417     }
00418     //  NOTE:
00419     //
00420     //  In all of the error processing below, the emitError() call MUST come
00421     //  before the flush of the reader mgr, or it will fail because it tries
00422     //  to find out the position in the XML source of the error.
00423     catch(const XMLErrs::Codes)
00424     {
00425         // This is a 'first failure' exception, so return failure
00426         retVal = false;
00427     }
00428     catch(const XMLValid::Codes)
00429     {
00430         // This is a 'first fatal error' type exit, so return failure
00431         retVal = false;
00432     }
00433     catch(const XMLException& excToCatch)
00434     {
00435         //  Emit the error and catch any user exception thrown from here. Make
00436         //  sure in all cases we flush the reader manager.
00437         fInException = true;
00438         try
00439         {
00440             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
00441                 emitError
00442                 (
00443                     XMLErrs::XMLException_Warning
00444                     , excToCatch.getCode()
00445                     , excToCatch.getMessage()
00446                 );
00447             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
00448                 emitError
00449                 (
00450                     XMLErrs::XMLException_Fatal
00451                     , excToCatch.getCode()
00452                     , excToCatch.getMessage()
00453                 );
00454             else
00455                 emitError
00456                 (
00457                     XMLErrs::XMLException_Error
00458                     , excToCatch.getCode()
00459                     , excToCatch.getMessage()
00460                 );
00461         }
00462         catch(const OutOfMemoryException&)
00463         {
00464             // This is a special case for out-of-memory
00465             // conditions, because resetting the ReaderMgr
00466             // can be problematic.
00467             resetReaderMgr.release();
00468 
00469             throw;
00470         }
00471 
00472         retVal = false;
00473     }
00474     catch(const OutOfMemoryException&)
00475     {
00476         // This is a special case for out-of-memory
00477         // conditions, because resetting the ReaderMgr
00478         // can be problematic.
00479         resetReaderMgr.release();
00480 
00481         throw;
00482     }
00483 
00484     // If we are not at the end, release the object that will
00485     // reset the ReaderMgr.
00486     if (retVal)
00487         resetReaderMgr.release();
00488 
00489     return retVal;
00490 }
00491 
00492 // ---------------------------------------------------------------------------
00493 //  SGXMLScanner: Private scanning methods
00494 // ---------------------------------------------------------------------------
00495 
00496 //  This method is called from scanStartTag() to handle the very raw initial
00497 //  scan of the attributes. It just fills in the passed collection with
00498 //  key/value pairs for each attribute. No processing is done on them at all.
00499 XMLSize_t
00500 SGXMLScanner::rawAttrScan(const   XMLCh* const                elemName
00501                           ,       RefVectorOf<KVStringPair>&  toFill
00502                           ,       bool&                       isEmpty)
00503 {
00504     //  Keep up with how many attributes we've seen so far, and how many
00505     //  elements are available in the vector. This way we can reuse old
00506     //  elements until we run out and then expand it.
00507     XMLSize_t attCount = 0;
00508     XMLSize_t curVecSize = toFill.size();
00509 
00510     // Assume it is not empty
00511     isEmpty = false;
00512 
00513     //  We loop until we either see a /> or >, handling key/value pairs util
00514     //  we get there. We place them in the passed vector, which we will expand
00515     //  as required to hold them.
00516     while (true)
00517     {
00518         // Get the next character, which should be non-space
00519         XMLCh nextCh = fReaderMgr.peekNextChar();
00520 
00521         //  If the next character is not a slash or closed angle bracket,
00522         //  then it must be whitespace, since whitespace is required
00523         //  between the end of the last attribute and the name of the next
00524         //  one.
00525         //
00526         if (attCount)
00527         {
00528             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
00529             {
00530                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
00531                 {
00532                     // Ok, skip by them and get another char
00533                     fReaderMgr.getNextChar();
00534                     fReaderMgr.skipPastSpaces();
00535                     nextCh = fReaderMgr.peekNextChar();
00536                 }
00537                  else
00538                 {
00539                     // Emit the error but keep on going
00540                     emitError(XMLErrs::ExpectedWhitespace);
00541                 }
00542             }
00543         }
00544 
00545         //  Ok, here we first check for any of the special case characters.
00546         //  If its not one, then we do the normal case processing, which
00547         //  assumes that we've hit an attribute value, Otherwise, we do all
00548         //  the special case checks.
00549         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
00550         {
00551             //  Assume its going to be an attribute, so get a name from
00552             //  the input.
00553             int colonPosition;
00554             if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
00555             {
00556                 if (fAttNameBuf.isEmpty())
00557                     emitError(XMLErrs::ExpectedAttrName);
00558                 else
00559                     emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
00560                 fReaderMgr.skipPastChar(chCloseAngle);
00561                 return attCount;
00562             }
00563 
00564             const XMLCh* curAttNameBuf = fAttNameBuf.getRawBuffer();
00565 
00566             // And next must be an equal sign
00567             if (!scanEq())
00568             {
00569                 static const XMLCh tmpList[] =
00570                 {
00571                     chSingleQuote, chDoubleQuote, chCloseAngle
00572                     , chOpenAngle, chForwardSlash, chNull
00573                 };
00574 
00575                 emitError(XMLErrs::ExpectedEqSign);
00576 
00577                 //  Try to sync back up by skipping forward until we either
00578                 //  hit something meaningful.
00579                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
00580 
00581                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
00582                 {
00583                     // Jump back to top for normal processing of these
00584                     continue;
00585                 }
00586                 else if ((chFound == chSingleQuote)
00587                       ||  (chFound == chDoubleQuote)
00588                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
00589                 {
00590                     // Just fall through assuming that the value is to follow
00591                 }
00592                 else if (chFound == chOpenAngle)
00593                 {
00594                     // Assume a malformed tag and that new one is starting
00595                     emitError(XMLErrs::UnterminatedStartTag, elemName);
00596                     return attCount;
00597                 }
00598                 else
00599                 {
00600                     // Something went really wrong
00601                     return attCount;
00602                 }
00603             }
00604 
00605             //  Next should be the quoted attribute value. We just do a simple
00606             //  and stupid scan of this value. The only thing we do here
00607             //  is to expand entity references.
00608             if (!basicAttrValueScan(curAttNameBuf, fAttValueBuf))
00609             {
00610                 static const XMLCh tmpList[] =
00611                 {
00612                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
00613                 };
00614 
00615                 emitError(XMLErrs::ExpectedAttrValue);
00616 
00617                 //  It failed, so lets try to get synced back up. We skip
00618                 //  forward until we find some whitespace or one of the
00619                 //  chars in our list.
00620                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
00621 
00622                 if ((chFound == chCloseAngle)
00623                 ||  (chFound == chForwardSlash)
00624                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
00625                 {
00626                     //  Just fall through and process this attribute, though
00627                     //  the value will be "".
00628                 }
00629                 else if (chFound == chOpenAngle)
00630                 {
00631                     // Assume a malformed tag and that new one is starting
00632                     emitError(XMLErrs::UnterminatedStartTag, elemName);
00633                     return attCount;
00634                 }
00635                 else
00636                 {
00637                     // Something went really wrong
00638                     return attCount;
00639                 }
00640             }
00641 
00642             //  And now lets add it to the passed collection. If we have not
00643             //  filled it up yet, then we use the next element. Else we add
00644             //  a new one.
00645             KVStringPair* curPair = 0;
00646             if (attCount >= curVecSize)
00647             {
00648                 curPair = new (fMemoryManager) KVStringPair
00649                 (
00650                     curAttNameBuf
00651                     , fAttNameBuf.getLen()
00652                     , fAttValueBuf.getRawBuffer()
00653                     , fAttValueBuf.getLen()
00654                     , fMemoryManager
00655                 );
00656                 toFill.addElement(curPair);
00657             }
00658              else
00659             {
00660                 curPair = toFill.elementAt(attCount);
00661                 curPair->set
00662                 (
00663                     curAttNameBuf
00664                     , fAttNameBuf.getLen()
00665                     , fAttValueBuf.getRawBuffer()
00666                     , fAttValueBuf.getLen()
00667                 );
00668             }
00669             if (attCount >= fRawAttrColonListSize) {
00670                 resizeRawAttrColonList();
00671             }
00672             fRawAttrColonList[attCount] = colonPosition;
00673 
00674             // And bump the count of attributes we've gotten
00675             attCount++;
00676 
00677             // And go to the top again for another attribute
00678             continue;
00679         }
00680 
00681         //  It was some special case character so do all of the checks and
00682         //  deal with it.
00683         if (!nextCh)
00684             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
00685 
00686         if (nextCh == chForwardSlash)
00687         {
00688             fReaderMgr.getNextChar();
00689             isEmpty = true;
00690             if (!fReaderMgr.skippedChar(chCloseAngle))
00691                 emitError(XMLErrs::UnterminatedStartTag, elemName);
00692             break;
00693         }
00694         else if (nextCh == chCloseAngle)
00695         {
00696             fReaderMgr.getNextChar();
00697             break;
00698         }
00699         else if (nextCh == chOpenAngle)
00700         {
00701             //  Check for this one specially, since its going to be common
00702             //  and it is kind of auto-recovering since we've already hit the
00703             //  next open bracket, which is what we would have seeked to (and
00704             //  skipped this whole tag.)
00705             emitError(XMLErrs::UnterminatedStartTag, elemName);
00706             break;
00707         }
00708         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
00709         {
00710             //  Check for this one specially, which is probably a missing
00711             //  attribute name, e.g. ="value". Just issue expected name
00712             //  error and eat the quoted string, then jump back to the
00713             //  top again.
00714             emitError(XMLErrs::ExpectedAttrName);
00715             fReaderMgr.getNextChar();
00716             fReaderMgr.skipQuotedString(nextCh);
00717             fReaderMgr.skipPastSpaces();
00718             continue;
00719         }
00720     }
00721 
00722     return attCount;
00723 }
00724 
00725 
00726 //  This method will kick off the scanning of the primary content of the
00727 //  document, i.e. the elements.
00728 bool SGXMLScanner::scanContent()
00729 {
00730     //  Go into a loop until we hit the end of the root element, or we fall
00731     //  out because there is no root element.
00732     //
00733     //  We have to do kind of a deeply nested double loop here in order to
00734     //  avoid doing the setup/teardown of the exception handler on each
00735     //  round. Doing it this way we only do it when an exception actually
00736     //  occurs.
00737     bool gotData = true;
00738     bool inMarkup = false;
00739     while (gotData)
00740     {
00741         try
00742         {
00743             while (gotData)
00744             {
00745                 //  Sense what the next top level token is. According to what
00746                 //  this tells us, we will call something to handle that kind
00747                 //  of thing.
00748                 XMLSize_t orgReader;
00749                 const XMLTokens curToken = senseNextToken(orgReader);
00750 
00751                 //  Handle character data and end of file specially. Char data
00752                 //  is not markup so we don't want to handle it in the loop
00753                 //  below.
00754                 if (curToken == Token_CharData)
00755                 {
00756                     //  Scan the character data and call appropriate events. Let
00757                     //  him use our local character data buffer for efficiency.
00758                     scanCharData(fCDataBuf);
00759                     continue;
00760                 }
00761                 else if (curToken == Token_EOF)
00762                 {
00763                     //  The element stack better be empty at this point or we
00764                     //  ended prematurely before all elements were closed.
00765                     if (!fElemStack.isEmpty())
00766                     {
00767                         const ElemStack::StackElem* topElem = fElemStack.popTop();
00768                         emitError
00769                         (
00770                             XMLErrs::EndedWithTagsOnStack
00771                             , topElem->fThisElement->getFullName()
00772                         );
00773                     }
00774 
00775                     // Its the end of file, so clear the got data flag
00776                     gotData = false;
00777                     continue;
00778                 }
00779 
00780                 // We are in some sort of markup now
00781                 inMarkup = true;
00782 
00783                 //  According to the token we got, call the appropriate
00784                 //  scanning method.
00785                 switch(curToken)
00786                 {
00787                     case Token_CData :
00788                         // Make sure we are within content
00789                         if (fElemStack.isEmpty())
00790                             emitError(XMLErrs::CDATAOutsideOfContent);
00791                         scanCDSection();
00792                         break;
00793 
00794                     case Token_Comment :
00795                         scanComment();
00796                         break;
00797 
00798                     case Token_EndTag :
00799                         scanEndTag(gotData);
00800                         break;
00801 
00802                     case Token_PI :
00803                         scanPI();
00804                         break;
00805 
00806                     case Token_StartTag :
00807                         scanStartTag(gotData);
00808                         break;
00809 
00810                     default :
00811                         fReaderMgr.skipToChar(chOpenAngle);
00812                         break;
00813                 }
00814 
00815                 if (orgReader != fReaderMgr.getCurrentReaderNum())
00816                     emitError(XMLErrs::PartialMarkupInEntity);
00817 
00818                 // And we are back out of markup again
00819                 inMarkup = false;
00820             }
00821         }
00822         catch(const EndOfEntityException& toCatch)
00823         {
00824             //  If we were in some markup when this happened, then its a
00825             //  partial markup error.
00826             if (inMarkup)
00827                 emitError(XMLErrs::PartialMarkupInEntity);
00828 
00829             // Send an end of entity reference event
00830             if (fDocHandler)
00831                 fDocHandler->endEntityReference(toCatch.getEntity());
00832 
00833             inMarkup = false;
00834         }
00835     }
00836 
00837     // It went ok, so return success
00838     return true;
00839 }
00840 
00841 
00842 void SGXMLScanner::scanEndTag(bool& gotData)
00843 {
00844     //  Assume we will still have data until proven otherwise. It will only
00845     //  ever be false if this is the end of the root element.
00846     gotData = true;
00847 
00848     //  Check if the element stack is empty. If so, then this is an unbalanced
00849     //  element (i.e. more ends than starts, perhaps because of bad text
00850     //  causing one to be skipped.)
00851     if (fElemStack.isEmpty())
00852     {
00853         emitError(XMLErrs::MoreEndThanStartTags);
00854         fReaderMgr.skipPastChar(chCloseAngle);
00855         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
00856     }
00857 
00858     //  Pop the stack of the element we are supposed to be ending. Remember
00859     //  that we don't own this. The stack just keeps them and reuses them.
00860     unsigned int uriId = (fDoNamespaces)
00861         ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
00862 
00863     // Make sure that its the end of the element that we expect
00864     const XMLCh *elemName = fElemStack.getCurrentSchemaElemName();
00865     const ElemStack::StackElem* topElem = fElemStack.topElement();
00866     if (!fReaderMgr.skippedStringLong(elemName))
00867     {
00868         emitError
00869         (
00870             XMLErrs::ExpectedEndOfTagX
00871             , elemName
00872         );
00873         fReaderMgr.skipPastChar(chCloseAngle);
00874         fElemStack.popTop();
00875         return;
00876     }
00877 
00878     fPSVIElemContext.fErrorOccurred = fErrorStack->pop();
00879 
00880     // Make sure we are back on the same reader as where we started
00881     if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
00882         emitError(XMLErrs::PartialTagMarkupError);
00883 
00884     // Skip optional whitespace
00885     fReaderMgr.skipPastSpaces();
00886 
00887     // Make sure we find the closing bracket
00888     if (!fReaderMgr.skippedChar(chCloseAngle))
00889     {
00890         emitError
00891         (
00892             XMLErrs::UnterminatedEndTag
00893             , topElem->fThisElement->getFullName()
00894         );
00895     }
00896 
00897     if (fValidate && topElem->fThisElement->isDeclared())
00898     {
00899         fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
00900         if(!fPSVIElemContext.fCurrentTypeInfo)
00901             fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
00902         else
00903             fPSVIElemContext.fCurrentDV = 0;
00904         if (fPSVIHandler)
00905         {
00906             fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
00907 
00908             if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
00909                 fPSVIElemContext.fNormalizedValue = 0;
00910 
00911         }
00912     }
00913     else
00914     {
00915         fPSVIElemContext.fCurrentDV = 0;
00916         fPSVIElemContext.fCurrentTypeInfo = 0;
00917         fPSVIElemContext.fNormalizedValue = 0;
00918     }
00919 
00920     //  If validation is enabled, then lets pass him the list of children and
00921     //  this element and let him validate it.
00922     DatatypeValidator* psviMemberType = 0;
00923     if (fValidate)
00924     {
00925         XMLSize_t failure;
00926         bool res = fValidator->checkContent
00927         (
00928             topElem->fThisElement
00929             , topElem->fChildren
00930             , topElem->fChildCount
00931             , &failure
00932         );
00933 
00934         if (!res)
00935         {
00936             //  One of the elements is not valid for the content. NOTE that
00937             //  if no children were provided but the content model requires
00938             //  them, it comes back with a zero value. But we cannot use that
00939             //  to index the child array in this case, and have to put out a
00940             //  special message.
00941             if (!topElem->fChildCount)
00942             {
00943                 fValidator->emitError
00944                 (
00945                     XMLValid::EmptyNotValidForContent
00946                     , topElem->fThisElement->getFormattedContentModel()
00947                 );
00948             }
00949             else if (failure >= topElem->fChildCount)
00950             {
00951                 fValidator->emitError
00952                 (
00953                     XMLValid::NotEnoughElemsForCM
00954                     , topElem->fThisElement->getFormattedContentModel()
00955                 );
00956             }
00957             else
00958             {
00959                 fValidator->emitError
00960                 (
00961                     XMLValid::ElementNotValidForContent
00962                     , topElem->fChildren[failure]->getRawName()
00963                     , topElem->fThisElement->getFormattedContentModel()
00964                 );
00965             }
00966 
00967         }
00968 
00969         // update PSVI info
00970         if (((SchemaValidator*) fValidator)->getErrorOccurred())
00971             fPSVIElemContext.fErrorOccurred = true;
00972         else if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
00973             psviMemberType = fValidationContext->getValidatingMemberType();
00974         if (fPSVIHandler)
00975         {
00976             fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
00977             if(fPSVIElemContext.fIsSpecified)
00978                 fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)topElem->fThisElement)->getDefaultValue();
00979         }
00980 
00981         // call matchers and de-activate context
00982         if (toCheckIdentityConstraint())
00983         {
00984             fICHandler->deactivateContext
00985                         (
00986                          (SchemaElementDecl *) topElem->fThisElement
00987                        , fContent.getRawBuffer()
00988                        , fValidationContext
00989                        , fPSVIElemContext.fCurrentDV
00990                         );
00991         }
00992 
00993     }
00994 
00995     // QName dv needed topElem to resolve URIs on the checkContent
00996     fElemStack.popTop();
00997 
00998     // See if it was the root element, to avoid multiple calls below
00999     const bool isRoot = fElemStack.isEmpty();
01000 
01001     if (fPSVIHandler)
01002     {
01003         endElementPSVI
01004         (
01005             (SchemaElementDecl*)topElem->fThisElement, psviMemberType
01006         );
01007     }
01008     // now we can reset the datatype buffer, since the
01009     // application has had a chance to copy the characters somewhere else
01010     ((SchemaValidator *)fValidator)->clearDatatypeBuffer();
01011 
01012     // If we have a doc handler, tell it about the end tag
01013     if (fDocHandler)
01014     {
01015         if (fGrammarType == Grammar::SchemaGrammarType) {
01016             if (topElem->fPrefixColonPos != -1)
01017                 fPrefixBuf.set(elemName, topElem->fPrefixColonPos);
01018             else
01019                 fPrefixBuf.reset();
01020         }
01021         else {
01022             fPrefixBuf.set(topElem->fThisElement->getElementName()->getPrefix());
01023         }
01024         fDocHandler->endElement
01025         (
01026             *topElem->fThisElement
01027             , uriId
01028             , isRoot
01029             , fPrefixBuf.getRawBuffer()
01030         );
01031     }
01032 
01033     if (!isRoot)
01034     {
01035         // update error information
01036         fErrorStack->push((fErrorStack->size() && fErrorStack->pop()) || fPSVIElemContext.fErrorOccurred);
01037     }
01038 
01039     // If this was the root, then done with content
01040     gotData = !isRoot;
01041 
01042     if (gotData) {
01043 
01044         // Restore the grammar
01045         fGrammar = fElemStack.getCurrentGrammar();
01046         fGrammarType = fGrammar->getGrammarType();
01047         fValidator->setGrammar(fGrammar);
01048 
01049         // Restore the validation flag
01050         fValidate = fElemStack.getValidationFlag();
01051     }
01052 }
01053 
01054 
01055 //  This method handles the high level logic of scanning the DOCType
01056 //  declaration. This calls the DTDScanner and kicks off both the scanning of
01057 //  the internal subset and the scanning of the external subset, if any.
01058 //
01059 //  When we get here the '<!DOCTYPE' part has already been scanned, which is
01060 //  what told us that we had a doc type decl to parse.
01061 void SGXMLScanner::scanDocTypeDecl()
01062 {
01063     // Just skips over it
01064     // REVISIT: Should we issue a warning
01065     static const XMLCh doctypeIE[] =
01066     {
01067             chOpenSquare, chCloseAngle, chNull
01068     };
01069     XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);
01070 
01071     if (nextCh == chOpenSquare)
01072         fReaderMgr.skipPastChar(chCloseSquare);
01073 
01074     fReaderMgr.skipPastChar(chCloseAngle);
01075 }
01076 
01077 //  This method is called to scan a start tag when we are processing
01078 //  namespaces. This method is called after we've scanned the < of a
01079 //  start tag. So we have to get the element name, then scan the attributes,
01080 //  after which we are either going to see >, />, or attributes followed
01081 //  by one of those sequences.
01082 bool SGXMLScanner::scanStartTag(bool& gotData)
01083 {
01084     //  Assume we will still have data until proven otherwise. It will only
01085     //  ever be false if this is the root and its empty.
01086     gotData = true;
01087 
01088     // Reset element content
01089     fContent.reset();
01090 
01091     //  The current position is after the open bracket, so we need to read in
01092     //  in the element name.
01093     int prefixColonPos;
01094     if (!fReaderMgr.getQName(fQNameBuf, &prefixColonPos))
01095     {
01096         if (fQNameBuf.isEmpty())
01097             emitError(XMLErrs::ExpectedElementName);
01098         else
01099             emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
01100         fReaderMgr.skipToChar(chOpenAngle);
01101         return false;
01102     }
01103 
01104     // See if its the root element
01105     const bool isRoot = fElemStack.isEmpty();
01106 
01107     // Skip any whitespace after the name
01108     fReaderMgr.skipPastSpaces();
01109 
01110     //  First we have to do the rawest attribute scan. We don't do any
01111     //  normalization of them at all, since we don't know yet what type they
01112     //  might be (since we need the element decl in order to do that.)
01113     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
01114     bool isEmpty;
01115     XMLSize_t attCount = rawAttrScan
01116     (
01117         qnameRawBuf
01118         , *fRawAttrList
01119         , isEmpty
01120     );
01121 
01122     // save the contentleafname and currentscope before addlevel, for later use
01123     ContentLeafNameTypeVector* cv = 0;
01124     XMLContentModel* cm = 0;
01125     unsigned int currentScope = Grammar::TOP_LEVEL_SCOPE;
01126     bool laxThisOne = false;
01127     if (!isRoot)
01128     {
01129         // schema validator will have correct type if validating
01130         SchemaElementDecl* tempElement = (SchemaElementDecl*)
01131             fElemStack.topElement()->fThisElement;
01132         SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
01133         ComplexTypeInfo *currType = 0;
01134 
01135         if (fValidate)
01136         {
01137             currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
01138             if (currType)
01139                 modelType = (SchemaElementDecl::ModelTypes)currType->getContentType();
01140             else // something must have gone wrong
01141                 modelType = SchemaElementDecl::Any;
01142         }
01143         else
01144         {
01145             currType = tempElement->getComplexTypeInfo();
01146         }
01147 
01148         if ((modelType == SchemaElementDecl::Mixed_Simple)
01149           ||  (modelType == SchemaElementDecl::Mixed_Complex)
01150           ||  (modelType == SchemaElementDecl::Children))
01151         {
01152             cm = currType->getContentModel();
01153             cv = cm->getContentLeafNameTypeVector();
01154             currentScope = fElemStack.getCurrentScope();
01155         }
01156         else if (modelType == SchemaElementDecl::Any) {
01157             laxThisOne = true;
01158         }
01159     }
01160 
01161     //  Now, since we might have to update the namespace map for this element,
01162     //  but we don't have the element decl yet, we just tell the element stack
01163     //  to expand up to get ready.
01164     XMLSize_t elemDepth = fElemStack.addLevel();
01165     fElemStack.setValidationFlag(fValidate);
01166     fElemStack.setPrefixColonPos(prefixColonPos);
01167 
01168     //  Check if there is any external schema location specified, and if we are at root,
01169     //  go through them first before scanning those specified in the instance document
01170     if (isRoot
01171         && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
01172 
01173         if (fExternalSchemaLocation)
01174             parseSchemaLocation(fExternalSchemaLocation, true);
01175         if (fExternalNoNamespaceSchemaLocation)
01176             resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString, true);
01177     }
01178 
01179     //  Make an initial pass through the list and find any xmlns attributes or
01180     //  schema attributes.
01181     if (attCount)
01182         scanRawAttrListforNameSpaces(attCount);
01183 
01184     //  Resolve the qualified name to a URI and name so that we can look up
01185     //  the element decl for this element. We have now update the prefix to
01186     //  namespace map so we should get the correct element now.
01187     unsigned int uriId = resolveQNameWithColon
01188     (
01189         qnameRawBuf
01190         , fPrefixBuf
01191         , ElemStack::Mode_Element
01192         , prefixColonPos
01193     );
01194 
01195     //if schema, check if we should lax or skip the validation of this element
01196     bool parentValidation = fValidate;
01197     if (cv) {
01198         QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
01199         // elementDepth will be > 0, as cv is only constructed if element is not
01200         // root.
01201         laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
01202     }
01203 
01204     //  Look up the element now in the grammar. This will get us back a
01205     //  generic element decl object. We tell him to fault one in if he does
01206     //  not find it.
01207     XMLElementDecl* elemDecl = 0;
01208     bool wasAdded = false;
01209     const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
01210     const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
01211 
01212     if (uriId != fEmptyNamespaceId) {
01213 
01214         // Check in current grammar before switching if necessary
01215         elemDecl = fGrammar->getElemDecl
01216         (
01217           uriId
01218           , nameRawBuf
01219           , qnameRawBuf
01220           , currentScope
01221         );
01222         if(!elemDecl)
01223         {
01224             // look in the list of undeclared elements, as would have been done
01225             // before we made grammars stateless:
01226             elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
01227         }
01228         // this is initialized correctly only if there is
01229         // no element decl.  The other uses in this scope will only
01230         // be encountered if there continues to be no element decl--which
01231         // implies that this will have been initialized correctly.
01232         unsigned int orgGrammarUri = uriId;
01233         if (!elemDecl && ( orgGrammarUri = fURIStringPool->getId(original_uriStr)) != uriId) {
01234             // not found, switch to the specified grammar
01235             const XMLCh* uriStr = getURIText(uriId);
01236             bool errorCondition = !switchGrammar(uriStr) && fValidate;
01237             if (errorCondition && !laxThisOne)
01238             {
01239                 fValidator->emitError
01240                 (
01241                     XMLValid::GrammarNotFound
01242                     ,uriStr
01243                 );
01244             }
01245 
01246             elemDecl = fGrammar->getElemDecl
01247             (
01248               uriId
01249               , nameRawBuf
01250               , qnameRawBuf
01251               , currentScope
01252             );
01253         }
01254 
01255         if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
01256             // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
01257             elemDecl = fGrammar->getElemDecl
01258                        (
01259                            uriId
01260                            , nameRawBuf
01261                            , qnameRawBuf
01262                            , Grammar::TOP_LEVEL_SCOPE
01263                        );
01264             if(!elemDecl)
01265             {
01266                 // look in the list of undeclared elements, as would have been done
01267                 // before we made grammars stateless:
01268                 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
01269             }
01270             if(!elemDecl) {
01271                 // still not found in specified uri
01272                 // try emptyNamespace see if element should be un-qualified.
01273                 // Use a temp variable until we decide this is the case
01274                 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
01275                            (
01276                                fEmptyNamespaceId
01277                                , nameRawBuf
01278                                , qnameRawBuf
01279                                , currentScope
01280                            );
01281                 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
01282                     fValidator->emitError
01283                     (
01284                         XMLValid::ElementNotUnQualified
01285                         , qnameRawBuf
01286                     );
01287                     elemDecl = tempElemDecl;
01288                 }
01289             }
01290         }
01291 
01292         if (!elemDecl) {
01293             // still not found, fault this in and issue error later
01294             // switch back to original grammar first (if necessary)
01295             if(orgGrammarUri != uriId)
01296             {
01297                 switchGrammar(original_uriStr);
01298             }
01299             elemDecl = new (fMemoryManager) SchemaElementDecl
01300             (
01301                 fPrefixBuf.getRawBuffer()
01302                 , nameRawBuf
01303                 , uriId
01304                 , SchemaElementDecl::Any
01305                 , Grammar::TOP_LEVEL_SCOPE
01306                 , fMemoryManager
01307             );
01308             elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
01309             wasAdded = true;
01310         }
01311     }
01312     else if (!elemDecl)
01313     {
01314         //the element has no prefix,
01315         //thus it is either a non-qualified element defined in current targetNS
01316         //or an element that is defined in the globalNS
01317 
01318         //try unqualifed first
01319         elemDecl = fGrammar->getElemDecl
01320                    (
01321                       uriId
01322                     , nameRawBuf
01323                     , qnameRawBuf
01324                     , currentScope
01325                     );
01326         if(!elemDecl)
01327         {
01328             // look in the list of undeclared elements, as would have been done
01329             // before we made grammars stateless:
01330             elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, currentScope);
01331         }
01332         // this is initialized correctly only if there is
01333         // no element decl.  The other uses in this scope will only
01334         // be encountered if there continues to be no element decl--which
01335         // implies that this will have been initialized correctly.
01336         unsigned int orgGrammarUri = fEmptyNamespaceId;
01337         if (!elemDecl && (orgGrammarUri = fURIStringPool->getId(original_uriStr)) != fEmptyNamespaceId) {
01338             //not found, switch grammar and try globalNS
01339             bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
01340             if (errorCondition && !laxThisOne)
01341             {
01342                 fValidator->emitError
01343                 (
01344                     XMLValid::GrammarNotFound
01345                   , XMLUni::fgZeroLenString
01346                 );
01347             }
01348 
01349             elemDecl = fGrammar->getElemDecl
01350             (
01351               uriId
01352               , nameRawBuf
01353               , qnameRawBuf
01354               , currentScope
01355             );
01356         }
01357 
01358         if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
01359             // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
01360             elemDecl = fGrammar->getElemDecl
01361                        (
01362                            uriId
01363                            , nameRawBuf
01364                            , qnameRawBuf
01365                            , Grammar::TOP_LEVEL_SCOPE
01366                        );
01367             if(!elemDecl)
01368             {
01369                 // look in the list of undeclared elements, as would have been done
01370                 // before we made grammars stateless:
01371                 elemDecl = fElemNonDeclPool->getByKey(nameRawBuf, uriId, (int)Grammar::TOP_LEVEL_SCOPE);
01372             }
01373             if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
01374                 // still Not found in specified uri
01375                 // go to original Grammar again to see if element needs to be fully qualified.
01376                 bool errorCondition = !switchGrammar(original_uriStr) && fValidate;
01377                 if (errorCondition && !laxThisOne)
01378                 {
01379                     fValidator->emitError
01380                     (
01381                         XMLValid::GrammarNotFound
01382                         ,original_uriStr
01383                     );
01384                 }
01385 
01386                 // Use a temp variable until we decide this is the case
01387                 XMLElementDecl* tempElemDecl = fGrammar->getElemDecl
01388                            (
01389                                orgGrammarUri
01390                                , nameRawBuf
01391                                , qnameRawBuf
01392                                , currentScope
01393                            );
01394                 if (tempElemDecl && tempElemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
01395                     fValidator->emitError
01396                     (
01397                         XMLValid::ElementNotQualified
01398                         , qnameRawBuf
01399                     );
01400                     elemDecl=tempElemDecl;
01401                 }
01402             }
01403         }
01404 
01405         if (!elemDecl) {
01406             // still not found, fault this in and issue error later
01407             // switch back to original grammar first (if necessary)
01408             if(orgGrammarUri != fEmptyNamespaceId)
01409             {
01410                 switchGrammar(original_uriStr);
01411             }
01412             elemDecl = new (fMemoryManager) SchemaElementDecl
01413             (
01414                 fPrefixBuf.getRawBuffer()
01415                 , nameRawBuf
01416                 , uriId
01417                 , SchemaElementDecl::Any
01418                 , Grammar::TOP_LEVEL_SCOPE
01419                 , fMemoryManager
01420             );
01421             elemDecl->setId(fElemNonDeclPool->put((void*)elemDecl->getBaseName(), uriId, currentScope, (SchemaElementDecl*)elemDecl));
01422             wasAdded = true;
01423         }
01424     }
01425 
01426     // this info needed for DOMTypeInfo
01427     fPSVIElemContext.fErrorOccurred = false;
01428 
01429     //  We do something different here according to whether we found the
01430     //  element or not.
01431     bool bXsiTypeSet= (fValidator)?((SchemaValidator*)fValidator)->getIsXsiTypeSet():false;
01432     if (wasAdded)
01433     {
01434         if (laxThisOne && !bXsiTypeSet) {
01435             fValidate = false;
01436             fElemStack.setValidationFlag(fValidate);
01437         }
01438 
01439         // If validating then emit an error
01440         if (fValidate)
01441         {
01442             // This is to tell the reuse Validator that this element was
01443             // faulted-in, was not an element in the grammar pool originally
01444             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
01445 
01446             if(!bXsiTypeSet)
01447             {
01448                 fValidator->emitError
01449                 (
01450                     XMLValid::ElementNotDefined
01451                     , elemDecl->getFullName()
01452                 );
01453                 fPSVIElemContext.fErrorOccurred = true;
01454             }
01455         }
01456     }
01457     else
01458     {
01459         // If its not marked declared and validating, then emit an error
01460         if (!elemDecl->isDeclared()) {
01461             if(elemDecl->getCreateReason() == XMLElementDecl::NoReason) {
01462                 if(!bXsiTypeSet)
01463                     fPSVIElemContext.fErrorOccurred = true;
01464             }
01465             if (laxThisOne) {
01466                 fValidate = false;
01467                 fElemStack.setValidationFlag(fValidate);
01468             }
01469 
01470             if (fValidate && !bXsiTypeSet)
01471             {
01472                 fValidator->emitError
01473                 (
01474                     XMLValid::ElementNotDefined
01475                     , elemDecl->getFullName()
01476                 );
01477             }
01478         }
01479     }
01480 
01481 
01482     //  Now we can update the element stack to set the current element
01483     //  decl. We expanded the stack above, but couldn't store the element
01484     //  decl because we didn't know it yet.
01485     fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
01486     fElemStack.setCurrentURI(uriId);
01487 
01488     if (isRoot)
01489     {
01490         fRootGrammar = fGrammar;
01491         fRootElemName = XMLString::replicate(qnameRawBuf, fMemoryManager);
01492     }
01493 
01494     if (fPSVIHandler)
01495     {
01496         fPSVIElemContext.fElemDepth++;
01497 
01498         if (elemDecl->isDeclared())
01499         {
01500             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth;
01501         }
01502         else
01503         {
01504             fPSVIElemContext.fFullValidationDepth = fPSVIElemContext.fElemDepth;
01505 
01506             /******
01507              * While we report an error for historical reasons, this should
01508              * actually result in lax assessment - NG.
01509             if (isRoot && fValidate)
01510                 fPSVIElemContext.fErrorOccurred = true;
01511             ******/
01512         }
01513     }
01514 
01515     //  Validate the element
01516     if (fValidate)
01517     {
01518         fValidator->validateElement(elemDecl);
01519         if (((SchemaValidator*) fValidator)->getErrorOccurred())
01520             fPSVIElemContext.fErrorOccurred = true;
01521     }
01522 
01523     // squirrel away the element's QName, so that we can do an efficient
01524     // end-tag match
01525     fElemStack.setCurrentSchemaElemName(fQNameBuf.getRawBuffer());
01526 
01527     ComplexTypeInfo* typeinfo = (fValidate)
01528         ? ((SchemaValidator*)fValidator)->getCurrentTypeInfo()
01529         : ((SchemaElementDecl*) elemDecl)->getComplexTypeInfo();
01530 
01531     if (typeinfo) {
01532         currentScope = typeinfo->getScopeDefined();
01533 
01534         // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
01535         XMLCh* typeName = typeinfo->getTypeName();
01536         const int comma = XMLString::indexOf(typeName, chComma);
01537         if (comma > 0) {
01538             XMLBuffer prefixBuf(comma+1, fMemoryManager);
01539             prefixBuf.append(typeName, comma);
01540             const XMLCh* uriStr = prefixBuf.getRawBuffer();
01541 
01542             bool errorCondition = !switchGrammar(uriStr) && fValidate;
01543             if (errorCondition && !laxThisOne)
01544             {
01545                 fValidator->emitError
01546                 (
01547                     XMLValid::GrammarNotFound
01548                     , prefixBuf.getRawBuffer()
01549                 );
01550             }
01551         }
01552         else if (comma == 0) {
01553             bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
01554             if (errorCondition && !laxThisOne)
01555             {
01556                 fValidator->emitError
01557                 (
01558                     XMLValid::GrammarNotFound
01559                     , XMLUni::fgZeroLenString
01560                 );
01561             }
01562         }
01563     }
01564     fElemStack.setCurrentScope(currentScope);
01565 
01566     // Set element next state
01567     if (elemDepth >= fElemStateSize) {
01568         resizeElemState();
01569     }
01570 
01571     fElemState[elemDepth] = 0;
01572     fElemLoopState[elemDepth] = 0;
01573     fElemStack.setCurrentGrammar(fGrammar);
01574 
01575     //  If this is the first element and we are validating, check the root
01576     //  element.
01577     if (!isRoot && parentValidation)
01578     {
01579         //  If the element stack is not empty, then add this element as a
01580         //  child of the previous top element. If its empty, this is the root
01581         //  elem and is not the child of anything.
01582         fElemStack.addChild(elemDecl->getElementName(), true);
01583     }
01584 
01585     // PSVI handling:  must reset this, even if no attributes...
01586     if(getPSVIHandler())
01587         fPSVIAttrList->reset();
01588 
01589     //  Now lets get the fAttrList filled in. This involves faulting in any
01590     //  defaulted and fixed attributes and normalizing the values of any that
01591     //  we got explicitly.
01592     //
01593     //  We update the attCount value with the total number of attributes, but
01594     //  it goes in with the number of values we got during the raw scan of
01595     //  explictly provided attrs above.
01596     attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
01597 
01598     if(attCount)
01599     {
01600         // clean up after ourselves:
01601         // clear the map used to detect duplicate attributes
01602         fUndeclaredAttrRegistry->removeAll();
01603     }
01604 
01605     // activate identity constraints
01606     if (toCheckIdentityConstraint())
01607     {
01608         fICHandler->activateIdentityConstraint
01609                         (
01610                           (SchemaElementDecl*) elemDecl
01611                         , (int) elemDepth
01612                         , uriId
01613                         , fPrefixBuf.getRawBuffer()
01614                         , *fAttrList
01615                         , attCount
01616                         , fValidationContext
01617                         );
01618 
01619     }
01620 
01621     // Since the element may have default values, call start tag now regardless if it is empty or not
01622     // If we have a document handler, then tell it about this start tag
01623     if (fDocHandler)
01624     {
01625         fDocHandler->startElement
01626         (
01627             *elemDecl
01628             , uriId
01629             , fPrefixBuf.getRawBuffer()
01630             , *fAttrList
01631             , attCount
01632             , false
01633             , isRoot
01634         );
01635     } // may be where we output something...
01636 
01637     // if we have a PSVIHandler, now's the time to call
01638     // its handleAttributesPSVI method:
01639     if(fPSVIHandler)
01640     {
01641         QName *eName = elemDecl->getElementName();
01642         fPSVIHandler->handleAttributesPSVI
01643         (
01644             eName->getLocalPart()
01645             , fURIStringPool->getValueForId(eName->getURI())
01646             , fPSVIAttrList
01647         );
01648     }
01649 
01650     //  If empty, validate content right now if we are validating and then
01651     //  pop the element stack top. Else, we have to update the current stack
01652     //  top's namespace mapping elements.
01653     if (isEmpty)
01654     {
01655         // Pop the element stack back off since it'll never be used now
01656         fElemStack.popTop();
01657 
01658         // reset current type info
01659         DatatypeValidator* psviMemberType = 0;
01660         if (fGrammarType == Grammar::SchemaGrammarType)
01661         {
01662             if (fValidate && elemDecl->isDeclared())
01663             {
01664                 fPSVIElemContext.fCurrentTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
01665                 if(!fPSVIElemContext.fCurrentTypeInfo)
01666                     fPSVIElemContext.fCurrentDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
01667                 else
01668                     fPSVIElemContext.fCurrentDV = 0;
01669                 if(fPSVIHandler)
01670                 {
01671                     fPSVIElemContext.fNormalizedValue = ((SchemaValidator*) fValidator)->getNormalizedValue();
01672 
01673                     if (XMLString::equals(fPSVIElemContext.fNormalizedValue, XMLUni::fgZeroLenString))
01674                         fPSVIElemContext.fNormalizedValue = 0;
01675                 }
01676             }
01677             else
01678             {
01679                 fPSVIElemContext.fCurrentDV = 0;
01680                 fPSVIElemContext.fCurrentTypeInfo = 0;
01681                 fPSVIElemContext.fNormalizedValue = 0;
01682             }
01683         }
01684 
01685         // If validating, then insure that its legal to have no content
01686         if (fValidate)
01687         {
01688             XMLSize_t failure;
01689             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
01690             if (!res)
01691             {
01692                 // REVISIT:  in the case of xsi:type, this may
01693                 // return the wrong string...
01694                 fValidator->emitError
01695                 (
01696                     XMLValid::ElementNotValidForContent
01697                     , elemDecl->getFullName()
01698                     , elemDecl->getFormattedContentModel()
01699                 );
01700             }
01701 
01702             if (((SchemaValidator*) fValidator)->getErrorOccurred())
01703                 fPSVIElemContext.fErrorOccurred = true;
01704             // note that if we're empty, won't be a current DV
01705             else
01706             {
01707                 if (fPSVIHandler)
01708                 {
01709                     fPSVIElemContext.fIsSpecified = ((SchemaValidator*) fValidator)->getIsElemSpecified();
01710                     if(fPSVIElemContext.fIsSpecified)
01711                         fPSVIElemContext.fNormalizedValue = ((SchemaElementDecl *)elemDecl)->getDefaultValue();
01712                 }
01713                 if (fPSVIElemContext.fCurrentDV && fPSVIElemContext.fCurrentDV->getType() == DatatypeValidator::Union)
01714                     psviMemberType = fValidationContext->getValidatingMemberType();
01715             }
01716 
01717             // call matchers and de-activate context
01718             if (toCheckIdentityConstraint())
01719             {
01720                 fICHandler->deactivateContext
01721                        (
01722                         (SchemaElementDecl *) elemDecl
01723                       , fContent.getRawBuffer()
01724                       , fValidationContext
01725                       , fPSVIElemContext.fCurrentDV
01726                        );
01727             }
01728 
01729         }
01730         else if (fGrammarType == Grammar::SchemaGrammarType) {
01731             ((SchemaValidator*)fValidator)->resetNillable();
01732         }
01733 
01734         if (fPSVIHandler)
01735         {
01736             endElementPSVI
01737             (
01738                 (SchemaElementDecl*)elemDecl, psviMemberType
01739             );
01740         }
01741 
01742         // If we have a doc handler, tell it about the end tag
01743         if (fDocHandler)
01744         {
01745             fDocHandler->endElement
01746             (
01747                 *elemDecl
01748                 , uriId
01749                 , isRoot
01750                 , fPrefixBuf.getRawBuffer()
01751             );
01752         }
01753 
01754         // If the elem stack is empty, then it was an empty root
01755         if (isRoot)
01756             gotData = false;
01757         else
01758         {
01759             // Restore the grammar
01760             fGrammar = fElemStack.getCurrentGrammar();
01761             fGrammarType = fGrammar->getGrammarType();
01762             fValidator->setGrammar(fGrammar);
01763 
01764             // Restore the validation flag
01765             fValidate = fElemStack.getValidationFlag();
01766         }
01767     }
01768     else    // not empty
01769     {
01770 
01771         // send a partial element psvi
01772         if (fPSVIHandler)
01773         {
01774 
01775             ComplexTypeInfo*   curTypeInfo = 0;
01776             DatatypeValidator* curDV = 0;
01777             XSTypeDefinition*  typeDef = 0;
01778 
01779             if (fValidate && elemDecl->isDeclared())
01780             {
01781                 curTypeInfo = ((SchemaValidator*) fValidator)->getCurrentTypeInfo();
01782 
01783                 if (curTypeInfo)
01784                 {
01785                     typeDef = (XSTypeDefinition*) fModel->getXSObject(curTypeInfo);
01786                 }
01787                 else
01788                 {
01789                     curDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
01790 
01791                     if (curDV)
01792                     {
01793                         typeDef = (XSTypeDefinition*) fModel->getXSObject(curDV);
01794                     }
01795                 }
01796             }
01797 
01798             fPSVIElement->reset
01799                 (
01800                   PSVIElement::VALIDITY_NOTKNOWN
01801                 , PSVIElement::VALIDATION_NONE
01802                 , fRootElemName
01803                 , ((SchemaValidator*) fValidator)->getIsElemSpecified()
01804                 , (elemDecl->isDeclared()) ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
01805                 , typeDef
01806                 , 0 //memberType
01807                 , fModel
01808                 , ((SchemaElementDecl*)elemDecl)->getDefaultValue()
01809                 , 0
01810                 , 0
01811                 , 0
01812                 );
01813 
01814 
01815             fPSVIHandler->handlePartialElementPSVI
01816                 (
01817                   elemDecl->getBaseName()
01818                 , fURIStringPool->getValueForId(elemDecl->getURI())
01819                 , fPSVIElement
01820                 );
01821 
01822         }
01823 
01824         fErrorStack->push(fPSVIElemContext.fErrorOccurred);
01825     }
01826 
01827     return true;
01828 }
01829 
01830 
01831 // ---------------------------------------------------------------------------
01832 //  SGXMLScanner: Grammar preparsing
01833 // ---------------------------------------------------------------------------
01834 Grammar* SGXMLScanner::loadGrammar(const   InputSource& src
01835                                    , const short        grammarType
01836                                    , const bool         toCache)
01837 {
01838     Grammar* loadedGrammar = 0;
01839 
01840     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
01841 
01842     try
01843     {
01844         fGrammarResolver->cacheGrammarFromParse(false);
01845                 // if the new grammar has to be cached, better use the already cached
01846                 // grammars, or the an exception will be thrown when caching an already
01847                 // cached grammar
01848         fGrammarResolver->useCachedGrammarInParse(toCache);
01849         fRootGrammar = 0;
01850 
01851         if (fValScheme == Val_Auto) {
01852             fValidate = true;
01853         }
01854 
01855         // Reset some status flags
01856         fInException = false;
01857         fStandalone = false;
01858         fErrorCount = 0;
01859         fHasNoDTD = true;
01860         fSeeXsi = false;
01861 
01862         if (grammarType == Grammar::SchemaGrammarType) {
01863             loadedGrammar = loadXMLSchemaGrammar(src, toCache);
01864         }
01865     }
01866     //  NOTE:
01867     //
01868     //  In all of the error processing below, the emitError() call MUST come
01869     //  before the flush of the reader mgr, or it will fail because it tries
01870     //  to find out the position in the XML source of the error.
01871     catch(const XMLErrs::Codes)
01872     {
01873         // This is a 'first failure' exception, so fall through
01874     }
01875     catch(const XMLValid::Codes)
01876     {
01877         // This is a 'first fatal error' type exit, so fall through
01878     }
01879     catch(const XMLException& excToCatch)
01880     {
01881         //  Emit the error and catch any user exception thrown from here. Make
01882         //  sure in all cases we flush the reader manager.
01883         fInException = true;
01884         try
01885         {
01886             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
01887                 emitError
01888                 (
01889                     XMLErrs::XMLException_Warning
01890                     , excToCatch.getCode()
01891                     , excToCatch.getMessage()
01892                 );
01893             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
01894                 emitError
01895                 (
01896                     XMLErrs::XMLException_Fatal
01897                     , excToCatch.getCode()
01898                     , excToCatch.getMessage()
01899                 );
01900             else
01901                 emitError
01902                 (
01903                     XMLErrs::XMLException_Error
01904                     , excToCatch.getCode()
01905                     , excToCatch.getMessage()
01906                 );
01907         }
01908         catch(const OutOfMemoryException&)
01909         {
01910             // This is a special case for out-of-memory
01911             // conditions, because resetting the ReaderMgr
01912             // can be problematic.
01913             resetReaderMgr.release();
01914 
01915             throw;
01916         }
01917     }
01918     catch(const OutOfMemoryException&)
01919     {
01920         // This is a special case for out-of-memory
01921         // conditions, because resetting the ReaderMgr
01922         // can be problematic.
01923         resetReaderMgr.release();
01924 
01925         throw;
01926     }
01927 
01928     return loadedGrammar;
01929 }
01930 
01931 void SGXMLScanner::resetCachedGrammar ()
01932 {
01933   fCachedSchemaInfoList->removeAll ();
01934 }
01935 
01936 // ---------------------------------------------------------------------------
01937 //  SGXMLScanner: Private helper methods
01938 // ---------------------------------------------------------------------------
01939 //  This method handles the common initialization, to avoid having to do
01940 //  it redundantly in multiple constructors.
01941 void SGXMLScanner::commonInit()
01942 {
01943     //  Create the element state array
01944     fElemState = (unsigned int*) fMemoryManager->allocate
01945     (
01946         fElemStateSize * sizeof(unsigned int)
01947     ); //new unsigned int[fElemStateSize];
01948     fElemLoopState = (unsigned int*) fMemoryManager->allocate
01949     (
01950         fElemStateSize * sizeof(unsigned int)
01951     ); //new unsigned int[fElemStateSize];
01952 
01953     //  And we need one for the raw attribute scan. This just stores key/
01954     //  value string pairs (prior to any processing.)
01955     fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
01956     fRawAttrColonList = (int*) fMemoryManager->allocate
01957     (
01958         fRawAttrColonListSize * sizeof(int)
01959     );
01960 
01961     //  Create the Validator and init them
01962     fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
01963     initValidator(fSchemaValidator);
01964 
01965     // Create IdentityConstraint info
01966     fICHandler = new (fMemoryManager) IdentityConstraintHandler(this, fMemoryManager);
01967 
01968     //  Add the default entity entries for the character refs that must always
01969     //  be present.
01970     fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);
01971     fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);
01972     fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);
01973     fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);
01974     fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
01975     fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
01976     fElemNonDeclPool = new (fMemoryManager) RefHash3KeysIdPool<SchemaElementDecl>(29, true, 128, fMemoryManager);
01977     fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
01978     (
01979         131, false, fMemoryManager
01980     );
01981     fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
01982     fPSVIAttrList = new (fMemoryManager) PSVIAttributeList(fMemoryManager);
01983 
01984     fSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
01985     fCachedSchemaInfoList = new (fMemoryManager) RefHash2KeysTableOf<SchemaInfo>(29, fMemoryManager);
01986 
01987     if (fValidator)
01988     {
01989         if (!fValidator->handlesSchema())
01990             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
01991     }
01992     else
01993     {
01994         fValidator = fSchemaValidator;
01995     }
01996 }
01997 
01998 void SGXMLScanner::cleanUp()
01999 {
02000     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
02001     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
02002     delete fSchemaGrammar;
02003     delete fEntityTable;
02004     delete fRawAttrList;
02005     fMemoryManager->deallocate(fRawAttrColonList);
02006     delete fSchemaValidator;
02007     delete fICHandler;
02008     delete fElemNonDeclPool;
02009     delete fAttDefRegistry;
02010     delete fUndeclaredAttrRegistry;
02011     delete fPSVIAttrList;
02012     if (fPSVIElement)
02013         delete fPSVIElement;
02014 
02015     if (fErrorStack)
02016         delete fErrorStack;
02017 
02018     delete fSchemaInfoList;
02019     delete fCachedSchemaInfoList;
02020 }
02021 
02022 void SGXMLScanner::resizeElemState() {
02023 
02024     unsigned int newSize = fElemStateSize * 2;
02025     unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
02026     (
02027         newSize * sizeof(unsigned int)
02028     ); //new unsigned int[newSize];
02029     unsigned int* newElemLoopState = (unsigned int*) fMemoryManager->allocate
02030     (
02031         newSize * sizeof(unsigned int)
02032     ); //new unsigned int[newSize];
02033 
02034     // Copy the existing values
02035     unsigned int index = 0;
02036     for (; index < fElemStateSize; index++)
02037     {
02038         newElemState[index] = fElemState[index];
02039         newElemLoopState[index] = fElemLoopState[index];
02040     }
02041 
02042     for (; index < newSize; index++)
02043         newElemLoopState[index] = newElemState[index] = 0;
02044 
02045     // Delete the old array and udpate our members
02046     fMemoryManager->deallocate(fElemState); //delete [] fElemState;
02047     fMemoryManager->deallocate(fElemLoopState); //delete [] fElemLoopState;
02048     fElemState = newElemState;
02049     fElemLoopState = newElemLoopState;
02050     fElemStateSize = newSize;
02051 }
02052 
02053 void SGXMLScanner::resizeRawAttrColonList() {
02054 
02055     unsigned int newSize = fRawAttrColonListSize * 2;
02056     int* newRawAttrColonList = (int*) fMemoryManager->allocate
02057     (
02058         newSize * sizeof(int)
02059     ); //new int[newSize];
02060 
02061     // Copy the existing values
02062     unsigned int index = 0;
02063     for (; index < fRawAttrColonListSize; index++)
02064         newRawAttrColonList[index] = fRawAttrColonList[index];
02065 
02066     // Delete the old array and udpate our members
02067     fMemoryManager->deallocate(fRawAttrColonList); //delete [] fRawAttrColonList;
02068     fRawAttrColonList = newRawAttrColonList;
02069     fRawAttrColonListSize = newSize;
02070 }
02071 
02072 //  This method is called from scanStartTag() to build up the list of
02073 //  XMLAttr objects that will be passed out in the start tag callout. We
02074 //  get the key/value pairs from the raw scan of explicitly provided attrs,
02075 //  which have not been normalized. And we get the element declaration from
02076 //  which we will get any defaulted or fixed attribute defs and add those
02077 //  in as well.
02078 XMLSize_t
02079 SGXMLScanner::buildAttList(const  RefVectorOf<KVStringPair>&  providedAttrs
02080                           , const XMLSize_t                   attCount
02081                           ,       XMLElementDecl*             elemDecl
02082                           ,       RefVectorOf<XMLAttr>&       toFill)
02083 {
02084     //  Ask the element to clear the 'provided' flag on all of the att defs
02085     //  that it owns, and to return us a boolean indicating whether it has
02086     //  any defs.
02087     DatatypeValidator *currDV = 0;
02088     ComplexTypeInfo *currType = 0;
02089 
02090     if (fValidate)
02091     {
02092         currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
02093         if (!currType) {
02094             currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator();
02095         }
02096     }
02097 
02098     const bool hasDefs = (currType && fValidate)
02099             ? currType->hasAttDefs()
02100             : elemDecl->hasAttDefs();
02101 
02102     fElemCount++;
02103 
02104     //  If there are no expliclitily provided attributes and there are no
02105     //  defined attributes for the element, the we don't have anything to do.
02106     //  So just return zero in this case.
02107     if (!hasDefs && !attCount)
02108         return 0;
02109 
02110     // Keep up with how many attrs we end up with total
02111     XMLSize_t retCount = 0;
02112 
02113     //  And get the current size of the output vector. This lets us use
02114     //  existing elements until we fill it, then start adding new ones.
02115     const XMLSize_t curAttListSize = toFill.size();
02116 
02117     //  We need a buffer into which raw scanned attribute values will be
02118     //  normalized.
02119     XMLBufBid bbNormal(&fBufMgr);
02120     XMLBuffer& normBuf = bbNormal.getBuffer();
02121 
02122     XMLBufBid bbPrefix(&fBufMgr);
02123     XMLBuffer& prefixBuf = bbPrefix.getBuffer();
02124 
02125     //  Loop through our explicitly provided attributes, which are in the raw
02126     //  scanned form, and build up XMLAttr objects.
02127     XMLSize_t index;
02128     const XMLCh* prefPtr, *suffPtr;
02129     for (index = 0; index < attCount; index++)
02130     {
02131         PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID;
02132         PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL;
02133         const KVStringPair* curPair = providedAttrs.elementAt(index);
02134 
02135         //  We have to split the name into its prefix and name parts. Then
02136         //  we map the prefix to its URI.
02137         const XMLCh* const namePtr = curPair->getKey();
02138 
02139         const int colonInd = fRawAttrColonList[index];
02140         unsigned int uriId;
02141         if (colonInd != -1)
02142         {
02143             prefixBuf.set(namePtr, colonInd);
02144             prefPtr = prefixBuf.getRawBuffer();
02145             suffPtr = namePtr + colonInd + 1;
02146             //  Map the prefix to a URI id
02147             uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
02148         }
02149         else
02150         {
02151             // No colon, so we just have a name with no prefix
02152             prefPtr = XMLUni::fgZeroLenString;
02153             suffPtr = namePtr;
02154             // an empty prefix is always the empty namespace, when dealing with attributes
02155             uriId = fEmptyNamespaceId;
02156         }
02157 
02158         //  If the uri comes back as the xmlns or xml URI or its just a name
02159         //  and that name is 'xmlns', then we handle it specially. So set a
02160         //  boolean flag that lets us quickly below know which we are dealing
02161         //  with.
02162         const bool isNSAttr = (uriId == fEmptyNamespaceId)?
02163                                 XMLString::equals(suffPtr, XMLUni::fgXMLNSString) :
02164                                 (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI));
02165 
02166         //  If its not a special case namespace attr of some sort, then we
02167         //  do normal checking and processing.
02168         XMLAttDef::AttTypes attType = XMLAttDef::CData;
02169         DatatypeValidator *attrValidator = 0;
02170         PSVIAttribute *psviAttr = 0;
02171         bool otherXSI = false;
02172 
02173         if (isNSAttr)
02174         {
02175             if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
02176             {
02177                 emitError
02178                 (
02179                     XMLErrs::AttrAlreadyUsedInSTag
02180                     , namePtr
02181                     , elemDecl->getFullName()
02182                 );
02183                 fPSVIElemContext.fErrorOccurred = true;
02184             }
02185             else
02186             {
02187                 bool ValueValidate = false;
02188                 bool tokenizeBuffer = false;
02189 
02190                 if (uriId == fXMLNSNamespaceId)
02191                 {
02192                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
02193                 }
02194                 else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
02195                 {
02196                     if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
02197                     {
02198                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
02199 
02200                         ValueValidate = true;
02201                     }
02202                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
02203                     {
02204                         // use anyURI as the validator
02205                         // tokenize the data and use the anyURI data for each piece
02206                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
02207                         //We should validate each value in the schema location however
02208                         //this lead to a performance degradation of around 4%.  Since
02209                         //the first value of each pair needs to match what is in the
02210                         //schema document and the second value needs to be valid in
02211                         //order to open the document we won't validate it.  Need to
02212                         //do performance analysis of the anyuri datatype.
02213                         //ValueValidate = true;
02214                         ValueValidate = false;
02215                         tokenizeBuffer = true;
02216                     }
02217                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
02218                     {
02219                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
02220                         //We should validate this value however
02221                         //this lead to a performance degradation of around 4%.  Since
02222                         //the value needs to be valid in
02223                         //order to open the document we won't validate it.  Need to
02224                         //do performance analysis of the anyuri datatype.
02225                         //ValueValidate = true;
02226                         ValueValidate = false;
02227                     }
02228                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
02229                     {
02230                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
02231 
02232                         ValueValidate = true;
02233                     }
02234                     else {
02235                         otherXSI = true;
02236                     }
02237                 }
02238 
02239                 if (!otherXSI) {
02240                     normalizeAttRawValue
02241                     (
02242                         namePtr
02243                         , curPair->getValue()
02244                         , normBuf
02245                     );
02246 
02247                     if (fValidate && attrValidator && ValueValidate)
02248                     {
02249                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true);
02250 
02251                         ValidationContext* const    theContext =
02252                             getValidationContext();
02253 
02254                         if (theContext)
02255                         {
02256                             try
02257                             {
02258                                 if (tokenizeBuffer) {
02259                                     XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager);
02260                                     while (tokenizer.hasMoreTokens()) {
02261                                         attrValidator->validate(
02262                                             tokenizer.nextToken(),
02263                                             theContext,
02264                                             fMemoryManager);
02265                                     }
02266                                 }
02267                                 else {
02268                                     attrValidator->validate(
02269                                         normBuf.getRawBuffer(),
02270                                         theContext,
02271                                         fMemoryManager);
02272                                 }
02273                             }
02274                             catch (const XMLException& idve)
02275                             {
02276                                 fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage());
02277                             }
02278                         }
02279                     }
02280 
02281                     if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
02282                     {
02283                             psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
02284                             XSSimpleTypeDefinition *validatingType = (attrValidator)
02285                             ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator)
02286                             : 0;
02287                         // no attribute declarations for these...
02288                             psviAttr->reset(
02289                                 fRootElemName
02290                                 , PSVIItem::VALIDITY_NOTKNOWN
02291                                 , PSVIItem::VALIDATION_NONE
02292                                 , validatingType
02293                                 , 0
02294                                 , 0
02295                             , false
02296                                 , 0
02297                             , attrValidator
02298                         );
02299                     }
02300                 }
02301             }
02302         }
02303 
02304         if (!isNSAttr || otherXSI)
02305         {
02306             // Some checking for attribute wild card first (for schema)
02307             bool laxThisOne = false;
02308             bool skipThisOne = false;
02309 
02310             XMLAttDef* attDefForWildCard = 0;
02311             XMLAttDef*  attDef = 0;
02312 
02313             if (fGrammarType == Grammar::SchemaGrammarType) {
02314 
02315                 //retrieve the att def
02316                 SchemaAttDef* attWildCard = 0;
02317                 if (currType) {
02318                     attDef = currType->getAttDef(suffPtr, uriId);
02319                     attWildCard = currType->getAttWildCard();
02320                 }
02321                 else if (!currDV) { // check explicitly-set wildcard
02322                     attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
02323                 }
02324 
02325                 // if not found or faulted in - check for a matching wildcard attribute
02326                 // if no matching wildcard attribute, check (un)qualifed cases and flag
02327                 // appropriate errors
02328                 if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
02329 
02330                     if (attWildCard) {
02331                         //if schema, see if we should lax or skip the validation of this attribute
02332                         if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
02333 
02334                             if(!skipThisOne)
02335                             {
02336                                 SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
02337                                 if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
02338                                     RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
02339                                     if (attRegistry) {
02340                                         attDefForWildCard = attRegistry->get(suffPtr);
02341                                     }
02342                                 }
02343                             }
02344                         }
02345                     }
02346                     else if (currType) {
02347                         // not found, see if the attDef should be qualified or not
02348                         if (uriId == fEmptyNamespaceId) {
02349                             attDef = currType->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace()));
02350                             if (fValidate
02351                                 && attDef
02352                                 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
02353                                 // the attribute should be qualified
02354                                 fValidator->emitError
02355                                 (
02356                                     XMLValid::AttributeNotQualified
02357                                     , attDef->getFullName()
02358                                 );
02359                                 fPSVIElemContext.fErrorOccurred = true;
02360                                 if (getPSVIHandler())
02361                                 {
02362                                     attrValid = PSVIItem::VALIDITY_INVALID;
02363                                 }
02364                             }
02365                         }
02366                         else {
02367                             attDef = currType->getAttDef(suffPtr, fEmptyNamespaceId);
02368                             if (fValidate
02369                                 && attDef
02370                                 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
02371                                 // the attribute should be qualified
02372                                 fValidator->emitError
02373                                 (
02374                                     XMLValid::AttributeNotUnQualified
02375                                     , attDef->getFullName()
02376                                 );
02377                                 fPSVIElemContext.fErrorOccurred = true;
02378                                 if (getPSVIHandler())
02379                                 {
02380                                     attrValid = PSVIItem::VALIDITY_INVALID;
02381                                 }
02382                             }
02383                         }
02384                     }
02385                 }
02386             }
02387 
02388             // now need to prepare for duplicate detection
02389             if(attDef)
02390             {
02391                 unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
02392                 if(!curCountPtr)
02393                 {
02394                     curCountPtr = getNewUIntPtr();
02395                     *curCountPtr = fElemCount;
02396                     fAttDefRegistry->put(attDef, curCountPtr);
02397                 }
02398                 else if(*curCountPtr < fElemCount)
02399                     *curCountPtr = fElemCount;
02400                 else
02401                 {
02402                     emitError
02403                     (
02404                         XMLErrs::AttrAlreadyUsedInSTag
02405                         , attDef->getFullName()
02406                         , elemDecl->getFullName()
02407                     );
02408                     fPSVIElemContext.fErrorOccurred = true;
02409                 }
02410             }
02411             else
02412             {
02413                 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
02414                 {
02415                     emitError
02416                     (
02417                         XMLErrs::AttrAlreadyUsedInSTag
02418                         , namePtr
02419                         , elemDecl->getFullName()
02420                     );
02421                     fPSVIElemContext.fErrorOccurred = true;
02422                 }
02423             }
02424 
02425             // if we've found either an attDef or an attDefForWildCard,
02426             // then we're doing full validation and it may still be valid.
02427             if(!attDef && !attDefForWildCard)
02428             {
02429                 if(!laxThisOne && !skipThisOne)
02430                 {
02431                     fPSVIElemContext.fErrorOccurred = true;
02432                 }
02433                 if(getPSVIHandler())
02434                 {
02435                     if(!laxThisOne && !skipThisOne)
02436                     {
02437                         attrValid = PSVIItem::VALIDITY_INVALID;
02438                     }
02439                     else if(laxThisOne)
02440                     {
02441                         attrValid = PSVIItem::VALIDITY_NOTKNOWN;
02442                         attrAssessed = PSVIItem::VALIDATION_PARTIAL;
02443                     }
02444                     else
02445                     {
02446                         attrValid = PSVIItem::VALIDITY_NOTKNOWN;
02447                         attrAssessed = PSVIItem::VALIDATION_NONE;
02448                     }
02449                 }
02450             }
02451 
02452             bool errorCondition = fValidate && !attDefForWildCard && !attDef;
02453             if (errorCondition && !skipThisOne && !laxThisOne)
02454             {
02455                 //
02456                 //  Its not valid for this element, so issue an error if we are
02457                 //  validating.
02458                 //
02459                 XMLBufBid bbMsg(&fBufMgr);
02460                 XMLBuffer& bufMsg = bbMsg.getBuffer();
02461                 if (uriId != fEmptyNamespaceId) {
02462                     XMLBufBid bbURI(&fBufMgr);
02463                     XMLBuffer& bufURI = bbURI.getBuffer();
02464 
02465                     getURIText(uriId, bufURI);
02466 
02467                     bufMsg.append(chOpenCurly);
02468                     bufMsg.append(bufURI.getRawBuffer());
02469                     bufMsg.append(chCloseCurly);
02470                 }
02471                 bufMsg.append(suffPtr);
02472                 fValidator->emitError
02473                 (
02474                     XMLValid::AttNotDefinedForElement
02475                     , bufMsg.getRawBuffer()
02476                     , elemDecl->getFullName()
02477                 );
02478             }
02479 
02480             //  Now normalize the raw value since we have the attribute type. We
02481             //  don't care about the return status here. If it failed, an error
02482             //  was issued, which is all we care about.
02483             if (attDefForWildCard) {
02484                 normalizeAttValue(
02485                     attDefForWildCard, namePtr, curPair->getValue(), normBuf
02486                 );
02487 
02488                 //  If we found an attdef for this one, then lets validate it.
02489                 const XMLCh* xsNormalized = normBuf.getRawBuffer();
02490                 DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
02491                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
02492                 {
02493                     // normalize the attribute according to schema whitespace facet
02494                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
02495                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
02496                     if (fNormalizeData && fValidate) {
02497                         normBuf.set(xsNormalized);
02498                     }
02499                 }
02500 
02501                 if (fValidate ) {
02502                     fValidator->validateAttrValue(
02503                         attDefForWildCard, xsNormalized, false, elemDecl
02504                     );
02505                     attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
02506                     if(((SchemaValidator *)fValidator)->getErrorOccurred())
02507                     {
02508                         fPSVIElemContext.fErrorOccurred = true;
02509                         if(getPSVIHandler())
02510                             attrValid = PSVIItem::VALIDITY_INVALID;
02511                     }
02512                 }
02513                 else { // no decl; default DOMTypeInfo to anySimpleType
02514                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
02515                 }
02516 
02517                 // Save the type for later use
02518                 attType = attDefForWildCard->getType();
02519             }
02520             else {
02521                 normalizeAttValue(
02522                     attDef, namePtr, curPair->getValue(), normBuf
02523                 );
02524 
02525                 //  If we found an attdef for this one, then lets validate it.
02526                 if (attDef)
02527                 {
02528                     const XMLCh* xsNormalized = normBuf.getRawBuffer();
02529                     if (fGrammarType == Grammar::SchemaGrammarType)
02530                     {
02531                         DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator();
02532                         if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
02533                         {
02534                             // normalize the attribute according to schema whitespace facet
02535                             ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
02536                             xsNormalized = fWSNormalizeBuf.getRawBuffer();
02537                             if (fNormalizeData && fValidate && !skipThisOne) {
02538                                 normBuf.set(xsNormalized);
02539                             }
02540                         }
02541                     }
02542 
02543                     if (fValidate && !skipThisOne)
02544                     {
02545                         fValidator->validateAttrValue(
02546                             attDef, xsNormalized, false, elemDecl
02547                         );
02548                         attrValidator = ((SchemaValidator *)fValidator)->getMostRecentAttrValidator();
02549                         if(((SchemaValidator *)fValidator)->getErrorOccurred())
02550                         {
02551                             fPSVIElemContext.fErrorOccurred = true;
02552                             if(getPSVIHandler())
02553                                 attrValid = PSVIItem::VALIDITY_INVALID;
02554                         }
02555                     }
02556                     else {
02557                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
02558                     }
02559                 }
02560                 else {
02561                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
02562                 }
02563 
02564                 // Save the type for later use
02565                 if (attDef)
02566                 {
02567                     attType = attDef->getType();
02568                 }
02569             }
02570 
02571             // now fill in the PSVIAttributes entry for this attribute:
02572                 if(getPSVIHandler())
02573                 {
02574                     psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
02575                     SchemaAttDef *actualAttDef = 0;
02576                     if(attDef)
02577                         actualAttDef = (SchemaAttDef *)attDef;
02578                     else if (attDefForWildCard)
02579                         actualAttDef = (SchemaAttDef *)attDefForWildCard;
02580                 if(actualAttDef)
02581                 {
02582                         XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef);
02583                     DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator();
02584                         XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType);
02585                         if(attrValid != PSVIItem::VALIDITY_VALID)
02586                         {
02587                             psviAttr->reset
02588                         (
02589                                 fRootElemName
02590                                 , attrValid
02591                                 , attrAssessed
02592                                 , validatingType
02593                                 , 0
02594                                 , actualAttDef->getValue()
02595                                 , false
02596                                 , attrDecl
02597                             , 0
02598                             );
02599                         }
02600                         else
02601                         {
02602                             XSSimpleTypeDefinition *memberType = 0;
02603                             if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
02604                                 memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator);
02605                             psviAttr->reset
02606                         (
02607                                 fRootElemName
02608                                 , attrValid
02609                                 , attrAssessed
02610                                 , validatingType
02611                                 , memberType
02612                                 , actualAttDef->getValue()
02613                                 , false
02614                                 , attrDecl
02615                             , (memberType)?attrValidator:attrDataType
02616                             );
02617                         }
02618                 }
02619                 else
02620                 {
02621                         psviAttr->reset
02622                     (
02623                             fRootElemName
02624                             , attrValid
02625                             , attrAssessed
02626                             , 0
02627                             , 0
02628                         , 0
02629                             , false
02630                             , 0
02631                         , 0
02632                         );
02633                 }
02634                 }
02635         }
02636 
02637         //  Add this attribute to the attribute list that we use to pass them
02638         //  to the handler. We reuse its existing elements but expand it as
02639         //  required.
02640         XMLAttr* curAttr;
02641         if (retCount >= curAttListSize)
02642         {
02643             curAttr = new (fMemoryManager) XMLAttr
02644             (
02645                 uriId
02646                 , suffPtr
02647                 , prefPtr
02648                 , normBuf.getRawBuffer()
02649                 , attType
02650                 , true
02651                 , fMemoryManager
02652             );
02653             toFill.addElement(curAttr);
02654         }
02655         else
02656         {
02657             curAttr = toFill.elementAt(retCount);
02658             curAttr->set
02659             (
02660                 uriId
02661                 , suffPtr
02662                 , prefPtr
02663                 , normBuf.getRawBuffer()
02664                 , attType
02665             );
02666             curAttr->setSpecified(true);
02667         }
02668         if(psviAttr)
02669             psviAttr->setValue(curAttr->getValue());
02670 
02671         // Bump the count of attrs in the list
02672         retCount++;
02673     }
02674 
02675     //  Now, if there are any attributes declared by this element, let's
02676     //  go through them and make sure that any required ones are provided,
02677     //  and fault in any fixed ones and defaulted ones that are not provided
02678     //  literally.
02679     if (hasDefs)
02680     {
02681         // Check after all specified attrs are scanned
02682         // (1) report error for REQUIRED attrs that are missing (V_TAGc)
02683         // (2) add default attrs if missing (FIXED and NOT_FIXED)
02684 
02685         XMLAttDefList& attDefList = getAttDefList(currType, elemDecl);
02686 
02687         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
02688         {
02689             // Get the current att def, for convenience and its def type
02690             XMLAttDef *curDef = &attDefList.getAttDef(i);
02691             const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
02692 
02693             unsigned int *attCountPtr = fAttDefRegistry->get(curDef);
02694             if (!attCountPtr || *attCountPtr < fElemCount)
02695             { // did not occur
02696                 // note that since there is no attribute information
02697                 // item present, there is no PSVI infoset to augment here *except*
02698                 // that the element is invalid
02699 
02700                 //the attribute is not provided
02701                 if (fValidate)
02702                 {
02703                     // If we are validating and its required, then an error
02704                     if ((defType == XMLAttDef::Required) ||
02705                         (defType == XMLAttDef::Required_And_Fixed)  )
02706 
02707                     {
02708                         fValidator->emitError
02709                         (
02710                             XMLValid::RequiredAttrNotProvided
02711                             , curDef->getFullName()
02712                         );
02713                         fPSVIElemContext.fErrorOccurred = true;
02714                     }
02715                     else if ((defType == XMLAttDef::Default) ||
02716                              (defType == XMLAttDef::Fixed)  )
02717                     {
02718                         if (fStandalone && curDef->isExternal())
02719                         {
02720                             // XML 1.0 Section 2.9
02721                             // Document is standalone, so attributes must not be defaulted.
02722                             fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName());
02723                         }
02724                     }
02725                 }
02726 
02727                 //  Fault in the value if needed, and bump the att count.
02728                 if ((defType == XMLAttDef::Default)
02729                     ||  (defType == XMLAttDef::Fixed))
02730                 {
02731                     // Let the validator pass judgement on the attribute value
02732                     if (fValidate)
02733                     {
02734                         fValidator->validateAttrValue
02735                         (
02736                             curDef
02737                             , curDef->getValue()
02738                             , false
02739                             , elemDecl
02740                         );
02741                     }
02742 
02743                     XMLAttr* curAtt;
02744                     if (retCount >= curAttListSize)
02745                     {
02746                         curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
02747                         fValidator->faultInAttr(*curAtt, *curDef);
02748                         fAttrList->addElement(curAtt);
02749                     }
02750                     else
02751                     {
02752                         curAtt = fAttrList->elementAt(retCount);
02753                         fValidator->faultInAttr(*curAtt, *curDef);
02754                     }
02755 
02756                     // Indicate it was not explicitly specified and bump count
02757                     curAtt->setSpecified(false);
02758                     retCount++;
02759                     if(getPSVIHandler())
02760                     {
02761                         QName *attName = ((SchemaAttDef *)curDef)->getAttName();
02762                         PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill
02763                         (
02764                             attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI())
02765                         );
02766                         XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef);
02767                         DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator();
02768                         XSSimpleTypeDefinition *defAttrType =
02769                             (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType);
02770                         // would have occurred during validation of default value
02771                         if(((SchemaValidator *)fValidator)->getErrorOccurred())
02772                         {
02773                             defAttrToFill->reset(
02774                                 fRootElemName
02775                                 , PSVIItem::VALIDITY_INVALID
02776                                 , PSVIItem::VALIDATION_FULL
02777                                 , defAttrType
02778                                 , 0
02779                                 , curDef->getValue()
02780                                 , true
02781                                 , defAttrDecl
02782                                 , 0
02783                             );
02784                         }
02785                         else
02786                         {
02787                             XSSimpleTypeDefinition *defAttrMemberType = 0;
02788                             if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
02789                             {
02790                                 defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject
02791                                 (
02792                                     ((SchemaValidator*)fValidator)->getMostRecentAttrValidator()
02793                                 );
02794                             }
02795                             defAttrToFill->reset
02796                             (
02797                                 fRootElemName
02798                                 , PSVIItem::VALIDITY_VALID
02799                                 , PSVIItem::VALIDATION_FULL
02800                                 , defAttrType
02801                                 , defAttrMemberType
02802                                 , curDef->getValue()
02803                                 , true
02804                                 , defAttrDecl
02805                                 , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType
02806                             );
02807                         }
02808                         defAttrToFill->setValue(curDef->getValue());
02809                     }
02810                 }
02811             }
02812             else if (attCountPtr)
02813             {
02814                 //attribute is provided
02815                 // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
02816                 if (defType == XMLAttDef::Prohibited && fValidate)
02817                 {
02818                     fValidator->emitError
02819                     (
02820                         XMLValid::ProhibitedAttributePresent
02821                         , curDef->getFullName()
02822                     );
02823                     fPSVIElemContext.fErrorOccurred = true;
02824                     if (getPSVIHandler())
02825                     {
02826                         QName *attQName = ((SchemaAttDef *)curDef)->getAttName();
02827                         // bad luck...
02828                         PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName
02829                         (
02830                             attQName->getLocalPart(),
02831                             fURIStringPool->getValueForId(attQName->getURI())
02832                         );
02833                         prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID);
02834                     }
02835                 }
02836             }
02837         }
02838     }
02839 
02840     return retCount;
02841 }
02842 
02843 
02844 //  This method will take a raw attribute value and normalize it according to
02845 //  the rules of the attribute type. It will put the resulting value into the
02846 //  passed buffer.
02847 //
02848 //  This code assumes that escaped characters in the original value (via char
02849 //  refs) are prefixed by a 0xFFFF character. This is because some characters
02850 //  are legal if escaped only. And some escape chars are not subject to
02851 //  normalization rules.
02852 bool SGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
02853                                       , const XMLCh* const        attName
02854                                       , const XMLCh* const        value
02855                                       ,       XMLBuffer&          toFill)
02856 {
02857     // A simple state value for a whitespace processing state machine
02858     enum States
02859     {
02860         InWhitespace
02861         , InContent
02862     };
02863 
02864     // Get the type and name
02865     const XMLAttDef::AttTypes type = (attDef)
02866                             ?attDef->getType()
02867                             :XMLAttDef::CData;
02868 
02869     // Assume its going to go fine, and empty the target buffer in preperation
02870     bool retVal = true;
02871     toFill.reset();
02872 
02873     // Get attribute def - to check to see if it's declared externally or not
02874     bool  isAttExternal = (attDef)
02875                         ?attDef->isExternal()
02876                         :false;
02877 
02878     //  Loop through the chars of the source value and normalize it according
02879     //  to the type.
02880     States curState = InContent;
02881     bool firstNonWS = false;
02882     XMLCh nextCh;
02883     const XMLCh* srcPtr = value;
02884 
02885     if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
02886         while (*srcPtr) {
02887             //  Get the next character from the source. We have to watch for
02888             //  escaped characters (which are indicated by a 0xFFFF value followed
02889             //  by the char that was escaped.)
02890             nextCh = *srcPtr;
02891 
02892             // Do we have an escaped character ?
02893             if (nextCh == 0xFFFF)
02894             {
02895                 nextCh = *++srcPtr;
02896             }
02897             else if ( (nextCh <= 0x0D) && (nextCh == 0x09 || nextCh == 0x0A || nextCh == 0x0D) ) {
02898                 // Check Validity Constraint for Standalone document declaration
02899                 // XML 1.0, Section 2.9
02900                 if (fStandalone && fValidate && isAttExternal)
02901                 {
02902                      // Can't have a standalone document declaration of "yes" if  attribute
02903                      // values are subject to normalisation
02904                      fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
02905                 }
02906                 nextCh = chSpace;
02907             }
02908             else if (nextCh == chOpenAngle) {
02909                 //  If its not escaped, then make sure its not a < character, which is
02910                 //  not allowed in attribute values.
02911                 emitError(XMLErrs::BracketInAttrValue, attName);
02912                 retVal = false;
02913             }
02914 
02915             // Add this char to the target buffer
02916             toFill.append(nextCh);
02917 
02918             // And move up to the next character in the source
02919             srcPtr++;
02920         }
02921     }
02922     else {
02923         while (*srcPtr)
02924         {
02925             //  Get the next character from the source. We have to watch for
02926             //  escaped characters (which are indicated by a 0xFFFF value followed
02927             //  by the char that was escaped.)
02928             nextCh = *srcPtr;
02929 
02930             // Do we have an escaped character ?
02931             if (nextCh == 0xFFFF)
02932             {
02933                 nextCh = *++srcPtr;
02934             }
02935             else if (nextCh == chOpenAngle) {
02936                 //  If its not escaped, then make sure its not a < character, which is
02937                 //  not allowed in attribute values.
02938                 emitError(XMLErrs::BracketInAttrValue, attName);
02939                 retVal = false;
02940             }
02941 
02942             if (curState == InWhitespace)
02943             {
02944                 if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
02945                 {
02946                     if (firstNonWS)
02947                         toFill.append(chSpace);
02948                     curState = InContent;
02949                     firstNonWS = true;
02950                 }
02951                 else
02952                 {
02953                     srcPtr++;
02954                     continue;
02955                 }
02956             }
02957             else if (curState == InContent)
02958             {
02959                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
02960                 {
02961                     curState = InWhitespace;
02962                     srcPtr++;
02963 
02964                     // Check Validity Constraint for Standalone document declaration
02965                     // XML 1.0, Section 2.9
02966                     if (fStandalone && fValidate && isAttExternal)
02967                     {
02968                         if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr))
02969                         {
02970                             // Can't have a standalone document declaration of "yes" if  attribute
02971                             // values are subject to normalisation
02972                             fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
02973                         }
02974                     }
02975                     continue;
02976                 }
02977                 firstNonWS = true;
02978             }
02979 
02980             // Add this char to the target buffer
02981             toFill.append(nextCh);
02982 
02983             // And move up to the next character in the source
02984             srcPtr++;
02985         }
02986     }
02987 
02988     return retVal;
02989 }
02990 
02991 //  This method will just normalize the input value as CDATA without
02992 //  any standalone checking.
02993 bool SGXMLScanner::normalizeAttRawValue( const   XMLCh* const        attrName
02994                                       , const XMLCh* const        value
02995                                       ,       XMLBuffer&          toFill)
02996 {
02997     // Assume its going to go fine, and empty the target buffer in preperation
02998     bool retVal = true;
02999     toFill.reset();
03000 
03001     //  Loop through the chars of the source value and normalize it according
03002     //  to the type.
03003     bool escaped;
03004     XMLCh nextCh;
03005     const XMLCh* srcPtr = value;
03006     while (*srcPtr)
03007     {
03008         //  Get the next character from the source. We have to watch for
03009         //  escaped characters (which are indicated by a 0xFFFF value followed
03010         //  by the char that was escaped.)
03011         nextCh = *srcPtr;
03012         escaped = (nextCh == 0xFFFF);
03013         if (escaped)
03014             nextCh = *++srcPtr;
03015 
03016         //  If its not escaped, then make sure its not a < character, which is
03017         //  not allowed in attribute values.
03018         if (!escaped && (*srcPtr == chOpenAngle))
03019         {
03020             emitError(XMLErrs::BracketInAttrValue, attrName);
03021             retVal = false;
03022         }
03023 
03024         if (!escaped)
03025         {
03026             //  NOTE: Yes this is a little redundant in that a 0x20 is
03027             //  replaced with an 0x20. But its faster to do this (I think)
03028             //  than checking for 9, A, and D separately.
03029             if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
03030                 nextCh = chSpace;
03031         }
03032 
03033         // Add this char to the target buffer
03034         toFill.append(nextCh);
03035 
03036         // And move up to the next character in the source
03037         srcPtr++;
03038     }
03039     return retVal;
03040 }
03041 
03042 //  This method will reset the scanner data structures, and related plugged
03043 //  in stuff, for a new scan session. We get the input source for the primary
03044 //  XML entity, create the reader for it, and push it on the stack so that
03045 //  upon successful return from here we are ready to go.
03046 void SGXMLScanner::scanReset(const InputSource& src)
03047 {
03048 
03049     //  This call implicitly tells us that we are going to reuse the scanner
03050     //  if it was previously used. So tell the validator to reset itself.
03051     //
03052     //  But, if the fUseCacheGrammar flag is set, then don't reset it.
03053     //
03054     //  NOTE:   The ReaderMgr is flushed on the way out, because that is
03055     //          required to insure that files are closed.
03056     fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
03057     fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
03058 
03059     // Clear transient schema info list.
03060     //
03061     fSchemaInfoList->removeAll ();
03062 
03063     // fModel may need updating, as fGrammarResolver could have cleaned it
03064     if(fModel && getPSVIHandler())
03065         fModel = fGrammarResolver->getXSModel();
03066 
03067     // Create dummy schema grammar
03068     if (!fSchemaGrammar) {
03069         fSchemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
03070     }
03071 
03072     fGrammar = fSchemaGrammar;
03073     fGrammarType = Grammar::DTDGrammarType;
03074     fRootGrammar = 0;
03075 
03076     fValidator->setGrammar(fGrammar);
03077     if (fValidatorFromUser) {
03078 
03079         ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter);
03080         ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver);
03081         ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal);
03082     }
03083 
03084     // Reset validation
03085     fValidate = (fValScheme == Val_Always) ? true : false;
03086 
03087     //  And for all installed handlers, send reset events. This gives them
03088     //  a chance to flush any cached data.
03089     if (fDocHandler)
03090         fDocHandler->resetDocument();
03091     if (fEntityHandler)
03092         fEntityHandler->resetEntities();
03093     if (fErrorReporter)
03094         fErrorReporter->resetErrors();
03095 
03096     // Clear out the id reference list
03097     resetValidationContext();
03098 
03099     // Reset the Root Element Name
03100     fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
03101     fRootElemName = 0;
03102 
03103     // Reset IdentityConstraints
03104     if (fICHandler)
03105         fICHandler->reset();
03106 
03107     //  Reset the element stack, and give it the latest ids for the special
03108     //  URIs it has to know about.
03109     fElemStack.reset
03110     (
03111         fEmptyNamespaceId
03112         , fUnknownNamespaceId
03113         , fXMLNamespaceId
03114         , fXMLNSNamespaceId
03115     );
03116 
03117     if (!fSchemaNamespaceId)
03118         fSchemaNamespaceId  = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI);
03119 
03120     // Reset some status flags
03121     fInException = false;
03122     fStandalone = false;
03123     fErrorCount = 0;
03124     fHasNoDTD = true;
03125     fSeeXsi = false;
03126     fDoNamespaces = true;
03127     fDoSchema = true;
03128 
03129     // Reset PSVI context
03130     // Note that we always need this around for DOMTypeInfo
03131     if (!fPSVIElement)
03132         fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager);
03133 
03134     if (!fErrorStack)
03135     {
03136         fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager);
03137     }
03138     else
03139     {
03140         fErrorStack->removeAllElements();
03141     }
03142 
03143     resetPSVIElemContext();
03144 
03145     // Reset the validators
03146     fSchemaValidator->reset();
03147     fSchemaValidator->setErrorReporter(fErrorReporter);
03148     fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
03149     fSchemaValidator->setGrammarResolver(fGrammarResolver);
03150     if (fValidatorFromUser)
03151         fValidator->reset();
03152 
03153     //  Handle the creation of the XML reader object for this input source.
03154     //  This will provide us with transcoding and basic lexing services.
03155     XMLReader* newReader = fReaderMgr.createReader
03156     (
03157         src
03158         , true
03159         , XMLReader::RefFrom_NonLiteral
03160         , XMLReader::Type_General
03161         , XMLReader::Source_External
03162         , fCalculateSrcOfs
03163         , fLowWaterMark
03164     );
03165 
03166     if (!newReader) {
03167         if (src.getIssueFatalErrorIfNotFound())
03168             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
03169         else
03170             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
03171     }
03172 
03173     // Push this read onto the reader manager
03174     fReaderMgr.pushReader(newReader, 0);
03175 
03176     // and reset security-related things if necessary:
03177     if(fSecurityManager != 0)
03178     {
03179         fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
03180         fEntityExpansionCount = 0;
03181     }
03182     fElemCount = 0;
03183     if(fUIntPoolRowTotal >= 32)
03184     { // 8 KB tied up with validating attributes...
03185         fAttDefRegistry->removeAll();
03186         recreateUIntPool();
03187     }
03188     else
03189     {
03190         // note that this will implicitly reset the values of the hashtables,
03191         // though their buckets will still be tied up
03192         resetUIntPool();
03193     }
03194     fUndeclaredAttrRegistry->removeAll();
03195 }
03196 
03197 
03198 //  This method is called between markup in content. It scans for character
03199 //  data that is sent to the document handler. It watches for any markup
03200 //  characters that would indicate that the character data has ended. It also
03201 //  handles expansion of general and character entities.
03202 //
03203 //  sendData() is a local static helper for this method which handles some
03204 //  code that must be done in three different places here.
03205 void SGXMLScanner::sendCharData(XMLBuffer& toSend)
03206 {
03207     // If no data in the buffer, then nothing to do
03208     if (toSend.isEmpty())
03209         return;
03210 
03211     //  We do different things according to whether we are validating or
03212     //  not. If not, its always just characters; else, it depends on the
03213     //  current element's content model.
03214     if (fValidate)
03215     {
03216         // Get the raw data we need for the callback
03217         const XMLCh* rawBuf = toSend.getRawBuffer();
03218         const XMLSize_t len = toSend.getLen();
03219 
03220         // Get the character data opts for the current element
03221         XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
03222         // And see if the current element is a 'Children' style content model
03223         ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
03224         if(currType)
03225         {
03226             SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
03227             if(modelType == SchemaElementDecl::Children ||
03228                modelType == SchemaElementDecl::ElementOnlyEmpty)
03229                 charOpts = XMLElementDecl::SpacesOk;
03230             else if(modelType == SchemaElementDecl::Empty)
03231                 charOpts = XMLElementDecl::NoCharData;
03232         }
03233 
03234         // should not be necessary once PSVI method on element decls
03235         // are removed
03236         if (charOpts == XMLElementDecl::NoCharData)
03237         {
03238             // They definitely cannot handle any type of char data
03239             fValidator->emitError(XMLValid::NoCharDataInCM);
03240             if (getPSVIHandler())
03241             {
03242                 // REVISIT:
03243                 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
03244             }
03245         }
03246         else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
03247         {
03248             //  Its all spaces. So, if they can take spaces, then send it
03249             //  as ignorable whitespace. If they can handle any char data
03250             //  send it as characters.
03251             if (charOpts == XMLElementDecl::SpacesOk) {
03252                 if (fDocHandler)
03253                     fDocHandler->ignorableWhitespace(rawBuf, len, false);
03254             }
03255             else if (charOpts == XMLElementDecl::AllCharData)
03256             {
03257                 XMLSize_t xsLen;
03258                 const XMLCh* xsNormalized;
03259                 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
03260                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
03261                 {
03262                     // normalize the character according to schema whitespace facet
03263                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
03264                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
03265                     xsLen = fWSNormalizeBuf.getLen();
03266                 }
03267                 else {
03268                     xsNormalized = rawBuf;
03269                     xsLen = len;
03270                 }
03271 
03272                 // tell the schema validation about the character data for checkContent later
03273                 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
03274 
03275                 // call all active identity constraints
03276                 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
03277                     fContent.append(xsNormalized, xsLen);
03278                 }
03279 
03280                 if (fDocHandler) {
03281                     if (fNormalizeData) {
03282                         fDocHandler->docCharacters(xsNormalized, xsLen, false);
03283                     }
03284                     else {
03285                         fDocHandler->docCharacters(rawBuf, len, false);
03286                     }
03287                 }
03288             }
03289         }
03290         else
03291         {
03292             //  If they can take any char data, then send it. Otherwise, they
03293             //  can only handle whitespace and can't handle this stuff so
03294             //  issue an error.
03295             if (charOpts == XMLElementDecl::AllCharData)
03296             {
03297                 XMLSize_t xsLen;
03298                 const XMLCh *xsNormalized;
03299                 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
03300                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
03301                 {
03302                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
03303                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
03304                     xsLen = fWSNormalizeBuf.getLen();
03305                 }
03306                 else {
03307                     xsNormalized = rawBuf;
03308                     xsLen = len;
03309                 }
03310 
03311                 // tell the schema validation about the character data for checkContent later
03312                 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
03313 
03314                 // call all active identity constraints
03315                 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
03316                     fContent.append(xsNormalized, xsLen);
03317                 }
03318 
03319                 if (fDocHandler) {
03320                     if (fNormalizeData) {
03321                         fDocHandler->docCharacters(xsNormalized, xsLen, false);
03322                     }
03323                     else {
03324                         fDocHandler->docCharacters(rawBuf, len, false);
03325                     }
03326                 }
03327             }
03328             else
03329             {
03330                 fValidator->emitError(XMLValid::NoCharDataInCM);
03331                 if (getPSVIHandler())
03332                 {
03333                     // REVISIT:
03334                     // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
03335                 }
03336             }
03337         }
03338     }
03339     else
03340     {
03341         // call all active identity constraints
03342         if (toCheckIdentityConstraint() && fICHandler->getMatcherCount())
03343             fContent.append(toSend.getRawBuffer(), toSend.getLen());
03344 
03345         // Always assume its just char data if not validating
03346         if (fDocHandler)
03347             fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
03348     }
03349 
03350     // Reset buffer
03351     toSend.reset();
03352 }
03353 
03354 
03355 
03356 //  This method is called with a key/value string pair that represents an
03357 //  xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
03358 //  current top of the element stack based on this data. We know that when
03359 //  we get here, that it is one of these forms, so we don't bother confirming
03360 //  it.
03361 //
03362 //  But we have to ensure
03363 //      1. xxx is not xmlns
03364 //      2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
03365 //      3. yyy is not XMLUni::fgXMLNSURIName
03366 //      4. if xxx is not null, then yyy cannot be an empty string.
03367 void SGXMLScanner::updateNSMap(const  XMLCh* const    attrName
03368                               , const XMLCh* const    attrValue)
03369 {
03370     updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon));
03371 }
03372 
03373 void SGXMLScanner::updateNSMap(const  XMLCh* const    attrName
03374                               , const XMLCh* const    attrValue
03375                               , const int colonOfs)
03376 {
03377     // We need a buffer to normalize the attribute value into
03378     XMLBufBid bbNormal(&fBufMgr);
03379     XMLBuffer& normalBuf = bbNormal.getBuffer();
03380 
03381     //  Normalize the value into the passed buffer. In this case, we don't
03382     //  care about the return value. An error was issued for the error, which
03383     //  is all we care about here.
03384     normalizeAttRawValue(attrName, attrValue, normalBuf);
03385     XMLCh* namespaceURI = normalBuf.getRawBuffer();
03386 
03387     //  We either have the default prefix (""), or we point it into the attr
03388     //  name parameter. Note that the xmlns is not the prefix we care about
03389     //  here. To us, the 'prefix' is really the local part of the attrName
03390     //  parameter.
03391     //
03392     //  Check 1. xxx is not xmlns
03393     //        2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
03394     //        3. yyy is not XMLUni::fgXMLNSURIName
03395     //        4. if xxx is not null, then yyy cannot be an empty string.
03396     const XMLCh* prefPtr = XMLUni::fgZeroLenString;
03397     if (colonOfs != -1) {
03398         prefPtr = &attrName[colonOfs + 1];
03399 
03400         if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString))
03401             emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
03402         else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) {
03403             if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
03404                 emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
03405         }
03406 
03407         if (!namespaceURI)
03408             emitError(XMLErrs::NoEmptyStrNamespace, attrName);
03409         else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
03410             emitError(XMLErrs::NoEmptyStrNamespace, attrName);
03411     }
03412 
03413     if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
03414         emitError(XMLErrs::NoUseOfxmlnsURI);
03415     else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) {
03416         if (!XMLString::equals(prefPtr, XMLUni::fgXMLString))
03417             emitError(XMLErrs::XMLURINotMatchXMLPrefix);
03418     }
03419 
03420     //  Ok, we have to get the unique id for the attribute value, which is the
03421     //  URI that this value should be mapped to. The validator has the
03422     //  namespace string pool, so we ask him to find or add this new one. Then
03423     //  we ask the element stack to add this prefix to URI Id mapping.
03424     fElemStack.addPrefix
03425     (
03426         prefPtr
03427         , fURIStringPool->addOrFind(namespaceURI)
03428     );
03429 }
03430 
03431 void SGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount)
03432 {
03433     //  Make an initial pass through the list and find any xmlns attributes or
03434     //  schema attributes.
03435     //  When we find one, send it off to be used to update the element stack's
03436     //  namespace mappings.
03437     for (XMLSize_t index = 0; index < attCount; index++)
03438     {
03439         // each attribute has the prefix:suffix="value"
03440         const KVStringPair* curPair = fRawAttrList->elementAt(index);
03441         const XMLCh* rawPtr = curPair->getKey();
03442 
03443         //  If either the key begins with "xmlns:" or its just plain
03444         //  "xmlns", then use it to update the map.
03445         if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
03446         ||  XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
03447         {
03448             const XMLCh* valuePtr = curPair->getValue();
03449 
03450             updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]);
03451 
03452             // if the schema URI is seen in the the valuePtr, set the boolean seeXsi
03453             if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
03454                 fSeeXsi = true;
03455             }
03456         }
03457     }
03458 
03459     // walk through the list again to deal with "xsi:...."
03460     if (fSeeXsi)
03461     {
03462         //  Schema Xsi Type yyyy (e.g. xsi:type="yyyyy")
03463         XMLBufBid bbXsi(&fBufMgr);
03464         XMLBuffer& fXsiType = bbXsi.getBuffer();
03465 
03466         for (XMLSize_t index = 0; index < attCount; index++)
03467         {
03468             // each attribute has the prefix:suffix="value"
03469             const KVStringPair* curPair = fRawAttrList->elementAt(index);
03470             const XMLCh* rawPtr = curPair->getKey();
03471             const XMLCh* prefPtr;
03472 
03473             int   colonInd = fRawAttrColonList[index];
03474 
03475             if (colonInd != -1) {
03476                 fURIBuf.set(rawPtr, colonInd);
03477                 prefPtr = fURIBuf.getRawBuffer();
03478             }
03479             else {
03480                 prefPtr = XMLUni::fgZeroLenString;
03481             }
03482 
03483             // if schema URI has been seen, scan for the schema location and uri
03484             // and resolve the schema grammar; or scan for schema type
03485             if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
03486 
03487                 const XMLCh* valuePtr = curPair->getValue();
03488                 const XMLCh*  suffPtr = &rawPtr[colonInd + 1];
03489 
03490                 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
03491                     parseSchemaLocation(valuePtr);
03492                 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
03493                     resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
03494 
03495                 if( fValidator && fValidator->handlesSchema() )
03496                 {
03497                     if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
03498                     {
03499                         // normalize the attribute according to schema whitespace facet
03500                         DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
03501                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiType, true);
03502                     }
03503                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
03504                     {
03505                         // normalize the attribute according to schema whitespace facet
03506                         XMLBuffer& fXsiNil = fBufMgr.bidOnBuffer();
03507                         DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
03508                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiNil, true);
03509                         if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE))
03510                             ((SchemaValidator*)fValidator)->setNillable(true);
03511                         else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE))
03512                             ((SchemaValidator*)fValidator)->setNillable(false);
03513                         else
03514                             emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr);
03515                         fBufMgr.releaseBuffer(fXsiNil);
03516                     }
03517                 }
03518             }
03519         }
03520 
03521         if (fValidator && fValidator->handlesSchema()) {
03522             if (!fXsiType.isEmpty()) {
03523                 int colonPos = -1;
03524                 unsigned int uriId = resolveQName (
03525                       fXsiType.getRawBuffer()
03526                     , fPrefixBuf
03527                     , ElemStack::Mode_Element
03528                     , colonPos
03529                 );
03530                 ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
03531             }
03532         }
03533     }
03534 }
03535 
03536 void SGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema)
03537 {
03538     BaseRefVectorOf<XMLCh>* schemaLocation = XMLString::tokenizeString(schemaLocationStr, fMemoryManager);
03539     Janitor<BaseRefVectorOf<XMLCh> > janLoc(schemaLocation);
03540 
03541     XMLSize_t size = schemaLocation->size();
03542     if (size % 2 != 0 ) {
03543         emitError(XMLErrs::BadSchemaLocation);
03544     } else {
03545         // We need a buffer to normalize the attribute value into
03546         XMLBuffer normalBuf(1023, fMemoryManager);
03547         for(XMLSize_t i=0; i<size; i=i+2) {
03548             normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, schemaLocation->elementAt(i), normalBuf);
03549             resolveSchemaGrammar(schemaLocation->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema);
03550         }
03551     }
03552 }
03553 
03554 void SGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) {
03555 
03556     Grammar* grammar = 0;
03557 
03558     {
03559         XMLSchemaDescriptionImpl    theSchemaDescription(uri, fMemoryManager);
03560         theSchemaDescription.setLocationHints(loc);
03561         grammar = fGrammarResolver->getGrammar(&theSchemaDescription);
03562     }
03563 
03564     // If multi-import is enabled, make sure the existing grammar came
03565     // from the import directive. Otherwise we may end up reloading
03566     // the same schema that came from the external grammar pool. Ideally,
03567     // we would move fSchemaInfoList to XMLGrammarPool so that it survives
03568     // the destruction of the scanner in which case we could rely on the
03569     // same logic we use to weed out duplicate schemas below.
03570     //
03571     if (!grammar || grammar->getGrammarType() == Grammar::DTDGrammarType ||
03572         (getHandleMultipleImports() &&
03573          ((XMLSchemaDescription*)grammar->getGrammarDescription())->
03574          getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
03575     {
03576       if (fLoadSchema || ignoreLoadSchema)
03577       {
03578         XSDDOMParser parser(0, fMemoryManager, 0);
03579 
03580         parser.setValidationScheme(XercesDOMParser::Val_Never);
03581         parser.setDoNamespaces(true);
03582         parser.setUserEntityHandler(fEntityHandler);
03583         parser.setUserErrorReporter(fErrorReporter);
03584 
03585         //Normalize sysId
03586         XMLBufBid nnSys(&fBufMgr);
03587         XMLBuffer& normalizedSysId = nnSys.getBuffer();
03588         XMLString::removeChar(loc, 0xFFFF, normalizedSysId);
03589         const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
03590 
03591         // Create a buffer for expanding the system id
03592         XMLBufBid bbSys(&fBufMgr);
03593         XMLBuffer& expSysId = bbSys.getBuffer();
03594 
03595         //  Allow the entity handler to expand the system id if they choose
03596         //  to do so.
03597         InputSource* srcToFill = 0;
03598         if (fEntityHandler)
03599         {
03600             if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
03601                 expSysId.set(normalizedURI);
03602 
03603             ReaderMgr::LastExtEntityInfo lastInfo;
03604             fReaderMgr.getLastExtEntityInfo(lastInfo);
03605             XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar,
03606                             expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId,
03607                             &fReaderMgr);
03608             srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
03609         }
03610         else
03611         {
03612             expSysId.set(normalizedURI);
03613         }
03614 
03615         //  If they didn't create a source via the entity handler, then we
03616         //  have to create one on our own.
03617         if (!srcToFill)
03618         {
03619             if (fDisableDefaultEntityResolution)
03620                 return;
03621 
03622             ReaderMgr::LastExtEntityInfo lastInfo;
03623             fReaderMgr.getLastExtEntityInfo(lastInfo);
03624 
03625             XMLURL urlTmp(fMemoryManager);
03626             if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
03627                 (urlTmp.isRelative()))
03628             {
03629                 if (!fStandardUriConformant)
03630                 {
03631                     XMLBufBid  ddSys(&fBufMgr);
03632                     XMLBuffer& resolvedSysId = ddSys.getBuffer();
03633                     XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
03634 
03635                     srcToFill = new (fMemoryManager) LocalFileInputSource
03636                     (
03637                         lastInfo.systemId
03638                         , resolvedSysId.getRawBuffer()
03639                         , fMemoryManager
03640                     );
03641                 }
03642                 else
03643                     ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
03644             }
03645             else
03646             {
03647                 if (fStandardUriConformant && urlTmp.hasInvalidChar())
03648                     ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
03649 
03650                 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
03651             }
03652         }
03653 
03654         // Put a janitor on the input source
03655         Janitor<InputSource> janSrc(srcToFill);
03656 
03657         // Check if this exact schema has already been seen.
03658         //
03659         const XMLCh* sysId = srcToFill->getSystemId();
03660         unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId;
03661         SchemaInfo* importSchemaInfo = 0;
03662 
03663         if (fUseCachedGrammar)
03664           importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId);
03665 
03666         if (!importSchemaInfo && !fToCacheGrammar)
03667           importSchemaInfo = fSchemaInfoList->get(sysId, uriId);
03668 
03669         if (importSchemaInfo)
03670         {
03671           // We haven't added any new grammars so it is safe to just
03672           // return.
03673           //
03674           return;
03675         }
03676 
03677         // Should just issue warning if the schema is not found
03678         bool flag = srcToFill->getIssueFatalErrorIfNotFound();
03679         srcToFill->setIssueFatalErrorIfNotFound(false);
03680 
03681         parser.parse(*srcToFill);
03682 
03683         // Reset the InputSource
03684         srcToFill->setIssueFatalErrorIfNotFound(flag);
03685 
03686         if (parser.getSawFatal() && fExitOnFirstFatal)
03687             emitError(XMLErrs::SchemaScanFatalError);
03688 
03689         DOMDocument* document = parser.getDocument(); //Our Grammar
03690 
03691         if (document != 0) {
03692 
03693             DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
03694             if (root != 0)
03695             {
03696                 const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
03697                 bool newGrammar = false;
03698                 if (!XMLString::equals(newUri, uri)) {
03699                     if (fValidate || fValScheme == Val_Auto) {
03700                         fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
03701                     }
03702 
03703                     grammar = fGrammarResolver->getGrammar(newUri);
03704                     newGrammar = true;
03705                 }
03706 
03707                 if (!grammar ||
03708                     grammar->getGrammarType() == Grammar::DTDGrammarType ||
03709                     (getHandleMultipleImports() &&
03710                      ((XMLSchemaDescription*) grammar->getGrammarDescription())->
03711                      getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
03712                 {
03713                     // If we switched namespace URI, recheck the schema info.
03714                     //
03715                     if (newGrammar)
03716                     {
03717                       unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId;
03718 
03719                       if (fUseCachedGrammar)
03720                         importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId);
03721 
03722                       if (!importSchemaInfo && !fToCacheGrammar)
03723                         importSchemaInfo = fSchemaInfoList->get(sysId, newUriId);
03724 
03725                       if (importSchemaInfo)
03726                         return;
03727                     }
03728 
03729                     //  Since we have seen a grammar, set our validation flag
03730                     //  at this point if the validation scheme is auto
03731                     if (fValScheme == Val_Auto && !fValidate) {
03732                         fValidate = true;
03733                         fElemStack.setValidationFlag(fValidate);
03734                     }
03735 
03736                     bool grammarFound = grammar &&
03737                       grammar->getGrammarType() == Grammar::SchemaGrammarType;
03738 
03739                     SchemaGrammar* schemaGrammar;
03740 
03741                     if (grammarFound) {
03742                       schemaGrammar = (SchemaGrammar*) grammar;
03743                     }
03744                     else {
03745                       schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
03746                     }
03747 
03748                     XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
03749 
03750                     gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
03751                     gramDesc->setLocationHints(sysId);
03752 
03753                     TraverseSchema traverseSchema
03754                     (
03755                         root
03756                         , fURIStringPool
03757                         , schemaGrammar
03758                         , fGrammarResolver
03759                         , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList
03760                         , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList
03761                         , this
03762                         , sysId
03763                         , fEntityHandler
03764                         , fErrorReporter
03765                         , fMemoryManager
03766                         , grammarFound
03767                     );
03768 
03769                     // Reset the now invalid schema roots in the collected
03770                     // schema info entries.
03771                     //
03772                     {
03773                       RefHash2KeysTableOfEnumerator<SchemaInfo> i (
03774                         fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList);
03775 
03776                       while (i.hasMoreElements ())
03777                         i.nextElement().resetRoot ();
03778                     }
03779 
03780                     if (fGrammarType == Grammar::DTDGrammarType) {
03781                         fGrammar = schemaGrammar;
03782                         fGrammarType = Grammar::SchemaGrammarType;
03783                         fValidator->setGrammar(fGrammar);
03784                     }
03785 
03786                     if (fValidate) {
03787                         //  validate the Schema scan so far
03788                         fValidator->preContentValidation(false);
03789                     }
03790                 }
03791             }
03792         }
03793       }
03794     }
03795     else
03796     {
03797         //  Since we have seen a grammar, set our validation flag
03798         //  at this point if the validation scheme is auto
03799         if (fValScheme == Val_Auto && !fValidate) {
03800             fValidate = true;
03801             fElemStack.setValidationFlag(fValidate);
03802         }
03803 
03804         // we have seen a schema, so set up the fValidator as fSchemaValidator
03805         if (fGrammarType == Grammar::DTDGrammarType) {
03806             fGrammar = grammar;
03807             fGrammarType = Grammar::SchemaGrammarType;
03808             fValidator->setGrammar(fGrammar);
03809         }
03810     }
03811     // update fModel; rely on the grammar resolver to do this
03812     // efficiently
03813     if(getPSVIHandler())
03814         fModel = fGrammarResolver->getXSModel();
03815 }
03816 
03817 InputSource* SGXMLScanner::resolveSystemId(const XMLCh* const sysId
03818                                           ,const XMLCh* const pubId)
03819 {
03820     //Normalize sysId
03821     XMLBufBid nnSys(&fBufMgr);
03822     XMLBuffer& normalizedSysId = nnSys.getBuffer();
03823     XMLString::removeChar(sysId, 0xFFFF, normalizedSysId);
03824     const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
03825 
03826     // Create a buffer for expanding the system id
03827     XMLBufBid bbSys(&fBufMgr);
03828     XMLBuffer& expSysId = bbSys.getBuffer();
03829 
03830     //  Allow the entity handler to expand the system id if they choose
03831     //  to do so.
03832     InputSource* srcToFill = 0;
03833     if (fEntityHandler)
03834     {
03835         if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
03836             expSysId.set(normalizedURI);
03837 
03838         ReaderMgr::LastExtEntityInfo lastInfo;
03839         fReaderMgr.getLastExtEntityInfo(lastInfo);
03840         XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
03841                             expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId,
03842                             &fReaderMgr);
03843         srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
03844     }
03845     else
03846     {
03847         expSysId.set(normalizedURI);
03848     }
03849 
03850     //  If they didn't create a source via the entity handler, then we
03851     //  have to create one on our own.
03852     if (!srcToFill)
03853     {
03854         if (fDisableDefaultEntityResolution)
03855             return 0;
03856 
03857         ReaderMgr::LastExtEntityInfo lastInfo;
03858         fReaderMgr.getLastExtEntityInfo(lastInfo);
03859 
03860         XMLURL urlTmp(fMemoryManager);
03861         if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
03862             (urlTmp.isRelative()))
03863         {
03864             if (!fStandardUriConformant)
03865             {
03866                 XMLBufBid  ddSys(&fBufMgr);
03867                 XMLBuffer& resolvedSysId = ddSys.getBuffer();
03868                 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
03869 
03870                 srcToFill = new (fMemoryManager) LocalFileInputSource
03871                 (
03872                     lastInfo.systemId
03873                     , resolvedSysId.getRawBuffer()
03874                     , fMemoryManager
03875                 );
03876             }
03877             else
03878                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
03879         }
03880         else
03881         {
03882             if (fStandardUriConformant && urlTmp.hasInvalidChar())
03883                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
03884             srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
03885         }
03886     }
03887 
03888     return srcToFill;
03889 }
03890 
03891 
03892 // ---------------------------------------------------------------------------
03893 //  SGXMLScanner: Private grammar preparsing methods
03894 // ---------------------------------------------------------------------------
03895 Grammar* SGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
03896                                           const bool toCache)
03897 {
03898    // Reset the validators
03899     fSchemaValidator->reset();
03900     fSchemaValidator->setErrorReporter(fErrorReporter);
03901     fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
03902     fSchemaValidator->setGrammarResolver(fGrammarResolver);
03903 
03904     if (fValidatorFromUser)
03905         fValidator->reset();
03906 
03907     XSDDOMParser parser(0, fMemoryManager, 0);
03908 
03909     parser.setValidationScheme(XercesDOMParser::Val_Never);
03910     parser.setDoNamespaces(true);
03911     parser.setUserEntityHandler(fEntityHandler);
03912     parser.setUserErrorReporter(fErrorReporter);
03913 
03914     // Should just issue warning if the schema is not found
03915     bool flag = src.getIssueFatalErrorIfNotFound();
03916     ((InputSource&) src).setIssueFatalErrorIfNotFound(false);
03917 
03918     parser.parse(src);
03919 
03920     // Reset the InputSource
03921     ((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
03922 
03923     if (parser.getSawFatal() && fExitOnFirstFatal)
03924         emitError(XMLErrs::SchemaScanFatalError);
03925 
03926     DOMDocument* document = parser.getDocument(); //Our Grammar
03927 
03928     if (document != 0) {
03929 
03930         DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
03931         if (root != 0)
03932         {
03933             const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
03934             Grammar* grammar = fGrammarResolver->getGrammar(nsUri);
03935 
03936             // Check if this exact schema has already been seen.
03937             //
03938             const XMLCh* sysId = src.getSystemId();
03939             SchemaInfo* importSchemaInfo = 0;
03940 
03941             if (grammar)
03942             {
03943               if (nsUri && *nsUri)
03944                 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri));
03945               else
03946                 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId);
03947             }
03948 
03949             if (!importSchemaInfo)
03950             {
03951               bool grammarFound = grammar &&
03952                 grammar->getGrammarType() == Grammar::SchemaGrammarType &&
03953                 getHandleMultipleImports();
03954 
03955               SchemaGrammar* schemaGrammar;
03956 
03957               if (grammarFound)
03958                 schemaGrammar = (SchemaGrammar*) grammar;
03959               else
03960                 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
03961 
03962               XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
03963               gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
03964               gramDesc->setLocationHints(sysId);
03965 
03966               TraverseSchema traverseSchema
03967                 (
03968                   root
03969                   , fURIStringPool
03970                   , schemaGrammar
03971                   , fGrammarResolver
03972                   , fCachedSchemaInfoList
03973                   , toCache ? fCachedSchemaInfoList : fSchemaInfoList
03974                   , this
03975                   , sysId
03976                   , fEntityHandler
03977                   , fErrorReporter
03978                   , fMemoryManager
03979                   , grammarFound
03980                 );
03981 
03982               grammar = schemaGrammar;
03983 
03984               // Reset the now invalid schema roots in the collected
03985               // schema info entries.
03986               //
03987               {
03988                 RefHash2KeysTableOfEnumerator<SchemaInfo> i (
03989                   toCache ? fCachedSchemaInfoList : fSchemaInfoList);
03990 
03991                 while (i.hasMoreElements ())
03992                   i.nextElement().resetRoot ();
03993               }
03994             }
03995 
03996             if (fValidate) {
03997               //  validate the Schema scan so far
03998               fValidator->setGrammar(grammar);
03999               fValidator->preContentValidation(false);
04000             }
04001 
04002             if (toCache) {
04003               fGrammarResolver->cacheGrammars();
04004             }
04005 
04006             if(getPSVIHandler())
04007               fModel = fGrammarResolver->getXSModel();
04008 
04009             return grammar;
04010         }
04011     }
04012 
04013     return 0;
04014 }
04015 
04016 
04017 
04018 // ---------------------------------------------------------------------------
04019 //  SGXMLScanner: Private parsing methods
04020 // ---------------------------------------------------------------------------
04021 
04022 //  This method is called to do a raw scan of an attribute value. It does not
04023 //  do normalization (since we don't know their types yet.) It just scans the
04024 //  value and does entity expansion.
04025 //
04026 //  End of entity's must be dealt with here. During DTD scan, they can come
04027 //  from external entities. During content, they can come from any entity.
04028 //  We just eat the end of entity and continue with our scan until we come
04029 //  to the closing quote. If an unterminated value causes us to go through
04030 //  subsequent entities, that will cause errors back in the calling code,
04031 //  but there's little we can do about it here.
04032 bool SGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
04033 {
04034     // Reset the target buffer
04035     toFill.reset();
04036 
04037     // Get the next char which must be a single or double quote
04038     XMLCh quoteCh;
04039     if (!fReaderMgr.skipIfQuote(quoteCh))
04040         return false;
04041 
04042     //  We have to get the current reader because we have to ignore closing
04043     //  quotes until we hit the same reader again.
04044     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
04045 
04046     //  Loop until we get the attribute value. Note that we use a double
04047     //  loop here to avoid the setup/teardown overhead of the exception
04048     //  handler on every round.
04049     while (true)
04050     {
04051         try
04052         {
04053             while(true)
04054             {
04055                 XMLCh nextCh = fReaderMgr.getNextChar();
04056 
04057                 if (nextCh != quoteCh)
04058                 {
04059                     if (nextCh != chAmpersand)
04060                     {
04061                         if ((nextCh < 0xD800) || (nextCh > 0xDFFF))
04062                         {
04063                             // Its got to at least be a valid XML character
04064                             if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
04065                             {
04066                                 if (nextCh == 0)
04067                                     ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
04068 
04069                                 XMLCh tmpBuf[9];
04070                                 XMLString::binToText
04071                                 (
04072                                     nextCh
04073                                     , tmpBuf
04074                                     , 8
04075                                     , 16
04076                                     , fMemoryManager
04077                                 );
04078                                 emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
04079                             }
04080                         } else // its a surrogate
04081                         {
04082                             // Deal with surrogate pairs
04083 
04084                             //  we expect a a leading surrogate.
04085                             if (nextCh <= 0xDBFF)
04086                             {
04087                                 toFill.append(nextCh);
04088 
04089                                 //  process the trailing surrogate
04090                                 nextCh = fReaderMgr.getNextChar();
04091 
04092                                 //  it should be a trailing surrogate.
04093                                 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
04094                                 {
04095                                     emitError(XMLErrs::Expected2ndSurrogateChar);
04096                                 }
04097                             } else
04098                             {
04099                                 //  Its a trailing surrogate, but we are not expecting it
04100                                 emitError(XMLErrs::Unexpected2ndSurrogateChar);
04101                             }
04102                         }
04103                     } else // its a chAmpersand
04104                     {
04105                         //  Check for an entity ref . We ignore the empty flag in
04106                         //  this one.
04107 
04108                         bool    escaped;
04109                         XMLCh   firstCh;
04110                         XMLCh   secondCh
04111                             ;
04112                         // If it was not returned directly, then jump back up
04113                         if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned)
04114                         {
04115                             //  If it was escaped, then put in a 0xFFFF value. This will
04116                             //  be used later during validation and normalization of the
04117                             //  value to know that the following character was via an
04118                             //  escape char.
04119                             if (escaped)
04120                                 toFill.append(0xFFFF);
04121 
04122                             toFill.append(firstCh);
04123                             if (secondCh)
04124                                 toFill.append(secondCh);
04125                         }
04126                         continue;
04127                     }
04128                 } else // its a quoteCh
04129                 {
04130                     //  Check for our ending quote. It has to be in the same entity
04131                     //  as where we started. Quotes in nested entities are ignored.
04132 
04133                     if (curReader == fReaderMgr.getCurrentReaderNum())
04134                     {
04135                         return true;
04136                     }
04137 
04138                     // Watch for spillover into a previous entity
04139                     if (curReader > fReaderMgr.getCurrentReaderNum())
04140                     {
04141                         emitError(XMLErrs::PartialMarkupInEntity);
04142                         return false;
04143                     }
04144                 }
04145 
04146                 // add it to the buffer
04147                 toFill.append(nextCh);
04148 
04149             }
04150         }
04151         catch(const EndOfEntityException&)
04152         {
04153             // Just eat it and continue.
04154         }
04155     }
04156     return true;
04157 }
04158 
04159 
04160 //  This method scans a CDATA section. It collects the character into one
04161 //  of the temp buffers and calls the document handler, if any, with the
04162 //  characters. It assumes that the <![CDATA string has been scanned before
04163 //  this call.
04164 void SGXMLScanner::scanCDSection()
04165 {
04166     static const XMLCh CDataClose[] =
04167     {
04168             chCloseSquare, chCloseAngle, chNull
04169     };
04170 
04171     //  The next character should be the opening square bracket. If not
04172     //  issue an error, but then try to recover by skipping any whitespace
04173     //  and checking again.
04174     if (!fReaderMgr.skippedChar(chOpenSquare))
04175     {
04176         emitError(XMLErrs::ExpectedOpenSquareBracket);
04177         fReaderMgr.skipPastSpaces();
04178 
04179         // If we still don't find it, then give up, else keep going
04180         if (!fReaderMgr.skippedChar(chOpenSquare))
04181             return;
04182     }
04183 
04184     // Get a buffer for this
04185     XMLBufBid bbCData(&fBufMgr);
04186 
04187     //  We just scan forward until we hit the end of CDATA section sequence.
04188     //  CDATA is effectively a big escape mechanism so we don't treat markup
04189     //  characters specially here.
04190     bool            emittedError = false;
04191     bool    gotLeadingSurrogate = false;
04192 
04193     // Get the character data opts for the current element
04194     XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
04195     // And see if the current element is a 'Children' style content model
04196     ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
04197     if(currType)
04198     {
04199         SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
04200         if(modelType == SchemaElementDecl::Children ||
04201            modelType == SchemaElementDecl::ElementOnlyEmpty)
04202             charOpts = XMLElementDecl::SpacesOk;
04203         else if(modelType == SchemaElementDecl::Empty)
04204             charOpts = XMLElementDecl::NoCharData;
04205     }
04206 
04207     // should not be necessary when PSVI on element decl removed
04208     const ElemStack::StackElem* topElem = fElemStack.topElement();
04209 
04210     while (true)
04211     {
04212         const XMLCh nextCh = fReaderMgr.getNextChar();
04213 
04214         // Watch for unexpected end of file
04215         if (!nextCh)
04216         {
04217             emitError(XMLErrs::UnterminatedCDATASection);
04218             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
04219         }
04220 
04221         if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
04222         {
04223             // This document is standalone; this ignorable CDATA whitespace is forbidden.
04224             // XML 1.0, Section 2.9
04225             // And see if the current element is a 'Children' style content model
04226             if (topElem->fThisElement->isExternal()) {
04227 
04228                 if (charOpts == XMLElementDecl::SpacesOk) // Element Content
04229                 {
04230                     // Error - standalone should have a value of "no" as whitespace detected in an
04231                     // element type with element content whose element declaration was external
04232                     fValidator->emitError(XMLValid::NoWSForStandalone);
04233                     if (getPSVIHandler())
04234                     {
04235                         // REVISIT:
04236                         // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
04237                     }
04238                 }
04239             }
04240         }
04241 
04242         //  If this is a close square bracket it could be our closing
04243         //  sequence.
04244         if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
04245         {
04246             //  make sure we were not expecting a trailing surrogate.
04247             if (gotLeadingSurrogate) {
04248                 emitError(XMLErrs::Expected2ndSurrogateChar);
04249             }
04250 
04251             XMLSize_t xsLen = bbCData.getLen();
04252             const XMLCh* xsNormalized = bbCData.getRawBuffer();
04253             if (fValidate) {
04254 
04255                 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
04256                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
04257                 {
04258                     // normalize the character according to schema whitespace facet
04259                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf);
04260                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
04261                     xsLen = fWSNormalizeBuf.getLen();
04262                 }
04263 
04264                 // tell the schema validation about the character data for checkContent later
04265                 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
04266 
04267                 if (charOpts != XMLElementDecl::AllCharData)
04268                 {
04269                     // They definitely cannot handle any type of char data
04270                     fValidator->emitError(XMLValid::NoCharDataInCM);
04271                     if (getPSVIHandler())
04272                     {
04273                         // REVISIT:
04274                         // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
04275                     }
04276                 }
04277             }
04278 
04279             // call all active identity constraints
04280             if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
04281                 fContent.append(xsNormalized, xsLen);
04282             }
04283 
04284             // If we have a doc handler, call it
04285             if (fDocHandler)
04286             {
04287                 if (fNormalizeData) {
04288                     fDocHandler->docCharacters(xsNormalized, xsLen, true);
04289                 }
04290                 else {
04291                     fDocHandler->docCharacters(
04292                         bbCData.getRawBuffer(), bbCData.getLen(), true
04293                     );
04294                 }
04295             }
04296 
04297             // And we are done
04298             break;
04299         }
04300 
04301         //  Make sure its a valid character. But if we've emitted an error
04302         //  already, don't bother with the overhead since we've already told
04303         //  them about it.
04304         if (!emittedError)
04305         {
04306             // Deal with surrogate pairs
04307             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
04308             {
04309                 //  Its a leading surrogate. If we already got one, then
04310                 //  issue an error, else set leading flag to make sure that
04311                 //  we look for a trailing next time.
04312                 if (gotLeadingSurrogate)
04313                     emitError(XMLErrs::Expected2ndSurrogateChar);
04314                 else
04315                     gotLeadingSurrogate = true;
04316             }
04317             else
04318             {
04319                 //  If its a trailing surrogate, make sure that we are
04320                 //  prepared for that. Else, its just a regular char so make
04321                 //  sure that we were not expected a trailing surrogate.
04322                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
04323                 {
04324                     // Its trailing, so make sure we were expecting it
04325                     if (!gotLeadingSurrogate)
04326                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
04327                 }
04328                 else
04329                 {
04330                     //  Its just a char, so make sure we were not expecting a
04331                     //  trailing surrogate.
04332                     if (gotLeadingSurrogate)
04333                         emitError(XMLErrs::Expected2ndSurrogateChar);
04334 
04335                     // Its got to at least be a valid XML character
04336                     else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
04337                     {
04338                         XMLCh tmpBuf[9];
04339                         XMLString::binToText
04340                         (
04341                             nextCh
04342                             , tmpBuf
04343                             , 8
04344                             , 16
04345                             , fMemoryManager
04346                         );
04347                         emitError(XMLErrs::InvalidCharacter, tmpBuf);
04348                         emittedError = true;
04349                     }
04350                 }
04351                 gotLeadingSurrogate = false;
04352             }
04353         }
04354 
04355         // Add it to the buffer
04356         bbCData.append(nextCh);
04357     }
04358 }
04359 
04360 
04361 void SGXMLScanner::scanCharData(XMLBuffer& toUse)
04362 {
04363     //  We have to watch for the stupid ]]> sequence, which is illegal in
04364     //  character data. So this is a little state machine that handles that.
04365     enum States
04366     {
04367         State_Waiting
04368         , State_GotOne
04369         , State_GotTwo
04370     };
04371 
04372     // Reset the buffer before we start
04373     toUse.reset();
04374 
04375     // Turn on the 'throw at end' flag of the reader manager
04376     ThrowEOEJanitor jan(&fReaderMgr, true);
04377 
04378     //  In order to be more efficient we have to use kind of a deeply nested
04379     //  set of blocks here. The outer block puts on a try and catches end of
04380     //  entity exceptions. The inner loop is the per-character loop. If we
04381     //  put the try inside the inner loop, it would work but would require
04382     //  the exception handling code setup/teardown code to be invoked for
04383     //  each character.
04384     XMLCh   nextCh;
04385     XMLCh   secondCh = 0;
04386     States  curState = State_Waiting;
04387     bool    escaped = false;
04388     bool    gotLeadingSurrogate = false;
04389     bool    notDone = true;
04390     while (notDone)
04391     {
04392         try
04393         {
04394             while (true)
04395             {
04396                 //  Eat through as many plain content characters as possible without
04397                 //  needing special handling.  Moving most content characters here,
04398                 //  in this one call, rather than running the overall loop once
04399                 //  per content character, is a speed optimization.
04400                 if (curState == State_Waiting  &&  !gotLeadingSurrogate)
04401                 {
04402                      fReaderMgr.movePlainContentChars(toUse);
04403                 }
04404 
04405                 // Try to get another char from the source
04406                 //   The code from here on down covers all contengencies,
04407                 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
04408                 {
04409                     // If we were waiting for a trailing surrogate, its an error
04410                     if (gotLeadingSurrogate)
04411                         emitError(XMLErrs::Expected2ndSurrogateChar);
04412 
04413                     notDone = false;
04414                     break;
04415                 }
04416 
04417                 //  Watch for a reference. Note that the escapement mechanism
04418                 //  is ignored in this content.
04419                 escaped = false;
04420                 if (nextCh == chAmpersand)
04421                 {
04422                     sendCharData(toUse);
04423 
04424                     // Turn off the throwing at the end of entity during this
04425                     ThrowEOEJanitor jan(&fReaderMgr, false);
04426 
04427                     if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
04428                     {
04429                         gotLeadingSurrogate = false;
04430                         continue;
04431                     }
04432                 }
04433                 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
04434                 {
04435                     // Deal with surrogate pairs
04436                     //  Its a leading surrogate. If we already got one, then
04437                     //  issue an error, else set leading flag to make sure that
04438                     //  we look for a trailing next time.
04439                     if (gotLeadingSurrogate)
04440                         emitError(XMLErrs::Expected2ndSurrogateChar);
04441                     else
04442                         gotLeadingSurrogate = true;
04443                 }
04444                 else
04445                 {
04446                     //  If its a trailing surrogate, make sure that we are
04447                     //  prepared for that. Else, its just a regular char so make
04448                     //  sure that we were not expected a trailing surrogate.
04449                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
04450                     {
04451                         // Its trailing, so make sure we were expecting it
04452                         if (!gotLeadingSurrogate)
04453                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
04454                     }
04455                     else
04456                     {
04457                         //  Its just a char, so make sure we were not expecting a
04458                         //  trailing surrogate.
04459                         if (gotLeadingSurrogate)
04460                             emitError(XMLErrs::Expected2ndSurrogateChar);
04461 
04462                         // Make sure the returned char is a valid XML char
04463                         if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
04464                         {
04465                             XMLCh tmpBuf[9];
04466                             XMLString::binToText
04467                             (
04468                                 nextCh
04469                                 , tmpBuf
04470                                 , 8
04471                                 , 16
04472                                 , fMemoryManager
04473                             );
04474                             emitError(XMLErrs::InvalidCharacter, tmpBuf);
04475                         }
04476                     }
04477                     gotLeadingSurrogate = false;
04478                 }
04479 
04480                 // Keep the state machine up to date
04481                 if (!escaped)
04482                 {
04483                     if (nextCh == chCloseSquare)
04484                     {
04485                         if (curState == State_Waiting)
04486                             curState = State_GotOne;
04487                         else if (curState == State_GotOne)
04488                             curState = State_GotTwo;
04489                     }
04490                     else if (nextCh == chCloseAngle)
04491                     {
04492                         if (curState == State_GotTwo)
04493                             emitError(XMLErrs::BadSequenceInCharData);
04494                         curState = State_Waiting;
04495                     }
04496                     else
04497                     {
04498                         curState = State_Waiting;
04499                     }
04500                 }
04501                 else
04502                 {
04503                     curState = State_Waiting;
04504                 }
04505 
04506                 // Add this char to the buffer
04507                 toUse.append(nextCh);
04508 
04509                 if (secondCh)
04510                 {
04511                     toUse.append(secondCh);
04512                     secondCh=0;
04513                 }
04514             }
04515         }
04516         catch(const EndOfEntityException& toCatch)
04517         {
04518             //  Some entity ended, so we have to send any accumulated
04519             //  chars and send an end of entity event.
04520             sendCharData(toUse);
04521             gotLeadingSurrogate = false;
04522 
04523             if (fDocHandler)
04524                 fDocHandler->endEntityReference(toCatch.getEntity());
04525         }
04526     }
04527 
04528     // Check the validity constraints as per XML 1.0 Section 2.9
04529     if (fValidate && fStandalone)
04530     {
04531         // See if the text contains whitespace
04532         // Get the raw data we need for the callback
04533         const XMLCh* rawBuf = toUse.getRawBuffer();
04534         const XMLSize_t len = toUse.getLen();
04535         const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
04536 
04537         if (isSpaces)
04538         {
04539             // And see if the current element is a 'Children' style content model
04540             const ElemStack::StackElem* topElem = fElemStack.topElement();
04541 
04542             if (topElem->fThisElement->isExternal()) {
04543 
04544                 // Get the character data opts for the current element
04545                 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
04546                 // And see if the current element is a 'Children' style content model
04547                 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
04548                 if(currType)
04549                 {
04550                     SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
04551                     if(modelType == SchemaElementDecl::Children ||
04552                        modelType == SchemaElementDecl::ElementOnlyEmpty)
04553                         charOpts = XMLElementDecl::SpacesOk;
04554                 }
04555 
04556                 if (charOpts == XMLElementDecl::SpacesOk)  // => Element Content
04557                 {
04558                     // Error - standalone should have a value of "no" as whitespace detected in an
04559                     // element type with element content whose element declaration was external
04560                     //
04561                     fValidator->emitError(XMLValid::NoWSForStandalone);
04562                     if (getPSVIHandler())
04563                     {
04564                         // REVISIT:
04565                         // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
04566                     }
04567                 }
04568             }
04569         }
04570     }
04571     // Send any char data that we accumulated into the buffer
04572     sendCharData(toUse);
04573 }
04574 
04575 
04576 //  This method will scan a general/character entity ref. It will either
04577 //  expand a char ref and return it directly, or push a reader for a general
04578 //  entity.
04579 //
04580 //  The return value indicates whether the char parameters hold the value
04581 //  or whether the value was pushed as a reader, or that it failed.
04582 //
04583 //  The escaped flag tells the caller whether the returned parameter resulted
04584 //  from a character reference, which escapes the character in some cases. It
04585 //  only makes any difference if the return value indicates the value was
04586 //  returned directly.
04587 SGXMLScanner::EntityExpRes
04588 SGXMLScanner::scanEntityRef(  const   bool
04589                             ,       XMLCh&  firstCh
04590                             ,       XMLCh&  secondCh
04591                             ,       bool&   escaped)
04592 {
04593     // Assume no escape
04594     secondCh = 0;
04595     escaped = false;
04596 
04597     // We have to insure that its all in one entity
04598     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
04599 
04600     //  If the next char is a pound, then its a character reference and we
04601     //  need to expand it always.
04602     if (fReaderMgr.skippedChar(chPound))
04603     {
04604         //  Its a character reference, so scan it and get back the numeric
04605         //  value it represents.
04606         if (!scanCharRef(firstCh, secondCh))
04607             return EntityExp_Failed;
04608 
04609         escaped = true;
04610 
04611         if (curReader != fReaderMgr.getCurrentReaderNum())
04612             emitError(XMLErrs::PartialMarkupInEntity);
04613 
04614         return EntityExp_Returned;
04615     }
04616 
04617     // Expand it since its a normal entity ref
04618     XMLBufBid bbName(&fBufMgr);
04619     int colonPosition;
04620     if (!fReaderMgr.getQName(bbName.getBuffer(), &colonPosition))
04621     {
04622         if (bbName.isEmpty())
04623             emitError(XMLErrs::ExpectedEntityRefName);
04624         else
04625             emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer());
04626         return EntityExp_Failed;
04627     }
04628 
04629     //  Next char must be a semi-colon. But if its not, just emit
04630     //  an error and try to continue.
04631     if (!fReaderMgr.skippedChar(chSemiColon))
04632         emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
04633 
04634     // Make sure we ended up on the same entity reader as the & char
04635     if (curReader != fReaderMgr.getCurrentReaderNum())
04636         emitError(XMLErrs::PartialMarkupInEntity);
04637 
04638     // Look up the name in the general entity pool
04639     // If it does not exist, then obviously an error
04640     if (!fEntityTable->containsKey(bbName.getRawBuffer()))
04641     {
04642         // XML 1.0 Section 4.1
04643         // Well-formedness Constraint for entity not found:
04644         //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
04645         //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
04646         //      or a parameter entity
04647         if (fStandalone || fHasNoDTD)
04648             emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
04649 
04650         return EntityExp_Failed;
04651     }
04652 
04653     // here's where we need to check if there's a SecurityManager,
04654     // how many entity references we've had
04655     if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
04656         XMLCh expLimStr[32];
04657         XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
04658         emitError
04659         (
04660             XMLErrs::EntityExpansionLimitExceeded
04661             , expLimStr
04662         );
04663         // there seems nothing better to be done than to reset the entity expansion limit
04664         fEntityExpansionCount = 0;
04665     }
04666 
04667     firstCh = fEntityTable->get(bbName.getRawBuffer());
04668     escaped = true;
04669     return EntityExp_Returned;
04670 }
04671 
04672 
04673 bool SGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
04674 {
04675     Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
04676 
04677     if (!tempGrammar) {
04678         tempGrammar = fSchemaGrammar;
04679     }
04680 
04681     if (!tempGrammar)
04682         return false;
04683     else {
04684         fGrammar = tempGrammar;
04685         fGrammarType = fGrammar->getGrammarType();
04686         if (fGrammarType == Grammar::DTDGrammarType) {
04687             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
04688         }
04689 
04690         fValidator->setGrammar(fGrammar);
04691         return true;
04692     }
04693 }
04694 
04695 // check if we should skip or lax the validation of the element
04696 // if skip - no validation
04697 // if lax - validate only if the element if found
04698 bool SGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
04699                                         const XMLContentModel* const cm,
04700                                         const XMLSize_t parentElemDepth)
04701 {
04702     bool skipThisOne = false;
04703     bool laxThisOne = false;
04704     unsigned int elementURI = element->getURI();
04705     unsigned int currState = fElemState[parentElemDepth];
04706     unsigned int currLoop = fElemLoopState[parentElemDepth];
04707 
04708     if (currState == XMLContentModel::gInvalidTrans) {
04709         return laxThisOne;
04710     }
04711 
04712     SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);
04713 
04714     if (cv) {
04715         XMLSize_t i = 0;
04716         XMLSize_t leafCount = cv->getLeafCount();
04717         unsigned int nextState = 0;
04718 
04719         for (; i < leafCount; i++) {
04720 
04721             QName* fElemMap = cv->getLeafNameAt(i);
04722             unsigned int uri = fElemMap->getURI();
04723             ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
04724 
04725             if (type == ContentSpecNode::Leaf) {
04726                 if (((uri == elementURI)
04727                       && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart()))
04728                     || comparator.isEquivalentTo(element, fElemMap)) {
04729 
04730                     nextState = cm->getNextState(currState, i);
04731 
04732                     if (nextState != XMLContentModel::gInvalidTrans)
04733                         break;
04734                 }
04735             } else if ((type & 0x0f) == ContentSpecNode::Any) {
04736                 nextState = cm->getNextState(currState, i);
04737                 if (nextState != XMLContentModel::gInvalidTrans)
04738                     break;
04739             }
04740             else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
04741                 if (uri != elementURI && elementURI != fEmptyNamespaceId) {
04742                     nextState = cm->getNextState(currState, i);
04743                     if (nextState != XMLContentModel::gInvalidTrans)
04744                         break;
04745                 }
04746             }
04747             else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
04748                 if (uri == elementURI) {
04749                     nextState = cm->getNextState(currState, i);
04750                     if (nextState != XMLContentModel::gInvalidTrans)
04751                         break;
04752                 }
04753             }
04754 
04755         } // for
04756 
04757         if (i == leafCount) { // no match
04758             fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
04759             fElemLoopState[parentElemDepth] = 0;
04760             return laxThisOne;
04761         }
04762 
04763         ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
04764         if ((type & 0x0f) == ContentSpecNode::Any ||
04765             (type & 0x0f) == ContentSpecNode::Any_Other ||
04766             (type & 0x0f) == ContentSpecNode::Any_NS)
04767         {
04768             if (type == ContentSpecNode::Any_Skip ||
04769                 type == ContentSpecNode::Any_NS_Skip ||
04770                 type == ContentSpecNode::Any_Other_Skip) {
04771                 skipThisOne = true;
04772             }
04773             else if (type == ContentSpecNode::Any_Lax ||
04774                      type == ContentSpecNode::Any_NS_Lax ||
04775                      type == ContentSpecNode::Any_Other_Lax) {
04776                 laxThisOne = true;
04777             }
04778         }
04779         fElemState[parentElemDepth] = nextState;
04780         fElemLoopState[parentElemDepth] = currLoop;
04781     } // if
04782 
04783     if (skipThisOne) {
04784         fValidate = false;
04785         fElemStack.setValidationFlag(fValidate);
04786     }
04787 
04788     return laxThisOne;
04789 }
04790 
04791 
04792 // check if there is an AnyAttribute, and if so, see if we should lax or skip
04793 // if skip - no validation
04794 // if lax - validate only if the attribute if found
04795 bool SGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
04796 {
04797     XMLAttDef::AttTypes wildCardType = attWildCard->getType();
04798     bool anyEncountered = false;
04799     skipThisOne = false;
04800     laxThisOne = false;
04801     if (wildCardType == XMLAttDef::Any_Any)
04802         anyEncountered = true;
04803     else if (wildCardType == XMLAttDef::Any_Other) {
04804         if (attWildCard->getAttName()->getURI() != uriId
04805             && uriId != fEmptyNamespaceId)
04806             anyEncountered = true;
04807     }
04808     else if (wildCardType == XMLAttDef::Any_List) {
04809         ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
04810         XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0;
04811 
04812         if (listSize) {
04813             for (XMLSize_t i=0; i < listSize; i++) {
04814                 if (nameURIList->elementAt(i) == uriId)
04815                     anyEncountered = true;
04816             }
04817         }
04818     }
04819 
04820     if (anyEncountered) {
04821         XMLAttDef::DefAttTypes   defType   = attWildCard->getDefaultType();
04822         if (defType == XMLAttDef::ProcessContents_Skip) {
04823             // attribute should just be bypassed,
04824             skipThisOne = true;
04825             if (getPSVIHandler())
04826             {
04827                 // REVISIT:
04828                 // PSVIAttribute->setValidationAttempted(PSVIItem::VALIDATION_NONE);
04829             }
04830         }
04831         else if (defType == XMLAttDef::ProcessContents_Lax) {
04832             laxThisOne = true;
04833         }
04834     }
04835 
04836     return anyEncountered;
04837 }
04838 
04839 inline XMLAttDefList& getAttDefList(ComplexTypeInfo* currType, XMLElementDecl* elemDecl)
04840 {
04841     if (currType)
04842         return currType->getAttDefList();
04843     else
04844         return elemDecl->getAttDefList();
04845 }
04846 
04847 void SGXMLScanner::endElementPSVI(SchemaElementDecl* const elemDecl,
04848                                   DatatypeValidator* const memberDV)
04849 {
04850     PSVIElement::ASSESSMENT_TYPE validationAttempted;
04851     PSVIElement::VALIDITY_STATE validity = PSVIElement::VALIDITY_NOTKNOWN;
04852 
04853     if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fFullValidationDepth)
04854         validationAttempted = PSVIElement::VALIDATION_FULL;
04855     else if (fPSVIElemContext.fElemDepth > fPSVIElemContext.fNoneValidationDepth)
04856         validationAttempted = PSVIElement::VALIDATION_NONE;
04857     else
04858     {
04859         validationAttempted  = PSVIElement::VALIDATION_PARTIAL;
04860                 fPSVIElemContext.fFullValidationDepth =
04861             fPSVIElemContext.fNoneValidationDepth = fPSVIElemContext.fElemDepth - 1;
04862     }
04863 
04864     if (fValidate && elemDecl->isDeclared())
04865     {
04866         validity = (fPSVIElemContext.fErrorOccurred)
04867             ? PSVIElement::VALIDITY_INVALID : PSVIElement::VALIDITY_VALID;
04868     }
04869 
04870     XSTypeDefinition* typeDef = 0;
04871     bool isMixed = false;
04872     if (fPSVIElemContext.fCurrentTypeInfo)
04873     {
04874         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentTypeInfo);
04875         SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes)fPSVIElemContext.fCurrentTypeInfo->getContentType();
04876         isMixed = (modelType == SchemaElementDecl::Mixed_Simple
04877                 || modelType == SchemaElementDecl::Mixed_Complex);
04878     }
04879     else if (fPSVIElemContext.fCurrentDV)
04880         typeDef = (XSTypeDefinition*) fModel->getXSObject(fPSVIElemContext.fCurrentDV);
04881 
04882     XMLCh* canonicalValue = 0;
04883     if (fPSVIElemContext.fNormalizedValue && !isMixed &&
04884             validity == PSVIElement::VALIDITY_VALID)
04885     {
04886         if (memberDV)
04887             canonicalValue = (XMLCh*) memberDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
04888         else if (fPSVIElemContext.fCurrentDV)
04889             canonicalValue = (XMLCh*) fPSVIElemContext.fCurrentDV->getCanonicalRepresentation(fPSVIElemContext.fNormalizedValue, fMemoryManager);
04890     }
04891 
04892     fPSVIElement->reset
04893     (
04894         validity
04895         , validationAttempted
04896         , fRootElemName
04897         , fPSVIElemContext.fIsSpecified
04898         , (elemDecl->isDeclared())
04899             ? (XSElementDeclaration*) fModel->getXSObject(elemDecl) : 0
04900         , typeDef
04901         , (memberDV) ? (XSSimpleTypeDefinition*) fModel->getXSObject(memberDV) : 0
04902         , fModel
04903         , elemDecl->getDefaultValue()
04904         , fPSVIElemContext.fNormalizedValue
04905         , canonicalValue
04906     );
04907 
04908     fPSVIHandler->handleElementPSVI
04909     (
04910         elemDecl->getBaseName()
04911         , fURIStringPool->getValueForId(elemDecl->getURI())
04912         , fPSVIElement
04913     );
04914 
04915     // decrease element depth
04916     fPSVIElemContext.fElemDepth--;
04917 
04918 }
04919 
04920 void SGXMLScanner::resetPSVIElemContext()
04921 {
04922     fPSVIElemContext.fIsSpecified = false;
04923     fPSVIElemContext.fErrorOccurred = false;
04924     fPSVIElemContext.fElemDepth = -1;
04925     fPSVIElemContext.fFullValidationDepth = -1;
04926     fPSVIElemContext.fNoneValidationDepth = -1;
04927     fPSVIElemContext.fCurrentDV = 0;
04928     fPSVIElemContext.fCurrentTypeInfo = 0;
04929     fPSVIElemContext.fNormalizedValue = 0;
04930 }
04931 
04932 XERCES_CPP_NAMESPACE_END