GME: DGXMLScanner.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: DGXMLScanner.cpp 833045 2009-11-05 13:21:27Z borisk $
00020  */
00021 
00022 
00023 // ---------------------------------------------------------------------------
00024 //  Includes
00025 // ---------------------------------------------------------------------------
00026 #include <xercesc/internal/DGXMLScanner.hpp>
00027 #include <xercesc/util/Janitor.hpp>
00028 #include <xercesc/util/RuntimeException.hpp>
00029 #include <xercesc/util/UnexpectedEOFException.hpp>
00030 #include <xercesc/util/XMLUri.hpp>
00031 #include <xercesc/framework/URLInputSource.hpp>
00032 #include <xercesc/framework/LocalFileInputSource.hpp>
00033 #include <xercesc/framework/XMLDocumentHandler.hpp>
00034 #include <xercesc/framework/XMLEntityHandler.hpp>
00035 #include <xercesc/framework/XMLPScanToken.hpp>
00036 #include <xercesc/framework/XMLGrammarPool.hpp>
00037 #include <xercesc/framework/XMLDTDDescription.hpp>
00038 #include <xercesc/internal/EndOfEntityException.hpp>
00039 #include <xercesc/validators/common/GrammarResolver.hpp>
00040 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
00041 #include <xercesc/validators/DTD/DTDScanner.hpp>
00042 #include <xercesc/validators/DTD/DTDValidator.hpp>
00043 #include <xercesc/util/OutOfMemoryException.hpp>
00044 #include <xercesc/util/XMLResourceIdentifier.hpp>
00045 
00046 XERCES_CPP_NAMESPACE_BEGIN
00047 
00048 
00049 typedef JanitorMemFunCall<DGXMLScanner> CleanupType;
00050 typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;
00051 
00052 
00053 // ---------------------------------------------------------------------------
00054 //  DGXMLScanner: Constructors and Destructor
00055 // ---------------------------------------------------------------------------
00056 DGXMLScanner::DGXMLScanner(XMLValidator* const valToAdopt
00057                          , GrammarResolver* const grammarResolver
00058                          , MemoryManager* const manager) :
00059 
00060     XMLScanner(valToAdopt, grammarResolver, manager)
00061     , fAttrNSList(0)
00062     , fDTDValidator(0)
00063     , fDTDGrammar(0)
00064     , fDTDElemNonDeclPool(0)
00065     , fElemCount(0)
00066     , fAttDefRegistry(0)
00067     , fUndeclaredAttrRegistry(0)
00068 {
00069     CleanupType cleanup(this, &DGXMLScanner::cleanUp);
00070 
00071     try
00072     {
00073         commonInit();
00074     }
00075     catch(const OutOfMemoryException&)
00076     {
00077         // Don't cleanup when out of memory, since executing the
00078         // code can cause problems.
00079         cleanup.release();
00080 
00081         throw;
00082     }
00083 
00084     cleanup.release();
00085 }
00086 
00087 DGXMLScanner::DGXMLScanner( XMLDocumentHandler* const docHandler
00088                           , DocTypeHandler* const     docTypeHandler
00089                           , XMLEntityHandler* const   entityHandler
00090                           , XMLErrorReporter* const   errHandler
00091                           , XMLValidator* const       valToAdopt
00092                           , GrammarResolver* const    grammarResolver
00093                           , MemoryManager* const      manager) :
00094 
00095     XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
00096     , fAttrNSList(0)
00097     , fDTDValidator(0)
00098     , fDTDGrammar(0)
00099     , fDTDElemNonDeclPool(0)
00100     , fElemCount(0)
00101     , fAttDefRegistry(0)
00102     , fUndeclaredAttrRegistry(0)
00103 {
00104     CleanupType cleanup(this, &DGXMLScanner::cleanUp);
00105 
00106     try
00107     {
00108         commonInit();
00109     }
00110     catch(const OutOfMemoryException&)
00111     {
00112         // Don't cleanup when out of memory, since executing the
00113         // code can cause problems.
00114         cleanup.release();
00115 
00116         throw;
00117     }
00118 
00119     cleanup.release();
00120 }
00121 
00122 DGXMLScanner::~DGXMLScanner()
00123 {
00124     cleanUp();
00125 }
00126 
00127 // ---------------------------------------------------------------------------
00128 //  XMLScanner: Getter methods
00129 // ---------------------------------------------------------------------------
00130 NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool()
00131 {
00132     if(!fGrammar)
00133         return 0;
00134     return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
00135 }
00136 
00137 const NameIdPool<DTDEntityDecl>* DGXMLScanner::getEntityDeclPool() const
00138 {
00139     if(!fGrammar)
00140         return 0;
00141     return ((DTDGrammar*)fGrammar)->getEntityDeclPool();
00142 }
00143 
00144 // ---------------------------------------------------------------------------
00145 //  DGXMLScanner: Main entry point to scan a document
00146 // ---------------------------------------------------------------------------
00147 void DGXMLScanner::scanDocument(const InputSource& src)
00148 {
00149     //  Bump up the sequence id for this parser instance. This will invalidate
00150     //  any previous progressive scan tokens.
00151     fSequenceId++;
00152 
00153     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
00154 
00155     try
00156     {
00157         //  Reset the scanner and its plugged in stuff for a new run. This
00158         //  resets all the data structures, creates the initial reader and
00159         //  pushes it on the stack, and sets up the base document path.
00160         scanReset(src);
00161 
00162         // If we have a document handler, then call the start document
00163         if (fDocHandler)
00164             fDocHandler->startDocument();
00165 
00166         //  Scan the prolog part, which is everything before the root element
00167         //  including the DTD subsets.
00168         scanProlog();
00169 
00170         //  If we got to the end of input, then its not a valid XML file.
00171         //  Else, go on to scan the content.
00172         if (fReaderMgr.atEOF())
00173         {
00174             emitError(XMLErrs::EmptyMainEntity);
00175         }
00176         else
00177         {
00178             // Scan content, and tell it its not an external entity
00179             if (scanContent())
00180             {
00181                 // Do post-parse validation if required
00182                 if (fValidate)
00183                 {
00184                     //  We handle ID reference semantics at this level since
00185                     //  its required by XML 1.0.
00186                     checkIDRefs();
00187 
00188                     // Then allow the validator to do any extra stuff it wants
00189 //                    fValidator->postParseValidation();
00190                 }
00191 
00192                 // That went ok, so scan for any miscellaneous stuff
00193                 if (!fReaderMgr.atEOF())
00194                     scanMiscellaneous();
00195             }
00196         }
00197 
00198         // If we have a document handler, then call the end document
00199         if (fDocHandler)
00200             fDocHandler->endDocument();
00201     }
00202     //  NOTE:
00203     //
00204     //  In all of the error processing below, the emitError() call MUST come
00205     //  before the flush of the reader mgr, or it will fail because it tries
00206     //  to find out the position in the XML source of the error.
00207     catch(const XMLErrs::Codes)
00208     {
00209         // This is a 'first failure' exception, so fall through
00210     }
00211     catch(const XMLValid::Codes)
00212     {
00213         // This is a 'first fatal error' type exit, so fall through
00214     }
00215     catch(const XMLException& excToCatch)
00216     {
00217         //  Emit the error and catch any user exception thrown from here. Make
00218         //  sure in all cases we flush the reader manager.
00219         fInException = true;
00220         try
00221         {
00222             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
00223                 emitError
00224                 (
00225                     XMLErrs::XMLException_Warning
00226                     , excToCatch.getCode()
00227                     , excToCatch.getMessage()
00228                 );
00229             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
00230                 emitError
00231                 (
00232                     XMLErrs::XMLException_Fatal
00233                     , excToCatch.getCode()
00234                     , excToCatch.getMessage()
00235                 );
00236             else
00237                 emitError
00238                 (
00239                     XMLErrs::XMLException_Error
00240                     , excToCatch.getCode()
00241                     , excToCatch.getMessage()
00242                 );
00243         }
00244         catch(const OutOfMemoryException&)
00245         {
00246             // This is a special case for out-of-memory
00247             // conditions, because resetting the ReaderMgr
00248             // can be problematic.
00249             resetReaderMgr.release();
00250 
00251             throw;
00252         }
00253     }
00254     catch(const OutOfMemoryException&)
00255     {
00256         // This is a special case for out-of-memory
00257         // conditions, because resetting the ReaderMgr
00258         // can be problematic.
00259         resetReaderMgr.release();
00260 
00261         throw;
00262     }
00263 }
00264 
00265 
00266 bool DGXMLScanner::scanNext(XMLPScanToken& token)
00267 {
00268     // Make sure this token is still legal
00269     if (!isLegalToken(token))
00270         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);
00271 
00272     // Find the next token and remember the reader id
00273     XMLSize_t orgReader;
00274     XMLTokens curToken;
00275 
00276     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
00277 
00278     bool retVal = true;
00279 
00280     try
00281     {
00282         while (true)
00283         {
00284             //  We have to handle any end of entity exceptions that happen here.
00285             //  We could be at the end of X nested entities, each of which will
00286             //  generate an end of entity exception as we try to move forward.
00287             try
00288             {
00289                 curToken = senseNextToken(orgReader);
00290                 break;
00291             }
00292             catch(const EndOfEntityException& toCatch)
00293             {
00294                 // Send an end of entity reference event
00295                 if (fDocHandler)
00296                     fDocHandler->endEntityReference(toCatch.getEntity());
00297             }
00298         }
00299 
00300         if (curToken == Token_CharData)
00301         {
00302             scanCharData(fCDataBuf);
00303         }
00304         else if (curToken == Token_EOF)
00305         {
00306             if (!fElemStack.isEmpty())
00307             {
00308                 const ElemStack::StackElem* topElem = fElemStack.popTop();
00309                 emitError
00310                 (
00311                     XMLErrs::EndedWithTagsOnStack
00312                     , topElem->fThisElement->getFullName()
00313                 );
00314             }
00315 
00316             retVal = false;
00317         }
00318         else
00319         {
00320             // Its some sort of markup
00321             bool gotData = true;
00322             switch(curToken)
00323             {
00324                 case Token_CData :
00325                     // Make sure we are within content
00326                     if (fElemStack.isEmpty())
00327                         emitError(XMLErrs::CDATAOutsideOfContent);
00328                     scanCDSection();
00329                     break;
00330 
00331                 case Token_Comment :
00332                     scanComment();
00333                     break;
00334 
00335                 case Token_EndTag :
00336                     scanEndTag(gotData);
00337                     break;
00338 
00339                 case Token_PI :
00340                     scanPI();
00341                     break;
00342 
00343                 case Token_StartTag :
00344                     if (fDoNamespaces)
00345                         scanStartTagNS(gotData);
00346                     else
00347                         scanStartTag(gotData);
00348                     break;
00349 
00350                 default :
00351                     fReaderMgr.skipToChar(chOpenAngle);
00352                     break;
00353             }
00354 
00355             if (orgReader != fReaderMgr.getCurrentReaderNum())
00356                 emitError(XMLErrs::PartialMarkupInEntity);
00357 
00358             // If we hit the end, then do the miscellaneous part
00359             if (!gotData)
00360             {
00361                 // Do post-parse validation if required
00362                 if (fValidate)
00363                 {
00364                     //  We handle ID reference semantics at this level since
00365                     //  its required by XML 1.0.
00366                     checkIDRefs();
00367 
00368                     // Then allow the validator to do any extra stuff it wants
00369 //                    fValidator->postParseValidation();
00370                 }
00371 
00372                 // That went ok, so scan for any miscellaneous stuff
00373                 scanMiscellaneous();
00374 
00375                 if (fDocHandler)
00376                     fDocHandler->endDocument();
00377             }
00378         }
00379     }
00380     //  NOTE:
00381     //
00382     //  In all of the error processing below, the emitError() call MUST come
00383     //  before the flush of the reader mgr, or it will fail because it tries
00384     //  to find out the position in the XML source of the error.
00385     catch(const XMLErrs::Codes)
00386     {
00387         // This is a 'first failure' exception, so return failure
00388         retVal = false;
00389     }
00390     catch(const XMLValid::Codes)
00391     {
00392         // This is a 'first fatal error' type exit, so return failure
00393         retVal = false;
00394     }
00395     catch(const XMLException& excToCatch)
00396     {
00397         //  Emit the error and catch any user exception thrown from here. Make
00398         //  sure in all cases we flush the reader manager.
00399         fInException = true;
00400         try
00401         {
00402             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
00403                 emitError
00404                 (
00405                     XMLErrs::XMLException_Warning
00406                     , excToCatch.getCode()
00407                     , excToCatch.getMessage()
00408                 );
00409             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
00410                 emitError
00411                 (
00412                     XMLErrs::XMLException_Fatal
00413                     , excToCatch.getCode()
00414                     , excToCatch.getMessage()
00415                 );
00416             else
00417                 emitError
00418                 (
00419                     XMLErrs::XMLException_Error
00420                     , excToCatch.getCode()
00421                     , excToCatch.getMessage()
00422                 );
00423         }
00424         catch(const OutOfMemoryException&)
00425         {
00426             // This is a special case for out-of-memory
00427             // conditions, because resetting the ReaderMgr
00428             // can be problematic.
00429             resetReaderMgr.release();
00430 
00431             throw;
00432         }
00433 
00434         retVal = false;
00435     }
00436     catch(const OutOfMemoryException&)
00437     {
00438         // This is a special case for out-of-memory
00439         // conditions, because resetting the ReaderMgr
00440         // can be problematic.
00441         resetReaderMgr.release();
00442 
00443         throw;
00444     }
00445 
00446     // If we are not at the end, release the object that will
00447     // reset the ReaderMgr.
00448     if (retVal)
00449         resetReaderMgr.release();
00450 
00451     return retVal;
00452 }
00453 
00454 
00455 // ---------------------------------------------------------------------------
00456 //  DGXMLScanner: Private scanning methods
00457 // ---------------------------------------------------------------------------
00458 
00459 //  This method will kick off the scanning of the primary content of the
00460 //  document, i.e. the elements.
00461 bool DGXMLScanner::scanContent()
00462 {
00463     //  Go into a loop until we hit the end of the root element, or we fall
00464     //  out because there is no root element.
00465     //
00466     //  We have to do kind of a deeply nested double loop here in order to
00467     //  avoid doing the setup/teardown of the exception handler on each
00468     //  round. Doing it this way we only do it when an exception actually
00469     //  occurs.
00470     bool gotData = true;
00471     bool inMarkup = false;
00472     while (gotData)
00473     {
00474         try
00475         {
00476             while (gotData)
00477             {
00478                 //  Sense what the next top level token is. According to what
00479                 //  this tells us, we will call something to handle that kind
00480                 //  of thing.
00481                 XMLSize_t orgReader;
00482                 const XMLTokens curToken = senseNextToken(orgReader);
00483 
00484                 //  Handle character data and end of file specially. Char data
00485                 //  is not markup so we don't want to handle it in the loop
00486                 //  below.
00487                 if (curToken == Token_CharData)
00488                 {
00489                     //  Scan the character data and call appropriate events. Let
00490                     //  him use our local character data buffer for efficiency.
00491                     scanCharData(fCDataBuf);
00492                     continue;
00493                 }
00494                 else if (curToken == Token_EOF)
00495                 {
00496                     //  The element stack better be empty at this point or we
00497                     //  ended prematurely before all elements were closed.
00498                     if (!fElemStack.isEmpty())
00499                     {
00500                         const ElemStack::StackElem* topElem = fElemStack.popTop();
00501                         emitError
00502                         (
00503                             XMLErrs::EndedWithTagsOnStack
00504                             , topElem->fThisElement->getFullName()
00505                         );
00506                     }
00507 
00508                     // Its the end of file, so clear the got data flag
00509                     gotData = false;
00510                     continue;
00511                 }
00512 
00513                 // We are in some sort of markup now
00514                 inMarkup = true;
00515 
00516                 //  According to the token we got, call the appropriate
00517                 //  scanning method.
00518                 switch(curToken)
00519                 {
00520                     case Token_CData :
00521                         // Make sure we are within content
00522                         if (fElemStack.isEmpty())
00523                             emitError(XMLErrs::CDATAOutsideOfContent);
00524                         scanCDSection();
00525                         break;
00526 
00527                     case Token_Comment :
00528                         scanComment();
00529                         break;
00530 
00531                     case Token_EndTag :
00532                         scanEndTag(gotData);
00533                         break;
00534 
00535                     case Token_PI :
00536                         scanPI();
00537                         break;
00538 
00539                     case Token_StartTag :
00540                         if (fDoNamespaces)
00541                             scanStartTagNS(gotData);
00542                         else
00543                             scanStartTag(gotData);
00544                         break;
00545 
00546                     default :
00547                         fReaderMgr.skipToChar(chOpenAngle);
00548                         break;
00549                 }
00550 
00551                 if (orgReader != fReaderMgr.getCurrentReaderNum())
00552                     emitError(XMLErrs::PartialMarkupInEntity);
00553 
00554                 // And we are back out of markup again
00555                 inMarkup = false;
00556             }
00557         }
00558         catch(const EndOfEntityException& toCatch)
00559         {
00560             //  If we were in some markup when this happened, then its a
00561             //  partial markup error.
00562             if (inMarkup)
00563                 emitError(XMLErrs::PartialMarkupInEntity);
00564 
00565             // Send an end of entity reference event
00566             if (fDocHandler)
00567                 fDocHandler->endEntityReference(toCatch.getEntity());
00568 
00569             inMarkup = false;
00570         }
00571     }
00572 
00573     // It went ok, so return success
00574     return true;
00575 }
00576 
00577 
00578 void DGXMLScanner::scanEndTag(bool& gotData)
00579 {
00580     //  Assume we will still have data until proven otherwise. It will only
00581     //  ever be false if this is the end of the root element.
00582     gotData = true;
00583 
00584     //  Check if the element stack is empty. If so, then this is an unbalanced
00585     //  element (i.e. more ends than starts, perhaps because of bad text
00586     //  causing one to be skipped.)
00587     if (fElemStack.isEmpty())
00588     {
00589         emitError(XMLErrs::MoreEndThanStartTags);
00590         fReaderMgr.skipPastChar(chCloseAngle);
00591         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
00592     }
00593 
00594     //  Pop the stack of the element we are supposed to be ending. Remember
00595     //  that we don't own this. The stack just keeps them and reuses them.
00596     unsigned int uriId = (fDoNamespaces)
00597         ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
00598 
00599     //  Pop the stack of the element we are supposed to be ending. Remember
00600     //  that we don't own this. The stack just keeps them and reuses them.
00601     const ElemStack::StackElem* topElem = fElemStack.popTop();
00602     XMLElementDecl *tempElement = topElem->fThisElement;
00603 
00604     // See if it was the root element, to avoid multiple calls below
00605     const bool isRoot = fElemStack.isEmpty();
00606 
00607     // Make sure that its the end of the element that we expect
00608     if (!fReaderMgr.skippedStringLong(tempElement->getFullName()))
00609     {
00610         emitError
00611         (
00612             XMLErrs::ExpectedEndOfTagX
00613             , tempElement->getFullName()
00614         );
00615         fReaderMgr.skipPastChar(chCloseAngle);
00616         return;
00617     }
00618 
00619     // Make sure we are back on the same reader as where we started
00620     if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
00621         emitError(XMLErrs::PartialTagMarkupError);
00622 
00623     // Skip optional whitespace
00624     fReaderMgr.skipPastSpaces();
00625 
00626     // Make sure we find the closing bracket
00627     if (!fReaderMgr.skippedChar(chCloseAngle))
00628     {
00629         emitError
00630         (
00631             XMLErrs::UnterminatedEndTag
00632             , topElem->fThisElement->getFullName()
00633         );
00634     }
00635 
00636     //  If validation is enabled, then lets pass him the list of children and
00637     //  this element and let him validate it.
00638     if (fValidate)
00639     {
00640 
00641        //
00642        // XML1.0-3rd
00643        // Validity Constraint:
00644        // The declaration matches EMPTY and the element has no content (not even
00645        // entity references, comments, PIs or white space).
00646        //
00647        if ( (topElem->fCommentOrPISeen)               &&
00648             (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Empty))
00649        {
00650            fValidator->emitError
00651                (
00652                XMLValid::EmptyElemHasContent
00653                , topElem->fThisElement->getFullName()
00654                );
00655        }
00656 
00657        //
00658        // XML1.0-3rd
00659        // Validity Constraint:
00660        //
00661        // The declaration matches children and the sequence of child elements
00662        // belongs to the language generated by the regular expression in the
00663        // content model, with optional white space, comments and PIs
00664        // (i.e. markup matching production [27] Misc) between the start-tag and
00665        // the first child element, between child elements, or between the last
00666        // child element and the end-tag.
00667        //
00668        // Note that
00669        //    a CDATA section containing only white space or
00670        //    a reference to an entity whose replacement text is character references
00671        //       expanding to white space do not match the nonterminal S, and hence
00672        //       cannot appear in these positions; however,
00673        //    a reference to an internal entity with a literal value consisting
00674        //       of character references expanding to white space does match S,
00675        //       since its replacement text is the white space resulting from expansion
00676        //       of the character references.
00677        //
00678        if ( (topElem->fReferenceEscaped)               &&
00679             (((DTDElementDecl*) topElem->fThisElement)->getModelType() == DTDElementDecl::Children))
00680        {
00681            fValidator->emitError
00682                (
00683                XMLValid::ElemChildrenHasInvalidWS
00684                , topElem->fThisElement->getFullName()
00685                );
00686        }
00687 
00688         XMLSize_t failure;
00689         bool res = fValidator->checkContent
00690         (
00691             topElem->fThisElement
00692             , topElem->fChildren
00693             , topElem->fChildCount
00694             , &failure
00695         );
00696 
00697         if (!res)
00698         {
00699             //  One of the elements is not valid for the content. NOTE that
00700             //  if no children were provided but the content model requires
00701             //  them, it comes back with a zero value. But we cannot use that
00702             //  to index the child array in this case, and have to put out a
00703             //  special message.
00704             if (!topElem->fChildCount)
00705             {
00706                 fValidator->emitError
00707                 (
00708                     XMLValid::EmptyNotValidForContent
00709                     , topElem->fThisElement->getFormattedContentModel()
00710                 );
00711             }
00712             else if (failure >= topElem->fChildCount)
00713             {
00714                 fValidator->emitError
00715                 (
00716                     XMLValid::NotEnoughElemsForCM
00717                     , topElem->fThisElement->getFormattedContentModel()
00718                 );
00719             }
00720             else
00721             {
00722                 fValidator->emitError
00723                 (
00724                     XMLValid::ElementNotValidForContent
00725                     , topElem->fChildren[failure]->getRawName()
00726                     , topElem->fThisElement->getFormattedContentModel()
00727                 );
00728             }
00729         }
00730     }
00731 
00732     // If we have a doc handler, tell it about the end tag
00733     if (fDocHandler)
00734     {
00735         fDocHandler->endElement
00736         (
00737             *topElem->fThisElement
00738             , uriId
00739             , isRoot
00740             , (fDoNamespaces)
00741                 ? topElem->fThisElement->getElementName()->getPrefix()
00742                 : XMLUni::fgZeroLenString
00743         );
00744     }
00745 
00746     // If this was the root, then done with content
00747     gotData = !isRoot;
00748 }
00749 
00750 
00751 //  This method handles the high level logic of scanning the DOCType
00752 //  declaration. This calls the DTDScanner and kicks off both the scanning of
00753 //  the internal subset and the scanning of the external subset, if any.
00754 //
00755 //  When we get here the '<!DOCTYPE' part has already been scanned, which is
00756 //  what told us that we had a doc type decl to parse.
00757 void DGXMLScanner::scanDocTypeDecl()
00758 {
00759     if (fDocTypeHandler)
00760         fDocTypeHandler->resetDocType();
00761 
00762     // There must be some space after DOCTYPE
00763     bool skippedSomething;
00764     fReaderMgr.skipPastSpaces(skippedSomething);
00765     if (!skippedSomething)
00766     {
00767         emitError(XMLErrs::ExpectedWhitespace);
00768 
00769         // Just skip the Doctype declaration and return
00770         fReaderMgr.skipPastChar(chCloseAngle);
00771         return;
00772     }
00773 
00774     // Get a buffer for the root element
00775     XMLBufBid bbRootName(&fBufMgr);
00776 
00777     //  Get a name from the input, which should be the name of the root
00778     //  element of the upcoming content.
00779     int  colonPosition;
00780     bool validName = fDoNamespaces ? fReaderMgr.getQName(bbRootName.getBuffer(), &colonPosition) :
00781                                      fReaderMgr.getName(bbRootName.getBuffer());
00782     if (!validName)
00783     {
00784         if (bbRootName.isEmpty())
00785             emitError(XMLErrs::NoRootElemInDOCTYPE);
00786         else
00787             emitError(XMLErrs::InvalidRootElemInDOCTYPE, bbRootName.getRawBuffer());
00788         fReaderMgr.skipPastChar(chCloseAngle);
00789         return;
00790     }
00791 
00792     //  Store the root element name for later check
00793     setRootElemName(bbRootName.getRawBuffer());
00794 
00795     //  This element obviously is not going to exist in the element decl
00796     //  pool yet, but we need to call docTypeDecl. So force it into
00797     //  the element decl pool, marked as being there because it was in
00798     //  the DOCTYPE. Later, when its declared, the status will be updated.
00799     //
00800     //  Only do this if we are not reusing the validator! If we are reusing,
00801     //  then look it up instead. It has to exist!
00802     MemoryManager* const  rootDeclMgr =
00803         fUseCachedGrammar ? fMemoryManager : fGrammarPoolMemoryManager;
00804 
00805     DTDElementDecl* rootDecl = new (rootDeclMgr) DTDElementDecl
00806     (
00807         bbRootName.getRawBuffer()
00808         , fEmptyNamespaceId
00809         , DTDElementDecl::Any
00810         , rootDeclMgr
00811     );
00812 
00813     Janitor<DTDElementDecl> rootDeclJanitor(rootDecl);
00814     rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
00815     rootDecl->setExternalElemDeclaration(true);
00816     if(!fUseCachedGrammar)
00817     {
00818         fGrammar->putElemDecl(rootDecl);
00819         rootDeclJanitor.release();
00820     } else
00821     {
00822         // put this in the undeclared pool so it gets deleted...
00823         XMLElementDecl* elemDecl = fDTDElemNonDeclPool->getByKey(bbRootName.getRawBuffer());
00824         if (elemDecl)
00825         {
00826             rootDecl->setId(elemDecl->getId());
00827         }
00828         else
00829         {
00830             rootDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)rootDecl));
00831             rootDeclJanitor.release();
00832         }
00833     }
00834 
00835     // Skip any spaces after the name
00836     fReaderMgr.skipPastSpaces();
00837 
00838     //  And now if we are looking at a >, then we are done. It is not
00839     //  required to have an internal or external subset, though why you
00840     //  would not escapes me.
00841     if (fReaderMgr.skippedChar(chCloseAngle)) {
00842 
00843         //  If we have a doc type handler and advanced callbacks are enabled,
00844         //  call the doctype event.
00845         if (fDocTypeHandler)
00846             fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
00847         return;
00848     }
00849 
00850     // either internal/external subset
00851     if (fValScheme == Val_Auto && !fValidate)
00852         fValidate = true;
00853 
00854     bool    hasIntSubset = false;
00855     bool    hasExtSubset = false;
00856     XMLCh*  sysId = 0;
00857     XMLCh*  pubId = 0;
00858 
00859     DTDScanner dtdScanner
00860     (
00861         (DTDGrammar*) fGrammar
00862         , fDocTypeHandler
00863         , fGrammarPoolMemoryManager
00864         , fMemoryManager
00865     );
00866     dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
00867 
00868     //  If the next character is '[' then we have no external subset cause
00869     //  there is no system id, just the opening character of the internal
00870     //  subset. Else, has to be an id.
00871     //
00872     // Just look at the next char, don't eat it.
00873     if (fReaderMgr.peekNextChar() == chOpenSquare)
00874     {
00875         hasIntSubset = true;
00876     }
00877     else
00878     {
00879         // Indicate we have an external subset
00880         hasExtSubset = true;
00881         fHasNoDTD = false;
00882 
00883         // Get buffers for the ids
00884         XMLBufBid bbPubId(&fBufMgr);
00885         XMLBufBid bbSysId(&fBufMgr);
00886 
00887         // Get the external subset id
00888         if (!dtdScanner.scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), DTDScanner::IDType_External))
00889         {
00890             fReaderMgr.skipPastChar(chCloseAngle);
00891             return;
00892         }
00893 
00894         // Get copies of the ids we got
00895         pubId = XMLString::replicate(bbPubId.getRawBuffer(), fMemoryManager);
00896         sysId = XMLString::replicate(bbSysId.getRawBuffer(), fMemoryManager);
00897 
00898         // Skip spaces and check again for the opening of an internal subset
00899         fReaderMgr.skipPastSpaces();
00900 
00901         // Just look at the next char, don't eat it.
00902         if (fReaderMgr.peekNextChar() == chOpenSquare) {
00903             hasIntSubset = true;
00904         }
00905     }
00906 
00907     // Insure that the ids get cleaned up, if they got allocated
00908     ArrayJanitor<XMLCh> janSysId(sysId, fMemoryManager);
00909     ArrayJanitor<XMLCh> janPubId(pubId, fMemoryManager);
00910 
00911     //  If we have a doc type handler and advanced callbacks are enabled,
00912     //  call the doctype event.
00913     if (fDocTypeHandler)
00914         fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset, hasExtSubset);
00915 
00916     //  Ok, if we had an internal subset, we are just past the [ character
00917     //  and need to parse that first.
00918     if (hasIntSubset)
00919     {
00920         // Eat the opening square bracket
00921         fReaderMgr.getNextChar();
00922 
00923         checkInternalDTD(hasExtSubset, sysId, pubId);
00924 
00925         //  And try to scan the internal subset. If we fail, try to recover
00926         //  by skipping forward tot he close angle and returning.
00927         if (!dtdScanner.scanInternalSubset())
00928         {
00929             fReaderMgr.skipPastChar(chCloseAngle);
00930             return;
00931         }
00932 
00933         //  Do a sanity check that some expanded PE did not propogate out of
00934         //  the doctype. This could happen if it was terminated early by bad
00935         //  syntax.
00936         if (fReaderMgr.getReaderDepth() > 1)
00937         {
00938             emitError(XMLErrs::PEPropogated);
00939 
00940             // Ask the reader manager to pop back down to the main level
00941             fReaderMgr.cleanStackBackTo(1);
00942         }
00943 
00944         fReaderMgr.skipPastSpaces();
00945     }
00946 
00947     // And that should leave us at the closing > of the DOCTYPE line
00948     if (!fReaderMgr.skippedChar(chCloseAngle))
00949     {
00950         //  Do a special check for the common scenario of an extra ] char at
00951         //  the end. This is easy to recover from.
00952         if (fReaderMgr.skippedChar(chCloseSquare)
00953         &&  fReaderMgr.skippedChar(chCloseAngle))
00954         {
00955             emitError(XMLErrs::ExtraCloseSquare);
00956         }
00957          else
00958         {
00959             emitError(XMLErrs::UnterminatedDOCTYPE);
00960             fReaderMgr.skipPastChar(chCloseAngle);
00961         }
00962     }
00963 
00964     //  If we had an external subset, then we need to deal with that one
00965     //  next. If we are reusing the validator, then don't scan it.
00966     if (hasExtSubset) {
00967 
00968         InputSource* srcUsed=0;
00969         Janitor<InputSource> janSrc(srcUsed);
00970         // If we had an internal subset and we're using the cached grammar, it
00971         // means that the ignoreCachedDTD is set, so we ignore the cached
00972         // grammar
00973         if (fUseCachedGrammar && !hasIntSubset)
00974         {
00975             srcUsed = resolveSystemId(sysId, pubId);
00976             if (srcUsed) {
00977                 janSrc.reset(srcUsed);
00978                 Grammar* grammar = fGrammarResolver->getGrammar(srcUsed->getSystemId());
00979 
00980                 if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType) {
00981 
00982                     fDTDGrammar = (DTDGrammar*) grammar;
00983                     fGrammar = fDTDGrammar;
00984                     fValidator->setGrammar(fGrammar);
00985                     // If we don't report at least the external subset boundaries,
00986                     // an advanced document handler cannot know when the DTD end,
00987                     // since we've already sent a doctype decl that indicates there's
00988                     // there's an external subset.
00989                     if (fDocTypeHandler)
00990                     {
00991                         fDocTypeHandler->startExtSubset();
00992                         fDocTypeHandler->endExtSubset();
00993                     }
00994 
00995                     return;
00996                 }
00997             }
00998         }
00999 
01000         if (fLoadExternalDTD || fValidate)
01001         {
01002             // And now create a reader to read this entity
01003             XMLReader* reader;
01004             if(srcUsed) {
01005                 reader = fReaderMgr.createReader
01006                         (
01007                             *srcUsed
01008                             , false
01009                             , XMLReader::RefFrom_NonLiteral
01010                             , XMLReader::Type_General
01011                             , XMLReader::Source_External
01012                             , fCalculateSrcOfs
01013                             , fLowWaterMark
01014                         );
01015             }
01016             else {
01017                 reader = fReaderMgr.createReader
01018                         (
01019                             sysId
01020                             , pubId
01021                             , false
01022                             , XMLReader::RefFrom_NonLiteral
01023                             , XMLReader::Type_General
01024                             , XMLReader::Source_External
01025                             , srcUsed
01026                             , fCalculateSrcOfs
01027                             , fLowWaterMark
01028                             , fDisableDefaultEntityResolution
01029                         );
01030                 janSrc.reset(srcUsed);
01031             }
01032             //  If it failed then throw an exception
01033             if (!reader)
01034                 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed ? srcUsed->getSystemId() : sysId, fMemoryManager);
01035 
01036             if (fToCacheGrammar) {
01037 
01038                 unsigned int stringId = fGrammarResolver->getStringPool()->addOrFind(srcUsed->getSystemId());
01039                 const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(stringId);
01040 
01041                 fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
01042                 ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
01043                 fGrammarResolver->putGrammar(fGrammar);
01044             }
01045 
01046             //  In order to make the processing work consistently, we have to
01047             //  make this look like an external entity. So create an entity
01048             //  decl and fill it in and push it with the reader, as happens
01049             //  with an external entity. Put a janitor on it to insure it gets
01050             //  cleaned up. The reader manager does not adopt them.
01051             const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
01052             DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
01053             declDTD->setSystemId(sysId);
01054             declDTD->setIsExternal(true);
01055             Janitor<DTDEntityDecl> janDecl(declDTD);
01056 
01057             // Mark this one as a throw at end
01058             reader->setThrowAtEnd(true);
01059 
01060             // And push it onto the stack, with its pseudo name
01061             fReaderMgr.pushReader(reader, declDTD);
01062 
01063             // Tell it its not in an include section
01064             dtdScanner.scanExtSubsetDecl(false, true);
01065         }
01066     }
01067 }
01068 
01069 bool DGXMLScanner::scanStartTag(bool& gotData)
01070 {
01071     //  Assume we will still have data until proven otherwise. It will only
01072     //  ever be false if this is the root and its empty.
01073     gotData = true;
01074 
01075     //  Get the QName. In this case, we are not doing namespaces, so we just
01076     //  use it as is and don't have to break it into parts.
01077 
01078     bool validName = fReaderMgr.getName(fQNameBuf);
01079     if (!validName)
01080     {
01081         if (fQNameBuf.isEmpty())
01082             emitError(XMLErrs::ExpectedElementName);
01083         else
01084             emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
01085         fReaderMgr.skipToChar(chOpenAngle);
01086         return false;
01087     }
01088 
01089     // Assume it won't be an empty tag
01090     bool isEmpty = false;
01091 
01092     // See if its the root element
01093     const bool isRoot = fElemStack.isEmpty();
01094 
01095     //  Lets try to look up the element in the validator's element decl pool
01096     //  We can pass bogus values for the URI id and the base name. We know that
01097     //  this can only be called if we are doing a DTD style validator and that
01098     //  he will only look at the QName.
01099     //
01100     //  We *do not* tell him to fault in a decl if he does not find one - NG.
01101     bool wasAdded = false;
01102     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
01103 
01104     XMLElementDecl* elemDecl = fGrammar->getElemDecl
01105     (
01106         fEmptyNamespaceId
01107         , 0
01108         , qnameRawBuf
01109         , Grammar::TOP_LEVEL_SCOPE
01110     );
01111     // look in the undeclared pool:
01112     if(!elemDecl)
01113     {
01114         elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
01115     }
01116     if(!elemDecl)
01117     {
01118         wasAdded = true;
01119         elemDecl = new (fMemoryManager) DTDElementDecl
01120         (
01121             qnameRawBuf
01122             , fEmptyNamespaceId
01123             , DTDElementDecl::Any
01124             , fMemoryManager
01125         );
01126         elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
01127     }
01128 
01129     if (fValidate) {
01130 
01131         if (wasAdded)
01132         {
01133             // This is to tell the reuse Validator that this element was
01134             // faulted-in, was not an element in the validator pool originally
01135             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
01136 
01137             fValidator->emitError
01138             (
01139                 XMLValid::ElementNotDefined
01140                 , qnameRawBuf
01141             );
01142         }
01143         // If its not marked declared, then emit an error
01144         else if (!elemDecl->isDeclared())
01145         {
01146             fValidator->emitError
01147             (
01148                 XMLValid::ElementNotDefined
01149                 , qnameRawBuf
01150             );
01151         }
01152 
01153 
01154         fValidator->validateElement(elemDecl);
01155     }
01156 
01157     // Expand the element stack and add the new element
01158     fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
01159 
01160     //  If this is the first element and we are validating, check the root
01161     //  element.
01162     if (isRoot)
01163     {
01164         fRootGrammar = fGrammar;
01165 
01166         if (fValidate)
01167         {
01168             //  If a DocType exists, then check if it matches the root name there.
01169             if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
01170                 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
01171         }
01172     }
01173     else if (fValidate)
01174     {
01175         //  If the element stack is not empty, then add this element as a
01176         //  child of the previous top element. If its empty, this is the root
01177         //  elem and is not the child of anything.
01178         fElemStack.addChild(elemDecl->getElementName(), true);
01179     }
01180 
01181     // Skip any whitespace after the name
01182     fReaderMgr.skipPastSpaces();
01183 
01184     //  We loop until we either see a /> or >, handling attribute/value
01185     //  pairs until we get there.
01186     XMLSize_t    attCount = 0;
01187     XMLSize_t    curAttListSize = fAttrList->size();
01188     wasAdded = false;
01189 
01190     fElemCount++;
01191 
01192     while (true)
01193     {
01194         // And get the next non-space character
01195         XMLCh nextCh = fReaderMgr.peekNextChar();
01196 
01197         //  If the next character is not a slash or closed angle bracket,
01198         //  then it must be whitespace, since whitespace is required
01199         //  between the end of the last attribute and the name of the next
01200         //  one.
01201         if (attCount)
01202         {
01203             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
01204             {
01205                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
01206                 {
01207                     // Ok, skip by them and peek another char
01208                     fReaderMgr.skipPastSpaces();
01209                     nextCh = fReaderMgr.peekNextChar();
01210                 }
01211                  else
01212                 {
01213                     // Emit the error but keep on going
01214                     emitError(XMLErrs::ExpectedWhitespace);
01215                 }
01216             }
01217         }
01218 
01219         //  Ok, here we first check for any of the special case characters.
01220         //  If its not one, then we do the normal case processing, which
01221         //  assumes that we've hit an attribute value, Otherwise, we do all
01222         //  the special case checks.
01223         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
01224         {
01225             //  Assume its going to be an attribute, so get a name from
01226             //  the input.
01227 
01228             validName = fReaderMgr.getName(fAttNameBuf);
01229             if (!validName)
01230             {
01231                 if (fAttNameBuf.isEmpty())
01232                     emitError(XMLErrs::ExpectedAttrName);
01233                 else
01234                     emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
01235                 fReaderMgr.skipPastChar(chCloseAngle);
01236                 return false;
01237             }
01238 
01239             // And next must be an equal sign
01240             if (!scanEq())
01241             {
01242                 static const XMLCh tmpList[] =
01243                 {
01244                     chSingleQuote, chDoubleQuote, chCloseAngle
01245                     , chOpenAngle, chForwardSlash, chNull
01246                 };
01247 
01248                 emitError(XMLErrs::ExpectedEqSign);
01249 
01250                 //  Try to sync back up by skipping forward until we either
01251                 //  hit something meaningful.
01252                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
01253 
01254                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
01255                 {
01256                     // Jump back to top for normal processing of these
01257                     continue;
01258                 }
01259                 else if ((chFound == chSingleQuote)
01260                       ||  (chFound == chDoubleQuote)
01261                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
01262                 {
01263                     // Just fall through assuming that the value is to follow
01264                 }
01265                 else if (chFound == chOpenAngle)
01266                 {
01267                     // Assume a malformed tag and that new one is starting
01268                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01269                     return false;
01270                 }
01271                 else
01272                 {
01273                     // Something went really wrong
01274                     return false;
01275                 }
01276             }
01277 
01278             //  See if this attribute is declared for this element. If we are
01279             //  not validating of course it will not be at first, but we will
01280             //  fault it into the pool (to avoid lots of redundant errors.)
01281             XMLCh * namePtr = fAttNameBuf.getRawBuffer();
01282             XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr);
01283 
01284             //  Skip any whitespace before the value and then scan the att
01285             //  value. This will come back normalized with entity refs and
01286             //  char refs expanded.
01287             fReaderMgr.skipPastSpaces();
01288             if (!scanAttValue(attDef, namePtr, fAttValueBuf))
01289             {
01290                 static const XMLCh tmpList[] =
01291                 {
01292                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
01293                 };
01294 
01295                 emitError(XMLErrs::ExpectedAttrValue);
01296 
01297                 //  It failed, so lets try to get synced back up. We skip
01298                 //  forward until we find some whitespace or one of the
01299                 //  chars in our list.
01300                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
01301 
01302                 if ((chFound == chCloseAngle)
01303                 ||  (chFound == chForwardSlash)
01304                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
01305                 {
01306                     //  Just fall through and process this attribute, though
01307                     //  the value will be "".
01308                 }
01309                 else if (chFound == chOpenAngle)
01310                 {
01311                     // Assume a malformed tag and that new one is starting
01312                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01313                     return false;
01314                 }
01315                 else
01316                 {
01317                     // Something went really wrong
01318                     return false;
01319                 }
01320             }
01321 
01322             //  Add this attribute to the attribute list that we use to
01323             //  pass them to the handler. We reuse its existing elements
01324             //  but expand it as required.
01325             // Note that we want to this first since this will
01326             // make a copy of the namePtr; we can then make use of
01327             // that copy in the hashtable lookup that checks
01328             // for duplicates.  This will mean we may have to update
01329             // the type of the XMLAttr later.
01330             XMLAttr* curAtt;
01331             const XMLCh* attrValue = fAttValueBuf.getRawBuffer();
01332 
01333             if (attCount >= curAttListSize) {
01334                 curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
01335                 fAttrList->addElement(curAtt);
01336             }
01337             else {
01338                 curAtt = fAttrList->elementAt(attCount);
01339             }
01340 
01341             curAtt->setSpecified(true);
01342 
01343             // NO NAMESPACE CODE
01344             {
01345                 curAtt->set(
01346                     0, namePtr, XMLUni::fgZeroLenString, XMLUni::fgZeroLenString
01347                     , (attDef)?attDef->getType():XMLAttDef::CData
01348                 );
01349 
01350                 // now need to prepare for duplicate detection
01351                 if (attDef) {
01352                     unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
01353                     if (!curCountPtr) {
01354                         curCountPtr = getNewUIntPtr();
01355                         *curCountPtr = fElemCount;
01356                         fAttDefRegistry->put(attDef, curCountPtr);
01357                     }
01358                     else if (*curCountPtr < fElemCount) {
01359                         *curCountPtr = fElemCount;
01360                     }
01361                     else {
01362                         emitError(
01363                             XMLErrs::AttrAlreadyUsedInSTag
01364                             , attDef->getFullName(), elemDecl->getFullName()
01365                         );
01366                     }
01367                 }
01368                 else
01369                 {
01370                     // reset namePtr so it refers to newly-allocated memory
01371                     namePtr = (XMLCh *)curAtt->getQName();
01372                     if (!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
01373                     {
01374                         emitError(
01375                             XMLErrs::AttrAlreadyUsedInSTag
01376                             , namePtr, elemDecl->getFullName()
01377                         );
01378                     }
01379                 }
01380             }
01381 
01382             if (fValidate)
01383             {
01384                 if (attDef) {
01385                     // Let the validator pass judgement on the attribute value
01386                     fValidator->validateAttrValue(
01387                         attDef, fAttValueBuf.getRawBuffer(), false, elemDecl
01388                     );
01389                 }
01390                 else
01391                 {
01392                     fValidator->emitError
01393                     (
01394                         XMLValid::AttNotDefinedForElement
01395                         , fAttNameBuf.getRawBuffer(), qnameRawBuf
01396                     );
01397                 }
01398             }
01399 
01400             // must set the newly-minted value on the XMLAttr:
01401             curAtt->setValue(attrValue);
01402             attCount++;
01403 
01404             // And jump back to the top of the loop
01405             continue;
01406         }
01407 
01408         //  It was some special case character so do all of the checks and
01409         //  deal with it.
01410         if (!nextCh)
01411             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
01412 
01413         if (nextCh == chForwardSlash)
01414         {
01415             fReaderMgr.getNextChar();
01416             isEmpty = true;
01417             if (!fReaderMgr.skippedChar(chCloseAngle))
01418                 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01419             break;
01420         }
01421         else if (nextCh == chCloseAngle)
01422         {
01423             fReaderMgr.getNextChar();
01424             break;
01425         }
01426         else if (nextCh == chOpenAngle)
01427         {
01428             //  Check for this one specially, since its going to be common
01429             //  and it is kind of auto-recovering since we've already hit the
01430             //  next open bracket, which is what we would have seeked to (and
01431             //  skipped this whole tag.)
01432             emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01433             break;
01434         }
01435         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
01436         {
01437             //  Check for this one specially, which is probably a missing
01438             //  attribute name, e.g. ="value". Just issue expected name
01439             //  error and eat the quoted string, then jump back to the
01440             //  top again.
01441             emitError(XMLErrs::ExpectedAttrName);
01442             fReaderMgr.getNextChar();
01443             fReaderMgr.skipQuotedString(nextCh);
01444             fReaderMgr.skipPastSpaces();
01445             continue;
01446         }
01447     }
01448 
01449     if(attCount)
01450     {
01451         // clean up after ourselves:
01452         // clear the map used to detect duplicate attributes
01453         fUndeclaredAttrRegistry->removeAll();
01454     }
01455 
01456     //  Now lets get the fAttrList filled in. This involves faulting in any
01457     //  defaulted and fixed attributes and normalizing the values of any that
01458     //  we got explicitly.
01459     //
01460     //  We update the attCount value with the total number of attributes, but
01461     //  it goes in with the number of values we got during the raw scan of
01462     //  explictly provided attrs above.
01463     attCount = buildAttList(attCount, elemDecl, *fAttrList);
01464 
01465     //  If we have a document handler, then tell it about this start tag. We
01466     //  don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
01467     //  any prefix since its just one big name if we are not doing namespaces.
01468     unsigned int uriId = fEmptyNamespaceId;
01469     if (fDocHandler)
01470     {
01471         fDocHandler->startElement
01472         (
01473             *elemDecl
01474             , uriId
01475             , 0
01476             , *fAttrList
01477             , attCount
01478             , isEmpty
01479             , isRoot
01480         );
01481     }
01482 
01483     //  If empty, validate content right now if we are validating and then
01484     //  pop the element stack top. Else, we have to update the current stack
01485     //  top's namespace mapping elements.
01486     if (isEmpty)
01487     {
01488         // If validating, then insure that its legal to have no content
01489         if (fValidate)
01490         {
01491             XMLSize_t failure;
01492             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
01493             if (!res)
01494             {
01495                 fValidator->emitError
01496                 (
01497                     XMLValid::ElementNotValidForContent
01498                     , qnameRawBuf
01499                     , elemDecl->getFormattedContentModel()
01500                 );
01501             }
01502         }
01503 
01504         // Pop the element stack back off since it'll never be used now
01505         fElemStack.popTop();
01506 
01507         // If the elem stack is empty, then it was an empty root
01508         if (isRoot)
01509             gotData = false;
01510     }
01511 
01512     return true;
01513 }
01514 
01515 
01516 bool DGXMLScanner::scanStartTagNS(bool& gotData)
01517 {
01518     //  Assume we will still have data until proven otherwise. It will only
01519     //  ever be false if this is the root and its empty.
01520     gotData = true;
01521 
01522     //  Get the QName. In this case, we are not doing namespaces, so we just
01523     //  use it as is and don't have to break it into parts.
01524 
01525     int  colonPosition;
01526     bool validName = fReaderMgr.getQName(fQNameBuf, &colonPosition);
01527     if (!validName)
01528     {
01529         if (fQNameBuf.isEmpty())
01530             emitError(XMLErrs::ExpectedElementName);
01531         else
01532             emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
01533         fReaderMgr.skipToChar(chOpenAngle);
01534         return false;
01535     }
01536 
01537     // Assume it won't be an empty tag
01538     bool isEmpty = false;
01539 
01540     // See if its the root element
01541     const bool isRoot = fElemStack.isEmpty();
01542 
01543     //  Lets try to look up the element in the validator's element decl pool
01544     //  We can pass bogus values for the URI id and the base name. We know that
01545     //  this can only be called if we are doing a DTD style validator and that
01546     //  he will only look at the QName.
01547     //
01548     //  We *do not* tell him to fault in a decl if he does not find one - NG.
01549     bool wasAdded = false;
01550     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
01551 
01552     XMLElementDecl* elemDecl = fGrammar->getElemDecl
01553     (
01554         fEmptyNamespaceId
01555         , 0
01556         , qnameRawBuf
01557         , Grammar::TOP_LEVEL_SCOPE
01558     );
01559     // look in the undeclared pool:
01560     if(!elemDecl)
01561     {
01562         elemDecl = fDTDElemNonDeclPool->getByKey(qnameRawBuf);
01563     }
01564     if(!elemDecl)
01565     {
01566         wasAdded = true;
01567         elemDecl = new (fMemoryManager) DTDElementDecl
01568         (
01569             qnameRawBuf
01570             , fEmptyNamespaceId
01571             , DTDElementDecl::Any
01572             , fMemoryManager
01573         );
01574         elemDecl->setId(fDTDElemNonDeclPool->put((DTDElementDecl*)elemDecl));
01575     }
01576 
01577     if (fValidate) {
01578 
01579         if (wasAdded)
01580         {
01581             // This is to tell the reuse Validator that this element was
01582             // faulted-in, was not an element in the validator pool originally
01583             elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
01584 
01585             fValidator->emitError
01586             (
01587                 XMLValid::ElementNotDefined
01588                 , qnameRawBuf
01589             );
01590         }
01591         // If its not marked declared, then emit an error
01592         else if (!elemDecl->isDeclared())
01593         {
01594             fValidator->emitError
01595             (
01596                 XMLValid::ElementNotDefined
01597                 , qnameRawBuf
01598             );
01599         }
01600 
01601 
01602         fValidator->validateElement(elemDecl);
01603     }
01604 
01605     // Expand the element stack and add the new element
01606     fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());
01607 
01608     //  If this is the first element and we are validating, check the root
01609     //  element.
01610     if (isRoot)
01611     {
01612         fRootGrammar = fGrammar;
01613 
01614         if (fValidate)
01615         {
01616             //  If a DocType exists, then check if it matches the root name there.
01617             if (fRootElemName && !XMLString::equals(qnameRawBuf, fRootElemName))
01618                 fValidator->emitError(XMLValid::RootElemNotLikeDocType);
01619         }
01620     }
01621     else if (fValidate)
01622     {
01623         //  If the element stack is not empty, then add this element as a
01624         //  child of the previous top element. If its empty, this is the root
01625         //  elem and is not the child of anything.
01626         fElemStack.addChild(elemDecl->getElementName(), true);
01627     }
01628 
01629     // Skip any whitespace after the name
01630     fReaderMgr.skipPastSpaces();
01631 
01632     //  We loop until we either see a /> or >, handling attribute/value
01633     //  pairs until we get there.
01634     XMLSize_t    attCount = 0;
01635     XMLSize_t    curAttListSize = fAttrList->size();
01636     wasAdded = false;
01637 
01638     fElemCount++;
01639 
01640     while (true)
01641     {
01642         // And get the next non-space character
01643         XMLCh nextCh = fReaderMgr.peekNextChar();
01644 
01645         //  If the next character is not a slash or closed angle bracket,
01646         //  then it must be whitespace, since whitespace is required
01647         //  between the end of the last attribute and the name of the next
01648         //  one.
01649         if (attCount)
01650         {
01651             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
01652             {
01653                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
01654                 {
01655                     // Ok, skip by them and peek another char
01656                     fReaderMgr.skipPastSpaces();
01657                     nextCh = fReaderMgr.peekNextChar();
01658                 }
01659                  else
01660                 {
01661                     // Emit the error but keep on going
01662                     emitError(XMLErrs::ExpectedWhitespace);
01663                 }
01664             }
01665         }
01666 
01667         //  Ok, here we first check for any of the special case characters.
01668         //  If its not one, then we do the normal case processing, which
01669         //  assumes that we've hit an attribute value, Otherwise, we do all
01670         //  the special case checks.
01671         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
01672         {
01673             //  Assume its going to be an attribute, so get a name from
01674             //  the input.
01675 
01676             validName = fReaderMgr.getQName(fAttNameBuf, &colonPosition);
01677             if (!validName)
01678             {
01679                 if (fAttNameBuf.isEmpty())
01680                     emitError(XMLErrs::ExpectedAttrName);
01681                 else
01682                     emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
01683                 fReaderMgr.skipPastChar(chCloseAngle);
01684                 return false;
01685             }
01686 
01687             // And next must be an equal sign
01688             if (!scanEq())
01689             {
01690                 static const XMLCh tmpList[] =
01691                 {
01692                     chSingleQuote, chDoubleQuote, chCloseAngle
01693                     , chOpenAngle, chForwardSlash, chNull
01694                 };
01695 
01696                 emitError(XMLErrs::ExpectedEqSign);
01697 
01698                 //  Try to sync back up by skipping forward until we either
01699                 //  hit something meaningful.
01700                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
01701 
01702                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
01703                 {
01704                     // Jump back to top for normal processing of these
01705                     continue;
01706                 }
01707                 else if ((chFound == chSingleQuote)
01708                       ||  (chFound == chDoubleQuote)
01709                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
01710                 {
01711                     // Just fall through assuming that the value is to follow
01712                 }
01713                 else if (chFound == chOpenAngle)
01714                 {
01715                     // Assume a malformed tag and that new one is starting
01716                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01717                     return false;
01718                 }
01719                 else
01720                 {
01721                     // Something went really wrong
01722                     return false;
01723                 }
01724             }
01725 
01726             //  See if this attribute is declared for this element. If we are
01727             //  not validating of course it will not be at first, but we will
01728             //  fault it into the pool (to avoid lots of redundant errors.)
01729             XMLCh * namePtr = fAttNameBuf.getRawBuffer();
01730             XMLAttDef* attDef = ((DTDElementDecl *)elemDecl)->getAttDef(namePtr);
01731 
01732             //  Skip any whitespace before the value and then scan the att
01733             //  value. This will come back normalized with entity refs and
01734             //  char refs expanded.
01735             fReaderMgr.skipPastSpaces();
01736             if (!scanAttValue(attDef, namePtr, fAttValueBuf))
01737             {
01738                 static const XMLCh tmpList[] =
01739                 {
01740                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
01741                 };
01742 
01743                 emitError(XMLErrs::ExpectedAttrValue);
01744 
01745                 //  It failed, so lets try to get synced back up. We skip
01746                 //  forward until we find some whitespace or one of the
01747                 //  chars in our list.
01748                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
01749 
01750                 if ((chFound == chCloseAngle)
01751                 ||  (chFound == chForwardSlash)
01752                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
01753                 {
01754                     //  Just fall through and process this attribute, though
01755                     //  the value will be "".
01756                 }
01757                 else if (chFound == chOpenAngle)
01758                 {
01759                     // Assume a malformed tag and that new one is starting
01760                     emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01761                     return false;
01762                 }
01763                 else
01764                 {
01765                     // Something went really wrong
01766                     return false;
01767                 }
01768             }
01769 
01770             //  Add this attribute to the attribute list that we use to
01771             //  pass them to the handler. We reuse its existing elements
01772             //  but expand it as required.
01773             // Note that we want to this first since this will
01774             // make a copy of the namePtr; we can then make use of
01775             // that copy in the hashtable lookup that checks
01776             // for duplicates.  This will mean we may have to update
01777             // the type of the XMLAttr later.
01778             XMLAttr* curAtt;
01779             const XMLCh* attrValue = fAttValueBuf.getRawBuffer();
01780 
01781             if (attCount >= curAttListSize) {
01782                 curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
01783                 fAttrList->addElement(curAtt);
01784             }
01785             else {
01786                 curAtt = fAttrList->elementAt(attCount);
01787             }
01788 
01789             curAtt->setSpecified(true);
01790             // DO NAMESPACES
01791             {
01792                 curAtt->set(
01793                     fEmptyNamespaceId, namePtr, XMLUni::fgZeroLenString
01794                     , (attDef)? attDef->getType() : XMLAttDef::CData
01795                 );
01796 
01797                 // each attribute has the prefix:suffix="value"
01798                 const XMLCh* attPrefix = curAtt->getPrefix();
01799                 const XMLCh* attLocalName = curAtt->getName();
01800 
01801                 if (attPrefix && *attPrefix) {
01802                     if (XMLString::equals(attPrefix, XMLUni::fgXMLString)) {
01803                         curAtt->setURIId(fXMLNamespaceId);
01804                     }
01805                     else if (XMLString::equals(attPrefix, XMLUni::fgXMLNSString)) {
01806                         curAtt->setURIId(fXMLNSNamespaceId);
01807                         updateNSMap(attPrefix, attLocalName, attrValue);
01808                     }
01809                     else {
01810                         fAttrNSList->addElement(curAtt);
01811                     }
01812                 }
01813                 else if (XMLString::equals(XMLUni::fgXMLNSString, attLocalName))
01814                 {
01815                     updateNSMap(attPrefix, XMLUni::fgZeroLenString, attrValue);
01816                 }
01817 
01818                 // NOTE: duplicate attribute check will be done, when we map
01819                 //       namespaces to all attributes
01820                 if (attDef) {
01821                     unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
01822                     if (!curCountPtr) {
01823                         curCountPtr = getNewUIntPtr();
01824                         *curCountPtr = fElemCount;
01825                         fAttDefRegistry->put(attDef, curCountPtr);
01826                    }
01827                     else if (*curCountPtr < fElemCount) {
01828                         *curCountPtr = fElemCount;
01829                     }
01830                 }
01831             }
01832 
01833             if (fValidate)
01834             {
01835                 if (attDef) {
01836                     // Let the validator pass judgement on the attribute value
01837                     fValidator->validateAttrValue(
01838                         attDef, fAttValueBuf.getRawBuffer(), false, elemDecl
01839                     );
01840                 }
01841                 else
01842                 {
01843                     fValidator->emitError
01844                     (
01845                         XMLValid::AttNotDefinedForElement
01846                         , fAttNameBuf.getRawBuffer(), qnameRawBuf
01847                     );
01848                 }
01849             }
01850 
01851             // must set the newly-minted value on the XMLAttr:
01852             curAtt->setValue(attrValue);
01853             attCount++;
01854 
01855             // And jump back to the top of the loop
01856             continue;
01857         }
01858 
01859         //  It was some special case character so do all of the checks and
01860         //  deal with it.
01861         if (!nextCh)
01862             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
01863 
01864         if (nextCh == chForwardSlash)
01865         {
01866             fReaderMgr.getNextChar();
01867             isEmpty = true;
01868             if (!fReaderMgr.skippedChar(chCloseAngle))
01869                 emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01870             break;
01871         }
01872         else if (nextCh == chCloseAngle)
01873         {
01874             fReaderMgr.getNextChar();
01875             break;
01876         }
01877         else if (nextCh == chOpenAngle)
01878         {
01879             //  Check for this one specially, since its going to be common
01880             //  and it is kind of auto-recovering since we've already hit the
01881             //  next open bracket, which is what we would have seeked to (and
01882             //  skipped this whole tag.)
01883             emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
01884             break;
01885         }
01886         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
01887         {
01888             //  Check for this one specially, which is probably a missing
01889             //  attribute name, e.g. ="value". Just issue expected name
01890             //  error and eat the quoted string, then jump back to the
01891             //  top again.
01892             emitError(XMLErrs::ExpectedAttrName);
01893             fReaderMgr.getNextChar();
01894             fReaderMgr.skipQuotedString(nextCh);
01895             fReaderMgr.skipPastSpaces();
01896             continue;
01897         }
01898     }
01899 
01900     //  Make an initial pass through the list and find any xmlns attributes.
01901     if (attCount)
01902       scanAttrListforNameSpaces(fAttrList, attCount, elemDecl);
01903 
01904     if(attCount)
01905     {
01906         // clean up after ourselves:
01907         // clear the map used to detect duplicate attributes
01908         fUndeclaredAttrRegistry->removeAll();
01909     }
01910 
01911     //  Now lets get the fAttrList filled in. This involves faulting in any
01912     //  defaulted and fixed attributes and normalizing the values of any that
01913     //  we got explicitly.
01914     //
01915     //  We update the attCount value with the total number of attributes, but
01916     //  it goes in with the number of values we got during the raw scan of
01917     //  explictly provided attrs above.
01918     attCount = buildAttList(attCount, elemDecl, *fAttrList);
01919 
01920     //  If we have a document handler, then tell it about this start tag. We
01921     //  don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
01922     //  any prefix since its just one big name if we are not doing namespaces.
01923     if (fDocHandler)
01924     {
01925         unsigned int uriId = resolvePrefix
01926             (
01927                 elemDecl->getElementName()->getPrefix()
01928                 , ElemStack::Mode_Element
01929             );
01930 
01931         fDocHandler->startElement
01932         (
01933             *elemDecl
01934             , uriId
01935             , elemDecl->getElementName()->getPrefix()
01936             , *fAttrList
01937             , attCount
01938             , isEmpty
01939             , isRoot
01940         );
01941     }
01942 
01943     //  If empty, validate content right now if we are validating and then
01944     //  pop the element stack top. Else, we have to update the current stack
01945     //  top's namespace mapping elements.
01946     if (isEmpty)
01947     {
01948         // If validating, then insure that its legal to have no content
01949         if (fValidate)
01950         {
01951             XMLSize_t failure;
01952             bool res = fValidator->checkContent(elemDecl, 0, 0, &failure);
01953             if (!res)
01954             {
01955                 fValidator->emitError
01956                 (
01957                     XMLValid::ElementNotValidForContent
01958                     , qnameRawBuf
01959                     , elemDecl->getFormattedContentModel()
01960                 );
01961             }
01962         }
01963 
01964         // Pop the element stack back off since it'll never be used now
01965         fElemStack.popTop();
01966 
01967         // If the elem stack is empty, then it was an empty root
01968         if (isRoot)
01969             gotData = false;
01970     }
01971 
01972     return true;
01973 }
01974 
01975 // ---------------------------------------------------------------------------
01976 //  DGXMLScanner: Grammar preparsing
01977 // ---------------------------------------------------------------------------
01978 Grammar* DGXMLScanner::loadGrammar(const   InputSource& src
01979                                    , const short        grammarType
01980                                    , const bool         toCache)
01981 {
01982     Grammar* loadedGrammar = 0;
01983 
01984     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);
01985 
01986     try
01987     {
01988         fGrammarResolver->cacheGrammarFromParse(false);
01989         fGrammarResolver->useCachedGrammarInParse(false);
01990         fRootGrammar = 0;
01991 
01992         if (fValScheme == Val_Auto) {
01993             fValidate = true;
01994         }
01995 
01996         // Reset some status flags
01997         fInException = false;
01998         fStandalone = false;
01999         fErrorCount = 0;
02000         fHasNoDTD = true;
02001 
02002         if (grammarType == Grammar::DTDGrammarType) {
02003             loadedGrammar = loadDTDGrammar(src, toCache);
02004         }
02005     }
02006     //  NOTE:
02007     //
02008     //  In all of the error processing below, the emitError() call MUST come
02009     //  before the flush of the reader mgr, or it will fail because it tries
02010     //  to find out the position in the XML source of the error.
02011     catch(const XMLErrs::Codes)
02012     {
02013         // This is a 'first failure' exception, so fall through
02014     }
02015     catch(const XMLValid::Codes)
02016     {
02017         // This is a 'first fatal error' type exit, so fall through
02018     }
02019     catch(const XMLException& excToCatch)
02020     {
02021         //  Emit the error and catch any user exception thrown from here. Make
02022         //  sure in all cases we flush the reader manager.
02023         fInException = true;
02024         try
02025         {
02026             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
02027                 emitError
02028                 (
02029                     XMLErrs::XMLException_Warning
02030                     , excToCatch.getCode()
02031                     , excToCatch.getMessage()
02032                 );
02033             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
02034                 emitError
02035                 (
02036                     XMLErrs::XMLException_Fatal
02037                     , excToCatch.getCode()
02038                     , excToCatch.getMessage()
02039                 );
02040             else
02041                 emitError
02042                 (
02043                     XMLErrs::XMLException_Error
02044                     , excToCatch.getCode()
02045                     , excToCatch.getMessage()
02046                 );
02047         }
02048         catch(const OutOfMemoryException&)
02049         {
02050             // This is a special case for out-of-memory
02051             // conditions, because resetting the ReaderMgr
02052             // can be problematic.
02053             resetReaderMgr.release();
02054 
02055             throw;
02056         }
02057     }
02058     catch(const OutOfMemoryException&)
02059     {
02060         // This is a special case for out-of-memory
02061         // conditions, because resetting the ReaderMgr
02062         // can be problematic.
02063         resetReaderMgr.release();
02064 
02065         throw;
02066     }
02067 
02068     return loadedGrammar;
02069 }
02070 
02071 Grammar* DGXMLScanner::loadDTDGrammar(const InputSource& src,
02072                                       const bool toCache)
02073 {
02074     // Reset the validators
02075     fDTDValidator->reset();
02076     if (fValidatorFromUser)
02077         fValidator->reset();
02078 
02079     fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
02080     fGrammarResolver->putGrammar(fDTDGrammar);
02081     fGrammar = fDTDGrammar;
02082     fValidator->setGrammar(fGrammar);
02083 
02084     //  And for all installed handlers, send reset events. This gives them
02085     //  a chance to flush any cached data.
02086     if (fDocHandler)
02087         fDocHandler->resetDocument();
02088     if (fEntityHandler)
02089         fEntityHandler->resetEntities();
02090     if (fErrorReporter)
02091         fErrorReporter->resetErrors();
02092 
02093     // Clear out the id reference list
02094     resetValidationContext();
02095 
02096     if (toCache) {
02097 
02098         unsigned int sysId = fGrammarResolver->getStringPool()->addOrFind(src.getSystemId());
02099         const XMLCh* sysIdStr = fGrammarResolver->getStringPool()->getValueForId(sysId);
02100 
02101         fGrammarResolver->orphanGrammar(XMLUni::fgDTDEntityString);
02102         ((XMLDTDDescription*) (fGrammar->getGrammarDescription()))->setSystemId(sysIdStr);
02103         fGrammarResolver->putGrammar(fGrammar);
02104     }
02105 
02106     //  Handle the creation of the XML reader object for this input source.
02107     //  This will provide us with transcoding and basic lexing services.
02108     XMLReader* newReader = fReaderMgr.createReader
02109     (
02110         src
02111         , false
02112         , XMLReader::RefFrom_NonLiteral
02113         , XMLReader::Type_General
02114         , XMLReader::Source_External
02115         , fCalculateSrcOfs
02116         , fLowWaterMark
02117     );
02118     if (!newReader) {
02119         if (src.getIssueFatalErrorIfNotFound())
02120             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
02121         else
02122             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
02123     }
02124 
02125     //  In order to make the processing work consistently, we have to
02126     //  make this look like an external entity. So create an entity
02127     //  decl and fill it in and push it with the reader, as happens
02128     //  with an external entity. Put a janitor on it to insure it gets
02129     //  cleaned up. The reader manager does not adopt them.
02130     const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
02131     DTDEntityDecl* declDTD = new (fMemoryManager) DTDEntityDecl(gDTDStr, false, fMemoryManager);
02132     declDTD->setSystemId(src.getSystemId());
02133     declDTD->setIsExternal(true);
02134     Janitor<DTDEntityDecl> janDecl(declDTD);
02135 
02136     // Mark this one as a throw at end
02137     newReader->setThrowAtEnd(true);
02138 
02139     // And push it onto the stack, with its pseudo name
02140     fReaderMgr.pushReader(newReader, declDTD);
02141 
02142     //  If we have a doc type handler and advanced callbacks are enabled,
02143     //  call the doctype event.
02144     if (fDocTypeHandler) {
02145 
02146         // Create a dummy root
02147         DTDElementDecl* rootDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
02148         (
02149             gDTDStr
02150             , fEmptyNamespaceId
02151             , DTDElementDecl::Any
02152             , fGrammarPoolMemoryManager
02153         );
02154         rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
02155         rootDecl->setExternalElemDeclaration(true);
02156         Janitor<DTDElementDecl> janSrc(rootDecl);
02157 
02158         fDocTypeHandler->doctypeDecl(*rootDecl, src.getPublicId(), src.getSystemId(), false, true);
02159     }
02160 
02161     // Create DTDScanner
02162     DTDScanner dtdScanner
02163     (
02164         (DTDGrammar*)fGrammar
02165         , fDocTypeHandler
02166         , fGrammarPoolMemoryManager
02167         , fMemoryManager
02168     );
02169     dtdScanner.setScannerInfo(this, &fReaderMgr, &fBufMgr);
02170 
02171     // Tell it its not in an include section
02172     dtdScanner.scanExtSubsetDecl(false, true);
02173 
02174     if (fValidate) {
02175         //  validate the DTD scan so far
02176         fValidator->preContentValidation(false, true);
02177     }
02178 
02179     if (toCache)
02180         fGrammarResolver->cacheGrammars();
02181 
02182     return fDTDGrammar;
02183 }
02184 
02185 
02186 // ---------------------------------------------------------------------------
02187 //  DGXMLScanner: Private helper methods
02188 // ---------------------------------------------------------------------------
02189 //  This method handles the common initialization, to avoid having to do
02190 //  it redundantly in multiple constructors.
02191 void DGXMLScanner::commonInit()
02192 {
02193     //  And we need one for the raw attribute scan. This just stores key/
02194     //  value string pairs (prior to any processing.)
02195     fAttrNSList = new (fMemoryManager) ValueVectorOf<XMLAttr*>(8, fMemoryManager);
02196 
02197     //  Create the Validator and init them
02198     fDTDValidator = new (fMemoryManager) DTDValidator();
02199     initValidator(fDTDValidator);
02200     fDTDElemNonDeclPool = new (fMemoryManager) NameIdPool<DTDElementDecl>(29, 128, fMemoryManager);
02201     fAttDefRegistry = new (fMemoryManager) RefHashTableOf<unsigned int, PtrHasher>
02202     (
02203         131, false, fMemoryManager
02204     );
02205     fUndeclaredAttrRegistry = new (fMemoryManager) Hash2KeysSetOf<StringHasher>(7, fMemoryManager);
02206 
02207     if (fValidator)
02208     {
02209         if (!fValidator->handlesDTD())
02210            ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
02211     }
02212     else
02213     {
02214         fValidator = fDTDValidator;
02215     }
02216 }
02217 
02218 void DGXMLScanner::cleanUp()
02219 {
02220     delete fAttrNSList;
02221     delete fDTDValidator;
02222     delete fDTDElemNonDeclPool;
02223     delete fAttDefRegistry;
02224     delete fUndeclaredAttrRegistry;
02225 }
02226 
02227 
02228 //  This method is called from scanStartTagNS() to build up the list of
02229 //  XMLAttr objects that will be passed out in the start tag callout. We
02230 //  get the key/value pairs from the raw scan of explicitly provided attrs,
02231 //  which have not been normalized. And we get the element declaration from
02232 //  which we will get any defaulted or fixed attribute defs and add those
02233 //  in as well.
02234 XMLSize_t
02235 DGXMLScanner::buildAttList(const XMLSize_t              attCount
02236                           ,       XMLElementDecl*       elemDecl
02237                           ,       RefVectorOf<XMLAttr>& toFill)
02238 {
02239     //  Ask the element to clear the 'provided' flag on all of the att defs
02240     //  that it owns, and to return us a boolean indicating whether it has
02241     //  any defs.
02242     const bool hasDefs = elemDecl->hasAttDefs();
02243 
02244     //  If there are no expliclitily provided attributes and there are no
02245     //  defined attributes for the element, the we don't have anything to do.
02246     //  So just return zero in this case.
02247     if (!hasDefs && !attCount)
02248         return 0;
02249 
02250     // Keep up with how many attrs we end up with total
02251     XMLSize_t retCount = attCount;
02252 
02253     //  And get the current size of the output vector. This lets us use
02254     //  existing elements until we fill it, then start adding new ones.
02255     const XMLSize_t curAttListSize = toFill.size();
02256 
02257     //  Ok, so lets get an enumerator for the attributes of this element
02258     //  and run through them for well formedness and validity checks. But
02259     //  make sure that we had any attributes before we do it, since the list
02260     //  would have have gotten faulted in anyway.
02261     if (hasDefs)
02262     {
02263         XMLAttDefList& attDefList = elemDecl->getAttDefList();
02264         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
02265         {
02266             // Get the current att def, for convenience and its def type
02267             XMLAttDef& curDef = attDefList.getAttDef(i);
02268 
02269             unsigned int *attCountPtr = fAttDefRegistry->get(&curDef);
02270             if (!attCountPtr || *attCountPtr < fElemCount)
02271             { // did not occur
02272                 const XMLAttDef::DefAttTypes defType = curDef.getDefaultType();
02273 
02274                 if (fValidate)
02275                 {
02276                     // If we are validating and its required, then an error
02277                     if (defType == XMLAttDef::Required)
02278                     {
02279                         fValidator->emitError
02280                         (
02281                             XMLValid::RequiredAttrNotProvided
02282                             , curDef.getFullName()
02283                         );
02284                     }
02285                     else if ((defType == XMLAttDef::Default) ||
02286                                        (defType == XMLAttDef::Fixed)  )
02287                     {
02288                         if (fStandalone && curDef.isExternal())
02289                         {
02290                             // XML 1.0 Section 2.9
02291                             // Document is standalone, so attributes must not be defaulted.
02292                             fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef.getFullName(), elemDecl->getFullName());
02293                         }
02294                     }
02295                 }
02296 
02297                 // Fault in the value if needed, and bump the att count
02298                 if ((defType == XMLAttDef::Default)
02299                 ||  (defType == XMLAttDef::Fixed))
02300                 {
02301                     // Let the validator pass judgement on the attribute value
02302                     if (fValidate)
02303                     {
02304                         fValidator->validateAttrValue
02305                         (
02306                             &curDef
02307                             , curDef.getValue()
02308                             , false
02309                             , elemDecl
02310                         );
02311                     }
02312 
02313                     XMLAttr* curAtt;
02314                     if (retCount >= curAttListSize)
02315                     {
02316                         if (fDoNamespaces)
02317                         {
02318                             curAtt = new (fMemoryManager) XMLAttr
02319                             (
02320                                 fEmptyNamespaceId
02321                                 , curDef.getFullName()
02322                                 , curDef.getValue()
02323                                 , curDef.getType()
02324                                 , false
02325                                 , fMemoryManager
02326                             );
02327                         }
02328                         else
02329                         {
02330                             curAtt = new (fMemoryManager) XMLAttr
02331                             (
02332                                 0
02333                                 , curDef.getFullName()
02334                                 , XMLUni::fgZeroLenString
02335                                 , curDef.getValue()
02336                                 , curDef.getType()
02337                                 , false
02338                                 , fMemoryManager
02339                             );
02340                         }
02341 
02342                         fAttrList->addElement(curAtt);
02343                     }
02344                     else
02345                     {
02346                         curAtt = fAttrList->elementAt(retCount);
02347                         if (fDoNamespaces)
02348                         {
02349                             curAtt->set
02350                             (
02351                                 fEmptyNamespaceId
02352                                 , curDef.getFullName()
02353                                 , curDef.getValue()
02354                                 , curDef.getType()
02355                             );
02356                         }
02357                         else
02358                         {
02359                             curAtt->set
02360                             (
02361                                 0
02362                                 , curDef.getFullName()
02363                                 , XMLUni::fgZeroLenString
02364                                 , curDef.getValue()
02365                                 , curDef.getType()
02366                             );
02367                         }
02368                         curAtt->setSpecified(false);
02369                     }
02370 
02371                     if (fDoNamespaces)
02372                     {
02373                         //  Map the new attribute's prefix to a URI id and store
02374                         //  that in the attribute object.
02375                         const XMLCh* attPrefix = curAtt->getPrefix();
02376                         if (attPrefix && *attPrefix) {
02377                             curAtt->setURIId
02378                             (
02379                                 resolvePrefix(attPrefix, ElemStack::Mode_Attribute)
02380                             );
02381                         }
02382                     }
02383 
02384                     retCount++;
02385                 }
02386             }
02387         }
02388     }
02389 
02390     return retCount;
02391 }
02392 
02393 
02394 //  This method will reset the scanner data structures, and related plugged
02395 //  in stuff, for a new scan session. We get the input source for the primary
02396 //  XML entity, create the reader for it, and push it on the stack so that
02397 //  upon successful return from here we are ready to go.
02398 void DGXMLScanner::scanReset(const InputSource& src)
02399 {
02400 
02401     //  This call implicitly tells us that we are going to reuse the scanner
02402     //  if it was previously used. So tell the validator to reset itself.
02403     //
02404     //  But, if the fUseCacheGrammar flag is set, then don't reset it.
02405     //
02406     //  NOTE:   The ReaderMgr is flushed on the way out, because that is
02407     //          required to insure that files are closed.
02408     fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
02409     fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
02410 
02411     fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
02412     fGrammarResolver->putGrammar(fDTDGrammar);
02413     fGrammar = fDTDGrammar;
02414     fRootGrammar = 0;
02415     fValidator->setGrammar(fGrammar);
02416 
02417     // Reset validation
02418     fValidate = (fValScheme == Val_Always) ? true : false;
02419 
02420     //  And for all installed handlers, send reset events. This gives them
02421     //  a chance to flush any cached data.
02422     if (fDocHandler)
02423         fDocHandler->resetDocument();
02424     if (fEntityHandler)
02425         fEntityHandler->resetEntities();
02426     if (fErrorReporter)
02427         fErrorReporter->resetErrors();
02428 
02429     // Clear out the id reference list
02430     resetValidationContext();
02431 
02432     // Reset the Root Element Name
02433     fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
02434     fRootElemName = 0;
02435 
02436     //  Reset the element stack, and give it the latest ids for the special
02437     //  URIs it has to know about.
02438     fElemStack.reset
02439     (
02440         fEmptyNamespaceId
02441         , fUnknownNamespaceId
02442         , fXMLNamespaceId
02443         , fXMLNSNamespaceId
02444     );
02445 
02446     // Reset some status flags
02447     fInException = false;
02448     fStandalone = false;
02449     fErrorCount = 0;
02450     fHasNoDTD = true;
02451 
02452     // Reset the validators
02453     fDTDValidator->reset();
02454     fDTDValidator->setErrorReporter(fErrorReporter);
02455     if (fValidatorFromUser)
02456         fValidator->reset();
02457 
02458     //  Handle the creation of the XML reader object for this input source.
02459     //  This will provide us with transcoding and basic lexing services.
02460     XMLReader* newReader = fReaderMgr.createReader
02461     (
02462         src
02463         , true
02464         , XMLReader::RefFrom_NonLiteral
02465         , XMLReader::Type_General
02466         , XMLReader::Source_External
02467         , fCalculateSrcOfs
02468         , fLowWaterMark
02469     );
02470 
02471     if (!newReader) {
02472         if (src.getIssueFatalErrorIfNotFound())
02473             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
02474         else
02475             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
02476     }
02477 
02478     // Push this read onto the reader manager
02479     fReaderMgr.pushReader(newReader, 0);
02480 
02481     // and reset security-related things if necessary:
02482     if(fSecurityManager != 0)
02483     {
02484         fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
02485         fEntityExpansionCount = 0;
02486     }
02487     if(fUIntPoolRowTotal >= 32)
02488     { // 8 KB tied up with validating attributes...
02489         fAttDefRegistry->removeAll();
02490         recreateUIntPool();
02491     }
02492     else
02493     {
02494         // note that this will implicitly reset the values of the hashtables,
02495         // though their buckets will still be tied up
02496         resetUIntPool();
02497     }
02498     fUndeclaredAttrRegistry->removeAll();
02499     fAttrNSList->removeAllElements();
02500 }
02501 
02502 
02503 //  This method is called between markup in content. It scans for character
02504 //  data that is sent to the document handler. It watches for any markup
02505 //  characters that would indicate that the character data has ended. It also
02506 //  handles expansion of general and character entities.
02507 //
02508 //  sendData() is a local static helper for this method which handles some
02509 //  code that must be done in three different places here.
02510 void DGXMLScanner::sendCharData(XMLBuffer& toSend)
02511 {
02512     // If no data in the buffer, then nothing to do
02513     if (toSend.isEmpty())
02514         return;
02515 
02516     //  We do different things according to whether we are validating or
02517     //  not. If not, its always just characters; else, it depends on the
02518     //  current element's content model.
02519     if (fValidate)
02520     {
02521         // Get the raw data we need for the callback
02522         const XMLCh* const rawBuf = toSend.getRawBuffer();
02523         const XMLSize_t len = toSend.getLen();
02524 
02525         // And see if the current element is a 'Children' style content model
02526         const ElemStack::StackElem* topElem = fElemStack.topElement();
02527 
02528         // Get the character data opts for the current element
02529         XMLElementDecl::CharDataOpts charOpts = topElem->fThisElement->getCharDataOpts();
02530 
02531         if (charOpts == XMLElementDecl::NoCharData)
02532         {
02533             // They definitely cannot handle any type of char data
02534             fValidator->emitError(XMLValid::NoCharDataInCM);
02535         }
02536         else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
02537         {
02538             //  Its all spaces. So, if they can take spaces, then send it
02539             //  as ignorable whitespace. If they can handle any char data
02540             //  send it as characters.
02541             if (charOpts == XMLElementDecl::SpacesOk) {
02542                 if (fDocHandler)
02543                     fDocHandler->ignorableWhitespace(rawBuf, len, false);
02544             }
02545             else if (charOpts == XMLElementDecl::AllCharData)
02546             {
02547                 if (fDocHandler)
02548                     fDocHandler->docCharacters(rawBuf, len, false);
02549             }
02550         }
02551         else
02552         {
02553             //  If they can take any char data, then send it. Otherwise, they
02554             //  can only handle whitespace and can't handle this stuff so
02555             //  issue an error.
02556             if (charOpts == XMLElementDecl::AllCharData)
02557             {
02558                 if (fDocHandler)
02559                     fDocHandler->docCharacters(rawBuf, len, false);
02560             }
02561             else
02562             {
02563                 fValidator->emitError(XMLValid::NoCharDataInCM);
02564             }
02565         }
02566     }
02567     else
02568     {
02569         // Always assume its just char data if not validating
02570         if (fDocHandler)
02571             fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
02572     }
02573 
02574     // Reset buffer
02575     toSend.reset();
02576 }
02577 
02578 
02579 
02580 //  This method is called with a key/value string pair that represents an
02581 //  xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
02582 //  current top of the element stack based on this data. We know that when
02583 //  we get here, that it is one of these forms, so we don't bother confirming
02584 //  it.
02585 //
02586 //  But we have to ensure
02587 //      1. xxx is not xmlns
02588 //      2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
02589 //      3. yyy is not XMLUni::fgXMLNSURIName
02590 //      4. if xxx is not null, then yyy cannot be an empty string.
02591 void DGXMLScanner::updateNSMap(const    XMLCh* const attrPrefix
02592                                , const  XMLCh* const attrLocalName
02593                                , const  XMLCh* const attrValue)
02594 {
02595     //  We either have the default prefix (""), or we point it into the attr
02596     //  name parameter. Note that the xmlns is not the prefix we care about
02597     //  here. To us, the 'prefix' is really the local part of the attrName
02598     //  parameter.
02599     //
02600     //  Check 1. xxx is not xmlns
02601     //        2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
02602     //        3. yyy is not XMLUni::fgXMLNSURIName
02603     //        4. if xxx is not null, then yyy cannot be an empty string.
02604     if (attrPrefix && *attrPrefix) {
02605 
02606         if (XMLString::equals(attrLocalName, XMLUni::fgXMLNSString))
02607             emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
02608         else if (XMLString::equals(attrLocalName, XMLUni::fgXMLString)) {
02609             if (!XMLString::equals(attrValue, XMLUni::fgXMLURIName))
02610                 emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
02611         }
02612 
02613         if (!attrValue)
02614             emitError(XMLErrs::NoEmptyStrNamespace, attrLocalName);
02615         else if(!*attrValue && fXMLVersion == XMLReader::XMLV1_0)
02616             emitError(XMLErrs::NoEmptyStrNamespace, attrLocalName);
02617     }
02618 
02619     if (XMLString::equals(attrValue, XMLUni::fgXMLNSURIName))
02620         emitError(XMLErrs::NoUseOfxmlnsURI);
02621     else if (XMLString::equals(attrValue, XMLUni::fgXMLURIName)) {
02622         if (!XMLString::equals(attrLocalName, XMLUni::fgXMLString))
02623             emitError(XMLErrs::XMLURINotMatchXMLPrefix);
02624     }
02625 
02626     //  Ok, we have to get the unique id for the attribute value, which is the
02627     //  URI that this value should be mapped to. The validator has the
02628     //  namespace string pool, so we ask him to find or add this new one. Then
02629     //  we ask the element stack to add this prefix to URI Id mapping.
02630     fElemStack.addPrefix
02631     (
02632         attrLocalName
02633         , fURIStringPool->addOrFind(attrValue)
02634     );
02635 }
02636 
02637 void DGXMLScanner::scanAttrListforNameSpaces(RefVectorOf<XMLAttr>* theAttrList, XMLSize_t attCount,
02638                                                 XMLElementDecl*       elemDecl)
02639 {
02640     // Map prefixes to uris
02641     for (XMLSize_t i=0; i < fAttrNSList->size(); i++) {
02642         XMLAttr* providedAttr = fAttrNSList->elementAt(i);
02643         providedAttr->setURIId(
02644             resolvePrefix(providedAttr->getPrefix(), ElemStack::Mode_Attribute)
02645         );
02646     }
02647 
02648     fAttrNSList->removeAllElements();
02649 
02650      // Decide if to use hash table to do duplicate checking
02651     bool toUseHashTable = false;
02652 
02653         setAttrDupChkRegistry(attCount, toUseHashTable);
02654     for (XMLSize_t index = 0; index < attCount; index++)
02655     {
02656         // check for duplicate namespace attributes:
02657         // by checking for qualified names with the same local part and with prefixes
02658         // which have been bound to namespace names that are identical.
02659         XMLAttr* curAttr = theAttrList->elementAt(index);
02660         if (!toUseHashTable)
02661         {
02662             XMLAttr* loopAttr;
02663             for (XMLSize_t attrIndex=0; attrIndex < index; attrIndex++) {
02664                 loopAttr = theAttrList->elementAt(attrIndex);
02665                 if (loopAttr->getURIId() == curAttr->getURIId() &&
02666                     XMLString::equals(loopAttr->getName(), curAttr->getName())) {
02667                     emitError(
02668                         XMLErrs::AttrAlreadyUsedInSTag, curAttr->getName()
02669                         , elemDecl->getFullName()
02670                     );
02671                 }
02672             }
02673         }
02674         else
02675         {
02676             if (fAttrDupChkRegistry->containsKey((void*)curAttr->getName(), curAttr->getURIId()))
02677             {
02678                 emitError(
02679                     XMLErrs::AttrAlreadyUsedInSTag
02680                     , curAttr->getName(), elemDecl->getFullName()
02681                 );
02682             }
02683 
02684             fAttrDupChkRegistry->put((void*)curAttr->getName(), curAttr->getURIId(), curAttr);
02685         }
02686     }
02687 }
02688 
02689 InputSource* DGXMLScanner::resolveSystemId(const XMLCh* const sysId
02690                                           ,const XMLCh* const pubId)
02691 {
02692     //Normalize sysId
02693     XMLBufBid nnSys(&fBufMgr);
02694     XMLBuffer& normalizedSysId = nnSys.getBuffer();
02695     XMLString::removeChar(sysId, 0xFFFF, normalizedSysId);
02696     const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
02697 
02698     // Create a buffer for expanding the normalized system id
02699     XMLBufBid bbSys(&fBufMgr);
02700     XMLBuffer& expSysId = bbSys.getBuffer();
02701 
02702     //  Allow the entity handler to expand the system id if they choose
02703     //  to do so.
02704     InputSource* srcToFill = 0;
02705     if (fEntityHandler)
02706     {
02707         if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
02708             expSysId.set(normalizedURI);
02709 
02710         ReaderMgr::LastExtEntityInfo lastInfo;
02711         fReaderMgr.getLastExtEntityInfo(lastInfo);
02712         XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
02713                             expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId,
02714                             &fReaderMgr);
02715         srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
02716     }
02717     else
02718     {
02719         expSysId.set(normalizedURI);
02720     }
02721 
02722     //  If they didn't create a source via the entity handler, then we
02723     //  have to create one on our own.
02724     if (!srcToFill)
02725     {
02726         if (fDisableDefaultEntityResolution)
02727             return srcToFill;
02728 
02729         ReaderMgr::LastExtEntityInfo lastInfo;
02730         fReaderMgr.getLastExtEntityInfo(lastInfo);
02731 
02732         XMLURL urlTmp(fMemoryManager);
02733         if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
02734             (urlTmp.isRelative()))
02735         {
02736             if (!fStandardUriConformant)
02737             {
02738                 XMLBufBid  ddSys(&fBufMgr);
02739                 XMLBuffer& resolvedSysId = ddSys.getBuffer();
02740                 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
02741 
02742                 srcToFill = new (fMemoryManager) LocalFileInputSource
02743                 (
02744                     lastInfo.systemId
02745                     , resolvedSysId.getRawBuffer()
02746                     , fMemoryManager
02747                 );
02748             }
02749             else
02750                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
02751         }
02752         else
02753         {
02754             if (fStandardUriConformant && urlTmp.hasInvalidChar())
02755                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
02756             srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
02757         }
02758     }
02759 
02760     return srcToFill;
02761 }
02762 
02763 // ---------------------------------------------------------------------------
02764 //  DGXMLScanner: Private parsing methods
02765 // ---------------------------------------------------------------------------
02766 bool DGXMLScanner::scanAttValue(  const   XMLAttDef* const    attDef
02767                                   , const XMLCh *const attrName
02768                                   ,       XMLBuffer&          toFill)
02769 {
02770     enum States
02771     {
02772         InWhitespace
02773         , InContent
02774     };
02775 
02776     // Get the type and name
02777     const XMLAttDef::AttTypes type = (attDef)
02778                         ?attDef->getType()
02779                         :XMLAttDef::CData;
02780 
02781     // Reset the target buffer
02782     toFill.reset();
02783 
02784     // Get the next char which must be a single or double quote
02785     XMLCh quoteCh;
02786     if (!fReaderMgr.skipIfQuote(quoteCh))
02787         return false;
02788 
02789     //  We have to get the current reader because we have to ignore closing
02790     //  quotes until we hit the same reader again.
02791     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
02792 
02793     // Get attribute def - to check to see if it's declared externally or not
02794     bool  isAttExternal = (attDef)
02795                         ?attDef->isExternal()
02796                         :false;
02797 
02798     //  Loop until we get the attribute value. Note that we use a double
02799     //  loop here to avoid the setup/teardown overhead of the exception
02800     //  handler on every round.
02801     XMLCh   nextCh;
02802     XMLCh   secondCh = 0;
02803     States  curState = InContent;
02804     bool    firstNonWS = false;
02805     bool    gotLeadingSurrogate = false;
02806     bool    escaped;
02807     while (true)
02808     {
02809     try
02810     {
02811         while(true)
02812         {
02813             nextCh = fReaderMgr.getNextChar();
02814 
02815             if (!nextCh)
02816                 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
02817 
02818             // Check for our ending quote in the same entity
02819             if (nextCh == quoteCh)
02820             {
02821                 if (curReader == fReaderMgr.getCurrentReaderNum())
02822                     return true;
02823 
02824                 // Watch for spillover into a previous entity
02825                 if (curReader > fReaderMgr.getCurrentReaderNum())
02826                 {
02827                     emitError(XMLErrs::PartialMarkupInEntity);
02828                     return false;
02829                 }
02830             }
02831 
02832             //  Check for an entity ref now, before we let it affect our
02833             //  whitespace normalization logic below. We ignore the empty flag
02834             //  in this one.
02835             escaped = false;
02836             if (nextCh == chAmpersand)
02837             {
02838                 if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
02839                 {
02840                     gotLeadingSurrogate = false;
02841                     continue;
02842                 }
02843             }
02844             else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
02845             {
02846                 // Deal with surrogate pairs
02847                 //  Its a leading surrogate. If we already got one, then
02848                 //  issue an error, else set leading flag to make sure that
02849                 //  we look for a trailing next time.
02850                 if (gotLeadingSurrogate)
02851                     emitError(XMLErrs::Expected2ndSurrogateChar);
02852                 else
02853                     gotLeadingSurrogate = true;
02854             }
02855             else
02856             {
02857                 //  If its a trailing surrogate, make sure that we are
02858                 //  prepared for that. Else, its just a regular char so make
02859                 //  sure that we were not expected a trailing surrogate.
02860                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
02861                 {
02862                     // Its trailing, so make sure we were expecting it
02863                     if (!gotLeadingSurrogate)
02864                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
02865                 }
02866                 else
02867                 {
02868                     //  Its just a char, so make sure we were not expecting a
02869                     //  trailing surrogate.
02870                     if (gotLeadingSurrogate)
02871                         emitError(XMLErrs::Expected2ndSurrogateChar);
02872 
02873                     // Its got to at least be a valid XML character
02874                     if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
02875                     {
02876                         XMLCh tmpBuf[9];
02877                         XMLString::binToText
02878                         (
02879                             nextCh
02880                             , tmpBuf
02881                             , 8
02882                             , 16
02883                             , fMemoryManager
02884                         );
02885                         emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
02886                     }
02887                 }
02888                 gotLeadingSurrogate = false;
02889             }
02890 
02891             //  If its not escaped, then make sure its not a < character, which
02892             //  is not allowed in attribute values.
02893             if (!escaped && (nextCh == chOpenAngle))
02894                 emitError(XMLErrs::BracketInAttrValue, attrName);
02895 
02896             //  If the attribute is a CDATA type we do simple replacement of
02897             //  tabs and new lines with spaces, if the character is not escaped
02898             //  by way of a char ref.
02899             //
02900             //  Otherwise, we do the standard non-CDATA normalization of
02901             //  compressing whitespace to single spaces and getting rid of leading
02902             //  and trailing whitespace.
02903             if (type == XMLAttDef::CData)
02904             {
02905                 if (!escaped)
02906                 {
02907                     if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
02908                     {
02909                         // Check Validity Constraint for Standalone document declaration
02910                         // XML 1.0, Section 2.9
02911                         if (fStandalone && fValidate && isAttExternal)
02912                         {
02913                              // Can't have a standalone document declaration of "yes" if  attribute
02914                              // values are subject to normalisation
02915                              fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
02916                         }
02917                         nextCh = chSpace;
02918                     }
02919                 }
02920             }
02921             else
02922             {
02923                 if (curState == InWhitespace)
02924                 {
02925                     if ((escaped && nextCh != chSpace) || !fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
02926                     {
02927                         if (firstNonWS)
02928                             toFill.append(chSpace);
02929                         curState = InContent;
02930                         firstNonWS = true;
02931                     }
02932                     else
02933                     {
02934                         continue;
02935                     }
02936                 }
02937                 else if (curState == InContent)
02938                 {
02939                     if ((nextCh == chSpace) ||
02940                         (fReaderMgr.getCurrentReader()->isWhitespace(nextCh) && !escaped))
02941                     {
02942                         curState = InWhitespace;
02943 
02944                         // Check Validity Constraint for Standalone document declaration
02945                         // XML 1.0, Section 2.9
02946                         if (fStandalone && fValidate && isAttExternal)
02947                         {
02948                             if (!firstNonWS || (nextCh != chSpace) || (fReaderMgr.lookingAtSpace()))
02949                             {
02950                                  // Can't have a standalone document declaration of "yes" if  attribute
02951                                  // values are subject to normalisation
02952                                  fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
02953                             }
02954                         }
02955                         continue;
02956                     }
02957                     firstNonWS = true;
02958                 }
02959             }
02960 
02961             // Else add it to the buffer
02962             toFill.append(nextCh);
02963 
02964             if (secondCh)
02965             {
02966                 toFill.append(secondCh);
02967                 secondCh=0;
02968             }
02969         }
02970     }
02971     catch(const EndOfEntityException&)
02972     {
02973         // Just eat it and continue.
02974         gotLeadingSurrogate = false;
02975         escaped = false;
02976     }
02977     }
02978     return true;
02979 }
02980 
02981 
02982 //  This method scans a CDATA section. It collects the character into one
02983 //  of the temp buffers and calls the document handler, if any, with the
02984 //  characters. It assumes that the <![CDATA string has been scanned before
02985 //  this call.
02986 void DGXMLScanner::scanCDSection()
02987 {
02988     static const XMLCh CDataClose[] =
02989     {
02990             chCloseSquare, chCloseAngle, chNull
02991     };
02992 
02993     //  The next character should be the opening square bracket. If not
02994     //  issue an error, but then try to recover by skipping any whitespace
02995     //  and checking again.
02996     if (!fReaderMgr.skippedChar(chOpenSquare))
02997     {
02998         emitError(XMLErrs::ExpectedOpenSquareBracket);
02999         fReaderMgr.skipPastSpaces();
03000 
03001         // If we still don't find it, then give up, else keep going
03002         if (!fReaderMgr.skippedChar(chOpenSquare))
03003             return;
03004     }
03005 
03006     // Get a buffer for this
03007     XMLBufBid bbCData(&fBufMgr);
03008 
03009     //  We just scan forward until we hit the end of CDATA section sequence.
03010     //  CDATA is effectively a big escape mechanism so we don't treat markup
03011     //  characters specially here.
03012     bool            emittedError = false;
03013     bool     gotLeadingSurrogate = false;
03014 
03015     // Get the character data opts for the current element
03016     const ElemStack::StackElem* topElem = fElemStack.topElement();
03017     XMLElementDecl::CharDataOpts charOpts =  topElem->fThisElement->getCharDataOpts();
03018 
03019     while (true)
03020     {
03021         const XMLCh nextCh = fReaderMgr.getNextChar();
03022 
03023         // Watch for unexpected end of file
03024         if (!nextCh)
03025         {
03026             emitError(XMLErrs::UnterminatedCDATASection);
03027             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
03028         }
03029 
03030         if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
03031         {
03032             // This document is standalone; this ignorable CDATA whitespace is forbidden.
03033             // XML 1.0, Section 2.9
03034             // And see if the current element is a 'Children' style content model
03035             if (topElem->fThisElement->isExternal()) {
03036 
03037                 if (charOpts == XMLElementDecl::SpacesOk) // Element Content
03038                 {
03039                     // Error - standalone should have a value of "no" as whitespace detected in an
03040                     // element type with element content whose element declaration was external
03041                     fValidator->emitError(XMLValid::NoWSForStandalone);
03042                 }
03043             }
03044         }
03045 
03046         //  If this is a close square bracket it could be our closing
03047         //  sequence.
03048         if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
03049         {
03050             //  make sure we were not expecting a trailing surrogate.
03051             if (gotLeadingSurrogate)
03052                 emitError(XMLErrs::Expected2ndSurrogateChar);
03053 
03054             if (fValidate) {
03055 
03056                 if (charOpts != XMLElementDecl::AllCharData)
03057                 {
03058                     // They definitely cannot handle any type of char data
03059                     fValidator->emitError(XMLValid::NoCharDataInCM);
03060                 }
03061             }
03062 
03063             // If we have a doc handler, call it
03064             if (fDocHandler)
03065             {
03066                 fDocHandler->docCharacters
03067                     (
03068                     bbCData.getRawBuffer()
03069                     , bbCData.getLen()
03070                     , true
03071                     );
03072             }
03073 
03074             // And we are done
03075             break;
03076         }
03077 
03078         //  Make sure its a valid character. But if we've emitted an error
03079         //  already, don't bother with the overhead since we've already told
03080         //  them about it.
03081         if (!emittedError)
03082         {
03083             // Deal with surrogate pairs
03084             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
03085             {
03086                 //  Its a leading surrogate. If we already got one, then
03087                 //  issue an error, else set leading flag to make sure that
03088                 //  we look for a trailing next time.
03089                 if (gotLeadingSurrogate)
03090                     emitError(XMLErrs::Expected2ndSurrogateChar);
03091                 else
03092                     gotLeadingSurrogate = true;
03093             }
03094             else
03095             {
03096                 //  If its a trailing surrogate, make sure that we are
03097                 //  prepared for that. Else, its just a regular char so make
03098                 //  sure that we were not expected a trailing surrogate.
03099                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
03100                 {
03101                     // Its trailing, so make sure we were expecting it
03102                     if (!gotLeadingSurrogate)
03103                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
03104                 }
03105                 else
03106                 {
03107                     //  Its just a char, so make sure we were not expecting a
03108                     //  trailing surrogate.
03109                     if (gotLeadingSurrogate)
03110                         emitError(XMLErrs::Expected2ndSurrogateChar);
03111 
03112                     // Its got to at least be a valid XML character
03113                     else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
03114                     {
03115                         XMLCh tmpBuf[9];
03116                         XMLString::binToText
03117                         (
03118                             nextCh
03119                             , tmpBuf
03120                             , 8
03121                             , 16
03122                             , fMemoryManager
03123                         );
03124                         emitError(XMLErrs::InvalidCharacter, tmpBuf);
03125                         emittedError = true;
03126                     }
03127                 }
03128                 gotLeadingSurrogate = false;
03129             }
03130         }
03131 
03132         // Add it to the buffer
03133         bbCData.append(nextCh);
03134     }
03135 }
03136 
03137 
03138 void DGXMLScanner::scanCharData(XMLBuffer& toUse)
03139 {
03140     //  We have to watch for the stupid ]]> sequence, which is illegal in
03141     //  character data. So this is a little state machine that handles that.
03142     enum States
03143     {
03144         State_Waiting
03145         , State_GotOne
03146         , State_GotTwo
03147     };
03148 
03149     // Reset the buffer before we start
03150     toUse.reset();
03151 
03152     // Turn on the 'throw at end' flag of the reader manager
03153     ThrowEOEJanitor jan(&fReaderMgr, true);
03154 
03155     //  In order to be more efficient we have to use kind of a deeply nested
03156     //  set of blocks here. The outer block puts on a try and catches end of
03157     //  entity exceptions. The inner loop is the per-character loop. If we
03158     //  put the try inside the inner loop, it would work but would require
03159     //  the exception handling code setup/teardown code to be invoked for
03160     //  each character.
03161     XMLCh   nextCh;
03162     XMLCh   secondCh = 0;
03163     States  curState = State_Waiting;
03164     bool    escaped = false;
03165     bool    gotLeadingSurrogate = false;
03166     bool    notDone = true;
03167     while (notDone)
03168     {
03169         try
03170         {
03171             while (true)
03172             {
03173                 //  Eat through as many plain content characters as possible without
03174                 //  needing special handling.  Moving most content characters here,
03175                 //  in this one call, rather than running the overall loop once
03176                 //  per content character, is a speed optimization.
03177                 if (curState == State_Waiting  &&  !gotLeadingSurrogate)
03178                 {
03179                      fReaderMgr.movePlainContentChars(toUse);
03180                 }
03181 
03182                 // Try to get another char from the source
03183                 //   The code from here on down covers all contengencies,
03184                 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
03185                 {
03186                     // If we were waiting for a trailing surrogate, its an error
03187                     if (gotLeadingSurrogate)
03188                         emitError(XMLErrs::Expected2ndSurrogateChar);
03189 
03190                     notDone = false;
03191                     break;
03192                 }
03193 
03194                 //  Watch for a reference. Note that the escapement mechanism
03195                 //  is ignored in this content.
03196                 escaped = false;
03197                 if (nextCh == chAmpersand)
03198                 {
03199                     sendCharData(toUse);
03200 
03201                     // Turn off the throwing at the end of entity during this
03202                     ThrowEOEJanitor jan(&fReaderMgr, false);
03203 
03204                     if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
03205                     {
03206                         gotLeadingSurrogate = false;
03207                         continue;
03208                     }
03209                     else
03210                     {
03211                         if (escaped && !fElemStack.isEmpty())
03212                             fElemStack.setReferenceEscaped();
03213                     }
03214                 }
03215                 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
03216                 {
03217                     // Deal with surrogate pairs
03218                     //  Its a leading surrogate. If we already got one, then
03219                     //  issue an error, else set leading flag to make sure that
03220                     //  we look for a trailing next time.
03221                     if (gotLeadingSurrogate)
03222                         emitError(XMLErrs::Expected2ndSurrogateChar);
03223                     else
03224                         gotLeadingSurrogate = true;
03225                 }
03226                 else
03227                 {
03228                     //  If its a trailing surrogate, make sure that we are
03229                     //  prepared for that. Else, its just a regular char so make
03230                     //  sure that we were not expected a trailing surrogate.
03231                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
03232                     {
03233                         // Its trailing, so make sure we were expecting it
03234                         if (!gotLeadingSurrogate)
03235                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
03236                     }
03237                     else
03238                     {
03239                         //  Its just a char, so make sure we were not expecting a
03240                         //  trailing surrogate.
03241                         if (gotLeadingSurrogate)
03242                             emitError(XMLErrs::Expected2ndSurrogateChar);
03243 
03244                         // Make sure the returned char is a valid XML char
03245                         if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
03246                         {
03247                             XMLCh tmpBuf[9];
03248                             XMLString::binToText
03249                             (
03250                                 nextCh
03251                                 , tmpBuf
03252                                 , 8
03253                                 , 16
03254                                 , fMemoryManager
03255                             );
03256                             emitError(XMLErrs::InvalidCharacter, tmpBuf);
03257                         }
03258                     }
03259                     gotLeadingSurrogate = false;
03260                 }
03261 
03262                  // Keep the state machine up to date
03263                 if (!escaped)
03264                 {
03265                     if (nextCh == chCloseSquare)
03266                     {
03267                         if (curState == State_Waiting)
03268                             curState = State_GotOne;
03269                         else if (curState == State_GotOne)
03270                             curState = State_GotTwo;
03271                     }
03272                     else if (nextCh == chCloseAngle)
03273                     {
03274                         if (curState == State_GotTwo)
03275                             emitError(XMLErrs::BadSequenceInCharData);
03276                         curState = State_Waiting;
03277                     }
03278                     else
03279                     {
03280                         curState = State_Waiting;
03281                     }
03282                 }
03283                 else
03284                 {
03285                     curState = State_Waiting;
03286                 }
03287 
03288                 // Add this char to the buffer
03289                 toUse.append(nextCh);
03290 
03291                 if (secondCh)
03292                 {
03293                     toUse.append(secondCh);
03294                     secondCh=0;
03295                 }
03296             }
03297         }
03298         catch(const EndOfEntityException& toCatch)
03299         {
03300             //  Some entity ended, so we have to send any accumulated
03301             //  chars and send an end of entity event.
03302             sendCharData(toUse);
03303             gotLeadingSurrogate = false;
03304 
03305             if (fDocHandler)
03306                 fDocHandler->endEntityReference(toCatch.getEntity());
03307         }
03308     }
03309 
03310     // Check the validity constraints as per XML 1.0 Section 2.9
03311     if (fValidate && fStandalone)
03312     {
03313         // See if the text contains whitespace
03314         // Get the raw data we need for the callback
03315         const XMLCh* rawBuf = toUse.getRawBuffer();
03316         const XMLSize_t len = toUse.getLen();
03317         const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
03318 
03319         if (isSpaces)
03320         {
03321             // And see if the current element is a 'Children' style content model
03322             const ElemStack::StackElem* topElem = fElemStack.topElement();
03323 
03324             if (topElem->fThisElement->isExternal()) {
03325 
03326                 // Get the character data opts for the current element
03327                 XMLElementDecl::CharDataOpts charOpts =  topElem->fThisElement->getCharDataOpts();
03328 
03329                 if (charOpts == XMLElementDecl::SpacesOk)  // => Element Content
03330                 {
03331                     // Error - standalone should have a value of "no" as whitespace detected in an
03332                     // element type with element content whose element declaration was external
03333                     //
03334                     fValidator->emitError(XMLValid::NoWSForStandalone);
03335                 }
03336             }
03337         }
03338     }
03339     // Send any char data that we accumulated into the buffer
03340     sendCharData(toUse);
03341 }
03342 
03343 
03344 //  This method will scan a general/character entity ref. It will either
03345 //  expand a char ref and return it directly, or push a reader for a general
03346 //  entity.
03347 //
03348 //  The return value indicates whether the char parameters hold the value
03349 //  or whether the value was pushed as a reader, or that it failed.
03350 //
03351 //  The escaped flag tells the caller whether the returned parameter resulted
03352 //  from a character reference, which escapes the character in some cases. It
03353 //  only makes any difference if the return value indicates the value was
03354 //  returned directly.
03355 DGXMLScanner::EntityExpRes
03356 DGXMLScanner::scanEntityRef(  const   bool    inAttVal
03357                             ,       XMLCh&  firstCh
03358                             ,       XMLCh&  secondCh
03359                             ,       bool&   escaped)
03360 {
03361     // Assume no escape
03362     secondCh = 0;
03363     escaped = false;
03364 
03365     // We have to insure that its all in one entity
03366     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
03367 
03368     //  If the next char is a pound, then its a character reference and we
03369     //  need to expand it always.
03370     if (fReaderMgr.skippedChar(chPound))
03371     {
03372         //  Its a character reference, so scan it and get back the numeric
03373         //  value it represents.
03374         if (!scanCharRef(firstCh, secondCh))
03375             return EntityExp_Failed;
03376 
03377         escaped = true;
03378 
03379         if (curReader != fReaderMgr.getCurrentReaderNum())
03380             emitError(XMLErrs::PartialMarkupInEntity);
03381 
03382         return EntityExp_Returned;
03383     }
03384 
03385     // Expand it since its a normal entity ref
03386     XMLBufBid bbName(&fBufMgr);
03387 
03388     int  colonPosition;
03389     bool validName = fDoNamespaces ? fReaderMgr.getQName(bbName.getBuffer(), &colonPosition) :
03390                                      fReaderMgr.getName(bbName.getBuffer());
03391     if (!validName)
03392     {
03393         if (bbName.isEmpty())
03394             emitError(XMLErrs::ExpectedEntityRefName);
03395         else
03396             emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer());
03397         return EntityExp_Failed;
03398     }
03399 
03400     //  Next char must be a semi-colon. But if its not, just emit
03401     //  an error and try to continue.
03402     if (!fReaderMgr.skippedChar(chSemiColon))
03403         emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
03404 
03405     // Make sure we ended up on the same entity reader as the & char
03406     if (curReader != fReaderMgr.getCurrentReaderNum())
03407         emitError(XMLErrs::PartialMarkupInEntity);
03408 
03409     // Look up the name in the general entity pool
03410     XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());
03411 
03412     // If it does not exist, then obviously an error
03413     if (!decl)
03414     {
03415         // XML 1.0 Section 4.1
03416         // Well-formedness Constraint for entity not found:
03417         //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
03418         //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
03419         //      or a parameter entity
03420         //
03421         // Else it's Validity Constraint
03422         if (fStandalone || fHasNoDTD)
03423             emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
03424         else {
03425             if (fValidate)
03426                 fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
03427         }
03428 
03429         return EntityExp_Failed;
03430     }
03431 
03432     // XML 1.0 Section 4.1
03433     //  If we are a standalone document, then it has to have been declared
03434     //  in the internal subset.
03435     if (fStandalone && !decl->getDeclaredInIntSubset())
03436         emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer());
03437 
03438     if (decl->isExternal())
03439     {
03440         // If its unparsed, then its not valid here
03441         if (decl->isUnparsed())
03442         {
03443             emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
03444             return EntityExp_Failed;
03445         }
03446 
03447         // If we are in an attribute value, then not valid but keep going
03448         if (inAttVal)
03449             emitError(XMLErrs::NoExtRefsInAttValue);
03450 
03451         // And now create a reader to read this entity
03452         InputSource* srcUsed;
03453         XMLReader* reader = fReaderMgr.createReader
03454         (
03455             decl->getBaseURI()
03456             , decl->getSystemId()
03457             , decl->getPublicId()
03458             , false
03459             , XMLReader::RefFrom_NonLiteral
03460             , XMLReader::Type_General
03461             , XMLReader::Source_External
03462             , srcUsed
03463             , fCalculateSrcOfs
03464             , fLowWaterMark
03465             , fDisableDefaultEntityResolution
03466         );
03467 
03468         // Put a janitor on the source so it gets cleaned up on exit
03469         Janitor<InputSource> janSrc(srcUsed);
03470 
03471         //  If the creation failed, and its not because the source was empty,
03472         //  then emit an error and return.
03473         if (!reader)
03474             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed ? srcUsed->getSystemId() : decl->getSystemId(), fMemoryManager);
03475 
03476         //  Push the reader. If its a recursive expansion, then emit an error
03477         //  and return an failure.
03478         if (!fReaderMgr.pushReader(reader, decl))
03479         {
03480             emitError(XMLErrs::RecursiveEntity, decl->getName());
03481             return EntityExp_Failed;
03482         }
03483 
03484         // here's where we need to check if there's a SecurityManager,
03485         // how many entity references we've had
03486         if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
03487             XMLCh expLimStr[32];
03488             XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
03489             emitError
03490             (
03491                 XMLErrs::EntityExpansionLimitExceeded
03492                 , expLimStr
03493             );
03494             // there seems nothing better to do than reset the entity expansion counter
03495             fEntityExpansionCount = 0;
03496         }
03497 
03498         //  Do a start entity reference event.
03499         //
03500         //  <TBD> For now, we supress them in att values. Later, when
03501         //  the stuff is in place to correctly allow DOM to handle them
03502         //  we'll turn this back on.
03503         if (fDocHandler && !inAttVal)
03504             fDocHandler->startEntityReference(*decl);
03505 
03506         // If it starts with the XML string, then parse a text decl
03507         if (checkXMLDecl(true))
03508             scanXMLDecl(Decl_Text);
03509     }
03510     else
03511     {
03512         //  If its one of the special char references, then we can return
03513         //  it as a character, and its considered escaped.
03514         if (decl->getIsSpecialChar())
03515         {
03516             firstCh = decl->getValue()[0];
03517             escaped = true;
03518             return EntityExp_Returned;
03519         }
03520 
03521         //  Create a reader over a memory stream over the entity value
03522         //  We force it to assume UTF-16 by passing in an encoding
03523         //  string. This way it won't both trying to predecode the
03524         //  first line, looking for an XML/TextDecl.
03525         XMLReader* valueReader = fReaderMgr.createIntEntReader
03526         (
03527             decl->getName()
03528             , XMLReader::RefFrom_NonLiteral
03529             , XMLReader::Type_General
03530             , decl->getValue()
03531             , decl->getValueLen()
03532             , false
03533         );
03534 
03535         //  Try to push the entity reader onto the reader manager stack,
03536         //  where it will become the subsequent input. If it fails, that
03537         //  means the entity is recursive, so issue an error. The reader
03538         //  will have just been discarded, but we just keep going.
03539         if (!fReaderMgr.pushReader(valueReader, decl))
03540             emitError(XMLErrs::RecursiveEntity, decl->getName());
03541 
03542         // here's where we need to check if there's a SecurityManager,
03543         // how many entity references we've had
03544         if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
03545             XMLCh expLimStr[32];
03546             XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
03547             emitError
03548             (
03549                 XMLErrs::EntityExpansionLimitExceeded
03550                 , expLimStr
03551             );
03552         }
03553 
03554         //  Do a start entity reference event.
03555         //
03556         //  <TBD> For now, we supress them in att values. Later, when
03557         //  the stuff is in place to correctly allow DOM to handle them
03558         //  we'll turn this back on.
03559         if (fDocHandler && !inAttVal)
03560             fDocHandler->startEntityReference(*decl);
03561 
03562         // If it starts with the XML string, then it's an error
03563         if (checkXMLDecl(true)) {
03564             emitError(XMLErrs::TextDeclNotLegalHere);
03565             fReaderMgr.skipPastChar(chCloseAngle);
03566         }
03567     }
03568     return EntityExp_Pushed;
03569 }
03570 
03571 
03572 XERCES_CPP_NAMESPACE_END