GME  13
IGXMLScanner2.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: IGXMLScanner2.cpp 925236 2010-03-19 14:29:47Z borisk $
00020  */
00021 
00022 // ---------------------------------------------------------------------------
00023 //  This file holds some of the grunt work methods of IGXMLScanner.cpp to keep
00024 //  it a little more readable.
00025 // ---------------------------------------------------------------------------
00026 
00027 // ---------------------------------------------------------------------------
00028 //  Includes
00029 // ---------------------------------------------------------------------------
00030 #include <xercesc/internal/IGXMLScanner.hpp>
00031 #include <xercesc/internal/EndOfEntityException.hpp>
00032 #include <xercesc/util/UnexpectedEOFException.hpp>
00033 #include <xercesc/util/XMLUri.hpp>
00034 #include <xercesc/framework/LocalFileInputSource.hpp>
00035 #include <xercesc/framework/URLInputSource.hpp>
00036 #include <xercesc/framework/XMLDocumentHandler.hpp>
00037 #include <xercesc/framework/XMLEntityHandler.hpp>
00038 #include <xercesc/framework/XMLPScanToken.hpp>
00039 #include <xercesc/framework/XMLRefInfo.hpp>
00040 #include <xercesc/framework/XMLGrammarPool.hpp>
00041 #include <xercesc/framework/psvi/PSVIAttributeList.hpp>
00042 #include <xercesc/framework/psvi/PSVIElement.hpp>
00043 #include <xercesc/framework/psvi/XSAnnotation.hpp>
00044 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
00045 #include <xercesc/validators/DTD/DTDGrammar.hpp>
00046 #include <xercesc/validators/DTD/DTDValidator.hpp>
00047 #include <xercesc/validators/DTD/XMLDTDDescriptionImpl.hpp>
00048 #include <xercesc/validators/datatype/DatatypeValidator.hpp>
00049 #include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp>
00050 #include <xercesc/validators/schema/SchemaGrammar.hpp>
00051 #include <xercesc/validators/schema/SchemaValidator.hpp>
00052 #include <xercesc/validators/schema/TraverseSchema.hpp>
00053 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
00054 #include <xercesc/validators/schema/XSDDOMParser.hpp>
00055 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp>
00056 #include <xercesc/validators/schema/identity/ValueStore.hpp>
00057 #include <xercesc/util/XMLStringTokenizer.hpp>
00058 
00059 XERCES_CPP_NAMESPACE_BEGIN
00060 
00061 inline XMLAttDefList& getAttDefList(bool              isSchemaGrammar
00062                                   , ComplexTypeInfo*  currType
00063                                   , XMLElementDecl*   elemDecl);
00064 
00065 // ---------------------------------------------------------------------------
00066 //  IGXMLScanner: Private helper methods
00067 // ---------------------------------------------------------------------------
00068 
00069 //  This method is called from scanStartTagNS() to build up the list of
00070 //  XMLAttr objects that will be passed out in the start tag callout. We
00071 //  get the key/value pairs from the raw scan of explicitly provided attrs,
00072 //  which have not been normalized. And we get the element declaration from
00073 //  which we will get any defaulted or fixed attribute defs and add those
00074 //  in as well.
00075 XMLSize_t
00076 IGXMLScanner::buildAttList(const  RefVectorOf<KVStringPair>&  providedAttrs
00077                           , const XMLSize_t                   attCount
00078                           ,       XMLElementDecl*             elemDecl
00079                           ,       RefVectorOf<XMLAttr>&       toFill)
00080 {
00081     //  If doing DTD's, Ask the element to clear the 'provided' flag on all of the att defs
00082     //  that it owns, and to return us a boolean indicating whether it has
00083     //  any defs.  If schemas are being validated, the complexType
00084     // at the top of the SchemaValidator's stack will
00085     // know what's best.  REVISIT:  don't modify grammar at all; eliminate
00086     // this step...
00087     ComplexTypeInfo *currType = 0;
00088     DatatypeValidator *currDV = 0;
00089     if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType && fValidate)
00090     {
00091         currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
00092         if (!currType) {
00093             currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator();
00094         }
00095     }
00096 
00097     const bool hasDefs = (currType && fValidate)
00098             ? currType->hasAttDefs()
00099             : elemDecl->hasAttDefs();
00100 
00101     // another set of attributes; increment element counter
00102     fElemCount++;
00103 
00104     //  If there are no expliclitily provided attributes and there are no
00105     //  defined attributes for the element, the we don't have anything to do.
00106     //  So just return zero in this case.
00107     if (!hasDefs && !attCount)
00108         return 0;
00109 
00110     // Keep up with how many attrs we end up with total
00111     XMLSize_t retCount = 0;
00112 
00113     //  And get the current size of the output vector. This lets us use
00114     //  existing elements until we fill it, then start adding new ones.
00115     const XMLSize_t curAttListSize = toFill.size();
00116 
00117     //  We need a buffer into which raw scanned attribute values will be
00118     //  normalized.
00119     XMLBufBid bbNormal(&fBufMgr);
00120     XMLBuffer& normBuf = bbNormal.getBuffer();
00121 
00122     //
00123     // Decide if to use hash table to do duplicate checking
00124     //
00125     bool toUseHashTable = false;
00126     if (fGrammarType == Grammar::DTDGrammarType)
00127     {
00128         setAttrDupChkRegistry(attCount, toUseHashTable);
00129     }
00130 
00131     XMLBufBid bbPrefix(&fBufMgr);
00132     XMLBuffer& prefixBuf = bbPrefix.getBuffer();
00133 
00134     //  Loop through our explicitly provided attributes, which are in the raw
00135     //  scanned form, and build up XMLAttr objects.
00136     XMLSize_t index;
00137     const XMLCh* prefPtr, *suffPtr;
00138     for (index = 0; index < attCount; index++)
00139     {
00140         PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID;
00141         PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL;
00142         const KVStringPair* curPair = providedAttrs.elementAt(index);
00143 
00144         //  We have to split the name into its prefix and name parts. Then
00145         //  we map the prefix to its URI.
00146         const XMLCh* const namePtr = curPair->getKey();
00147 
00148         const int colonInd = fRawAttrColonList[index];
00149         unsigned int uriId;
00150         if (colonInd != -1)
00151         {
00152             prefixBuf.set(namePtr, colonInd);
00153             prefPtr = prefixBuf.getRawBuffer();
00154             suffPtr = namePtr + colonInd + 1;
00155             //  Map the prefix to a URI id
00156             uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
00157         }
00158         else
00159         {
00160             // No colon, so we just have a name with no prefix
00161             prefPtr = XMLUni::fgZeroLenString;
00162             suffPtr = namePtr;
00163             // an empty prefix is always the empty namespace, when dealing with attributes
00164             uriId = fEmptyNamespaceId;
00165         }
00166 
00167         //  If the uri comes back as the xmlns or xml URI or its just a name
00168         //  and that name is 'xmlns', then we handle it specially. So set a
00169         //  boolean flag that lets us quickly below know which we are dealing
00170         //  with.
00171         const bool isNSAttr = (uriId == fEmptyNamespaceId)?
00172                                 XMLString::equals(suffPtr, XMLUni::fgXMLNSString) :
00173                                 (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI));
00174 
00175 
00176         //  If its not a special case namespace attr of some sort, then we
00177         //  do normal checking and processing.
00178         XMLAttDef::AttTypes attType = XMLAttDef::CData;
00179         DatatypeValidator *attrValidator = 0;
00180         PSVIAttribute *psviAttr = 0;
00181         bool otherXSI = false;
00182 
00183         if (isNSAttr && fGrammarType == Grammar::SchemaGrammarType)
00184         {
00185             if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
00186             {
00187                 emitError
00188                 (
00189                     XMLErrs::AttrAlreadyUsedInSTag
00190                     , namePtr
00191                     , elemDecl->getFullName()
00192                 );
00193                 fPSVIElemContext.fErrorOccurred = true;
00194             }
00195             else
00196             {
00197                 bool ValueValidate = false;
00198                 bool tokenizeBuffer = false;
00199 
00200                 if (uriId == fXMLNSNamespaceId)
00201                 {
00202                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
00203                 }
00204                 else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI))
00205                 {
00206                     if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
00207                     {
00208                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
00209 
00210                         ValueValidate = true;
00211                     }
00212                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
00213                     {
00214                         // use anyURI as the validator
00215                         // tokenize the data and use the anyURI data for each piece
00216                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
00217                         //We should validate each value in the schema location however
00218                         //this lead to a performance degradation of around 4%.  Since
00219                         //the first value of each pair needs to match what is in the
00220                         //schema document and the second value needs to be valid in
00221                         //order to open the document we won't validate it.  Need to
00222                         //do performance analysis of the anyuri datatype.
00223                         //ValueValidate = true;
00224                         ValueValidate = false;
00225                         tokenizeBuffer = true;
00226                     }
00227                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
00228                     {
00229                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI);
00230                         //We should validate this value however
00231                         //this lead to a performance degradation of around 4%.  Since
00232                         //the value needs to be valid in
00233                         //order to open the document we won't validate it.  Need to
00234                         //do performance analysis of the anyuri datatype.
00235                         //ValueValidate = true;
00236                         ValueValidate = false;
00237                     }
00238                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
00239                     {
00240                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
00241 
00242                         ValueValidate = true;
00243                     }
00244                     else {
00245                         otherXSI = true;
00246                     }
00247                 }
00248 
00249                 if (!otherXSI) {
00250                     normalizeAttRawValue
00251                     (
00252                         namePtr
00253                         , curPair->getValue()
00254                         , normBuf
00255                     );
00256 
00257                     if (fValidate && attrValidator && ValueValidate)
00258                     {
00259                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true);
00260 
00261                         ValidationContext* const    theContext =
00262                             getValidationContext();
00263 
00264                         if (theContext)
00265                         {
00266                             try
00267                             {
00268                                 if (tokenizeBuffer) {
00269                                     XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager);
00270                                     while (tokenizer.hasMoreTokens()) {
00271                                         attrValidator->validate(
00272                                             tokenizer.nextToken(),
00273                                             theContext,
00274                                             fMemoryManager);
00275                                     }
00276                                 }
00277                                 else {
00278                                     attrValidator->validate(
00279                                         normBuf.getRawBuffer(),
00280                                         theContext,
00281                                         fMemoryManager);
00282                                 }
00283                             }
00284                             catch (const XMLException& idve)
00285                             {
00286                                 fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage());
00287                             }
00288                         }
00289                     }
00290 
00291                     if(getPSVIHandler())
00292                     {
00293                         psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
00294                         XSSimpleTypeDefinition *validatingType = (attrValidator)
00295                             ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator)
00296                             : 0;
00297                         // no attribute declarations for these...
00298                         psviAttr->reset(
00299                             fRootElemName
00300                             , PSVIItem::VALIDITY_NOTKNOWN
00301                             , PSVIItem::VALIDATION_NONE
00302                             , validatingType
00303                             , 0
00304                             , 0
00305                             , false
00306                             , 0
00307                             , attrValidator
00308                             );
00309                     }
00310                 }
00311             }
00312         }
00313 
00314         if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType || otherXSI)
00315         {
00316             // Some checking for attribute wild card first (for schema)
00317             bool laxThisOne = false;
00318             bool skipThisOne = false;
00319 
00320             XMLAttDef* attDefForWildCard = 0;
00321             XMLAttDef*  attDef = 0;
00322 
00323             if (fGrammarType == Grammar::SchemaGrammarType) {
00324 
00325                 //retrieve the att def
00326                 SchemaAttDef* attWildCard = 0;
00327                 if (currType) {
00328                     attDef = currType->getAttDef(suffPtr, uriId);
00329                     attWildCard = currType->getAttWildCard();
00330                 }
00331                 else if (!currDV) { // check explicitly-set wildcard
00332                     attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId);
00333                     attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
00334                 }
00335 
00336                 // if not found or faulted in - check for a matching wildcard attribute
00337                 // if no matching wildcard attribute, check (un)qualifed cases and flag
00338                 // appropriate errors
00339                 if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
00340 
00341                     if (attWildCard) {
00342                         //if schema, see if we should lax or skip the validation of this attribute
00343                         if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
00344 
00345                             if(!skipThisOne)
00346                             {
00347                                 SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
00348                                 if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
00349                                     RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
00350                                     if (attRegistry) {
00351                                         attDefForWildCard = attRegistry->get(suffPtr);
00352                                     }
00353                                 }
00354                             }
00355                         }
00356                     }
00357                     else if (currType) {
00358                         // not found, see if the attDef should be qualified or not
00359                         if (uriId == fEmptyNamespaceId) {
00360                             attDef = currType->getAttDef(suffPtr
00361                                             , fURIStringPool->getId(fGrammar->getTargetNamespace()));
00362                             if (fValidate
00363                                 && attDef
00364                                 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
00365                                 // the attribute should be qualified
00366                                 fValidator->emitError
00367                                 (
00368                                     XMLValid::AttributeNotQualified
00369                                     , attDef->getFullName()
00370                                 );
00371                                 if(fGrammarType == Grammar::SchemaGrammarType) {
00372                                     fPSVIElemContext.fErrorOccurred = true;
00373                                     if (getPSVIHandler())
00374                                     {
00375                                         attrValid = PSVIItem::VALIDITY_INVALID;
00376                                     }
00377                                 }
00378                             }
00379                         }
00380                         else {
00381                             attDef = currType->getAttDef(suffPtr
00382                                             , fEmptyNamespaceId);
00383                             if (fValidate
00384                                 && attDef
00385                                 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
00386                                 // the attribute should be qualified
00387                                 fValidator->emitError
00388                                 (
00389                                     XMLValid::AttributeNotUnQualified
00390                                     , attDef->getFullName()
00391                                 );
00392                                 if(fGrammarType == Grammar::SchemaGrammarType) {
00393                                     fPSVIElemContext.fErrorOccurred = true;
00394                                     if (getPSVIHandler())
00395                                     {
00396                                         attrValid = PSVIItem::VALIDITY_INVALID;
00397                                     }
00398                                 }
00399                             }
00400                         }
00401                     }
00402                 }
00403             }
00404 
00405             //  Find this attribute within the parent element. We pass both
00406             //  the uriID/name and the raw QName buffer, since we don't know
00407             //  how the derived validator and its elements store attributes.
00408             else
00409             {
00410                 if(fGrammarType == Grammar::DTDGrammarType)
00411                     attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( namePtr);
00412             }
00413 
00414             // now need to prepare for duplicate detection
00415             if(attDef)
00416             {
00417                 unsigned int *curCountPtr = fAttDefRegistry->get(attDef);
00418                 if(!curCountPtr)
00419                 {
00420                     curCountPtr = getNewUIntPtr();
00421                     *curCountPtr = fElemCount;
00422                     fAttDefRegistry->put(attDef, curCountPtr);
00423                 }
00424                 else if(*curCountPtr < fElemCount)
00425                     *curCountPtr = fElemCount;
00426                 else
00427                 {
00428                     emitError
00429                     (
00430                         XMLErrs::AttrAlreadyUsedInSTag
00431                         , attDef->getFullName()
00432                         , elemDecl->getFullName()
00433                     );
00434                     fPSVIElemContext.fErrorOccurred = true;
00435                 }
00436             }
00437             else
00438             {
00439                 if(fGrammarType == Grammar::DTDGrammarType)
00440                 {
00441                     if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0))
00442                     {
00443                         emitError
00444                         (
00445                             XMLErrs::AttrAlreadyUsedInSTag
00446                             , namePtr
00447                             , elemDecl->getFullName()
00448                         );
00449                     }
00450                 }
00451                 else // schema grammar
00452                 {
00453                     if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId))
00454                     {
00455                         emitError
00456                         (
00457                             XMLErrs::AttrAlreadyUsedInSTag
00458                             , namePtr
00459                             , elemDecl->getFullName()
00460                         );
00461                         fPSVIElemContext.fErrorOccurred = true;
00462                     }
00463                 }
00464             }
00465 
00466             if(fGrammarType == Grammar::SchemaGrammarType )
00467             {
00468                 // if we've found either an attDef or an attDefForWildCard,
00469                 // then we're doing full validation and it may still be valid.
00470                 if(!attDef && !attDefForWildCard)
00471                 {
00472                     if(!laxThisOne && !skipThisOne)
00473                     {
00474                         fPSVIElemContext.fErrorOccurred = true;
00475                     }
00476                     if(getPSVIHandler())
00477                     {
00478                         if(!laxThisOne && !skipThisOne)
00479                         {
00480                             attrValid = PSVIItem::VALIDITY_INVALID;
00481                         }
00482                         else if(laxThisOne)
00483                         {
00484                             attrValid = PSVIItem::VALIDITY_NOTKNOWN;
00485                             attrAssessed = PSVIItem::VALIDATION_PARTIAL;
00486                         }
00487                         else
00488                         {
00489                             attrValid = PSVIItem::VALIDITY_NOTKNOWN;
00490                             attrAssessed = PSVIItem::VALIDATION_NONE;
00491                         }
00492                     }
00493                 }
00494             }
00495 
00496             bool errorCondition = fValidate && !attDefForWildCard && !attDef;
00497             if (errorCondition && !skipThisOne && !laxThisOne)
00498             {
00499                 //
00500                 //  Its not valid for this element, so issue an error if we are
00501                 //  validating.
00502                 //
00503                 XMLBufBid bbMsg(&fBufMgr);
00504                 XMLBuffer& bufMsg = bbMsg.getBuffer();
00505                 if (uriId != fEmptyNamespaceId) {
00506                     XMLBufBid bbURI(&fBufMgr);
00507                     XMLBuffer& bufURI = bbURI.getBuffer();
00508 
00509                     getURIText(uriId, bufURI);
00510 
00511                     bufMsg.append(chOpenCurly);
00512                     bufMsg.append(bufURI.getRawBuffer());
00513                     bufMsg.append(chCloseCurly);
00514                 }
00515                 bufMsg.append(suffPtr);
00516                 fValidator->emitError
00517                 (
00518                     XMLValid::AttNotDefinedForElement
00519                     , bufMsg.getRawBuffer()
00520                     , elemDecl->getFullName()
00521                 );
00522             }
00523 
00524             //  Now normalize the raw value since we have the attribute type. We
00525             //  don't care about the return status here. If it failed, an error
00526             //  was issued, which is all we care about.
00527             if (attDefForWildCard) {
00528                 normalizeAttValue(
00529                     attDefForWildCard, namePtr, curPair->getValue(), normBuf
00530                 );
00531 
00532                 //  If we found an attdef for this one, then lets validate it.
00533                 const XMLCh* xsNormalized = normBuf.getRawBuffer();
00534                 DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator();
00535                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
00536                 {
00537                     // normalize the attribute according to schema whitespace facet
00538                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
00539                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
00540 
00541                     if (fNormalizeData && fValidate) {
00542                         normBuf.set(xsNormalized);
00543                     }
00544                 }
00545 
00546                 if (fValidate ) {
00547                     fValidator->validateAttrValue(
00548                         attDefForWildCard, xsNormalized, false, elemDecl
00549                     );
00550                     attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
00551                     if(((SchemaValidator *)fValidator)->getErrorOccurred())
00552                     {
00553                         fPSVIElemContext.fErrorOccurred = true;
00554                         if(getPSVIHandler())
00555                             attrValid = PSVIItem::VALIDITY_INVALID;
00556                     }
00557                 }
00558                 else { // no decl; default DOMTypeInfo to anySimpleType
00559                     attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
00560                 }
00561 
00562                 // Save the type for later use
00563                 attType = attDefForWildCard->getType();
00564             }
00565             else {
00566                 normalizeAttValue(
00567                     attDef, namePtr, curPair->getValue(), normBuf
00568                 );
00569 
00570                 //  If we found an attdef for this one, then lets validate it.
00571                 if (attDef)
00572                 {
00573                     const XMLCh* xsNormalized = normBuf.getRawBuffer();
00574                     if (fGrammarType == Grammar::SchemaGrammarType)
00575                     {
00576                         DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator();
00577                         if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
00578                         {
00579                             // normalize the attribute according to schema whitespace facet
00580                             ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true);
00581                             xsNormalized = fWSNormalizeBuf.getRawBuffer();
00582                             if (fNormalizeData && fValidate && !skipThisOne) {
00583                                 normBuf.set(xsNormalized);
00584                             }
00585                         }
00586                     }
00587 
00588                     if (fValidate && !skipThisOne)
00589                     {
00590                         fValidator->validateAttrValue(
00591                             attDef, xsNormalized, false, elemDecl
00592                         );
00593 
00594                         if(fGrammarType == Grammar::SchemaGrammarType)
00595                         {
00596                             attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator();
00597                             if(((SchemaValidator *)fValidator)->getErrorOccurred())
00598                             {
00599                                 fPSVIElemContext.fErrorOccurred = true;
00600                                 if (getPSVIHandler())
00601                                     attrValid = PSVIItem::VALIDITY_INVALID;
00602                             }
00603                         }
00604                     }
00605                     else if(fGrammarType == Grammar::SchemaGrammarType) {
00606                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
00607                     }
00608                 }
00609                 else // no attDef at all; default to anySimpleType
00610                 {
00611                     if(fGrammarType == Grammar::SchemaGrammarType) {
00612                         attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE);
00613                     }
00614                 }
00615 
00616                 // Save the type for later use
00617                 if (attDef)
00618                 {
00619                     attType = attDef->getType();
00620                 }
00621             }
00622 
00623             // now fill in the PSVIAttributes entry for this attribute:
00624             if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
00625             {
00626                 psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId));
00627                 SchemaAttDef *actualAttDef = 0;
00628                 if(attDef)
00629                     actualAttDef = (SchemaAttDef *)attDef;
00630                 else if (attDefForWildCard)
00631                     actualAttDef = (SchemaAttDef *)attDefForWildCard;
00632                 if(actualAttDef)
00633                 {
00634                     XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef);
00635                     DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator();
00636                     XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType);
00637                     if(attrValid != PSVIItem::VALIDITY_VALID)
00638                     {
00639                         psviAttr->reset
00640                         (
00641                             fRootElemName
00642                             , attrValid
00643                             , attrAssessed
00644                             , validatingType
00645                             , 0
00646                             , actualAttDef->getValue()
00647                             , false
00648                             , attrDecl
00649                             , 0
00650                         );
00651                     }
00652                     else
00653                     {
00654                         XSSimpleTypeDefinition *memberType = 0;
00655                         if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
00656                             memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator);
00657                         psviAttr->reset
00658                         (
00659                             fRootElemName
00660                             , attrValid
00661                             , attrAssessed
00662                             , validatingType
00663                             , memberType
00664                             , actualAttDef->getValue()
00665                             , false
00666                             , attrDecl
00667                             , (memberType)?attrValidator:attrDataType
00668                         );
00669                     }
00670                 }
00671                 else
00672                 {
00673                     psviAttr->reset
00674                     (
00675                         fRootElemName
00676                         , attrValid
00677                         , attrAssessed
00678                         , 0
00679                         , 0
00680                         , 0
00681                         , false
00682                         , 0
00683                         , 0
00684                     );
00685                 }
00686             }
00687         }
00688 
00689         //  Add this attribute to the attribute list that we use to pass them
00690         //  to the handler. We reuse its existing elements but expand it as
00691         //  required.
00692         XMLAttr* curAttr;
00693 
00694         // check for duplicate namespace attributes:
00695         // by checking for qualified names with the same local part and with prefixes
00696         // which have been bound to namespace names that are identical.
00697         if (fGrammarType == Grammar::DTDGrammarType) {
00698             if (!toUseHashTable)
00699             {
00700                 for (XMLSize_t attrIndex=0; attrIndex < retCount; attrIndex++) {
00701                     curAttr = toFill.elementAt(attrIndex);
00702                     if (uriId == curAttr->getURIId() &&
00703                         XMLString::equals(suffPtr, curAttr->getName())) {
00704                         emitError
00705                         (
00706 
00707                          XMLErrs::AttrAlreadyUsedInSTag
00708                         , curAttr->getName()
00709                         , elemDecl->getFullName()
00710                         );
00711                     }
00712                 }
00713             }
00714             else
00715             {
00716                 if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId))
00717                 {
00718                     emitError
00719                         (
00720                         XMLErrs::AttrAlreadyUsedInSTag
00721                         , suffPtr
00722                         , elemDecl->getFullName()
00723                         );
00724                 }
00725             }
00726         }
00727 
00728         if (retCount >= curAttListSize)
00729         {
00730             curAttr = new (fMemoryManager) XMLAttr
00731             (
00732                 uriId
00733                 , suffPtr
00734                 , prefPtr
00735                 , normBuf.getRawBuffer()
00736                 , attType
00737                 , true
00738                 , fMemoryManager
00739             );
00740             toFill.addElement(curAttr);
00741         }
00742         else
00743         {
00744             curAttr = toFill.elementAt(retCount);
00745             curAttr->set
00746             (
00747                 uriId
00748                 , suffPtr
00749                 , prefPtr
00750                 , normBuf.getRawBuffer()
00751                 , attType
00752             );
00753             curAttr->setSpecified(true);
00754         }
00755 
00756         if (toUseHashTable)
00757         {
00758             fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr);
00759         }
00760 
00761         if(psviAttr)
00762             psviAttr->setValue(curAttr->getValue());
00763 
00764         // Bump the count of attrs in the list
00765         retCount++;
00766     }
00767 
00768     //  Now, if there are any attributes declared by this element, let's
00769     //  go through them and make sure that any required ones are provided,
00770     //  and fault in any fixed ones and defaulted ones that are not provided
00771     //  literally.
00772     if (hasDefs)
00773     {
00774         // Check after all specified attrs are scanned
00775         // (1) report error for REQUIRED attrs that are missing (V_TAGc)
00776         // (2) add default attrs if missing (FIXED and NOT_FIXED)
00777 
00778 
00779         XMLAttDefList &attDefList = getAttDefList(fGrammarType == Grammar::SchemaGrammarType, currType, elemDecl);
00780 
00781         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
00782         {
00783             // Get the current att def, for convenience and its def type
00784             const XMLAttDef *curDef = &attDefList.getAttDef(i);
00785             const XMLAttDef::DefAttTypes defType = curDef->getDefaultType();
00786             unsigned int *attCountPtr = fAttDefRegistry->get((void *)curDef);
00787             if (!attCountPtr || *attCountPtr < fElemCount)
00788             { // did not occur
00789                 // note that since there is no attribute information
00790                 // item present, there is no PSVI infoset to augment here *except*
00791                 // that the element is invalid
00792 
00793                 //the attribute is not provided
00794                 if (fValidate)
00795                 {
00796                     // If we are validating and its required, then an error
00797                     if ((defType == XMLAttDef::Required) ||
00798                         (defType == XMLAttDef::Required_And_Fixed)  )
00799 
00800                     {
00801                         fValidator->emitError
00802                         (
00803                             XMLValid::RequiredAttrNotProvided
00804                             , curDef->getFullName()
00805                         );
00806                         if(fGrammarType == Grammar::SchemaGrammarType)
00807                         {
00808                             fPSVIElemContext.fErrorOccurred = true;
00809                         }
00810                     }
00811                     else if ((defType == XMLAttDef::Default) ||
00812                             (defType == XMLAttDef::Fixed)  )
00813                     {
00814                         if (fStandalone && curDef->isExternal())
00815                         {
00816                             // XML 1.0 Section 2.9
00817                             // Document is standalone, so attributes must not be defaulted.
00818                             fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName());
00819                             if(fGrammarType == Grammar::SchemaGrammarType)
00820                             {
00821                                 fPSVIElemContext.fErrorOccurred = true;
00822                             }
00823                         }
00824                     }
00825                 }
00826 
00827                 //  Fault in the value if needed, and bump the att count.
00828                 if ((defType == XMLAttDef::Default)
00829                     ||  (defType == XMLAttDef::Fixed))
00830                 {
00831                     // Let the validator pass judgement on the attribute value
00832                     if (fValidate)
00833                     {
00834                         fValidator->validateAttrValue
00835                         (
00836                             curDef
00837                             , curDef->getValue()
00838                             , false
00839                             , elemDecl
00840                         );
00841                     }
00842 
00843                     XMLAttr* curAtt;
00844                     if (retCount >= curAttListSize)
00845                     {
00846                         curAtt = new (fMemoryManager) XMLAttr(fMemoryManager);
00847                         fValidator->faultInAttr(*curAtt, *curDef);
00848                         fAttrList->addElement(curAtt);
00849                     }
00850                     else
00851                     {
00852                         curAtt = fAttrList->elementAt(retCount);
00853                         fValidator->faultInAttr(*curAtt, *curDef);
00854                     }
00855 
00856                     if (fGrammarType == Grammar::DTDGrammarType)
00857                     {
00858                         //  Map the new attribute's prefix to a URI id and store
00859                         //  that in the attribute object.
00860                         curAtt->setURIId
00861                         (
00862                             resolvePrefix(curAtt->getPrefix(), ElemStack::Mode_Attribute)
00863                         );
00864                     }
00865 
00866                     // Indicate it was not explicitly specified and bump count
00867                     curAtt->setSpecified(false);
00868                     retCount++;
00869                     if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType)
00870                     {
00871                         QName *attName = ((SchemaAttDef *)curDef)->getAttName();
00872                         PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill
00873                         (
00874                             attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI())
00875                         );
00876                         XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef);
00877                         DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator();
00878                         XSSimpleTypeDefinition *defAttrType =
00879                             (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType);
00880                         // would have occurred during validation of default value
00881                         if(((SchemaValidator *)fValidator)->getErrorOccurred())
00882                         {
00883                             defAttrToFill->reset(
00884                                 fRootElemName
00885                                 , PSVIItem::VALIDITY_INVALID
00886                                 , PSVIItem::VALIDATION_FULL
00887                                 , defAttrType
00888                                 , 0
00889                                 , curDef->getValue()
00890                                 , true
00891                                 , defAttrDecl
00892                                 , 0
00893                             );
00894                         }
00895                         else
00896                         {
00897                             XSSimpleTypeDefinition *defAttrMemberType = 0;
00898                             if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION)
00899                             {
00900                                 defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject
00901                                 (
00902                                     ((SchemaValidator*)fValidator)->getMostRecentAttrValidator()
00903                                 );
00904                             }
00905                             defAttrToFill->reset(
00906                                 fRootElemName
00907                                 , PSVIItem::VALIDITY_VALID
00908                                 , PSVIItem::VALIDATION_FULL
00909                                 , defAttrType
00910                                 , defAttrMemberType
00911                                 , curDef->getValue()
00912                                 , true
00913                                 , defAttrDecl
00914                                 , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType
00915                             );
00916                         }
00917                         defAttrToFill->setValue(curDef->getValue());
00918                     }
00919                 }
00920             }
00921             else if(attCountPtr)
00922             {
00923                 //attribute is provided
00924                 // (schema) report error for PROHIBITED attrs that are present (V_TAGc)
00925                 if (defType == XMLAttDef::Prohibited && fValidate)
00926                 {
00927                     fValidator->emitError
00928                     (
00929                         XMLValid::ProhibitedAttributePresent
00930                         , curDef->getFullName()
00931                     );
00932                     if(fGrammarType == Grammar::SchemaGrammarType)
00933                     {
00934                         fPSVIElemContext.fErrorOccurred = true;
00935                         if (getPSVIHandler())
00936                         {
00937                             QName *attQName = ((SchemaAttDef *)curDef)->getAttName();
00938                             // bad luck...
00939                             PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName
00940                             (
00941                                 attQName->getLocalPart(),
00942                                 fURIStringPool->getValueForId(attQName->getURI())
00943                             );
00944                             prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID);
00945                         }
00946                     }
00947                 }
00948             }
00949         }
00950     }
00951     return retCount;
00952 }
00953 
00954 
00955 //  This method will take a raw attribute value and normalize it according to
00956 //  the rules of the attribute type. It will put the resulting value into the
00957 //  passed buffer.
00958 //
00959 //  This code assumes that escaped characters in the original value (via char
00960 //  refs) are prefixed by a 0xFFFF character. This is because some characters
00961 //  are legal if escaped only. And some escape chars are not subject to
00962 //  normalization rules.
00963 bool IGXMLScanner::normalizeAttValue( const   XMLAttDef* const    attDef
00964                                       , const XMLCh* const        attName
00965                                       , const XMLCh* const        value
00966                                       ,       XMLBuffer&          toFill)
00967 {
00968     // A simple state value for a whitespace processing state machine
00969     enum States
00970     {
00971         InWhitespace
00972         , InContent
00973     };
00974 
00975     // Get the type and name
00976     const XMLAttDef::AttTypes type = (attDef)?attDef->getType():XMLAttDef::CData;
00977 
00978     // Assume its going to go fine, and empty the target buffer in preperation
00979     bool retVal = true;
00980     toFill.reset();
00981 
00982     //  Loop through the chars of the source value and normalize it according
00983     //  to the type.
00984     XMLCh nextCh;
00985     const XMLCh* srcPtr = value;
00986 
00987     if (type == XMLAttDef::CData || type > XMLAttDef::Notation) {
00988         //  Get the next character from the source. We have to watch for
00989         //  escaped characters (which are indicated by a 0xFFFF value followed
00990         //  by the char that was escaped.)
00991         while ((nextCh = *srcPtr++)!=0)
00992         {
00993             switch(nextCh)
00994             {
00995             // Do we have an escaped character ?
00996             case 0xFFFF:
00997                 nextCh = *srcPtr++;
00998                 break;
00999             case 0x09:
01000             case 0x0A:
01001             case 0x0D:
01002                 // Check Validity Constraint for Standalone document declaration
01003                 // XML 1.0, Section 2.9
01004                 if (fStandalone && fValidate && attDef && attDef->isExternal())
01005                 {
01006                      // Can't have a standalone document declaration of "yes" if  attribute
01007                      // values are subject to normalisation
01008                      fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
01009                 }
01010                 nextCh = chSpace;
01011                 break;
01012             case chOpenAngle:
01013                 //  If its not escaped, then make sure its not a < character, which is
01014                 //  not allowed in attribute values.
01015                 emitError(XMLErrs::BracketInAttrValue, attName);
01016                 retVal = false;
01017                 break;
01018             }
01019 
01020             // Add this char to the target buffer
01021             toFill.append(nextCh);
01022         }
01023     }
01024     else {
01025         States curState = InContent;
01026         bool firstNonWS = false;
01027         //  Get the next character from the source. We have to watch for
01028         //  escaped characters (which are indicated by a 0xFFFF value followed
01029         //  by the char that was escaped.)
01030         while ((nextCh = *srcPtr)!=0)
01031         {
01032             // Do we have an escaped character ?
01033             if (nextCh == 0xFFFF)
01034             {
01035                 nextCh = *++srcPtr;
01036             }
01037             else if (nextCh == chOpenAngle) {
01038                 //  If its not escaped, then make sure its not a < character, which is
01039                 //  not allowed in attribute values.
01040                 emitError(XMLErrs::BracketInAttrValue, attName);
01041                 retVal = false;
01042             }
01043 
01044             if (curState == InWhitespace)
01045             {
01046                 if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
01047                 {
01048                     if (firstNonWS)
01049                         toFill.append(chSpace);
01050                     curState = InContent;
01051                     firstNonWS = true;
01052                 }
01053                 else
01054                 {
01055                     srcPtr++;
01056                     continue;
01057                 }
01058             }
01059             else if (curState == InContent)
01060             {
01061                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
01062                 {
01063                     curState = InWhitespace;
01064                     srcPtr++;
01065 
01066                     // Check Validity Constraint for Standalone document declaration
01067                     // XML 1.0, Section 2.9
01068                     if (fStandalone && fValidate && attDef && attDef->isExternal())
01069                     {
01070                         if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr))
01071                         {
01072                             // Can't have a standalone document declaration of "yes" if  attribute
01073                             // values are subject to normalisation
01074                             fValidator->emitError(XMLValid::NoAttNormForStandalone, attName);
01075                         }
01076                     }
01077                     continue;
01078                 }
01079                 firstNonWS = true;
01080             }
01081 
01082             // Add this char to the target buffer
01083             toFill.append(nextCh);
01084 
01085             // And move up to the next character in the source
01086             srcPtr++;
01087         }
01088     }
01089 
01090     return retVal;
01091 }
01092 
01093 //  This method will just normalize the input value as CDATA without
01094 //  any standalone checking.
01095 bool IGXMLScanner::normalizeAttRawValue( const   XMLCh* const        attrName
01096                                       , const XMLCh* const        value
01097                                       ,       XMLBuffer&          toFill)
01098 {
01099     // Assume its going to go fine, and empty the target buffer in preperation
01100     bool retVal = true;
01101     toFill.reset();
01102 
01103     //  Loop through the chars of the source value and normalize it according
01104     //  to the type.
01105     bool escaped;
01106     XMLCh nextCh;
01107     const XMLCh* srcPtr = value;
01108     while (*srcPtr)
01109     {
01110         //  Get the next character from the source. We have to watch for
01111         //  escaped characters (which are indicated by a 0xFFFF value followed
01112         //  by the char that was escaped.)
01113         nextCh = *srcPtr;
01114         escaped = (nextCh == 0xFFFF);
01115         if (escaped)
01116             nextCh = *++srcPtr;
01117 
01118         //  If its not escaped, then make sure its not a < character, which is
01119         //  not allowed in attribute values.
01120         if (!escaped && (*srcPtr == chOpenAngle))
01121         {
01122             emitError(XMLErrs::BracketInAttrValue, attrName);
01123             retVal = false;
01124         }
01125 
01126         if (!escaped)
01127         {
01128             //  NOTE: Yes this is a little redundant in that a 0x20 is
01129             //  replaced with an 0x20. But its faster to do this (I think)
01130             //  than checking for 9, A, and D separately.
01131             if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
01132                 nextCh = chSpace;
01133         }
01134 
01135         // Add this char to the target buffer
01136         toFill.append(nextCh);
01137 
01138         // And move up to the next character in the source
01139         srcPtr++;
01140     }
01141     return retVal;
01142 }
01143 
01144 //  This method will reset the scanner data structures, and related plugged
01145 //  in stuff, for a new scan session. We get the input source for the primary
01146 //  XML entity, create the reader for it, and push it on the stack so that
01147 //  upon successful return from here we are ready to go.
01148 void IGXMLScanner::scanReset(const InputSource& src)
01149 {
01150     //  This call implicitly tells us that we are going to reuse the scanner
01151     //  if it was previously used. So tell the validator to reset itself.
01152     //
01153     //  But, if the fUseCacheGrammar flag is set, then don't reset it.
01154     //
01155     //  NOTE:   The ReaderMgr is flushed on the way out, because that is
01156     //          required to insure that files are closed.
01157     fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar);
01158     fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar);
01159 
01160     // Clear transient schema info list.
01161     //
01162     fSchemaInfoList->removeAll ();
01163 
01164     // fModel may need updating, as fGrammarResolver could have cleaned it
01165     if(fModel && getPSVIHandler())
01166         fModel = fGrammarResolver->getXSModel();
01167 
01168     {
01169         XMLDTDDescriptionImpl   theDTDDescription(XMLUni::fgDTDEntityString, fMemoryManager);
01170         fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(&theDTDDescription);
01171     }
01172 
01173     if (!fDTDGrammar) {
01174 
01175         fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager);
01176         fGrammarResolver->putGrammar(fDTDGrammar);
01177     }
01178     else
01179         fDTDGrammar->reset();
01180 
01181     fGrammar = fDTDGrammar;
01182     fGrammarType = fGrammar->getGrammarType();
01183     fRootGrammar = 0;
01184 
01185     if (fValidatorFromUser) {
01186         if (fValidator->handlesDTD())
01187             fValidator->setGrammar(fGrammar);
01188         else if (fValidator->handlesSchema()) {
01189 
01190             ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter);
01191             ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver);
01192             ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal);
01193         }
01194     }
01195     else {
01196         // set fValidator as fDTDValidator
01197         fValidator = fDTDValidator;
01198         fValidator->setGrammar(fGrammar);
01199     }
01200 
01201     // Reset validation
01202     fValidate = (fValScheme == Val_Always) ? true : false;
01203 
01204     // Ignore skipDTDValidation flag if no schema processing is taking place */
01205     fSkipDTDValidation = fSkipDTDValidation && fDoSchema;
01206 
01207     //  And for all installed handlers, send reset events. This gives them
01208     //  a chance to flush any cached data.
01209     if (fDocHandler)
01210         fDocHandler->resetDocument();
01211     if (fEntityHandler)
01212         fEntityHandler->resetEntities();
01213     if (fErrorReporter)
01214         fErrorReporter->resetErrors();
01215 
01216     // Clear out the id reference list
01217     resetValidationContext();
01218 
01219     // Reset the Root Element Name
01220     fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
01221     fRootElemName = 0;
01222 
01223     // Reset IdentityConstraints
01224     if (fICHandler)
01225         fICHandler->reset();
01226 
01227     //  Reset the element stack, and give it the latest ids for the special
01228     //  URIs it has to know about.
01229     fElemStack.reset
01230     (
01231         fEmptyNamespaceId
01232         , fUnknownNamespaceId
01233         , fXMLNamespaceId
01234         , fXMLNSNamespaceId
01235     );
01236 
01237     if (!fSchemaNamespaceId)
01238         fSchemaNamespaceId  = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI);
01239 
01240     // Reset some status flags
01241     fInException = false;
01242     fStandalone = false;
01243     fErrorCount = 0;
01244     fHasNoDTD = true;
01245     fSeeXsi = false;
01246 
01247     // Reset PSVI context
01248     // note that we always need this around for DOMTypeInfo
01249     if (!fPSVIElement)
01250         fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager);
01251 
01252     if (!fErrorStack)
01253     {
01254         fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager);
01255     }
01256     else
01257     {
01258         fErrorStack->removeAllElements();
01259     }
01260 
01261     resetPSVIElemContext();
01262 
01263     // Reset the validators
01264     fDTDValidator->reset();
01265     fDTDValidator->setErrorReporter(fErrorReporter);
01266     fSchemaValidator->reset();
01267     fSchemaValidator->setErrorReporter(fErrorReporter);
01268     fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
01269     fSchemaValidator->setGrammarResolver(fGrammarResolver);
01270     if (fValidatorFromUser)
01271         fValidator->reset();
01272 
01273     //  Handle the creation of the XML reader object for this input source.
01274     //  This will provide us with transcoding and basic lexing services.
01275     XMLReader* newReader = fReaderMgr.createReader
01276     (
01277         src
01278         , true
01279         , XMLReader::RefFrom_NonLiteral
01280         , XMLReader::Type_General
01281         , XMLReader::Source_External
01282         , fCalculateSrcOfs
01283         , fLowWaterMark
01284     );
01285 
01286     if (!newReader) {
01287         if (src.getIssueFatalErrorIfNotFound())
01288             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
01289         else
01290             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
01291     }
01292 
01293     // Push this read onto the reader manager
01294     fReaderMgr.pushReader(newReader, 0);
01295 
01296     // and reset security-related things if necessary:
01297     if(fSecurityManager != 0)
01298     {
01299         fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
01300         fEntityExpansionCount = 0;
01301     }
01302     fElemCount = 0;
01303     if(fUIntPoolRowTotal >= 32)
01304     { // 8 KB tied up with validating attributes...
01305         fAttDefRegistry->removeAll();
01306         recreateUIntPool();
01307     }
01308     else
01309     {
01310         // note that this will implicitly reset the values of the hashtables,
01311         // though their buckets will still be tied up
01312         resetUIntPool();
01313     }
01314     fUndeclaredAttrRegistry->removeAll();
01315     fDTDElemNonDeclPool->removeAll();
01316 }
01317 
01318 
01319 //  This method is called between markup in content. It scans for character
01320 //  data that is sent to the document handler. It watches for any markup
01321 //  characters that would indicate that the character data has ended. It also
01322 //  handles expansion of general and character entities.
01323 //
01324 //  sendData() is a local static helper for this method which handles some
01325 //  code that must be done in three different places here.
01326 void IGXMLScanner::sendCharData(XMLBuffer& toSend)
01327 {
01328     // If no data in the buffer, then nothing to do
01329     if (toSend.isEmpty())
01330         return;
01331 
01332     //  We do different things according to whether we are validating or
01333     //  not. If not, its always just characters; else, it depends on the
01334     //  current element's content model.
01335     if (fValidate)
01336     {
01337         // Get the raw data we need for the callback
01338         const XMLCh* rawBuf = toSend.getRawBuffer();
01339         XMLSize_t len = toSend.getLen();
01340 
01341         // And see if the current element is a 'Children' style content model
01342         const ElemStack::StackElem* topElem = fElemStack.topElement();
01343 
01344         // Get the character data opts for the current element
01345         XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
01346         if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
01347         {
01348             // And see if the current element is a 'Children' style content model
01349             ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
01350             if(currType)
01351             {
01352                 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
01353                 if(modelType == SchemaElementDecl::Children ||
01354                    modelType == SchemaElementDecl::ElementOnlyEmpty)
01355                     charOpts = XMLElementDecl::SpacesOk;
01356                 else if(modelType == SchemaElementDecl::Empty)
01357                     charOpts = XMLElementDecl::NoCharData;
01358             }
01359         } else // DTD grammar
01360             charOpts = topElem->fThisElement->getCharDataOpts();
01361 
01362         if (charOpts == XMLElementDecl::NoCharData)
01363         {
01364             // They definitely cannot handle any type of char data
01365             fValidator->emitError(XMLValid::NoCharDataInCM);
01366             //if(fGrammarType == Grammar::SchemaGrammarType)
01367             //{
01368               //  if (getPSVIHandler())
01369               //  {
01370                     // REVISIT:
01371                     // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
01372               //  }
01373            // }
01374         }
01375         else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len))
01376         {
01377             //  Its all spaces. So, if they can take spaces, then send it
01378             //  as ignorable whitespace. If they can handle any char data
01379             //  send it as characters.
01380             if (charOpts == XMLElementDecl::SpacesOk) {
01381                 if (fDocHandler)
01382                     fDocHandler->ignorableWhitespace(rawBuf, len, false);
01383             }
01384             else if (charOpts == XMLElementDecl::AllCharData)
01385             {
01386                 if (fGrammarType != Grammar::SchemaGrammarType)
01387                 {
01388                     if (fDocHandler)
01389                         fDocHandler->docCharacters(rawBuf, len, false);
01390                 }
01391                 else
01392                 {
01393                     XMLSize_t xsLen;
01394                     const XMLCh* xsNormalized;
01395                     SchemaValidator *schemaValidator = (SchemaValidator *)fValidator;
01396                     DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
01397                     if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
01398                     {
01399                         // normalize the character according to schema whitespace facet
01400                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
01401                         xsNormalized = fWSNormalizeBuf.getRawBuffer();
01402                         xsLen = fWSNormalizeBuf.getLen();
01403                     }
01404                     else {
01405                         xsNormalized = rawBuf;
01406                         xsLen = len ;
01407                     }
01408 
01409                     // tell the schema validation about the character data for checkContent later
01410                     schemaValidator->setDatatypeBuffer(xsNormalized);
01411 
01412                     // call all active identity constraints
01413                     if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
01414                         fContent.append(xsNormalized, xsLen);
01415                     }
01416 
01417                     if (fDocHandler) {
01418                         if (fNormalizeData) {
01419                            fDocHandler->docCharacters(xsNormalized, xsLen, false);
01420                         }
01421                         else {
01422                             fDocHandler->docCharacters(rawBuf, len, false);
01423                         }
01424                     }
01425                 }
01426             }
01427         }
01428         else
01429         {
01430             //  If they can take any char data, then send it. Otherwise, they
01431             //  can only handle whitespace and can't handle this stuff so
01432             //  issue an error.
01433             if (charOpts == XMLElementDecl::AllCharData)
01434             {
01435                 if (fGrammarType != Grammar::SchemaGrammarType)
01436                 {
01437                     if (fDocHandler)
01438                         fDocHandler->docCharacters(rawBuf, len, false);
01439                 }
01440                 else
01441                 {
01442                     XMLSize_t xsLen;
01443                     const XMLCh* xsNormalized;
01444                     SchemaValidator *schemaValidator = (SchemaValidator*)fValidator;
01445                     DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
01446                     if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
01447                     {
01448                         // normalize the character according to schema whitespace facet
01449                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf);
01450                         xsNormalized = fWSNormalizeBuf.getRawBuffer();
01451                         xsLen = fWSNormalizeBuf.getLen();
01452                     }
01453                     else {
01454                         xsNormalized = rawBuf;
01455                         xsLen = len;
01456                     }
01457 
01458                     // tell the schema validation about the character data for checkContent later
01459                     schemaValidator->setDatatypeBuffer(xsNormalized);
01460 
01461                     // call all active identity constraints
01462                     if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
01463                         fContent.append(xsNormalized, xsLen);
01464                     }
01465 
01466                     if (fDocHandler) {
01467                         if (fNormalizeData) {
01468                             fDocHandler->docCharacters(xsNormalized, xsLen, false);
01469                         }
01470                         else {
01471                             fDocHandler->docCharacters(rawBuf, len, false);
01472                         }
01473                     }
01474                 }
01475             }
01476             else
01477             {
01478                 fValidator->emitError(XMLValid::NoCharDataInCM);
01479                 if(fGrammarType == Grammar::SchemaGrammarType)
01480                 {
01481                     if (getPSVIHandler())
01482                     {
01483                         // REVISIT:
01484                         // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID);
01485                     }
01486                 }
01487             }
01488         }
01489     }
01490     else
01491     {
01492         // call all active identity constraints
01493         if (fGrammarType == Grammar::SchemaGrammarType) {
01494 
01495             if (toCheckIdentityConstraint() && fICHandler->getMatcherCount())
01496                 fContent.append(toSend.getRawBuffer(), toSend.getLen());
01497         }
01498 
01499         // Always assume its just char data if not validating
01500         if (fDocHandler)
01501             fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);
01502     }
01503 
01504     // Reset buffer
01505     toSend.reset();
01506 }
01507 
01508 
01509 
01510 //  This method is called with a key/value string pair that represents an
01511 //  xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the
01512 //  current top of the element stack based on this data. We know that when
01513 //  we get here, that it is one of these forms, so we don't bother confirming
01514 //  it.
01515 //
01516 //  But we have to ensure
01517 //      1. xxx is not xmlns
01518 //      2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
01519 //      3. yyy is not XMLUni::fgXMLNSURIName
01520 //      4. if xxx is not null, then yyy cannot be an empty string.
01521 void IGXMLScanner::updateNSMap(const  XMLCh* const    attrName
01522                             , const XMLCh* const    attrValue)
01523 {
01524     updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon));
01525 }
01526 
01527 void IGXMLScanner::updateNSMap(const  XMLCh* const    attrName
01528                             , const XMLCh* const    attrValue
01529                             , const int colonOfs)
01530 {
01531     // We need a buffer to normalize the attribute value into
01532     XMLBufBid bbNormal(&fBufMgr);
01533     XMLBuffer& normalBuf = bbNormal.getBuffer();
01534 
01535     //  Normalize the value into the passed buffer. In this case, we don't
01536     //  care about the return value. An error was issued for the error, which
01537     //  is all we care about here.
01538     normalizeAttRawValue(attrName, attrValue, normalBuf);
01539     XMLCh* namespaceURI = normalBuf.getRawBuffer();
01540 
01541     //  We either have the default prefix (""), or we point it into the attr
01542     //  name parameter. Note that the xmlns is not the prefix we care about
01543     //  here. To us, the 'prefix' is really the local part of the attrName
01544     //  parameter.
01545     //
01546     //  Check 1. xxx is not xmlns
01547     //        2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa
01548     //        3. yyy is not XMLUni::fgXMLNSURIName
01549     //        4. if xxx is not null, then yyy cannot be an empty string.
01550     const XMLCh* prefPtr = XMLUni::fgZeroLenString;
01551     if (colonOfs != -1) {
01552         prefPtr = &attrName[colonOfs + 1];
01553 
01554         if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString))
01555             emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
01556         else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) {
01557             if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
01558                 emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
01559         }
01560 
01561         if (!namespaceURI)
01562             emitError(XMLErrs::NoEmptyStrNamespace, attrName);
01563         else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
01564             emitError(XMLErrs::NoEmptyStrNamespace, attrName);
01565     }
01566 
01567     if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
01568         emitError(XMLErrs::NoUseOfxmlnsURI);
01569     else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) {
01570         if (!XMLString::equals(prefPtr, XMLUni::fgXMLString))
01571             emitError(XMLErrs::XMLURINotMatchXMLPrefix);
01572     }
01573 
01574     //  Ok, we have to get the unique id for the attribute value, which is the
01575     //  URI that this value should be mapped to. The validator has the
01576     //  namespace string pool, so we ask him to find or add this new one. Then
01577     //  we ask the element stack to add this prefix to URI Id mapping.
01578     fElemStack.addPrefix
01579     (
01580         prefPtr
01581         , fURIStringPool->addOrFind(namespaceURI)
01582     );
01583 }
01584 
01585 void IGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount)
01586 {
01587     //  Make an initial pass through the list and find any xmlns attributes or
01588     //  schema attributes.
01589     //  When we find one, send it off to be used to update the element stack's
01590     //  namespace mappings.
01591     for (XMLSize_t index = 0; index < attCount; index++)
01592     {
01593         // each attribute has the prefix:suffix="value"
01594         const KVStringPair* curPair = fRawAttrList->elementAt(index);
01595         const XMLCh* rawPtr = curPair->getKey();
01596 
01597         //  If either the key begins with "xmlns:" or its just plain
01598         //  "xmlns", then use it to update the map.
01599         if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6)
01600         ||  XMLString::equals(rawPtr, XMLUni::fgXMLNSString))
01601         {
01602             const XMLCh* valuePtr = curPair->getValue();
01603 
01604             updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]);
01605 
01606             // if the schema URI is seen in the the valuePtr, set the boolean seeXsi
01607             if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) {
01608                 fSeeXsi = true;
01609             }
01610         }
01611     }
01612 
01613     // walk through the list again to deal with "xsi:...."
01614     if (fDoSchema && fSeeXsi)
01615     {
01616         //  Schema Xsi Type yyyy (e.g. xsi:type="yyyyy")
01617         XMLBufBid bbXsi(&fBufMgr);
01618         XMLBuffer& fXsiType = bbXsi.getBuffer();
01619 
01620         for (XMLSize_t index = 0; index < attCount; index++)
01621         {
01622             // each attribute has the prefix:suffix="value"
01623             const KVStringPair* curPair = fRawAttrList->elementAt(index);
01624             const XMLCh* rawPtr = curPair->getKey();
01625             const XMLCh* prefPtr = XMLUni::fgZeroLenString;
01626             int   colonInd = fRawAttrColonList[index];
01627 
01628             if (colonInd != -1) {
01629 
01630                 fURIBuf.set(rawPtr, colonInd);
01631                 prefPtr = fURIBuf.getRawBuffer();
01632             }
01633 
01634             // if schema URI has been seen, scan for the schema location and uri
01635             // and resolve the schema grammar; or scan for schema type
01636             if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) {
01637 
01638                 const XMLCh* valuePtr = curPair->getValue();
01639                 const XMLCh* suffPtr = &rawPtr[colonInd + 1];
01640 
01641                 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION))
01642                     parseSchemaLocation(valuePtr);
01643                 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION))
01644                     resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString);
01645 
01646                 if ((!fValidator || !fValidator->handlesSchema()) &&
01647                     (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE) ||
01648                      XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)))
01649                 {
01650                   // If we are in the DTD mode, try to switch to the Schema
01651                   // mode. For that we need to find any XML Schema grammar
01652                   // that we can switch to. Such a grammar can only come
01653                   // from the cache (if it came from the schemaLocation
01654                   // attribute, we would be in the Schema mode already).
01655                   //
01656                   XMLGrammarPool* pool = fGrammarResolver->getGrammarPool ();
01657                   RefHashTableOfEnumerator<Grammar> i = pool->getGrammarEnumerator ();
01658 
01659                   while (i.hasMoreElements ())
01660                   {
01661                     Grammar& gr (i.nextElement ());
01662 
01663                     if (gr.getGrammarType () == Grammar::SchemaGrammarType)
01664                     {
01665                       switchGrammar (gr.getTargetNamespace ());
01666                       break;
01667                     }
01668                   }
01669                 }
01670 
01671                 if( fValidator && fValidator->handlesSchema() )
01672                 {
01673                     if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE))
01674                     {
01675                         // normalize the attribute according to schema whitespace facet
01676                         DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME);
01677                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiType, true);
01678                     }
01679                     else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))
01680                     {
01681                         // normalize the attribute according to schema whitespace facet
01682                         XMLBuffer& fXsiNil = fBufMgr.bidOnBuffer();
01683                         DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN);
01684                         ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiNil, true);
01685                         if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE))
01686                             ((SchemaValidator*)fValidator)->setNillable(true);
01687                         else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE))
01688                             ((SchemaValidator*)fValidator)->setNillable(false);
01689                         else
01690                             emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr);
01691                         fBufMgr.releaseBuffer(fXsiNil);
01692                     }
01693                 }
01694             }
01695         }
01696 
01697         if (fValidator && fValidator->handlesSchema()) {
01698             if (!fXsiType.isEmpty()) {
01699                 int colonPos = -1;
01700                 unsigned int uriId = resolveQName (
01701                       fXsiType.getRawBuffer()
01702                     , fPrefixBuf
01703                     , ElemStack::Mode_Element
01704                     , colonPos
01705                 );
01706                 ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId);
01707             }
01708         }
01709     }
01710 }
01711 
01712 void IGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema)
01713 {
01714     XMLCh* locStr = XMLString::replicate(schemaLocationStr, fMemoryManager);
01715     ArrayJanitor<XMLCh> janLoc(locStr, fMemoryManager);
01716 
01717     processSchemaLocation(locStr);
01718     XMLSize_t size = fLocationPairs->size();
01719 
01720     if (size % 2 != 0 ) {
01721         emitError(XMLErrs::BadSchemaLocation);
01722     } else {
01723         // We need a buffer to normalize the attribute value into
01724         XMLBuffer normalBuf(1023, fMemoryManager);
01725         for(XMLSize_t i=0; i<size; i=i+2) {
01726             normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, fLocationPairs->elementAt(i), normalBuf);
01727             resolveSchemaGrammar(fLocationPairs->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema);
01728         }
01729     }
01730 }
01731 
01732 void IGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) {
01733 
01734     Grammar* grammar = 0;
01735 
01736     {
01737         XMLSchemaDescriptionImpl    theSchemaDescription(uri, fMemoryManager);
01738         theSchemaDescription.setLocationHints(loc);
01739         grammar = fGrammarResolver->getGrammar(&theSchemaDescription);
01740     }
01741 
01742     // If multi-import is enabled, make sure the existing grammar came
01743     // from the import directive. Otherwise we may end up reloading
01744     // the same schema that came from the external grammar pool. Ideally,
01745     // we would move fSchemaInfoList to XMLGrammarPool so that it survives
01746     // the destruction of the scanner in which case we could rely on the
01747     // same logic we use to weed out duplicate schemas below.
01748     //
01749     if (!grammar ||
01750         grammar->getGrammarType() == Grammar::DTDGrammarType ||
01751         (getHandleMultipleImports() &&
01752          ((XMLSchemaDescription*)grammar->getGrammarDescription())->
01753          getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
01754     {
01755       if (fLoadSchema || ignoreLoadSchema)
01756       {
01757         XSDDOMParser parser(0, fMemoryManager, 0);
01758 
01759         parser.setValidationScheme(XercesDOMParser::Val_Never);
01760         parser.setDoNamespaces(true);
01761         parser.setUserEntityHandler(fEntityHandler);
01762         parser.setUserErrorReporter(fErrorReporter);
01763 
01764         //Normalize loc
01765         XMLBufBid nnSys(&fBufMgr);
01766         XMLBuffer& normalizedSysId = nnSys.getBuffer();
01767         XMLString::removeChar(loc, 0xFFFF, normalizedSysId);
01768         const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
01769 
01770         // Create a buffer for expanding the system id
01771         XMLBufBid bbSys(&fBufMgr);
01772         XMLBuffer& expSysId = bbSys.getBuffer();
01773 
01774         //  Allow the entity handler to expand the system id if they choose
01775         //  to do so.
01776         InputSource* srcToFill = 0;
01777         if (fEntityHandler)
01778         {
01779             if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
01780                 expSysId.set(normalizedURI);
01781 
01782             ReaderMgr::LastExtEntityInfo lastInfo;
01783             fReaderMgr.getLastExtEntityInfo(lastInfo);
01784             XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar,
01785                             expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId,
01786                             &fReaderMgr);
01787             srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
01788         }
01789         else
01790         {
01791             expSysId.set(normalizedURI);
01792         }
01793 
01794         //  If they didn't create a source via the entity handler, then we
01795         //  have to create one on our own.
01796         if (!srcToFill)
01797         {
01798             if (fDisableDefaultEntityResolution)
01799                 return;
01800 
01801             ReaderMgr::LastExtEntityInfo lastInfo;
01802             fReaderMgr.getLastExtEntityInfo(lastInfo);
01803 
01804             XMLURL urlTmp(fMemoryManager);
01805             if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
01806                 (urlTmp.isRelative()))
01807             {
01808                 if (!fStandardUriConformant)
01809                 {
01810                     XMLBufBid  ddSys(&fBufMgr);
01811                     XMLBuffer& resolvedSysId = ddSys.getBuffer();
01812                     XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
01813 
01814                     srcToFill = new (fMemoryManager) LocalFileInputSource
01815                     (
01816                         lastInfo.systemId
01817                         , resolvedSysId.getRawBuffer()
01818                         , fMemoryManager
01819                     );
01820                 }
01821                 else
01822                     ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
01823             }
01824             else
01825             {
01826                 if (fStandardUriConformant && urlTmp.hasInvalidChar())
01827                     ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
01828                 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
01829             }
01830         }
01831 
01832         // Put a janitor on the input source
01833         Janitor<InputSource> janSrc(srcToFill);
01834 
01835         // Check if this exact schema has already been seen.
01836         //
01837         const XMLCh* sysId = srcToFill->getSystemId();
01838         unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId;
01839         SchemaInfo* importSchemaInfo = 0;
01840 
01841         if (fUseCachedGrammar)
01842           importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId);
01843 
01844         if (!importSchemaInfo && !fToCacheGrammar)
01845           importSchemaInfo = fSchemaInfoList->get(sysId, uriId);
01846 
01847         if (importSchemaInfo)
01848         {
01849           // We haven't added any new grammars so it is safe to just
01850           // return.
01851           //
01852           return;
01853         }
01854 
01855         // Should just issue warning if the schema is not found
01856         bool flag = srcToFill->getIssueFatalErrorIfNotFound();
01857         srcToFill->setIssueFatalErrorIfNotFound(false);
01858 
01859         parser.parse(*srcToFill);
01860 
01861         // Reset the InputSource
01862         srcToFill->setIssueFatalErrorIfNotFound(flag);
01863 
01864         if (parser.getSawFatal() && fExitOnFirstFatal)
01865             emitError(XMLErrs::SchemaScanFatalError);
01866 
01867         DOMDocument* document = parser.getDocument(); //Our Grammar
01868 
01869         if (document != 0) {
01870 
01871             DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
01872             if (root != 0)
01873             {
01874                 const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
01875                 bool newGrammar = false;
01876                 if (!XMLString::equals(newUri, uri)) {
01877                     if (fValidate || fValScheme == Val_Auto) {
01878                         fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri);
01879                     }
01880 
01881                     grammar = fGrammarResolver->getGrammar(newUri);
01882                     newGrammar = true;
01883                 }
01884 
01885                 if (!grammar ||
01886                     grammar->getGrammarType() == Grammar::DTDGrammarType ||
01887                     (getHandleMultipleImports() &&
01888                      ((XMLSchemaDescription*)grammar->getGrammarDescription())->
01889                      getContextType () == XMLSchemaDescription::CONTEXT_IMPORT))
01890                 {
01891                     // If we switched namespace URI, recheck the schema info.
01892                     //
01893                     if (newGrammar)
01894                     {
01895                       unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId;
01896 
01897                       if (fUseCachedGrammar)
01898                         importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId);
01899 
01900                       if (!importSchemaInfo && !fToCacheGrammar)
01901                         importSchemaInfo = fSchemaInfoList->get(sysId, newUriId);
01902 
01903                       if (importSchemaInfo)
01904                         return;
01905                     }
01906 
01907                     //  Since we have seen a grammar, set our validation flag
01908                     //  at this point if the validation scheme is auto
01909                     if (fValScheme == Val_Auto && !fValidate) {
01910                         fValidate = true;
01911                         fElemStack.setValidationFlag(fValidate);
01912                     }
01913 
01914                     // we have seen a schema, so set up the fValidator as fSchemaValidator
01915                     if (!fValidator->handlesSchema())
01916                     {
01917                         if (fValidatorFromUser) {
01918                             // the fValidator is from user
01919                             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
01920                         }
01921                         else {
01922                             fValidator = fSchemaValidator;
01923                         }
01924                     }
01925 
01926                     bool grammarFound = grammar &&
01927                       grammar->getGrammarType() == Grammar::SchemaGrammarType;
01928 
01929                     SchemaGrammar* schemaGrammar;
01930 
01931                     if (grammarFound) {
01932                       schemaGrammar = (SchemaGrammar*) grammar;
01933                     }
01934                     else {
01935                       schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
01936                     }
01937 
01938                     XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
01939                     gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
01940                     gramDesc->setLocationHints(sysId);
01941 
01942                     TraverseSchema traverseSchema
01943                     (
01944                         root
01945                         , fURIStringPool
01946                         , schemaGrammar
01947                         , fGrammarResolver
01948                         , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList
01949                         , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList
01950                         , this
01951                         , sysId
01952                         , fEntityHandler
01953                         , fErrorReporter
01954                         , fMemoryManager
01955                         , grammarFound
01956                     );
01957 
01958                     // Reset the now invalid schema roots in the collected
01959                     // schema info entries.
01960                     //
01961                     {
01962                       RefHash2KeysTableOfEnumerator<SchemaInfo> i (
01963                         fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList);
01964 
01965                       while (i.hasMoreElements ())
01966                         i.nextElement().resetRoot ();
01967                     }
01968 
01969                     if (fGrammarType == Grammar::DTDGrammarType) {
01970                         fGrammar = schemaGrammar;
01971                         fGrammarType = Grammar::SchemaGrammarType;
01972                         fValidator->setGrammar(fGrammar);
01973                     }
01974 
01975                     if (fValidate) {
01976                         //  validate the Schema scan so far
01977                         fValidator->preContentValidation(false);
01978                     }
01979                 }
01980             }
01981         }
01982       }
01983     }
01984     else
01985     {
01986         //  Since we have seen a grammar, set our validation flag
01987         //  at this point if the validation scheme is auto
01988         if (fValScheme == Val_Auto && !fValidate) {
01989             fValidate = true;
01990             fElemStack.setValidationFlag(fValidate);
01991         }
01992 
01993         // we have seen a schema, so set up the fValidator as fSchemaValidator
01994         if (!fValidator->handlesSchema())
01995         {
01996             if (fValidatorFromUser) {
01997                 // the fValidator is from user
01998                 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
01999             }
02000             else {
02001                 fValidator = fSchemaValidator;
02002             }
02003         }
02004 
02005         if (fGrammarType == Grammar::DTDGrammarType) {
02006             fGrammar = grammar;
02007             fGrammarType = Grammar::SchemaGrammarType;
02008             fValidator->setGrammar(fGrammar);
02009         }
02010     }
02011 
02012     // fModel may need updating:
02013     if(getPSVIHandler())
02014         fModel = fGrammarResolver->getXSModel();
02015 }
02016 
02017 InputSource* IGXMLScanner::resolveSystemId(const XMLCh* const sysId
02018                                           ,const XMLCh* const pubId)
02019 {
02020     //Normalize sysId
02021     XMLBufBid nnSys(&fBufMgr);
02022     XMLBuffer& normalizedSysId = nnSys.getBuffer();
02023     XMLString::removeChar(sysId, 0xFFFF, normalizedSysId);
02024     const XMLCh* normalizedURI = normalizedSysId.getRawBuffer();
02025 
02026     // Create a buffer for expanding the system id
02027     XMLBufBid bbSys(&fBufMgr);
02028     XMLBuffer& expSysId = bbSys.getBuffer();
02029 
02030     //  Allow the entity handler to expand the system id if they choose
02031     //  to do so.
02032     InputSource* srcToFill = 0;
02033     if (fEntityHandler)
02034     {
02035         if (!fEntityHandler->expandSystemId(normalizedURI, expSysId))
02036             expSysId.set(normalizedURI);
02037 
02038         ReaderMgr::LastExtEntityInfo lastInfo;
02039         fReaderMgr.getLastExtEntityInfo(lastInfo);
02040         XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
02041                               expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId,
02042                               &fReaderMgr);
02043         srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier);
02044     }
02045     else
02046     {
02047         expSysId.set(normalizedURI);
02048     }
02049 
02050     //  If they didn't create a source via the entity handler, then we
02051     //  have to create one on our own.
02052     if (!srcToFill)
02053     {
02054         if (fDisableDefaultEntityResolution)
02055             return srcToFill;
02056 
02057         ReaderMgr::LastExtEntityInfo lastInfo;
02058         fReaderMgr.getLastExtEntityInfo(lastInfo);
02059 
02060         XMLURL urlTmp(fMemoryManager);
02061         if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) ||
02062             (urlTmp.isRelative()))
02063         {
02064             if (!fStandardUriConformant)
02065             {
02066                 XMLBufBid  ddSys(&fBufMgr);
02067                 XMLBuffer& resolvedSysId = ddSys.getBuffer();
02068                 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId);
02069 
02070                 srcToFill = new (fMemoryManager) LocalFileInputSource
02071                 (
02072                     lastInfo.systemId
02073                     , resolvedSysId.getRawBuffer()
02074                     , fMemoryManager
02075                 );
02076             }
02077             else
02078                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
02079         }
02080         else
02081         {
02082             if (fStandardUriConformant && urlTmp.hasInvalidChar())
02083                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager);
02084             srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager);
02085         }
02086     }
02087 
02088     return srcToFill;
02089 }
02090 
02091 
02092 // ---------------------------------------------------------------------------
02093 //  IGXMLScanner: Private grammar preparsing methods
02094 // ---------------------------------------------------------------------------
02095 Grammar* IGXMLScanner::loadXMLSchemaGrammar(const InputSource& src,
02096                                             const bool toCache)
02097 {
02098    // Reset the validators
02099     fSchemaValidator->reset();
02100     fSchemaValidator->setErrorReporter(fErrorReporter);
02101     fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal);
02102     fSchemaValidator->setGrammarResolver(fGrammarResolver);
02103 
02104     if (fValidatorFromUser)
02105         fValidator->reset();
02106 
02107     if (!fValidator->handlesSchema()) {
02108         if (fValidatorFromUser && fValidate)
02109             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
02110         else {
02111             fValidator = fSchemaValidator;
02112         }
02113     }
02114 
02115     XSDDOMParser parser(0, fMemoryManager, 0);
02116 
02117     parser.setValidationScheme(XercesDOMParser::Val_Never);
02118     parser.setDoNamespaces(true);
02119     parser.setUserEntityHandler(fEntityHandler);
02120     parser.setUserErrorReporter(fErrorReporter);
02121 
02122     // Should just issue warning if the schema is not found
02123     bool flag = src.getIssueFatalErrorIfNotFound();
02124     ((InputSource&) src).setIssueFatalErrorIfNotFound(false);
02125 
02126     parser.parse(src);
02127 
02128     // Reset the InputSource
02129     ((InputSource&) src).setIssueFatalErrorIfNotFound(flag);
02130 
02131     if (parser.getSawFatal() && fExitOnFirstFatal)
02132         emitError(XMLErrs::SchemaScanFatalError);
02133 
02134     DOMDocument* document = parser.getDocument(); //Our Grammar
02135 
02136     if (document != 0) {
02137 
02138         DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema
02139         if (root != 0)
02140         {
02141             const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE);
02142             Grammar* grammar = fGrammarResolver->getGrammar(nsUri);
02143 
02144             // Check if this exact schema has already been seen.
02145             //
02146             const XMLCh* sysId = src.getSystemId();
02147             SchemaInfo* importSchemaInfo = 0;
02148 
02149             if (grammar)
02150             {
02151               if (nsUri && *nsUri)
02152                 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri));
02153               else
02154                 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId);
02155             }
02156 
02157             if (!importSchemaInfo)
02158             {
02159               bool grammarFound = grammar &&
02160                 grammar->getGrammarType() == Grammar::SchemaGrammarType &&
02161                 getHandleMultipleImports();
02162 
02163               SchemaGrammar* schemaGrammar;
02164 
02165               if (grammarFound)
02166                 schemaGrammar = (SchemaGrammar*) grammar;
02167               else
02168                 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager);
02169 
02170               XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription();
02171               gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE);
02172               gramDesc->setLocationHints(sysId);
02173 
02174               TraverseSchema traverseSchema
02175                 (
02176                   root
02177                   , fURIStringPool
02178                   , schemaGrammar
02179                   , fGrammarResolver
02180                   , fCachedSchemaInfoList
02181                   , toCache ? fCachedSchemaInfoList : fSchemaInfoList
02182                   , this
02183                   , sysId
02184                   , fEntityHandler
02185                   , fErrorReporter
02186                   , fMemoryManager
02187                   , grammarFound
02188                 );
02189 
02190               grammar = schemaGrammar;
02191 
02192               // Reset the now invalid schema roots in the collected
02193               // schema info entries.
02194               //
02195               {
02196                 RefHash2KeysTableOfEnumerator<SchemaInfo> i (
02197                   toCache ? fCachedSchemaInfoList : fSchemaInfoList);
02198 
02199                 while (i.hasMoreElements ())
02200                   i.nextElement().resetRoot ();
02201               }
02202             }
02203 
02204             if (fValidate) {
02205               //  validate the Schema scan so far
02206               fValidator->setGrammar(grammar);
02207               fValidator->preContentValidation(false);
02208             }
02209 
02210             if (toCache) {
02211               fGrammarResolver->cacheGrammars();
02212             }
02213 
02214             if(getPSVIHandler())
02215               fModel = fGrammarResolver->getXSModel();
02216 
02217             return grammar;
02218         }
02219     }
02220 
02221     return 0;
02222 }
02223 
02224 
02225 
02226 // ---------------------------------------------------------------------------
02227 //  IGXMLScanner: Private parsing methods
02228 // ---------------------------------------------------------------------------
02229 
02230 //  This method is called to do a raw scan of an attribute value. It does not
02231 //  do normalization (since we don't know their types yet.) It just scans the
02232 //  value and does entity expansion.
02233 //
02234 //  End of entity's must be dealt with here. During DTD scan, they can come
02235 //  from external entities. During content, they can come from any entity.
02236 //  We just eat the end of entity and continue with our scan until we come
02237 //  to the closing quote. If an unterminated value causes us to go through
02238 //  subsequent entities, that will cause errors back in the calling code,
02239 //  but there's little we can do about it here.
02240 bool IGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill)
02241 {
02242     // Reset the target buffer
02243     toFill.reset();
02244 
02245     // Get the next char which must be a single or double quote
02246     XMLCh quoteCh;
02247     if (!fReaderMgr.skipIfQuote(quoteCh))
02248         return false;
02249 
02250     //  We have to get the current reader because we have to ignore closing
02251     //  quotes until we hit the same reader again.
02252     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
02253 
02254     //  Loop until we get the attribute value. Note that we use a double
02255     //  loop here to avoid the setup/teardown overhead of the exception
02256     //  handler on every round.
02257     while (true)
02258     {
02259         try
02260         {
02261             while(true)
02262             {
02263                 XMLCh nextCh = fReaderMgr.getNextChar();
02264 
02265                 if (nextCh != quoteCh)
02266                 {
02267                     if (nextCh != chAmpersand)
02268                     {
02269                         if ((nextCh < 0xD800) || (nextCh > 0xDFFF))
02270                         {
02271                             // Its got to at least be a valid XML character
02272                             if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
02273                             {
02274                                 if (nextCh == 0)
02275                                     ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
02276 
02277                                 XMLCh tmpBuf[9];
02278                                 XMLString::binToText
02279                                 (
02280                                     nextCh
02281                                     , tmpBuf
02282                                     , 8
02283                                     , 16
02284                                     , fMemoryManager
02285                                 );
02286                                 emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
02287                             }
02288                         } else // its a surrogate
02289                         {
02290                             // Deal with surrogate pairs
02291 
02292                             //  we expect a a leading surrogate.
02293                             if (nextCh <= 0xDBFF)
02294                             {
02295                                 toFill.append(nextCh);
02296 
02297                                 //  process the trailing surrogate
02298                                 nextCh = fReaderMgr.getNextChar();
02299 
02300                                 //  it should be a trailing surrogate.
02301                                 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
02302                                 {
02303                                     emitError(XMLErrs::Expected2ndSurrogateChar);
02304                                 }
02305                             } else
02306                             {
02307                                 //  Its a trailing surrogate, but we are not expecting it
02308                                 emitError(XMLErrs::Unexpected2ndSurrogateChar);
02309                             }
02310                         }
02311                     } else // its a chAmpersand
02312                     {
02313                         //  Check for an entity ref . We ignore the empty flag in
02314                         //  this one.
02315 
02316                         bool    escaped;
02317                         XMLCh   firstCh;
02318                         XMLCh   secondCh
02319                             ;
02320                         // If it was not returned directly, then jump back up
02321                         if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned)
02322                         {
02323                             //  If it was escaped, then put in a 0xFFFF value. This will
02324                             //  be used later during validation and normalization of the
02325                             //  value to know that the following character was via an
02326                             //  escape char.
02327                             if (escaped)
02328                                 toFill.append(0xFFFF);
02329 
02330                             toFill.append(firstCh);
02331                             if (secondCh)
02332                                 toFill.append(secondCh);
02333                         }
02334                         continue;
02335                     }
02336                 } else // its a quoteCh
02337                 {
02338                     //  Check for our ending quote. It has to be in the same entity
02339                     //  as where we started. Quotes in nested entities are ignored.
02340 
02341                     if (curReader == fReaderMgr.getCurrentReaderNum())
02342                     {
02343                         return true;
02344                     }
02345 
02346                     // Watch for spillover into a previous entity
02347                     if (curReader > fReaderMgr.getCurrentReaderNum())
02348                     {
02349                         emitError(XMLErrs::PartialMarkupInEntity);
02350                         return false;
02351                     }
02352                 }
02353 
02354                 // add it to the buffer
02355                 toFill.append(nextCh);
02356 
02357             }
02358         }
02359         catch(const EndOfEntityException&)
02360         {
02361             // Just eat it and continue.
02362         }
02363     }
02364     return true;
02365 }
02366 
02367 
02368 bool IGXMLScanner::scanAttValue(  const   XMLAttDef* const    attDef
02369                                   , const XMLCh* const        attrName
02370                                   ,       XMLBuffer&          toFill)
02371 {
02372     enum States
02373     {
02374         InWhitespace
02375         , InContent
02376     };
02377 
02378     // Get the type and name
02379     const XMLAttDef::AttTypes type = (attDef)
02380                 ?attDef->getType()
02381                 :XMLAttDef::CData;
02382 
02383     // Reset the target buffer
02384     toFill.reset();
02385 
02386     // Get the next char which must be a single or double quote
02387     XMLCh quoteCh;
02388     if (!fReaderMgr.skipIfQuote(quoteCh))
02389         return false;
02390 
02391     //  We have to get the current reader because we have to ignore closing
02392     //  quotes until we hit the same reader again.
02393     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
02394 
02395     // Get attribute def - to check to see if it's declared externally or not
02396     bool  isAttExternal = (attDef)
02397                 ?attDef->isExternal()
02398                 :false;
02399 
02400     //  Loop until we get the attribute value. Note that we use a double
02401     //  loop here to avoid the setup/teardown overhead of the exception
02402     //  handler on every round.
02403     XMLCh   nextCh;
02404     XMLCh   secondCh = 0;
02405     States  curState = InContent;
02406     bool    firstNonWS = false;
02407     bool    gotLeadingSurrogate = false;
02408     bool    escaped;
02409     while (true)
02410     {
02411         try
02412         {
02413             while(true)
02414             {
02415                 nextCh = fReaderMgr.getNextChar();
02416 
02417                 if (!nextCh)
02418                     ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
02419 
02420                 // Check for our ending quote in the same entity
02421                 if (nextCh == quoteCh)
02422                 {
02423                     if (curReader == fReaderMgr.getCurrentReaderNum())
02424                         return true;
02425 
02426                     // Watch for spillover into a previous entity
02427                     if (curReader > fReaderMgr.getCurrentReaderNum())
02428                     {
02429                         emitError(XMLErrs::PartialMarkupInEntity);
02430                         return false;
02431                     }
02432                 }
02433 
02434                 //  Check for an entity ref now, before we let it affect our
02435                 //  whitespace normalization logic below. We ignore the empty flag
02436                 //  in this one.
02437                 escaped = false;
02438                 if (nextCh == chAmpersand)
02439                 {
02440                     if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
02441                     {
02442                         gotLeadingSurrogate = false;
02443                         continue;
02444                     }
02445                 }
02446                 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
02447                 {
02448                     // Deal with surrogate pairs
02449                     //  Its a leading surrogate. If we already got one, then
02450                     //  issue an error, else set leading flag to make sure that
02451                     //  we look for a trailing next time.
02452                     if (gotLeadingSurrogate)
02453                         emitError(XMLErrs::Expected2ndSurrogateChar);
02454                      else
02455                         gotLeadingSurrogate = true;
02456                 }
02457                 else
02458                 {
02459                     //  If its a trailing surrogate, make sure that we are
02460                     //  prepared for that. Else, its just a regular char so make
02461                     //  sure that we were not expected a trailing surrogate.
02462                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
02463                     {
02464                         // Its trailing, so make sure we were expecting it
02465                         if (!gotLeadingSurrogate)
02466                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
02467                     }
02468                     else
02469                     {
02470                         //  Its just a char, so make sure we were not expecting a
02471                         //  trailing surrogate.
02472                         if (gotLeadingSurrogate)
02473                             emitError(XMLErrs::Expected2ndSurrogateChar);
02474 
02475                         // Its got to at least be a valid XML character
02476                         if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
02477                         {
02478                             XMLCh tmpBuf[9];
02479                             XMLString::binToText
02480                             (
02481                                 nextCh
02482                                 , tmpBuf
02483                                 , 8
02484                                 , 16
02485                                 , fMemoryManager
02486                             );
02487                             emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
02488                         }
02489                     }
02490                     gotLeadingSurrogate = false;
02491                 }
02492 
02493                 //  If its not escaped, then make sure its not a < character, which
02494                 //  is not allowed in attribute values.
02495                 if (!escaped && (nextCh == chOpenAngle))
02496                     emitError(XMLErrs::BracketInAttrValue, attrName);
02497 
02498                 //  If the attribute is a CDATA type we do simple replacement of
02499                 //  tabs and new lines with spaces, if the character is not escaped
02500                 //  by way of a char ref.
02501                 //
02502                 //  Otherwise, we do the standard non-CDATA normalization of
02503                 //  compressing whitespace to single spaces and getting rid of leading
02504                 //  and trailing whitespace.
02505                 if (type == XMLAttDef::CData)
02506                 {
02507                     if (!escaped)
02508                     {
02509                         if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
02510                         {
02511                             // Check Validity Constraint for Standalone document declaration
02512                             // XML 1.0, Section 2.9
02513                             if (fStandalone && fValidate && isAttExternal)
02514                             {
02515                                 // Can't have a standalone document declaration of "yes" if  attribute
02516                                 // values are subject to normalisation
02517                                 fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
02518                             }
02519                             nextCh = chSpace;
02520                         }
02521                     }
02522                 }
02523                 else
02524                 {
02525                     if (curState == InWhitespace)
02526                     {
02527                         if ((escaped && nextCh != chSpace) || !fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
02528                         {
02529                             if (firstNonWS)
02530                                 toFill.append(chSpace);
02531                             curState = InContent;
02532                             firstNonWS = true;
02533                         }
02534                         else
02535                         {
02536                             continue;
02537                         }
02538                     }
02539                     else if (curState == InContent)
02540                     {
02541                         if ((nextCh == chSpace) ||
02542                             (fReaderMgr.getCurrentReader()->isWhitespace(nextCh) && !escaped))
02543                         {
02544                             curState = InWhitespace;
02545 
02546                             // Check Validity Constraint for Standalone document declaration
02547                             // XML 1.0, Section 2.9
02548                             if (fStandalone && fValidate && isAttExternal)
02549                             {
02550                                 if (!firstNonWS || (nextCh != chSpace) || (fReaderMgr.lookingAtSpace()))
02551                                 {
02552                                      // Can't have a standalone document declaration of "yes" if  attribute
02553                                      // values are subject to normalisation
02554                                      fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName);
02555                                 }
02556                             }
02557                             continue;
02558                         }
02559                         firstNonWS = true;
02560                     }
02561                 }
02562 
02563                 // Else add it to the buffer
02564                 toFill.append(nextCh);
02565 
02566                 if (secondCh)
02567                 {
02568                     toFill.append(secondCh);
02569                     secondCh=0;
02570                 }
02571             }
02572         }
02573         catch(const EndOfEntityException&)
02574         {
02575             // Just eat it and continue.
02576             gotLeadingSurrogate = false;
02577             escaped = false;
02578         }
02579     }
02580     return true;
02581 }
02582 
02583 
02584 //  This method scans a CDATA section. It collects the character into one
02585 //  of the temp buffers and calls the document handler, if any, with the
02586 //  characters. It assumes that the <![CDATA string has been scanned before
02587 //  this call.
02588 void IGXMLScanner::scanCDSection()
02589 {
02590     static const XMLCh CDataClose[] =
02591     {
02592             chCloseSquare, chCloseAngle, chNull
02593     };
02594 
02595     //  The next character should be the opening square bracket. If not
02596     //  issue an error, but then try to recover by skipping any whitespace
02597     //  and checking again.
02598     if (!fReaderMgr.skippedChar(chOpenSquare))
02599     {
02600         emitError(XMLErrs::ExpectedOpenSquareBracket);
02601         fReaderMgr.skipPastSpaces();
02602 
02603         // If we still don't find it, then give up, else keep going
02604         if (!fReaderMgr.skippedChar(chOpenSquare))
02605             return;
02606     }
02607 
02608     // Get a buffer for this
02609     XMLBufBid bbCData(&fBufMgr);
02610 
02611     //  We just scan forward until we hit the end of CDATA section sequence.
02612     //  CDATA is effectively a big escape mechanism so we don't treat markup
02613     //  characters specially here.
02614     bool            emittedError = false;
02615     bool    gotLeadingSurrogate = false;
02616     const ElemStack::StackElem* topElem = fElemStack.topElement();
02617 
02618     // Get the character data opts for the current element
02619     XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
02620     if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
02621     {
02622         // And see if the current element is a 'Children' style content model
02623         ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
02624         if(currType)
02625         {
02626             SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
02627             if(modelType == SchemaElementDecl::Children ||
02628                modelType == SchemaElementDecl::ElementOnlyEmpty)
02629                 charOpts = XMLElementDecl::SpacesOk;
02630             else if(modelType == SchemaElementDecl::Empty)
02631                 charOpts = XMLElementDecl::NoCharData;
02632         }
02633     } else // DTD grammar
02634         charOpts = topElem->fThisElement->getCharDataOpts();
02635 
02636     while (true)
02637     {
02638         const XMLCh nextCh = fReaderMgr.getNextChar();
02639 
02640         // Watch for unexpected end of file
02641         if (!nextCh)
02642         {
02643             emitError(XMLErrs::UnterminatedCDATASection);
02644             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
02645         }
02646 
02647         if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)))
02648         {
02649             // This document is standalone; this ignorable CDATA whitespace is forbidden.
02650             // XML 1.0, Section 2.9
02651             // And see if the current element is a 'Children' style content model
02652             if (topElem->fThisElement->isExternal()) {
02653 
02654                 if (charOpts == XMLElementDecl::SpacesOk) // Element Content
02655                 {
02656                     // Error - standalone should have a value of "no" as whitespace detected in an
02657                     // element type with element content whose element declaration was external
02658                     fValidator->emitError(XMLValid::NoWSForStandalone);
02659                     if(fGrammarType == Grammar::SchemaGrammarType)
02660                     {
02661                         if (getPSVIHandler())
02662                         {
02663                             // REVISIT:
02664                             // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
02665                         }
02666                     }
02667                 }
02668             }
02669         }
02670 
02671         //  If this is a close square bracket it could be our closing
02672         //  sequence.
02673         if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
02674         {
02675             //  make sure we were not expecting a trailing surrogate.
02676             if (gotLeadingSurrogate)
02677                 emitError(XMLErrs::Expected2ndSurrogateChar);
02678 
02679             if (fGrammarType == Grammar::SchemaGrammarType) {
02680 
02681                 XMLSize_t xsLen = bbCData.getLen();
02682                 const XMLCh* xsNormalized = bbCData.getRawBuffer();
02683                 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator();
02684                 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE)
02685                 {
02686                     // normalize the character according to schema whitespace facet
02687                     ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf);
02688                     xsNormalized = fWSNormalizeBuf.getRawBuffer();
02689                     xsLen = fWSNormalizeBuf.getLen();
02690                     if (fNormalizeData && fValidate) {
02691                         bbCData.set(xsNormalized);
02692                     }
02693                 }
02694 
02695                 if (fValidate) {
02696 
02697                     // tell the schema validation about the character data for checkContent later
02698                     ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized);
02699 
02700                     if (charOpts != XMLElementDecl::AllCharData)
02701                     {
02702                         // They definitely cannot handle any type of char data
02703                         fValidator->emitError(XMLValid::NoCharDataInCM);
02704                         if (getPSVIHandler())
02705                         {
02706                             // REVISIT:
02707                             // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
02708                         }
02709                     }
02710                 }
02711 
02712                 // call all active identity constraints
02713                 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) {
02714                     fContent.append(xsNormalized, xsLen);
02715                 }
02716             }
02717             else {
02718                 if (fValidate) {
02719 
02720                     if (charOpts != XMLElementDecl::AllCharData)
02721                     {
02722                         // They definitely cannot handle any type of char data
02723                         fValidator->emitError(XMLValid::NoCharDataInCM);
02724                     }
02725                 }
02726             }
02727 
02728             // If we have a doc handler, call it
02729             if (fDocHandler)
02730             {
02731                 fDocHandler->docCharacters(
02732                     bbCData.getRawBuffer(), bbCData.getLen(), true
02733                 );
02734             }
02735 
02736             // And we are done
02737             break;
02738         }
02739 
02740         //  Make sure its a valid character. But if we've emitted an error
02741         //  already, don't bother with the overhead since we've already told
02742         //  them about it.
02743         if (!emittedError)
02744         {
02745             // Deal with surrogate pairs
02746             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
02747             {
02748                 //  Its a leading surrogate. If we already got one, then
02749                 //  issue an error, else set leading flag to make sure that
02750                 //  we look for a trailing next time.
02751                 if (gotLeadingSurrogate)
02752                     emitError(XMLErrs::Expected2ndSurrogateChar);
02753                 else
02754                     gotLeadingSurrogate = true;
02755             }
02756             else
02757             {
02758                 //  If its a trailing surrogate, make sure that we are
02759                 //  prepared for that. Else, its just a regular char so make
02760                 //  sure that we were not expected a trailing surrogate.
02761                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
02762                 {
02763                     // Its trailing, so make sure we were expecting it
02764                     if (!gotLeadingSurrogate)
02765                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
02766                 }
02767                 else
02768                 {
02769                     //  Its just a char, so make sure we were not expecting a
02770                     //  trailing surrogate.
02771                     if (gotLeadingSurrogate)
02772                         emitError(XMLErrs::Expected2ndSurrogateChar);
02773 
02774                     // Its got to at least be a valid XML character
02775                     else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
02776                     {
02777                         XMLCh tmpBuf[9];
02778                         XMLString::binToText
02779                         (
02780                             nextCh
02781                             , tmpBuf
02782                             , 8
02783                             , 16
02784                             , fMemoryManager
02785                         );
02786                         emitError(XMLErrs::InvalidCharacter, tmpBuf);
02787                         emittedError = true;
02788                     }
02789                 }
02790                 gotLeadingSurrogate = false;
02791             }
02792         }
02793 
02794         // Add it to the buffer
02795         bbCData.append(nextCh);
02796     }
02797 }
02798 
02799 
02800 void IGXMLScanner::scanCharData(XMLBuffer& toUse)
02801 {
02802     //  We have to watch for the stupid ]]> sequence, which is illegal in
02803     //  character data. So this is a little state machine that handles that.
02804     enum States
02805     {
02806         State_Waiting
02807         , State_GotOne
02808         , State_GotTwo
02809     };
02810 
02811     // Reset the buffer before we start
02812     toUse.reset();
02813 
02814     // Turn on the 'throw at end' flag of the reader manager
02815     ThrowEOEJanitor jan(&fReaderMgr, true);
02816 
02817     //  In order to be more efficient we have to use kind of a deeply nested
02818     //  set of blocks here. The outer block puts on a try and catches end of
02819     //  entity exceptions. The inner loop is the per-character loop. If we
02820     //  put the try inside the inner loop, it would work but would require
02821     //  the exception handling code setup/teardown code to be invoked for
02822     //  each character.
02823     XMLCh   nextCh;
02824     XMLCh   secondCh = 0;
02825     States  curState = State_Waiting;
02826     bool    escaped = false;
02827     bool    gotLeadingSurrogate = false;
02828     bool    notDone = true;
02829     while (notDone)
02830     {
02831         try
02832         {
02833             while (true)
02834             {
02835                 //  Eat through as many plain content characters as possible without
02836                 //  needing special handling.  Moving most content characters here,
02837                 //  in this one call, rather than running the overall loop once
02838                 //  per content character, is a speed optimization.
02839                 if (curState == State_Waiting  &&  !gotLeadingSurrogate)
02840                 {
02841                      fReaderMgr.movePlainContentChars(toUse);
02842                 }
02843 
02844                 // Try to get another char from the source
02845                 //   The code from here on down covers all contengencies,
02846                 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
02847                 {
02848                     // If we were waiting for a trailing surrogate, its an error
02849                     if (gotLeadingSurrogate)
02850                         emitError(XMLErrs::Expected2ndSurrogateChar);
02851 
02852                     notDone = false;
02853                     break;
02854                 }
02855 
02856                 //  Watch for a reference. Note that the escapement mechanism
02857                 //  is ignored in this content.
02858                 escaped = false;
02859                 if (nextCh == chAmpersand)
02860                 {
02861                     sendCharData(toUse);
02862 
02863                     // Turn off the throwing at the end of entity during this
02864                     ThrowEOEJanitor jan(&fReaderMgr, false);
02865 
02866                     if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
02867                     {
02868                         gotLeadingSurrogate = false;
02869                         continue;
02870                     }
02871                     else
02872                     {
02873                         if (escaped && !fElemStack.isEmpty())
02874                             fElemStack.setReferenceEscaped();
02875                     }
02876                 }
02877                 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
02878                 {
02879                     // Deal with surrogate pairs
02880                     //  Its a leading surrogate. If we already got one, then
02881                     //  issue an error, else set leading flag to make sure that
02882                     //  we look for a trailing next time.
02883                     if (gotLeadingSurrogate)
02884                         emitError(XMLErrs::Expected2ndSurrogateChar);
02885                     else
02886                         gotLeadingSurrogate = true;
02887                 }
02888                 else
02889                 {
02890                     //  If its a trailing surrogate, make sure that we are
02891                     //  prepared for that. Else, its just a regular char so make
02892                     //  sure that we were not expected a trailing surrogate.
02893                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
02894                     {
02895                         // Its trailing, so make sure we were expecting it
02896                         if (!gotLeadingSurrogate)
02897                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
02898                     }
02899                     else
02900                     {
02901                         //  Its just a char, so make sure we were not expecting a
02902                         //  trailing surrogate.
02903                         if (gotLeadingSurrogate)
02904                             emitError(XMLErrs::Expected2ndSurrogateChar);
02905 
02906                         // Make sure the returned char is a valid XML char
02907                         if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
02908                         {
02909                             XMLCh tmpBuf[9];
02910                             XMLString::binToText
02911                             (
02912                                 nextCh
02913                                 , tmpBuf
02914                                 , 8
02915                                 , 16
02916                                 , fMemoryManager
02917                             );
02918                             emitError(XMLErrs::InvalidCharacter, tmpBuf);
02919                         }
02920                     }
02921                     gotLeadingSurrogate = false;
02922                 }
02923 
02924                  // Keep the state machine up to date
02925                 if (!escaped)
02926                 {
02927                     if (nextCh == chCloseSquare)
02928                     {
02929                         if (curState == State_Waiting)
02930                             curState = State_GotOne;
02931                         else if (curState == State_GotOne)
02932                             curState = State_GotTwo;
02933                     }
02934                     else if (nextCh == chCloseAngle)
02935                     {
02936                         if (curState == State_GotTwo)
02937                             emitError(XMLErrs::BadSequenceInCharData);
02938                         curState = State_Waiting;
02939                     }
02940                     else
02941                     {
02942                         curState = State_Waiting;
02943                     }
02944                 }
02945                 else
02946                 {
02947                     curState = State_Waiting;
02948                 }
02949 
02950                 // Add this char to the buffer
02951                 toUse.append(nextCh);
02952 
02953                 if (secondCh)
02954                 {
02955                     toUse.append(secondCh);
02956                     secondCh=0;
02957                 }
02958             }
02959         }
02960         catch(const EndOfEntityException& toCatch)
02961         {
02962             //  Some entity ended, so we have to send any accumulated
02963             //  chars and send an end of entity event.
02964             sendCharData(toUse);
02965             gotLeadingSurrogate = false;
02966 
02967             if (fDocHandler)
02968                 fDocHandler->endEntityReference(toCatch.getEntity());
02969         }
02970     }
02971 
02972     // Check the validity constraints as per XML 1.0 Section 2.9
02973     if (fValidate && fStandalone)
02974     {
02975         // See if the text contains whitespace
02976         // Get the raw data we need for the callback
02977         const XMLCh* rawBuf = toUse.getRawBuffer();
02978         const XMLSize_t len = toUse.getLen();
02979         const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len);
02980 
02981         if (isSpaces)
02982         {
02983             // And see if the current element is a 'Children' style content model
02984             const ElemStack::StackElem* topElem = fElemStack.topElement();
02985 
02986             if (topElem->fThisElement->isExternal()) {
02987 
02988                 // Get the character data opts for the current element
02989                 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData;
02990                 if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType)
02991                 {
02992                     // And see if the current element is a 'Children' style content model
02993                     ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo();
02994                     if(currType)
02995                     {
02996                         SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType();
02997                         if(modelType == SchemaElementDecl::Children ||
02998                            modelType == SchemaElementDecl::ElementOnlyEmpty)
02999                             charOpts = XMLElementDecl::SpacesOk;
03000                         else if(modelType == SchemaElementDecl::Empty)
03001                             charOpts = XMLElementDecl::NoCharData;
03002                     }
03003                 } else // DTD grammar
03004                     charOpts = topElem->fThisElement->getCharDataOpts();
03005 
03006                 if (charOpts == XMLElementDecl::SpacesOk)  // => Element Content
03007                 {
03008                     // Error - standalone should have a value of "no" as whitespace detected in an
03009                     // element type with element content whose element declaration was external
03010                     //
03011                     fValidator->emitError(XMLValid::NoWSForStandalone);
03012                     if(fGrammarType == Grammar::SchemaGrammarType)
03013                     {
03014                         if (getPSVIHandler())
03015                         {
03016                             // REVISIT:
03017                             // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID);
03018                         }
03019                     }
03020                 }
03021             }
03022         }
03023     }
03024     // Send any char data that we accumulated into the buffer
03025     sendCharData(toUse);
03026 }
03027 
03028 
03029 //  This method will scan a general/character entity ref. It will either
03030 //  expand a char ref and return it directly, or push a reader for a general
03031 //  entity.
03032 //
03033 //  The return value indicates whether the char parameters hold the value
03034 //  or whether the value was pushed as a reader, or that it failed.
03035 //
03036 //  The escaped flag tells the caller whether the returned parameter resulted
03037 //  from a character reference, which escapes the character in some cases. It
03038 //  only makes any difference if the return value indicates the value was
03039 //  returned directly.
03040 IGXMLScanner::EntityExpRes
03041 IGXMLScanner::scanEntityRef(  const   bool    inAttVal
03042                             ,       XMLCh&  firstCh
03043                             ,       XMLCh&  secondCh
03044                             ,       bool&   escaped)
03045 {
03046     // Assume no escape
03047     secondCh = 0;
03048     escaped = false;
03049 
03050     // We have to insure that its all in one entity
03051     const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum();
03052 
03053     //  If the next char is a pound, then its a character reference and we
03054     //  need to expand it always.
03055     if (fReaderMgr.skippedChar(chPound))
03056     {
03057         //  Its a character reference, so scan it and get back the numeric
03058         //  value it represents.
03059         if (!scanCharRef(firstCh, secondCh))
03060             return EntityExp_Failed;
03061 
03062         escaped = true;
03063 
03064         if (curReader != fReaderMgr.getCurrentReaderNum())
03065             emitError(XMLErrs::PartialMarkupInEntity);
03066 
03067         return EntityExp_Returned;
03068     }
03069 
03070     // Expand it since its a normal entity ref
03071     XMLBufBid bbName(&fBufMgr);
03072     int  colonPosition;
03073     bool validName = fDoNamespaces ? fReaderMgr.getQName(bbName.getBuffer(), &colonPosition) :
03074                                      fReaderMgr.getName(bbName.getBuffer());
03075     if (!validName)
03076     {
03077         if (bbName.isEmpty())
03078             emitError(XMLErrs::ExpectedEntityRefName);
03079         else
03080             emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer());
03081         return EntityExp_Failed;
03082     }
03083 
03084     //  Next char must be a semi-colon. But if its not, just emit
03085     //  an error and try to continue.
03086     if (!fReaderMgr.skippedChar(chSemiColon))
03087         emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
03088 
03089     // Make sure we ended up on the same entity reader as the & char
03090     if (curReader != fReaderMgr.getCurrentReaderNum())
03091         emitError(XMLErrs::PartialMarkupInEntity);
03092 
03093     // Look up the name in the general entity pool
03094     XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());
03095 
03096     // If it does not exist, then obviously an error
03097     if (!decl)
03098     {
03099         // XML 1.0 Section 4.1
03100         // Well-formedness Constraint for entity not found:
03101         //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
03102         //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
03103         //      or a parameter entity
03104         //
03105         // Else it's Validity Constraint
03106         if (fStandalone || fHasNoDTD)
03107             emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
03108         else {
03109             if (fValidate)
03110                 fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
03111         }
03112 
03113         return EntityExp_Failed;
03114     }
03115 
03116     // XML 1.0 Section 4.1
03117     //  If we are a standalone document, then it has to have been declared
03118     //  in the internal subset.
03119     if (fStandalone && !decl->getDeclaredInIntSubset())
03120         emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer());
03121 
03122     if (decl->isExternal())
03123     {
03124         // If its unparsed, then its not valid here
03125         if (decl->isUnparsed())
03126         {
03127             emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
03128             return EntityExp_Failed;
03129         }
03130 
03131         // If we are in an attribute value, then not valid but keep going
03132         if (inAttVal)
03133             emitError(XMLErrs::NoExtRefsInAttValue);
03134 
03135         // And now create a reader to read this entity
03136         InputSource* srcUsed;
03137         XMLReader* reader = fReaderMgr.createReader
03138         (
03139             decl->getBaseURI()
03140             , decl->getSystemId()
03141             , decl->getPublicId()
03142             , false
03143             , XMLReader::RefFrom_NonLiteral
03144             , XMLReader::Type_General
03145             , XMLReader::Source_External
03146             , srcUsed
03147             , fCalculateSrcOfs
03148             , fLowWaterMark
03149             , fDisableDefaultEntityResolution
03150         );
03151 
03152         // Put a janitor on the source so it gets cleaned up on exit
03153         Janitor<InputSource> janSrc(srcUsed);
03154 
03155         //  If the creation failed, and its not because the source was empty,
03156         //  then emit an error and return.
03157         if (!reader)
03158             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed ? srcUsed->getSystemId() : decl->getSystemId(), fMemoryManager);
03159 
03160         //  Push the reader. If its a recursive expansion, then emit an error
03161         //  and return an failure.
03162         if (!fReaderMgr.pushReader(reader, decl))
03163         {
03164             emitError(XMLErrs::RecursiveEntity, decl->getName());
03165             return EntityExp_Failed;
03166         }
03167 
03168         // here's where we need to check if there's a SecurityManager,
03169         // how many entity references we've had
03170         if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
03171             XMLCh expLimStr[32];
03172             XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
03173             emitError
03174             (
03175                 XMLErrs::EntityExpansionLimitExceeded
03176                 , expLimStr
03177             );
03178             // there seems nothing  better to be done than to reset the entity expansion counter
03179             fEntityExpansionCount = 0;
03180         }
03181 
03182         //  Do a start entity reference event.
03183         //
03184         //  <TBD> For now, we supress them in att values. Later, when
03185         //  the stuff is in place to correctly allow DOM to handle them
03186         //  we'll turn this back on.
03187         if (fDocHandler && !inAttVal)
03188             fDocHandler->startEntityReference(*decl);
03189 
03190         // If it starts with the XML string, then parse a text decl
03191         if (checkXMLDecl(true))
03192             scanXMLDecl(Decl_Text);
03193     }
03194     else
03195     {
03196         //  If its one of the special char references, then we can return
03197         //  it as a character, and its considered escaped.
03198         if (decl->getIsSpecialChar())
03199         {
03200             firstCh = decl->getValue()[0];
03201             escaped = true;
03202             return EntityExp_Returned;
03203         }
03204 
03205         //  Create a reader over a memory stream over the entity value
03206         //  We force it to assume UTF-16 by passing in an encoding
03207         //  string. This way it won't both trying to predecode the
03208         //  first line, looking for an XML/TextDecl.
03209         XMLReader* valueReader = fReaderMgr.createIntEntReader
03210         (
03211             decl->getName()
03212             , XMLReader::RefFrom_NonLiteral
03213             , XMLReader::Type_General
03214             , decl->getValue()
03215             , decl->getValueLen()
03216             , false
03217         );
03218 
03219         //  Try to push the entity reader onto the reader manager stack,
03220         //  where it will become the subsequent input. If it fails, that
03221         //  means the entity is recursive, so issue an error. The reader
03222         //  will have just been discarded, but we just keep going.
03223         if (!fReaderMgr.pushReader(valueReader, decl))
03224             emitError(XMLErrs::RecursiveEntity, decl->getName());
03225 
03226         // here's where we need to check if there's a SecurityManager,
03227         // how many entity references we've had
03228         if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
03229             XMLCh expLimStr[32];
03230             XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager);
03231             emitError
03232             (
03233                 XMLErrs::EntityExpansionLimitExceeded
03234                 , expLimStr
03235             );
03236         }
03237 
03238         //  Do a start entity reference event.
03239         //
03240         //  <TBD> For now, we supress them in att values. Later, when
03241         //  the stuff is in place to correctly allow DOM to handle them
03242         //  we'll turn this back on.
03243         if (fDocHandler && !inAttVal)
03244             fDocHandler->startEntityReference(*decl);
03245 
03246         // If it starts with the XML string, then it's an error
03247         if (checkXMLDecl(true)) {
03248             emitError(XMLErrs::TextDeclNotLegalHere);
03249             fReaderMgr.skipPastChar(chCloseAngle);
03250         }
03251     }
03252     return EntityExp_Pushed;
03253 }
03254 
03255 
03256 bool IGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace)
03257 {
03258     Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace);
03259 
03260     if (!tempGrammar && !fSkipDTDValidation) {
03261         // This is a case where namespaces is on with a DTD grammar.
03262         tempGrammar = fDTDGrammar;
03263     }
03264     if (!tempGrammar) {
03265         return false;
03266     }
03267     else {
03268 
03269         Grammar::GrammarType tempGrammarType = tempGrammar->getGrammarType();
03270         if (tempGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) {
03271             if (fValidatorFromUser)
03272                 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager);
03273             else {
03274                 fValidator = fSchemaValidator;
03275             }
03276         }
03277         else if (tempGrammarType == Grammar::DTDGrammarType) {
03278             if (fSkipDTDValidation) {
03279                 return false;
03280             }
03281 
03282             if (!fValidator->handlesDTD()) {
03283                 if (fValidatorFromUser)
03284                     ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager);
03285                 else {
03286                     fValidator = fDTDValidator;
03287                 }
03288             }
03289         }
03290 
03291         fGrammarType = tempGrammarType;
03292         fGrammar = tempGrammar;
03293         fValidator->setGrammar(fGrammar);
03294         return true;
03295     }
03296 }
03297 
03298 // check if we should skip or lax the validation of the element
03299 // if skip - no validation
03300 // if lax - validate only if the element if found
03301 bool IGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
03302                                         const XMLContentModel* const cm,
03303                                         const XMLSize_t parentElemDepth)
03304 {
03305     bool skipThisOne = false;
03306     bool laxThisOne = false;
03307     unsigned int elementURI = element->getURI();
03308     unsigned int currState = fElemState[parentElemDepth];
03309     unsigned int currLoop = fElemLoopState[parentElemDepth];
03310 
03311     if (currState == XMLContentModel::gInvalidTrans) {
03312         return laxThisOne;
03313     }
03314 
03315     SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool);
03316 
03317     if (cv) {
03318         XMLSize_t i = 0;
03319         XMLSize_t leafCount = cv->getLeafCount();
03320         unsigned int nextState = 0;
03321 
03322         for (; i < leafCount; i++) {
03323 
03324             QName* fElemMap = cv->getLeafNameAt(i);
03325             unsigned int uri = fElemMap->getURI();
03326             ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
03327 
03328             if (type == ContentSpecNode::Leaf) {
03329                 if (((uri == elementURI)
03330                       && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart()))
03331                     || comparator.isEquivalentTo(element, fElemMap)) {
03332 
03333                     nextState = cm->getNextState(currState, i);
03334 
03335                     if (nextState != XMLContentModel::gInvalidTrans)
03336                         break;
03337                 }
03338             } else if ((type & 0x0f) == ContentSpecNode::Any) {
03339                 nextState = cm->getNextState(currState, i);
03340                 if (nextState != XMLContentModel::gInvalidTrans)
03341                     break;
03342             }
03343             else if ((type & 0x0f) == ContentSpecNode::Any_Other) {
03344                 if (uri != elementURI && elementURI != fEmptyNamespaceId) {
03345                     nextState = cm->getNextState(currState, i);
03346                     if (nextState != XMLContentModel::gInvalidTrans)
03347                         break;
03348                 }
03349             }
03350             else if ((type & 0x0f) == ContentSpecNode::Any_NS) {
03351                 if (uri == elementURI) {
03352                     nextState = cm->getNextState(currState, i);
03353                     if (nextState != XMLContentModel::gInvalidTrans)
03354                         break;
03355                 }
03356             }
03357 
03358         } // for
03359 
03360         if (i == leafCount) { // no match
03361             fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
03362             fElemLoopState[parentElemDepth] = 0;
03363             return laxThisOne;
03364         }
03365 
03366         unsigned int nextLoop = 0;
03367         if(!cm->handleRepetitions(element, currState, currLoop, nextState, nextLoop, i, &comparator)) {
03368             fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans;
03369             fElemLoopState[parentElemDepth] = 0;
03370             return laxThisOne;
03371         }
03372 
03373         ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i);
03374         if ((type & 0x0f) == ContentSpecNode::Any ||
03375             (type & 0x0f) == ContentSpecNode::Any_Other ||
03376             (type & 0x0f) == ContentSpecNode::Any_NS)
03377         {
03378             if (type == ContentSpecNode::Any_Skip ||
03379                 type == ContentSpecNode::Any_NS_Skip ||
03380                 type == ContentSpecNode::Any_Other_Skip) {
03381                 skipThisOne = true;
03382             }
03383             else if (type == ContentSpecNode::Any_Lax ||
03384                      type == ContentSpecNode::Any_NS_Lax ||
03385                      type == ContentSpecNode::Any_Other_Lax) {
03386                 laxThisOne = true;
03387             }
03388         }
03389         fElemState[parentElemDepth] = nextState;
03390         fElemLoopState[parentElemDepth] = nextLoop;
03391     } // if
03392 
03393     if (skipThisOne) {
03394         fValidate = false;
03395         fElemStack.setValidationFlag(fValidate);
03396     }
03397 
03398     return laxThisOne;
03399 }
03400 
03401 
03402 // check if there is an AnyAttribute, and if so, see if we should lax or skip
03403 // if skip - no validation
03404 // if lax - validate only if the attribute if found
03405 bool IGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne)
03406 {
03407     XMLAttDef::AttTypes wildCardType = attWildCard->getType();
03408     bool anyEncountered = false;
03409     skipThisOne = false;
03410     laxThisOne = false;
03411     if (wildCardType == XMLAttDef::Any_Any)
03412         anyEncountered = true;
03413     else if (wildCardType == XMLAttDef::Any_Other) {
03414         if (attWildCard->getAttName()->getURI() != uriId
03415             && uriId != fEmptyNamespaceId)
03416             anyEncountered = true;
03417     }
03418     else if (wildCardType == XMLAttDef::Any_List) {
03419         ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList();
03420         XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0;
03421 
03422         if (listSize) {
03423             for (XMLSize_t i=0; i < listSize; i++) {
03424                 if (nameURIList->elementAt(i) == uriId)
03425                     anyEncountered = true;
03426             }
03427         }
03428     }
03429 
03430     if (anyEncountered) {
03431         XMLAttDef::DefAttTypes   defType   = attWildCard->getDefaultType();
03432         if (defType == XMLAttDef::ProcessContents_Skip) {
03433             // attribute should just be bypassed,
03434             skipThisOne = true;
03435         }
03436         else if (defType == XMLAttDef::ProcessContents_Lax) {
03437             laxThisOne = true;
03438         }
03439     }
03440 
03441     return anyEncountered;
03442 }
03443 
03444 inline XMLAttDefList& getAttDefList(bool              isSchemaGrammar
03445                                   , ComplexTypeInfo*  currType
03446                                   , XMLElementDecl*   elemDecl)
03447 {
03448     if (isSchemaGrammar && currType)
03449         return currType->getAttDefList();
03450     else
03451         return elemDecl->getAttDefList();
03452 }
03453 
03454 XERCES_CPP_NAMESPACE_END