GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: IGXMLScanner2.cpp 925236 2010-03-19 14:29:47Z borisk $ 00020 */ 00021 00022 // --------------------------------------------------------------------------- 00023 // This file holds some of the grunt work methods of IGXMLScanner.cpp to keep 00024 // it a little more readable. 00025 // --------------------------------------------------------------------------- 00026 00027 // --------------------------------------------------------------------------- 00028 // Includes 00029 // --------------------------------------------------------------------------- 00030 #include <xercesc/internal/IGXMLScanner.hpp> 00031 #include <xercesc/internal/EndOfEntityException.hpp> 00032 #include <xercesc/util/UnexpectedEOFException.hpp> 00033 #include <xercesc/util/XMLUri.hpp> 00034 #include <xercesc/framework/LocalFileInputSource.hpp> 00035 #include <xercesc/framework/URLInputSource.hpp> 00036 #include <xercesc/framework/XMLDocumentHandler.hpp> 00037 #include <xercesc/framework/XMLEntityHandler.hpp> 00038 #include <xercesc/framework/XMLPScanToken.hpp> 00039 #include <xercesc/framework/XMLRefInfo.hpp> 00040 #include <xercesc/framework/XMLGrammarPool.hpp> 00041 #include <xercesc/framework/psvi/PSVIAttributeList.hpp> 00042 #include <xercesc/framework/psvi/PSVIElement.hpp> 00043 #include <xercesc/framework/psvi/XSAnnotation.hpp> 00044 #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> 00045 #include <xercesc/validators/DTD/DTDGrammar.hpp> 00046 #include <xercesc/validators/DTD/DTDValidator.hpp> 00047 #include <xercesc/validators/DTD/XMLDTDDescriptionImpl.hpp> 00048 #include <xercesc/validators/datatype/DatatypeValidator.hpp> 00049 #include <xercesc/validators/schema/XMLSchemaDescriptionImpl.hpp> 00050 #include <xercesc/validators/schema/SchemaGrammar.hpp> 00051 #include <xercesc/validators/schema/SchemaValidator.hpp> 00052 #include <xercesc/validators/schema/TraverseSchema.hpp> 00053 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp> 00054 #include <xercesc/validators/schema/XSDDOMParser.hpp> 00055 #include <xercesc/validators/schema/identity/IdentityConstraintHandler.hpp> 00056 #include <xercesc/validators/schema/identity/ValueStore.hpp> 00057 #include <xercesc/util/XMLStringTokenizer.hpp> 00058 00059 XERCES_CPP_NAMESPACE_BEGIN 00060 00061 inline XMLAttDefList& getAttDefList(bool isSchemaGrammar 00062 , ComplexTypeInfo* currType 00063 , XMLElementDecl* elemDecl); 00064 00065 // --------------------------------------------------------------------------- 00066 // IGXMLScanner: Private helper methods 00067 // --------------------------------------------------------------------------- 00068 00069 // This method is called from scanStartTagNS() to build up the list of 00070 // XMLAttr objects that will be passed out in the start tag callout. We 00071 // get the key/value pairs from the raw scan of explicitly provided attrs, 00072 // which have not been normalized. And we get the element declaration from 00073 // which we will get any defaulted or fixed attribute defs and add those 00074 // in as well. 00075 XMLSize_t 00076 IGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs 00077 , const XMLSize_t attCount 00078 , XMLElementDecl* elemDecl 00079 , RefVectorOf<XMLAttr>& toFill) 00080 { 00081 // If doing DTD's, Ask the element to clear the 'provided' flag on all of the att defs 00082 // that it owns, and to return us a boolean indicating whether it has 00083 // any defs. If schemas are being validated, the complexType 00084 // at the top of the SchemaValidator's stack will 00085 // know what's best. REVISIT: don't modify grammar at all; eliminate 00086 // this step... 00087 ComplexTypeInfo *currType = 0; 00088 DatatypeValidator *currDV = 0; 00089 if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType && fValidate) 00090 { 00091 currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 00092 if (!currType) { 00093 currDV = ((SchemaValidator*)fValidator)->getCurrentDatatypeValidator(); 00094 } 00095 } 00096 00097 const bool hasDefs = (currType && fValidate) 00098 ? currType->hasAttDefs() 00099 : elemDecl->hasAttDefs(); 00100 00101 // another set of attributes; increment element counter 00102 fElemCount++; 00103 00104 // If there are no expliclitily provided attributes and there are no 00105 // defined attributes for the element, the we don't have anything to do. 00106 // So just return zero in this case. 00107 if (!hasDefs && !attCount) 00108 return 0; 00109 00110 // Keep up with how many attrs we end up with total 00111 XMLSize_t retCount = 0; 00112 00113 // And get the current size of the output vector. This lets us use 00114 // existing elements until we fill it, then start adding new ones. 00115 const XMLSize_t curAttListSize = toFill.size(); 00116 00117 // We need a buffer into which raw scanned attribute values will be 00118 // normalized. 00119 XMLBufBid bbNormal(&fBufMgr); 00120 XMLBuffer& normBuf = bbNormal.getBuffer(); 00121 00122 // 00123 // Decide if to use hash table to do duplicate checking 00124 // 00125 bool toUseHashTable = false; 00126 if (fGrammarType == Grammar::DTDGrammarType) 00127 { 00128 setAttrDupChkRegistry(attCount, toUseHashTable); 00129 } 00130 00131 XMLBufBid bbPrefix(&fBufMgr); 00132 XMLBuffer& prefixBuf = bbPrefix.getBuffer(); 00133 00134 // Loop through our explicitly provided attributes, which are in the raw 00135 // scanned form, and build up XMLAttr objects. 00136 XMLSize_t index; 00137 const XMLCh* prefPtr, *suffPtr; 00138 for (index = 0; index < attCount; index++) 00139 { 00140 PSVIItem::VALIDITY_STATE attrValid = PSVIItem::VALIDITY_VALID; 00141 PSVIItem::ASSESSMENT_TYPE attrAssessed = PSVIItem::VALIDATION_FULL; 00142 const KVStringPair* curPair = providedAttrs.elementAt(index); 00143 00144 // We have to split the name into its prefix and name parts. Then 00145 // we map the prefix to its URI. 00146 const XMLCh* const namePtr = curPair->getKey(); 00147 00148 const int colonInd = fRawAttrColonList[index]; 00149 unsigned int uriId; 00150 if (colonInd != -1) 00151 { 00152 prefixBuf.set(namePtr, colonInd); 00153 prefPtr = prefixBuf.getRawBuffer(); 00154 suffPtr = namePtr + colonInd + 1; 00155 // Map the prefix to a URI id 00156 uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute); 00157 } 00158 else 00159 { 00160 // No colon, so we just have a name with no prefix 00161 prefPtr = XMLUni::fgZeroLenString; 00162 suffPtr = namePtr; 00163 // an empty prefix is always the empty namespace, when dealing with attributes 00164 uriId = fEmptyNamespaceId; 00165 } 00166 00167 // If the uri comes back as the xmlns or xml URI or its just a name 00168 // and that name is 'xmlns', then we handle it specially. So set a 00169 // boolean flag that lets us quickly below know which we are dealing 00170 // with. 00171 const bool isNSAttr = (uriId == fEmptyNamespaceId)? 00172 XMLString::equals(suffPtr, XMLUni::fgXMLNSString) : 00173 (uriId == fXMLNSNamespaceId || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)); 00174 00175 00176 // If its not a special case namespace attr of some sort, then we 00177 // do normal checking and processing. 00178 XMLAttDef::AttTypes attType = XMLAttDef::CData; 00179 DatatypeValidator *attrValidator = 0; 00180 PSVIAttribute *psviAttr = 0; 00181 bool otherXSI = false; 00182 00183 if (isNSAttr && fGrammarType == Grammar::SchemaGrammarType) 00184 { 00185 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId)) 00186 { 00187 emitError 00188 ( 00189 XMLErrs::AttrAlreadyUsedInSTag 00190 , namePtr 00191 , elemDecl->getFullName() 00192 ); 00193 fPSVIElemContext.fErrorOccurred = true; 00194 } 00195 else 00196 { 00197 bool ValueValidate = false; 00198 bool tokenizeBuffer = false; 00199 00200 if (uriId == fXMLNSNamespaceId) 00201 { 00202 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); 00203 } 00204 else if (XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI)) 00205 { 00206 if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)) 00207 { 00208 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN); 00209 00210 ValueValidate = true; 00211 } 00212 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION)) 00213 { 00214 // use anyURI as the validator 00215 // tokenize the data and use the anyURI data for each piece 00216 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); 00217 //We should validate each value in the schema location however 00218 //this lead to a performance degradation of around 4%. Since 00219 //the first value of each pair needs to match what is in the 00220 //schema document and the second value needs to be valid in 00221 //order to open the document we won't validate it. Need to 00222 //do performance analysis of the anyuri datatype. 00223 //ValueValidate = true; 00224 ValueValidate = false; 00225 tokenizeBuffer = true; 00226 } 00227 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION)) 00228 { 00229 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYURI); 00230 //We should validate this value however 00231 //this lead to a performance degradation of around 4%. Since 00232 //the value needs to be valid in 00233 //order to open the document we won't validate it. Need to 00234 //do performance analysis of the anyuri datatype. 00235 //ValueValidate = true; 00236 ValueValidate = false; 00237 } 00238 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) 00239 { 00240 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME); 00241 00242 ValueValidate = true; 00243 } 00244 else { 00245 otherXSI = true; 00246 } 00247 } 00248 00249 if (!otherXSI) { 00250 normalizeAttRawValue 00251 ( 00252 namePtr 00253 , curPair->getValue() 00254 , normBuf 00255 ); 00256 00257 if (fValidate && attrValidator && ValueValidate) 00258 { 00259 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(attrValidator, normBuf.getRawBuffer(), normBuf, true); 00260 00261 ValidationContext* const theContext = 00262 getValidationContext(); 00263 00264 if (theContext) 00265 { 00266 try 00267 { 00268 if (tokenizeBuffer) { 00269 XMLStringTokenizer tokenizer(normBuf.getRawBuffer(), fMemoryManager); 00270 while (tokenizer.hasMoreTokens()) { 00271 attrValidator->validate( 00272 tokenizer.nextToken(), 00273 theContext, 00274 fMemoryManager); 00275 } 00276 } 00277 else { 00278 attrValidator->validate( 00279 normBuf.getRawBuffer(), 00280 theContext, 00281 fMemoryManager); 00282 } 00283 } 00284 catch (const XMLException& idve) 00285 { 00286 fValidator->emitError (XMLValid::DatatypeError, idve.getCode(), idve.getMessage()); 00287 } 00288 } 00289 } 00290 00291 if(getPSVIHandler()) 00292 { 00293 psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId)); 00294 XSSimpleTypeDefinition *validatingType = (attrValidator) 00295 ? (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator) 00296 : 0; 00297 // no attribute declarations for these... 00298 psviAttr->reset( 00299 fRootElemName 00300 , PSVIItem::VALIDITY_NOTKNOWN 00301 , PSVIItem::VALIDATION_NONE 00302 , validatingType 00303 , 0 00304 , 0 00305 , false 00306 , 0 00307 , attrValidator 00308 ); 00309 } 00310 } 00311 } 00312 } 00313 00314 if (!isNSAttr || fGrammarType == Grammar::DTDGrammarType || otherXSI) 00315 { 00316 // Some checking for attribute wild card first (for schema) 00317 bool laxThisOne = false; 00318 bool skipThisOne = false; 00319 00320 XMLAttDef* attDefForWildCard = 0; 00321 XMLAttDef* attDef = 0; 00322 00323 if (fGrammarType == Grammar::SchemaGrammarType) { 00324 00325 //retrieve the att def 00326 SchemaAttDef* attWildCard = 0; 00327 if (currType) { 00328 attDef = currType->getAttDef(suffPtr, uriId); 00329 attWildCard = currType->getAttWildCard(); 00330 } 00331 else if (!currDV) { // check explicitly-set wildcard 00332 attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId); 00333 attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard(); 00334 } 00335 00336 // if not found or faulted in - check for a matching wildcard attribute 00337 // if no matching wildcard attribute, check (un)qualifed cases and flag 00338 // appropriate errors 00339 if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) { 00340 00341 if (attWildCard) { 00342 //if schema, see if we should lax or skip the validation of this attribute 00343 if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) { 00344 00345 if(!skipThisOne) 00346 { 00347 SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId)); 00348 if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) { 00349 RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry(); 00350 if (attRegistry) { 00351 attDefForWildCard = attRegistry->get(suffPtr); 00352 } 00353 } 00354 } 00355 } 00356 } 00357 else if (currType) { 00358 // not found, see if the attDef should be qualified or not 00359 if (uriId == fEmptyNamespaceId) { 00360 attDef = currType->getAttDef(suffPtr 00361 , fURIStringPool->getId(fGrammar->getTargetNamespace())); 00362 if (fValidate 00363 && attDef 00364 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { 00365 // the attribute should be qualified 00366 fValidator->emitError 00367 ( 00368 XMLValid::AttributeNotQualified 00369 , attDef->getFullName() 00370 ); 00371 if(fGrammarType == Grammar::SchemaGrammarType) { 00372 fPSVIElemContext.fErrorOccurred = true; 00373 if (getPSVIHandler()) 00374 { 00375 attrValid = PSVIItem::VALIDITY_INVALID; 00376 } 00377 } 00378 } 00379 } 00380 else { 00381 attDef = currType->getAttDef(suffPtr 00382 , fEmptyNamespaceId); 00383 if (fValidate 00384 && attDef 00385 && attDef->getCreateReason() != XMLAttDef::JustFaultIn) { 00386 // the attribute should be qualified 00387 fValidator->emitError 00388 ( 00389 XMLValid::AttributeNotUnQualified 00390 , attDef->getFullName() 00391 ); 00392 if(fGrammarType == Grammar::SchemaGrammarType) { 00393 fPSVIElemContext.fErrorOccurred = true; 00394 if (getPSVIHandler()) 00395 { 00396 attrValid = PSVIItem::VALIDITY_INVALID; 00397 } 00398 } 00399 } 00400 } 00401 } 00402 } 00403 } 00404 00405 // Find this attribute within the parent element. We pass both 00406 // the uriID/name and the raw QName buffer, since we don't know 00407 // how the derived validator and its elements store attributes. 00408 else 00409 { 00410 if(fGrammarType == Grammar::DTDGrammarType) 00411 attDef = ((DTDElementDecl *)elemDecl)->getAttDef ( namePtr); 00412 } 00413 00414 // now need to prepare for duplicate detection 00415 if(attDef) 00416 { 00417 unsigned int *curCountPtr = fAttDefRegistry->get(attDef); 00418 if(!curCountPtr) 00419 { 00420 curCountPtr = getNewUIntPtr(); 00421 *curCountPtr = fElemCount; 00422 fAttDefRegistry->put(attDef, curCountPtr); 00423 } 00424 else if(*curCountPtr < fElemCount) 00425 *curCountPtr = fElemCount; 00426 else 00427 { 00428 emitError 00429 ( 00430 XMLErrs::AttrAlreadyUsedInSTag 00431 , attDef->getFullName() 00432 , elemDecl->getFullName() 00433 ); 00434 fPSVIElemContext.fErrorOccurred = true; 00435 } 00436 } 00437 else 00438 { 00439 if(fGrammarType == Grammar::DTDGrammarType) 00440 { 00441 if(!fUndeclaredAttrRegistry->putIfNotPresent(namePtr, 0)) 00442 { 00443 emitError 00444 ( 00445 XMLErrs::AttrAlreadyUsedInSTag 00446 , namePtr 00447 , elemDecl->getFullName() 00448 ); 00449 } 00450 } 00451 else // schema grammar 00452 { 00453 if(!fUndeclaredAttrRegistry->putIfNotPresent(suffPtr, uriId)) 00454 { 00455 emitError 00456 ( 00457 XMLErrs::AttrAlreadyUsedInSTag 00458 , namePtr 00459 , elemDecl->getFullName() 00460 ); 00461 fPSVIElemContext.fErrorOccurred = true; 00462 } 00463 } 00464 } 00465 00466 if(fGrammarType == Grammar::SchemaGrammarType ) 00467 { 00468 // if we've found either an attDef or an attDefForWildCard, 00469 // then we're doing full validation and it may still be valid. 00470 if(!attDef && !attDefForWildCard) 00471 { 00472 if(!laxThisOne && !skipThisOne) 00473 { 00474 fPSVIElemContext.fErrorOccurred = true; 00475 } 00476 if(getPSVIHandler()) 00477 { 00478 if(!laxThisOne && !skipThisOne) 00479 { 00480 attrValid = PSVIItem::VALIDITY_INVALID; 00481 } 00482 else if(laxThisOne) 00483 { 00484 attrValid = PSVIItem::VALIDITY_NOTKNOWN; 00485 attrAssessed = PSVIItem::VALIDATION_PARTIAL; 00486 } 00487 else 00488 { 00489 attrValid = PSVIItem::VALIDITY_NOTKNOWN; 00490 attrAssessed = PSVIItem::VALIDATION_NONE; 00491 } 00492 } 00493 } 00494 } 00495 00496 bool errorCondition = fValidate && !attDefForWildCard && !attDef; 00497 if (errorCondition && !skipThisOne && !laxThisOne) 00498 { 00499 // 00500 // Its not valid for this element, so issue an error if we are 00501 // validating. 00502 // 00503 XMLBufBid bbMsg(&fBufMgr); 00504 XMLBuffer& bufMsg = bbMsg.getBuffer(); 00505 if (uriId != fEmptyNamespaceId) { 00506 XMLBufBid bbURI(&fBufMgr); 00507 XMLBuffer& bufURI = bbURI.getBuffer(); 00508 00509 getURIText(uriId, bufURI); 00510 00511 bufMsg.append(chOpenCurly); 00512 bufMsg.append(bufURI.getRawBuffer()); 00513 bufMsg.append(chCloseCurly); 00514 } 00515 bufMsg.append(suffPtr); 00516 fValidator->emitError 00517 ( 00518 XMLValid::AttNotDefinedForElement 00519 , bufMsg.getRawBuffer() 00520 , elemDecl->getFullName() 00521 ); 00522 } 00523 00524 // Now normalize the raw value since we have the attribute type. We 00525 // don't care about the return status here. If it failed, an error 00526 // was issued, which is all we care about. 00527 if (attDefForWildCard) { 00528 normalizeAttValue( 00529 attDefForWildCard, namePtr, curPair->getValue(), normBuf 00530 ); 00531 00532 // If we found an attdef for this one, then lets validate it. 00533 const XMLCh* xsNormalized = normBuf.getRawBuffer(); 00534 DatatypeValidator* tempDV = ((SchemaAttDef*) attDefForWildCard)->getDatatypeValidator(); 00535 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 00536 { 00537 // normalize the attribute according to schema whitespace facet 00538 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true); 00539 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 00540 00541 if (fNormalizeData && fValidate) { 00542 normBuf.set(xsNormalized); 00543 } 00544 } 00545 00546 if (fValidate ) { 00547 fValidator->validateAttrValue( 00548 attDefForWildCard, xsNormalized, false, elemDecl 00549 ); 00550 attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator(); 00551 if(((SchemaValidator *)fValidator)->getErrorOccurred()) 00552 { 00553 fPSVIElemContext.fErrorOccurred = true; 00554 if(getPSVIHandler()) 00555 attrValid = PSVIItem::VALIDITY_INVALID; 00556 } 00557 } 00558 else { // no decl; default DOMTypeInfo to anySimpleType 00559 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); 00560 } 00561 00562 // Save the type for later use 00563 attType = attDefForWildCard->getType(); 00564 } 00565 else { 00566 normalizeAttValue( 00567 attDef, namePtr, curPair->getValue(), normBuf 00568 ); 00569 00570 // If we found an attdef for this one, then lets validate it. 00571 if (attDef) 00572 { 00573 const XMLCh* xsNormalized = normBuf.getRawBuffer(); 00574 if (fGrammarType == Grammar::SchemaGrammarType) 00575 { 00576 DatatypeValidator* tempDV = ((SchemaAttDef*) attDef)->getDatatypeValidator(); 00577 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 00578 { 00579 // normalize the attribute according to schema whitespace facet 00580 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf, true); 00581 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 00582 if (fNormalizeData && fValidate && !skipThisOne) { 00583 normBuf.set(xsNormalized); 00584 } 00585 } 00586 } 00587 00588 if (fValidate && !skipThisOne) 00589 { 00590 fValidator->validateAttrValue( 00591 attDef, xsNormalized, false, elemDecl 00592 ); 00593 00594 if(fGrammarType == Grammar::SchemaGrammarType) 00595 { 00596 attrValidator = ((SchemaValidator*)fValidator)->getMostRecentAttrValidator(); 00597 if(((SchemaValidator *)fValidator)->getErrorOccurred()) 00598 { 00599 fPSVIElemContext.fErrorOccurred = true; 00600 if (getPSVIHandler()) 00601 attrValid = PSVIItem::VALIDITY_INVALID; 00602 } 00603 } 00604 } 00605 else if(fGrammarType == Grammar::SchemaGrammarType) { 00606 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); 00607 } 00608 } 00609 else // no attDef at all; default to anySimpleType 00610 { 00611 if(fGrammarType == Grammar::SchemaGrammarType) { 00612 attrValidator = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_ANYSIMPLETYPE); 00613 } 00614 } 00615 00616 // Save the type for later use 00617 if (attDef) 00618 { 00619 attType = attDef->getType(); 00620 } 00621 } 00622 00623 // now fill in the PSVIAttributes entry for this attribute: 00624 if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType) 00625 { 00626 psviAttr = fPSVIAttrList->getPSVIAttributeToFill(suffPtr, fURIStringPool->getValueForId(uriId)); 00627 SchemaAttDef *actualAttDef = 0; 00628 if(attDef) 00629 actualAttDef = (SchemaAttDef *)attDef; 00630 else if (attDefForWildCard) 00631 actualAttDef = (SchemaAttDef *)attDefForWildCard; 00632 if(actualAttDef) 00633 { 00634 XSAttributeDeclaration *attrDecl = (XSAttributeDeclaration *)fModel->getXSObject(actualAttDef); 00635 DatatypeValidator * attrDataType = actualAttDef->getDatatypeValidator(); 00636 XSSimpleTypeDefinition *validatingType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrDataType); 00637 if(attrValid != PSVIItem::VALIDITY_VALID) 00638 { 00639 psviAttr->reset 00640 ( 00641 fRootElemName 00642 , attrValid 00643 , attrAssessed 00644 , validatingType 00645 , 0 00646 , actualAttDef->getValue() 00647 , false 00648 , attrDecl 00649 , 0 00650 ); 00651 } 00652 else 00653 { 00654 XSSimpleTypeDefinition *memberType = 0; 00655 if(validatingType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION) 00656 memberType = (XSSimpleTypeDefinition *)fModel->getXSObject(attrValidator); 00657 psviAttr->reset 00658 ( 00659 fRootElemName 00660 , attrValid 00661 , attrAssessed 00662 , validatingType 00663 , memberType 00664 , actualAttDef->getValue() 00665 , false 00666 , attrDecl 00667 , (memberType)?attrValidator:attrDataType 00668 ); 00669 } 00670 } 00671 else 00672 { 00673 psviAttr->reset 00674 ( 00675 fRootElemName 00676 , attrValid 00677 , attrAssessed 00678 , 0 00679 , 0 00680 , 0 00681 , false 00682 , 0 00683 , 0 00684 ); 00685 } 00686 } 00687 } 00688 00689 // Add this attribute to the attribute list that we use to pass them 00690 // to the handler. We reuse its existing elements but expand it as 00691 // required. 00692 XMLAttr* curAttr; 00693 00694 // check for duplicate namespace attributes: 00695 // by checking for qualified names with the same local part and with prefixes 00696 // which have been bound to namespace names that are identical. 00697 if (fGrammarType == Grammar::DTDGrammarType) { 00698 if (!toUseHashTable) 00699 { 00700 for (XMLSize_t attrIndex=0; attrIndex < retCount; attrIndex++) { 00701 curAttr = toFill.elementAt(attrIndex); 00702 if (uriId == curAttr->getURIId() && 00703 XMLString::equals(suffPtr, curAttr->getName())) { 00704 emitError 00705 ( 00706 00707 XMLErrs::AttrAlreadyUsedInSTag 00708 , curAttr->getName() 00709 , elemDecl->getFullName() 00710 ); 00711 } 00712 } 00713 } 00714 else 00715 { 00716 if (fAttrDupChkRegistry->containsKey((void*)suffPtr, uriId)) 00717 { 00718 emitError 00719 ( 00720 XMLErrs::AttrAlreadyUsedInSTag 00721 , suffPtr 00722 , elemDecl->getFullName() 00723 ); 00724 } 00725 } 00726 } 00727 00728 if (retCount >= curAttListSize) 00729 { 00730 curAttr = new (fMemoryManager) XMLAttr 00731 ( 00732 uriId 00733 , suffPtr 00734 , prefPtr 00735 , normBuf.getRawBuffer() 00736 , attType 00737 , true 00738 , fMemoryManager 00739 ); 00740 toFill.addElement(curAttr); 00741 } 00742 else 00743 { 00744 curAttr = toFill.elementAt(retCount); 00745 curAttr->set 00746 ( 00747 uriId 00748 , suffPtr 00749 , prefPtr 00750 , normBuf.getRawBuffer() 00751 , attType 00752 ); 00753 curAttr->setSpecified(true); 00754 } 00755 00756 if (toUseHashTable) 00757 { 00758 fAttrDupChkRegistry->put((void*)suffPtr, uriId, curAttr); 00759 } 00760 00761 if(psviAttr) 00762 psviAttr->setValue(curAttr->getValue()); 00763 00764 // Bump the count of attrs in the list 00765 retCount++; 00766 } 00767 00768 // Now, if there are any attributes declared by this element, let's 00769 // go through them and make sure that any required ones are provided, 00770 // and fault in any fixed ones and defaulted ones that are not provided 00771 // literally. 00772 if (hasDefs) 00773 { 00774 // Check after all specified attrs are scanned 00775 // (1) report error for REQUIRED attrs that are missing (V_TAGc) 00776 // (2) add default attrs if missing (FIXED and NOT_FIXED) 00777 00778 00779 XMLAttDefList &attDefList = getAttDefList(fGrammarType == Grammar::SchemaGrammarType, currType, elemDecl); 00780 00781 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++) 00782 { 00783 // Get the current att def, for convenience and its def type 00784 const XMLAttDef *curDef = &attDefList.getAttDef(i); 00785 const XMLAttDef::DefAttTypes defType = curDef->getDefaultType(); 00786 unsigned int *attCountPtr = fAttDefRegistry->get((void *)curDef); 00787 if (!attCountPtr || *attCountPtr < fElemCount) 00788 { // did not occur 00789 // note that since there is no attribute information 00790 // item present, there is no PSVI infoset to augment here *except* 00791 // that the element is invalid 00792 00793 //the attribute is not provided 00794 if (fValidate) 00795 { 00796 // If we are validating and its required, then an error 00797 if ((defType == XMLAttDef::Required) || 00798 (defType == XMLAttDef::Required_And_Fixed) ) 00799 00800 { 00801 fValidator->emitError 00802 ( 00803 XMLValid::RequiredAttrNotProvided 00804 , curDef->getFullName() 00805 ); 00806 if(fGrammarType == Grammar::SchemaGrammarType) 00807 { 00808 fPSVIElemContext.fErrorOccurred = true; 00809 } 00810 } 00811 else if ((defType == XMLAttDef::Default) || 00812 (defType == XMLAttDef::Fixed) ) 00813 { 00814 if (fStandalone && curDef->isExternal()) 00815 { 00816 // XML 1.0 Section 2.9 00817 // Document is standalone, so attributes must not be defaulted. 00818 fValidator->emitError(XMLValid::NoDefAttForStandalone, curDef->getFullName(), elemDecl->getFullName()); 00819 if(fGrammarType == Grammar::SchemaGrammarType) 00820 { 00821 fPSVIElemContext.fErrorOccurred = true; 00822 } 00823 } 00824 } 00825 } 00826 00827 // Fault in the value if needed, and bump the att count. 00828 if ((defType == XMLAttDef::Default) 00829 || (defType == XMLAttDef::Fixed)) 00830 { 00831 // Let the validator pass judgement on the attribute value 00832 if (fValidate) 00833 { 00834 fValidator->validateAttrValue 00835 ( 00836 curDef 00837 , curDef->getValue() 00838 , false 00839 , elemDecl 00840 ); 00841 } 00842 00843 XMLAttr* curAtt; 00844 if (retCount >= curAttListSize) 00845 { 00846 curAtt = new (fMemoryManager) XMLAttr(fMemoryManager); 00847 fValidator->faultInAttr(*curAtt, *curDef); 00848 fAttrList->addElement(curAtt); 00849 } 00850 else 00851 { 00852 curAtt = fAttrList->elementAt(retCount); 00853 fValidator->faultInAttr(*curAtt, *curDef); 00854 } 00855 00856 if (fGrammarType == Grammar::DTDGrammarType) 00857 { 00858 // Map the new attribute's prefix to a URI id and store 00859 // that in the attribute object. 00860 curAtt->setURIId 00861 ( 00862 resolvePrefix(curAtt->getPrefix(), ElemStack::Mode_Attribute) 00863 ); 00864 } 00865 00866 // Indicate it was not explicitly specified and bump count 00867 curAtt->setSpecified(false); 00868 retCount++; 00869 if(getPSVIHandler() && fGrammarType == Grammar::SchemaGrammarType) 00870 { 00871 QName *attName = ((SchemaAttDef *)curDef)->getAttName(); 00872 PSVIAttribute *defAttrToFill = fPSVIAttrList->getPSVIAttributeToFill 00873 ( 00874 attName->getLocalPart(), fURIStringPool->getValueForId( attName->getURI()) 00875 ); 00876 XSAttributeDeclaration *defAttrDecl = (XSAttributeDeclaration *)fModel->getXSObject((void *)curDef); 00877 DatatypeValidator * attrDataType = ((SchemaAttDef *)curDef)->getDatatypeValidator(); 00878 XSSimpleTypeDefinition *defAttrType = 00879 (XSSimpleTypeDefinition*)fModel->getXSObject(attrDataType); 00880 // would have occurred during validation of default value 00881 if(((SchemaValidator *)fValidator)->getErrorOccurred()) 00882 { 00883 defAttrToFill->reset( 00884 fRootElemName 00885 , PSVIItem::VALIDITY_INVALID 00886 , PSVIItem::VALIDATION_FULL 00887 , defAttrType 00888 , 0 00889 , curDef->getValue() 00890 , true 00891 , defAttrDecl 00892 , 0 00893 ); 00894 } 00895 else 00896 { 00897 XSSimpleTypeDefinition *defAttrMemberType = 0; 00898 if(defAttrType->getVariety() == XSSimpleTypeDefinition::VARIETY_UNION) 00899 { 00900 defAttrMemberType = (XSSimpleTypeDefinition *)fModel->getXSObject 00901 ( 00902 ((SchemaValidator*)fValidator)->getMostRecentAttrValidator() 00903 ); 00904 } 00905 defAttrToFill->reset( 00906 fRootElemName 00907 , PSVIItem::VALIDITY_VALID 00908 , PSVIItem::VALIDATION_FULL 00909 , defAttrType 00910 , defAttrMemberType 00911 , curDef->getValue() 00912 , true 00913 , defAttrDecl 00914 , (defAttrMemberType)?((SchemaValidator *)fValidator)->getMostRecentAttrValidator():attrDataType 00915 ); 00916 } 00917 defAttrToFill->setValue(curDef->getValue()); 00918 } 00919 } 00920 } 00921 else if(attCountPtr) 00922 { 00923 //attribute is provided 00924 // (schema) report error for PROHIBITED attrs that are present (V_TAGc) 00925 if (defType == XMLAttDef::Prohibited && fValidate) 00926 { 00927 fValidator->emitError 00928 ( 00929 XMLValid::ProhibitedAttributePresent 00930 , curDef->getFullName() 00931 ); 00932 if(fGrammarType == Grammar::SchemaGrammarType) 00933 { 00934 fPSVIElemContext.fErrorOccurred = true; 00935 if (getPSVIHandler()) 00936 { 00937 QName *attQName = ((SchemaAttDef *)curDef)->getAttName(); 00938 // bad luck... 00939 PSVIAttribute *prohibitedAttr = fPSVIAttrList->getAttributePSVIByName 00940 ( 00941 attQName->getLocalPart(), 00942 fURIStringPool->getValueForId(attQName->getURI()) 00943 ); 00944 prohibitedAttr->updateValidity(PSVIItem::VALIDITY_INVALID); 00945 } 00946 } 00947 } 00948 } 00949 } 00950 } 00951 return retCount; 00952 } 00953 00954 00955 // This method will take a raw attribute value and normalize it according to 00956 // the rules of the attribute type. It will put the resulting value into the 00957 // passed buffer. 00958 // 00959 // This code assumes that escaped characters in the original value (via char 00960 // refs) are prefixed by a 0xFFFF character. This is because some characters 00961 // are legal if escaped only. And some escape chars are not subject to 00962 // normalization rules. 00963 bool IGXMLScanner::normalizeAttValue( const XMLAttDef* const attDef 00964 , const XMLCh* const attName 00965 , const XMLCh* const value 00966 , XMLBuffer& toFill) 00967 { 00968 // A simple state value for a whitespace processing state machine 00969 enum States 00970 { 00971 InWhitespace 00972 , InContent 00973 }; 00974 00975 // Get the type and name 00976 const XMLAttDef::AttTypes type = (attDef)?attDef->getType():XMLAttDef::CData; 00977 00978 // Assume its going to go fine, and empty the target buffer in preperation 00979 bool retVal = true; 00980 toFill.reset(); 00981 00982 // Loop through the chars of the source value and normalize it according 00983 // to the type. 00984 XMLCh nextCh; 00985 const XMLCh* srcPtr = value; 00986 00987 if (type == XMLAttDef::CData || type > XMLAttDef::Notation) { 00988 // Get the next character from the source. We have to watch for 00989 // escaped characters (which are indicated by a 0xFFFF value followed 00990 // by the char that was escaped.) 00991 while ((nextCh = *srcPtr++)!=0) 00992 { 00993 switch(nextCh) 00994 { 00995 // Do we have an escaped character ? 00996 case 0xFFFF: 00997 nextCh = *srcPtr++; 00998 break; 00999 case 0x09: 01000 case 0x0A: 01001 case 0x0D: 01002 // Check Validity Constraint for Standalone document declaration 01003 // XML 1.0, Section 2.9 01004 if (fStandalone && fValidate && attDef && attDef->isExternal()) 01005 { 01006 // Can't have a standalone document declaration of "yes" if attribute 01007 // values are subject to normalisation 01008 fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); 01009 } 01010 nextCh = chSpace; 01011 break; 01012 case chOpenAngle: 01013 // If its not escaped, then make sure its not a < character, which is 01014 // not allowed in attribute values. 01015 emitError(XMLErrs::BracketInAttrValue, attName); 01016 retVal = false; 01017 break; 01018 } 01019 01020 // Add this char to the target buffer 01021 toFill.append(nextCh); 01022 } 01023 } 01024 else { 01025 States curState = InContent; 01026 bool firstNonWS = false; 01027 // Get the next character from the source. We have to watch for 01028 // escaped characters (which are indicated by a 0xFFFF value followed 01029 // by the char that was escaped.) 01030 while ((nextCh = *srcPtr)!=0) 01031 { 01032 // Do we have an escaped character ? 01033 if (nextCh == 0xFFFF) 01034 { 01035 nextCh = *++srcPtr; 01036 } 01037 else if (nextCh == chOpenAngle) { 01038 // If its not escaped, then make sure its not a < character, which is 01039 // not allowed in attribute values. 01040 emitError(XMLErrs::BracketInAttrValue, attName); 01041 retVal = false; 01042 } 01043 01044 if (curState == InWhitespace) 01045 { 01046 if (!fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 01047 { 01048 if (firstNonWS) 01049 toFill.append(chSpace); 01050 curState = InContent; 01051 firstNonWS = true; 01052 } 01053 else 01054 { 01055 srcPtr++; 01056 continue; 01057 } 01058 } 01059 else if (curState == InContent) 01060 { 01061 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 01062 { 01063 curState = InWhitespace; 01064 srcPtr++; 01065 01066 // Check Validity Constraint for Standalone document declaration 01067 // XML 1.0, Section 2.9 01068 if (fStandalone && fValidate && attDef && attDef->isExternal()) 01069 { 01070 if (!firstNonWS || (nextCh != chSpace) || (!*srcPtr) || fReaderMgr.getCurrentReader()->isWhitespace(*srcPtr)) 01071 { 01072 // Can't have a standalone document declaration of "yes" if attribute 01073 // values are subject to normalisation 01074 fValidator->emitError(XMLValid::NoAttNormForStandalone, attName); 01075 } 01076 } 01077 continue; 01078 } 01079 firstNonWS = true; 01080 } 01081 01082 // Add this char to the target buffer 01083 toFill.append(nextCh); 01084 01085 // And move up to the next character in the source 01086 srcPtr++; 01087 } 01088 } 01089 01090 return retVal; 01091 } 01092 01093 // This method will just normalize the input value as CDATA without 01094 // any standalone checking. 01095 bool IGXMLScanner::normalizeAttRawValue( const XMLCh* const attrName 01096 , const XMLCh* const value 01097 , XMLBuffer& toFill) 01098 { 01099 // Assume its going to go fine, and empty the target buffer in preperation 01100 bool retVal = true; 01101 toFill.reset(); 01102 01103 // Loop through the chars of the source value and normalize it according 01104 // to the type. 01105 bool escaped; 01106 XMLCh nextCh; 01107 const XMLCh* srcPtr = value; 01108 while (*srcPtr) 01109 { 01110 // Get the next character from the source. We have to watch for 01111 // escaped characters (which are indicated by a 0xFFFF value followed 01112 // by the char that was escaped.) 01113 nextCh = *srcPtr; 01114 escaped = (nextCh == 0xFFFF); 01115 if (escaped) 01116 nextCh = *++srcPtr; 01117 01118 // If its not escaped, then make sure its not a < character, which is 01119 // not allowed in attribute values. 01120 if (!escaped && (*srcPtr == chOpenAngle)) 01121 { 01122 emitError(XMLErrs::BracketInAttrValue, attrName); 01123 retVal = false; 01124 } 01125 01126 if (!escaped) 01127 { 01128 // NOTE: Yes this is a little redundant in that a 0x20 is 01129 // replaced with an 0x20. But its faster to do this (I think) 01130 // than checking for 9, A, and D separately. 01131 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 01132 nextCh = chSpace; 01133 } 01134 01135 // Add this char to the target buffer 01136 toFill.append(nextCh); 01137 01138 // And move up to the next character in the source 01139 srcPtr++; 01140 } 01141 return retVal; 01142 } 01143 01144 // This method will reset the scanner data structures, and related plugged 01145 // in stuff, for a new scan session. We get the input source for the primary 01146 // XML entity, create the reader for it, and push it on the stack so that 01147 // upon successful return from here we are ready to go. 01148 void IGXMLScanner::scanReset(const InputSource& src) 01149 { 01150 // This call implicitly tells us that we are going to reuse the scanner 01151 // if it was previously used. So tell the validator to reset itself. 01152 // 01153 // But, if the fUseCacheGrammar flag is set, then don't reset it. 01154 // 01155 // NOTE: The ReaderMgr is flushed on the way out, because that is 01156 // required to insure that files are closed. 01157 fGrammarResolver->cacheGrammarFromParse(fToCacheGrammar); 01158 fGrammarResolver->useCachedGrammarInParse(fUseCachedGrammar); 01159 01160 // Clear transient schema info list. 01161 // 01162 fSchemaInfoList->removeAll (); 01163 01164 // fModel may need updating, as fGrammarResolver could have cleaned it 01165 if(fModel && getPSVIHandler()) 01166 fModel = fGrammarResolver->getXSModel(); 01167 01168 { 01169 XMLDTDDescriptionImpl theDTDDescription(XMLUni::fgDTDEntityString, fMemoryManager); 01170 fDTDGrammar = (DTDGrammar*) fGrammarResolver->getGrammar(&theDTDDescription); 01171 } 01172 01173 if (!fDTDGrammar) { 01174 01175 fDTDGrammar = new (fGrammarPoolMemoryManager) DTDGrammar(fGrammarPoolMemoryManager); 01176 fGrammarResolver->putGrammar(fDTDGrammar); 01177 } 01178 else 01179 fDTDGrammar->reset(); 01180 01181 fGrammar = fDTDGrammar; 01182 fGrammarType = fGrammar->getGrammarType(); 01183 fRootGrammar = 0; 01184 01185 if (fValidatorFromUser) { 01186 if (fValidator->handlesDTD()) 01187 fValidator->setGrammar(fGrammar); 01188 else if (fValidator->handlesSchema()) { 01189 01190 ((SchemaValidator*) fValidator)->setErrorReporter(fErrorReporter); 01191 ((SchemaValidator*) fValidator)->setGrammarResolver(fGrammarResolver); 01192 ((SchemaValidator*) fValidator)->setExitOnFirstFatal(fExitOnFirstFatal); 01193 } 01194 } 01195 else { 01196 // set fValidator as fDTDValidator 01197 fValidator = fDTDValidator; 01198 fValidator->setGrammar(fGrammar); 01199 } 01200 01201 // Reset validation 01202 fValidate = (fValScheme == Val_Always) ? true : false; 01203 01204 // Ignore skipDTDValidation flag if no schema processing is taking place */ 01205 fSkipDTDValidation = fSkipDTDValidation && fDoSchema; 01206 01207 // And for all installed handlers, send reset events. This gives them 01208 // a chance to flush any cached data. 01209 if (fDocHandler) 01210 fDocHandler->resetDocument(); 01211 if (fEntityHandler) 01212 fEntityHandler->resetEntities(); 01213 if (fErrorReporter) 01214 fErrorReporter->resetErrors(); 01215 01216 // Clear out the id reference list 01217 resetValidationContext(); 01218 01219 // Reset the Root Element Name 01220 fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName; 01221 fRootElemName = 0; 01222 01223 // Reset IdentityConstraints 01224 if (fICHandler) 01225 fICHandler->reset(); 01226 01227 // Reset the element stack, and give it the latest ids for the special 01228 // URIs it has to know about. 01229 fElemStack.reset 01230 ( 01231 fEmptyNamespaceId 01232 , fUnknownNamespaceId 01233 , fXMLNamespaceId 01234 , fXMLNSNamespaceId 01235 ); 01236 01237 if (!fSchemaNamespaceId) 01238 fSchemaNamespaceId = fURIStringPool->addOrFind(SchemaSymbols::fgURI_XSI); 01239 01240 // Reset some status flags 01241 fInException = false; 01242 fStandalone = false; 01243 fErrorCount = 0; 01244 fHasNoDTD = true; 01245 fSeeXsi = false; 01246 01247 // Reset PSVI context 01248 // note that we always need this around for DOMTypeInfo 01249 if (!fPSVIElement) 01250 fPSVIElement = new (fMemoryManager) PSVIElement(fMemoryManager); 01251 01252 if (!fErrorStack) 01253 { 01254 fErrorStack = new (fMemoryManager) ValueStackOf<bool>(8, fMemoryManager); 01255 } 01256 else 01257 { 01258 fErrorStack->removeAllElements(); 01259 } 01260 01261 resetPSVIElemContext(); 01262 01263 // Reset the validators 01264 fDTDValidator->reset(); 01265 fDTDValidator->setErrorReporter(fErrorReporter); 01266 fSchemaValidator->reset(); 01267 fSchemaValidator->setErrorReporter(fErrorReporter); 01268 fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); 01269 fSchemaValidator->setGrammarResolver(fGrammarResolver); 01270 if (fValidatorFromUser) 01271 fValidator->reset(); 01272 01273 // Handle the creation of the XML reader object for this input source. 01274 // This will provide us with transcoding and basic lexing services. 01275 XMLReader* newReader = fReaderMgr.createReader 01276 ( 01277 src 01278 , true 01279 , XMLReader::RefFrom_NonLiteral 01280 , XMLReader::Type_General 01281 , XMLReader::Source_External 01282 , fCalculateSrcOfs 01283 , fLowWaterMark 01284 ); 01285 01286 if (!newReader) { 01287 if (src.getIssueFatalErrorIfNotFound()) 01288 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager); 01289 else 01290 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager); 01291 } 01292 01293 // Push this read onto the reader manager 01294 fReaderMgr.pushReader(newReader, 0); 01295 01296 // and reset security-related things if necessary: 01297 if(fSecurityManager != 0) 01298 { 01299 fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit(); 01300 fEntityExpansionCount = 0; 01301 } 01302 fElemCount = 0; 01303 if(fUIntPoolRowTotal >= 32) 01304 { // 8 KB tied up with validating attributes... 01305 fAttDefRegistry->removeAll(); 01306 recreateUIntPool(); 01307 } 01308 else 01309 { 01310 // note that this will implicitly reset the values of the hashtables, 01311 // though their buckets will still be tied up 01312 resetUIntPool(); 01313 } 01314 fUndeclaredAttrRegistry->removeAll(); 01315 fDTDElemNonDeclPool->removeAll(); 01316 } 01317 01318 01319 // This method is called between markup in content. It scans for character 01320 // data that is sent to the document handler. It watches for any markup 01321 // characters that would indicate that the character data has ended. It also 01322 // handles expansion of general and character entities. 01323 // 01324 // sendData() is a local static helper for this method which handles some 01325 // code that must be done in three different places here. 01326 void IGXMLScanner::sendCharData(XMLBuffer& toSend) 01327 { 01328 // If no data in the buffer, then nothing to do 01329 if (toSend.isEmpty()) 01330 return; 01331 01332 // We do different things according to whether we are validating or 01333 // not. If not, its always just characters; else, it depends on the 01334 // current element's content model. 01335 if (fValidate) 01336 { 01337 // Get the raw data we need for the callback 01338 const XMLCh* rawBuf = toSend.getRawBuffer(); 01339 XMLSize_t len = toSend.getLen(); 01340 01341 // And see if the current element is a 'Children' style content model 01342 const ElemStack::StackElem* topElem = fElemStack.topElement(); 01343 01344 // Get the character data opts for the current element 01345 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; 01346 if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType) 01347 { 01348 // And see if the current element is a 'Children' style content model 01349 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 01350 if(currType) 01351 { 01352 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); 01353 if(modelType == SchemaElementDecl::Children || 01354 modelType == SchemaElementDecl::ElementOnlyEmpty) 01355 charOpts = XMLElementDecl::SpacesOk; 01356 else if(modelType == SchemaElementDecl::Empty) 01357 charOpts = XMLElementDecl::NoCharData; 01358 } 01359 } else // DTD grammar 01360 charOpts = topElem->fThisElement->getCharDataOpts(); 01361 01362 if (charOpts == XMLElementDecl::NoCharData) 01363 { 01364 // They definitely cannot handle any type of char data 01365 fValidator->emitError(XMLValid::NoCharDataInCM); 01366 //if(fGrammarType == Grammar::SchemaGrammarType) 01367 //{ 01368 // if (getPSVIHandler()) 01369 // { 01370 // REVISIT: 01371 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 01372 // } 01373 // } 01374 } 01375 else if (fReaderMgr.getCurrentReader()->isAllSpaces(rawBuf, len)) 01376 { 01377 // Its all spaces. So, if they can take spaces, then send it 01378 // as ignorable whitespace. If they can handle any char data 01379 // send it as characters. 01380 if (charOpts == XMLElementDecl::SpacesOk) { 01381 if (fDocHandler) 01382 fDocHandler->ignorableWhitespace(rawBuf, len, false); 01383 } 01384 else if (charOpts == XMLElementDecl::AllCharData) 01385 { 01386 if (fGrammarType != Grammar::SchemaGrammarType) 01387 { 01388 if (fDocHandler) 01389 fDocHandler->docCharacters(rawBuf, len, false); 01390 } 01391 else 01392 { 01393 XMLSize_t xsLen; 01394 const XMLCh* xsNormalized; 01395 SchemaValidator *schemaValidator = (SchemaValidator *)fValidator; 01396 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 01397 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 01398 { 01399 // normalize the character according to schema whitespace facet 01400 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf); 01401 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 01402 xsLen = fWSNormalizeBuf.getLen(); 01403 } 01404 else { 01405 xsNormalized = rawBuf; 01406 xsLen = len ; 01407 } 01408 01409 // tell the schema validation about the character data for checkContent later 01410 schemaValidator->setDatatypeBuffer(xsNormalized); 01411 01412 // call all active identity constraints 01413 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { 01414 fContent.append(xsNormalized, xsLen); 01415 } 01416 01417 if (fDocHandler) { 01418 if (fNormalizeData) { 01419 fDocHandler->docCharacters(xsNormalized, xsLen, false); 01420 } 01421 else { 01422 fDocHandler->docCharacters(rawBuf, len, false); 01423 } 01424 } 01425 } 01426 } 01427 } 01428 else 01429 { 01430 // If they can take any char data, then send it. Otherwise, they 01431 // can only handle whitespace and can't handle this stuff so 01432 // issue an error. 01433 if (charOpts == XMLElementDecl::AllCharData) 01434 { 01435 if (fGrammarType != Grammar::SchemaGrammarType) 01436 { 01437 if (fDocHandler) 01438 fDocHandler->docCharacters(rawBuf, len, false); 01439 } 01440 else 01441 { 01442 XMLSize_t xsLen; 01443 const XMLCh* xsNormalized; 01444 SchemaValidator *schemaValidator = (SchemaValidator*)fValidator; 01445 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 01446 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 01447 { 01448 // normalize the character according to schema whitespace facet 01449 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, rawBuf, fWSNormalizeBuf); 01450 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 01451 xsLen = fWSNormalizeBuf.getLen(); 01452 } 01453 else { 01454 xsNormalized = rawBuf; 01455 xsLen = len; 01456 } 01457 01458 // tell the schema validation about the character data for checkContent later 01459 schemaValidator->setDatatypeBuffer(xsNormalized); 01460 01461 // call all active identity constraints 01462 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { 01463 fContent.append(xsNormalized, xsLen); 01464 } 01465 01466 if (fDocHandler) { 01467 if (fNormalizeData) { 01468 fDocHandler->docCharacters(xsNormalized, xsLen, false); 01469 } 01470 else { 01471 fDocHandler->docCharacters(rawBuf, len, false); 01472 } 01473 } 01474 } 01475 } 01476 else 01477 { 01478 fValidator->emitError(XMLValid::NoCharDataInCM); 01479 if(fGrammarType == Grammar::SchemaGrammarType) 01480 { 01481 if (getPSVIHandler()) 01482 { 01483 // REVISIT: 01484 // PSVIAttribute->setValidity(PSVIItem::VALIDITY_INVALID); 01485 } 01486 } 01487 } 01488 } 01489 } 01490 else 01491 { 01492 // call all active identity constraints 01493 if (fGrammarType == Grammar::SchemaGrammarType) { 01494 01495 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) 01496 fContent.append(toSend.getRawBuffer(), toSend.getLen()); 01497 } 01498 01499 // Always assume its just char data if not validating 01500 if (fDocHandler) 01501 fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false); 01502 } 01503 01504 // Reset buffer 01505 toSend.reset(); 01506 } 01507 01508 01509 01510 // This method is called with a key/value string pair that represents an 01511 // xmlns="yyy" or xmlns:xxx="yyy" attribute. This method will update the 01512 // current top of the element stack based on this data. We know that when 01513 // we get here, that it is one of these forms, so we don't bother confirming 01514 // it. 01515 // 01516 // But we have to ensure 01517 // 1. xxx is not xmlns 01518 // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa 01519 // 3. yyy is not XMLUni::fgXMLNSURIName 01520 // 4. if xxx is not null, then yyy cannot be an empty string. 01521 void IGXMLScanner::updateNSMap(const XMLCh* const attrName 01522 , const XMLCh* const attrValue) 01523 { 01524 updateNSMap(attrName, attrValue, XMLString::indexOf(attrName, chColon)); 01525 } 01526 01527 void IGXMLScanner::updateNSMap(const XMLCh* const attrName 01528 , const XMLCh* const attrValue 01529 , const int colonOfs) 01530 { 01531 // We need a buffer to normalize the attribute value into 01532 XMLBufBid bbNormal(&fBufMgr); 01533 XMLBuffer& normalBuf = bbNormal.getBuffer(); 01534 01535 // Normalize the value into the passed buffer. In this case, we don't 01536 // care about the return value. An error was issued for the error, which 01537 // is all we care about here. 01538 normalizeAttRawValue(attrName, attrValue, normalBuf); 01539 XMLCh* namespaceURI = normalBuf.getRawBuffer(); 01540 01541 // We either have the default prefix (""), or we point it into the attr 01542 // name parameter. Note that the xmlns is not the prefix we care about 01543 // here. To us, the 'prefix' is really the local part of the attrName 01544 // parameter. 01545 // 01546 // Check 1. xxx is not xmlns 01547 // 2. if xxx is xml, then yyy must match XMLUni::fgXMLURIName, and vice versa 01548 // 3. yyy is not XMLUni::fgXMLNSURIName 01549 // 4. if xxx is not null, then yyy cannot be an empty string. 01550 const XMLCh* prefPtr = XMLUni::fgZeroLenString; 01551 if (colonOfs != -1) { 01552 prefPtr = &attrName[colonOfs + 1]; 01553 01554 if (XMLString::equals(prefPtr, XMLUni::fgXMLNSString)) 01555 emitError(XMLErrs::NoUseOfxmlnsAsPrefix); 01556 else if (XMLString::equals(prefPtr, XMLUni::fgXMLString)) { 01557 if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) 01558 emitError(XMLErrs::PrefixXMLNotMatchXMLURI); 01559 } 01560 01561 if (!namespaceURI) 01562 emitError(XMLErrs::NoEmptyStrNamespace, attrName); 01563 else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0) 01564 emitError(XMLErrs::NoEmptyStrNamespace, attrName); 01565 } 01566 01567 if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName)) 01568 emitError(XMLErrs::NoUseOfxmlnsURI); 01569 else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName)) { 01570 if (!XMLString::equals(prefPtr, XMLUni::fgXMLString)) 01571 emitError(XMLErrs::XMLURINotMatchXMLPrefix); 01572 } 01573 01574 // Ok, we have to get the unique id for the attribute value, which is the 01575 // URI that this value should be mapped to. The validator has the 01576 // namespace string pool, so we ask him to find or add this new one. Then 01577 // we ask the element stack to add this prefix to URI Id mapping. 01578 fElemStack.addPrefix 01579 ( 01580 prefPtr 01581 , fURIStringPool->addOrFind(namespaceURI) 01582 ); 01583 } 01584 01585 void IGXMLScanner::scanRawAttrListforNameSpaces(XMLSize_t attCount) 01586 { 01587 // Make an initial pass through the list and find any xmlns attributes or 01588 // schema attributes. 01589 // When we find one, send it off to be used to update the element stack's 01590 // namespace mappings. 01591 for (XMLSize_t index = 0; index < attCount; index++) 01592 { 01593 // each attribute has the prefix:suffix="value" 01594 const KVStringPair* curPair = fRawAttrList->elementAt(index); 01595 const XMLCh* rawPtr = curPair->getKey(); 01596 01597 // If either the key begins with "xmlns:" or its just plain 01598 // "xmlns", then use it to update the map. 01599 if (!XMLString::compareNString(rawPtr, XMLUni::fgXMLNSColonString, 6) 01600 || XMLString::equals(rawPtr, XMLUni::fgXMLNSString)) 01601 { 01602 const XMLCh* valuePtr = curPair->getValue(); 01603 01604 updateNSMap(rawPtr, valuePtr, fRawAttrColonList[index]); 01605 01606 // if the schema URI is seen in the the valuePtr, set the boolean seeXsi 01607 if (XMLString::equals(valuePtr, SchemaSymbols::fgURI_XSI)) { 01608 fSeeXsi = true; 01609 } 01610 } 01611 } 01612 01613 // walk through the list again to deal with "xsi:...." 01614 if (fDoSchema && fSeeXsi) 01615 { 01616 // Schema Xsi Type yyyy (e.g. xsi:type="yyyyy") 01617 XMLBufBid bbXsi(&fBufMgr); 01618 XMLBuffer& fXsiType = bbXsi.getBuffer(); 01619 01620 for (XMLSize_t index = 0; index < attCount; index++) 01621 { 01622 // each attribute has the prefix:suffix="value" 01623 const KVStringPair* curPair = fRawAttrList->elementAt(index); 01624 const XMLCh* rawPtr = curPair->getKey(); 01625 const XMLCh* prefPtr = XMLUni::fgZeroLenString; 01626 int colonInd = fRawAttrColonList[index]; 01627 01628 if (colonInd != -1) { 01629 01630 fURIBuf.set(rawPtr, colonInd); 01631 prefPtr = fURIBuf.getRawBuffer(); 01632 } 01633 01634 // if schema URI has been seen, scan for the schema location and uri 01635 // and resolve the schema grammar; or scan for schema type 01636 if (resolvePrefix(prefPtr, ElemStack::Mode_Attribute) == fSchemaNamespaceId) { 01637 01638 const XMLCh* valuePtr = curPair->getValue(); 01639 const XMLCh* suffPtr = &rawPtr[colonInd + 1]; 01640 01641 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_SCHEMALOCATION)) 01642 parseSchemaLocation(valuePtr); 01643 else if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_NONAMESPACESCHEMALOCATION)) 01644 resolveSchemaGrammar(valuePtr, XMLUni::fgZeroLenString); 01645 01646 if ((!fValidator || !fValidator->handlesSchema()) && 01647 (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE) || 01648 XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL))) 01649 { 01650 // If we are in the DTD mode, try to switch to the Schema 01651 // mode. For that we need to find any XML Schema grammar 01652 // that we can switch to. Such a grammar can only come 01653 // from the cache (if it came from the schemaLocation 01654 // attribute, we would be in the Schema mode already). 01655 // 01656 XMLGrammarPool* pool = fGrammarResolver->getGrammarPool (); 01657 RefHashTableOfEnumerator<Grammar> i = pool->getGrammarEnumerator (); 01658 01659 while (i.hasMoreElements ()) 01660 { 01661 Grammar& gr (i.nextElement ()); 01662 01663 if (gr.getGrammarType () == Grammar::SchemaGrammarType) 01664 { 01665 switchGrammar (gr.getTargetNamespace ()); 01666 break; 01667 } 01668 } 01669 } 01670 01671 if( fValidator && fValidator->handlesSchema() ) 01672 { 01673 if (XMLString::equals(suffPtr, SchemaSymbols::fgXSI_TYPE)) 01674 { 01675 // normalize the attribute according to schema whitespace facet 01676 DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_QNAME); 01677 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiType, true); 01678 } 01679 else if (XMLString::equals(suffPtr, SchemaSymbols::fgATT_NILL)) 01680 { 01681 // normalize the attribute according to schema whitespace facet 01682 XMLBuffer& fXsiNil = fBufMgr.bidOnBuffer(); 01683 DatatypeValidator* tempDV = DatatypeValidatorFactory::getBuiltInRegistry()->get(SchemaSymbols::fgDT_BOOLEAN); 01684 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, valuePtr, fXsiNil, true); 01685 if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_TRUE)) 01686 ((SchemaValidator*)fValidator)->setNillable(true); 01687 else if(XMLString::equals(fXsiNil.getRawBuffer(), SchemaSymbols::fgATTVAL_FALSE)) 01688 ((SchemaValidator*)fValidator)->setNillable(false); 01689 else 01690 emitError(XMLErrs::InvalidAttValue, fXsiNil.getRawBuffer(), valuePtr); 01691 fBufMgr.releaseBuffer(fXsiNil); 01692 } 01693 } 01694 } 01695 } 01696 01697 if (fValidator && fValidator->handlesSchema()) { 01698 if (!fXsiType.isEmpty()) { 01699 int colonPos = -1; 01700 unsigned int uriId = resolveQName ( 01701 fXsiType.getRawBuffer() 01702 , fPrefixBuf 01703 , ElemStack::Mode_Element 01704 , colonPos 01705 ); 01706 ((SchemaValidator*)fValidator)->setXsiType(fPrefixBuf.getRawBuffer(), fXsiType.getRawBuffer() + colonPos + 1, uriId); 01707 } 01708 } 01709 } 01710 } 01711 01712 void IGXMLScanner::parseSchemaLocation(const XMLCh* const schemaLocationStr, bool ignoreLoadSchema) 01713 { 01714 XMLCh* locStr = XMLString::replicate(schemaLocationStr, fMemoryManager); 01715 ArrayJanitor<XMLCh> janLoc(locStr, fMemoryManager); 01716 01717 processSchemaLocation(locStr); 01718 XMLSize_t size = fLocationPairs->size(); 01719 01720 if (size % 2 != 0 ) { 01721 emitError(XMLErrs::BadSchemaLocation); 01722 } else { 01723 // We need a buffer to normalize the attribute value into 01724 XMLBuffer normalBuf(1023, fMemoryManager); 01725 for(XMLSize_t i=0; i<size; i=i+2) { 01726 normalizeAttRawValue(SchemaSymbols::fgXSI_SCHEMALOCATION, fLocationPairs->elementAt(i), normalBuf); 01727 resolveSchemaGrammar(fLocationPairs->elementAt(i+1), normalBuf.getRawBuffer(), ignoreLoadSchema); 01728 } 01729 } 01730 } 01731 01732 void IGXMLScanner::resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri, bool ignoreLoadSchema) { 01733 01734 Grammar* grammar = 0; 01735 01736 { 01737 XMLSchemaDescriptionImpl theSchemaDescription(uri, fMemoryManager); 01738 theSchemaDescription.setLocationHints(loc); 01739 grammar = fGrammarResolver->getGrammar(&theSchemaDescription); 01740 } 01741 01742 // If multi-import is enabled, make sure the existing grammar came 01743 // from the import directive. Otherwise we may end up reloading 01744 // the same schema that came from the external grammar pool. Ideally, 01745 // we would move fSchemaInfoList to XMLGrammarPool so that it survives 01746 // the destruction of the scanner in which case we could rely on the 01747 // same logic we use to weed out duplicate schemas below. 01748 // 01749 if (!grammar || 01750 grammar->getGrammarType() == Grammar::DTDGrammarType || 01751 (getHandleMultipleImports() && 01752 ((XMLSchemaDescription*)grammar->getGrammarDescription())-> 01753 getContextType () == XMLSchemaDescription::CONTEXT_IMPORT)) 01754 { 01755 if (fLoadSchema || ignoreLoadSchema) 01756 { 01757 XSDDOMParser parser(0, fMemoryManager, 0); 01758 01759 parser.setValidationScheme(XercesDOMParser::Val_Never); 01760 parser.setDoNamespaces(true); 01761 parser.setUserEntityHandler(fEntityHandler); 01762 parser.setUserErrorReporter(fErrorReporter); 01763 01764 //Normalize loc 01765 XMLBufBid nnSys(&fBufMgr); 01766 XMLBuffer& normalizedSysId = nnSys.getBuffer(); 01767 XMLString::removeChar(loc, 0xFFFF, normalizedSysId); 01768 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); 01769 01770 // Create a buffer for expanding the system id 01771 XMLBufBid bbSys(&fBufMgr); 01772 XMLBuffer& expSysId = bbSys.getBuffer(); 01773 01774 // Allow the entity handler to expand the system id if they choose 01775 // to do so. 01776 InputSource* srcToFill = 0; 01777 if (fEntityHandler) 01778 { 01779 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) 01780 expSysId.set(normalizedURI); 01781 01782 ReaderMgr::LastExtEntityInfo lastInfo; 01783 fReaderMgr.getLastExtEntityInfo(lastInfo); 01784 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::SchemaGrammar, 01785 expSysId.getRawBuffer(), uri, XMLUni::fgZeroLenString, lastInfo.systemId, 01786 &fReaderMgr); 01787 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); 01788 } 01789 else 01790 { 01791 expSysId.set(normalizedURI); 01792 } 01793 01794 // If they didn't create a source via the entity handler, then we 01795 // have to create one on our own. 01796 if (!srcToFill) 01797 { 01798 if (fDisableDefaultEntityResolution) 01799 return; 01800 01801 ReaderMgr::LastExtEntityInfo lastInfo; 01802 fReaderMgr.getLastExtEntityInfo(lastInfo); 01803 01804 XMLURL urlTmp(fMemoryManager); 01805 if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) || 01806 (urlTmp.isRelative())) 01807 { 01808 if (!fStandardUriConformant) 01809 { 01810 XMLBufBid ddSys(&fBufMgr); 01811 XMLBuffer& resolvedSysId = ddSys.getBuffer(); 01812 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); 01813 01814 srcToFill = new (fMemoryManager) LocalFileInputSource 01815 ( 01816 lastInfo.systemId 01817 , resolvedSysId.getRawBuffer() 01818 , fMemoryManager 01819 ); 01820 } 01821 else 01822 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 01823 } 01824 else 01825 { 01826 if (fStandardUriConformant && urlTmp.hasInvalidChar()) 01827 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 01828 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); 01829 } 01830 } 01831 01832 // Put a janitor on the input source 01833 Janitor<InputSource> janSrc(srcToFill); 01834 01835 // Check if this exact schema has already been seen. 01836 // 01837 const XMLCh* sysId = srcToFill->getSystemId(); 01838 unsigned int uriId = (uri && *uri) ? fURIStringPool->addOrFind(uri) : fEmptyNamespaceId; 01839 SchemaInfo* importSchemaInfo = 0; 01840 01841 if (fUseCachedGrammar) 01842 importSchemaInfo = fCachedSchemaInfoList->get(sysId, uriId); 01843 01844 if (!importSchemaInfo && !fToCacheGrammar) 01845 importSchemaInfo = fSchemaInfoList->get(sysId, uriId); 01846 01847 if (importSchemaInfo) 01848 { 01849 // We haven't added any new grammars so it is safe to just 01850 // return. 01851 // 01852 return; 01853 } 01854 01855 // Should just issue warning if the schema is not found 01856 bool flag = srcToFill->getIssueFatalErrorIfNotFound(); 01857 srcToFill->setIssueFatalErrorIfNotFound(false); 01858 01859 parser.parse(*srcToFill); 01860 01861 // Reset the InputSource 01862 srcToFill->setIssueFatalErrorIfNotFound(flag); 01863 01864 if (parser.getSawFatal() && fExitOnFirstFatal) 01865 emitError(XMLErrs::SchemaScanFatalError); 01866 01867 DOMDocument* document = parser.getDocument(); //Our Grammar 01868 01869 if (document != 0) { 01870 01871 DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema 01872 if (root != 0) 01873 { 01874 const XMLCh* newUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE); 01875 bool newGrammar = false; 01876 if (!XMLString::equals(newUri, uri)) { 01877 if (fValidate || fValScheme == Val_Auto) { 01878 fValidator->emitError(XMLValid::WrongTargetNamespace, loc, uri); 01879 } 01880 01881 grammar = fGrammarResolver->getGrammar(newUri); 01882 newGrammar = true; 01883 } 01884 01885 if (!grammar || 01886 grammar->getGrammarType() == Grammar::DTDGrammarType || 01887 (getHandleMultipleImports() && 01888 ((XMLSchemaDescription*)grammar->getGrammarDescription())-> 01889 getContextType () == XMLSchemaDescription::CONTEXT_IMPORT)) 01890 { 01891 // If we switched namespace URI, recheck the schema info. 01892 // 01893 if (newGrammar) 01894 { 01895 unsigned int newUriId = (newUri && *newUri) ? fURIStringPool->addOrFind(newUri) : fEmptyNamespaceId; 01896 01897 if (fUseCachedGrammar) 01898 importSchemaInfo = fCachedSchemaInfoList->get(sysId, newUriId); 01899 01900 if (!importSchemaInfo && !fToCacheGrammar) 01901 importSchemaInfo = fSchemaInfoList->get(sysId, newUriId); 01902 01903 if (importSchemaInfo) 01904 return; 01905 } 01906 01907 // Since we have seen a grammar, set our validation flag 01908 // at this point if the validation scheme is auto 01909 if (fValScheme == Val_Auto && !fValidate) { 01910 fValidate = true; 01911 fElemStack.setValidationFlag(fValidate); 01912 } 01913 01914 // we have seen a schema, so set up the fValidator as fSchemaValidator 01915 if (!fValidator->handlesSchema()) 01916 { 01917 if (fValidatorFromUser) { 01918 // the fValidator is from user 01919 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); 01920 } 01921 else { 01922 fValidator = fSchemaValidator; 01923 } 01924 } 01925 01926 bool grammarFound = grammar && 01927 grammar->getGrammarType() == Grammar::SchemaGrammarType; 01928 01929 SchemaGrammar* schemaGrammar; 01930 01931 if (grammarFound) { 01932 schemaGrammar = (SchemaGrammar*) grammar; 01933 } 01934 else { 01935 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager); 01936 } 01937 01938 XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription(); 01939 gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE); 01940 gramDesc->setLocationHints(sysId); 01941 01942 TraverseSchema traverseSchema 01943 ( 01944 root 01945 , fURIStringPool 01946 , schemaGrammar 01947 , fGrammarResolver 01948 , fUseCachedGrammar ? fCachedSchemaInfoList : fSchemaInfoList 01949 , fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList 01950 , this 01951 , sysId 01952 , fEntityHandler 01953 , fErrorReporter 01954 , fMemoryManager 01955 , grammarFound 01956 ); 01957 01958 // Reset the now invalid schema roots in the collected 01959 // schema info entries. 01960 // 01961 { 01962 RefHash2KeysTableOfEnumerator<SchemaInfo> i ( 01963 fToCacheGrammar ? fCachedSchemaInfoList : fSchemaInfoList); 01964 01965 while (i.hasMoreElements ()) 01966 i.nextElement().resetRoot (); 01967 } 01968 01969 if (fGrammarType == Grammar::DTDGrammarType) { 01970 fGrammar = schemaGrammar; 01971 fGrammarType = Grammar::SchemaGrammarType; 01972 fValidator->setGrammar(fGrammar); 01973 } 01974 01975 if (fValidate) { 01976 // validate the Schema scan so far 01977 fValidator->preContentValidation(false); 01978 } 01979 } 01980 } 01981 } 01982 } 01983 } 01984 else 01985 { 01986 // Since we have seen a grammar, set our validation flag 01987 // at this point if the validation scheme is auto 01988 if (fValScheme == Val_Auto && !fValidate) { 01989 fValidate = true; 01990 fElemStack.setValidationFlag(fValidate); 01991 } 01992 01993 // we have seen a schema, so set up the fValidator as fSchemaValidator 01994 if (!fValidator->handlesSchema()) 01995 { 01996 if (fValidatorFromUser) { 01997 // the fValidator is from user 01998 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); 01999 } 02000 else { 02001 fValidator = fSchemaValidator; 02002 } 02003 } 02004 02005 if (fGrammarType == Grammar::DTDGrammarType) { 02006 fGrammar = grammar; 02007 fGrammarType = Grammar::SchemaGrammarType; 02008 fValidator->setGrammar(fGrammar); 02009 } 02010 } 02011 02012 // fModel may need updating: 02013 if(getPSVIHandler()) 02014 fModel = fGrammarResolver->getXSModel(); 02015 } 02016 02017 InputSource* IGXMLScanner::resolveSystemId(const XMLCh* const sysId 02018 ,const XMLCh* const pubId) 02019 { 02020 //Normalize sysId 02021 XMLBufBid nnSys(&fBufMgr); 02022 XMLBuffer& normalizedSysId = nnSys.getBuffer(); 02023 XMLString::removeChar(sysId, 0xFFFF, normalizedSysId); 02024 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); 02025 02026 // Create a buffer for expanding the system id 02027 XMLBufBid bbSys(&fBufMgr); 02028 XMLBuffer& expSysId = bbSys.getBuffer(); 02029 02030 // Allow the entity handler to expand the system id if they choose 02031 // to do so. 02032 InputSource* srcToFill = 0; 02033 if (fEntityHandler) 02034 { 02035 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) 02036 expSysId.set(normalizedURI); 02037 02038 ReaderMgr::LastExtEntityInfo lastInfo; 02039 fReaderMgr.getLastExtEntityInfo(lastInfo); 02040 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity, 02041 expSysId.getRawBuffer(), 0, pubId, lastInfo.systemId, 02042 &fReaderMgr); 02043 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); 02044 } 02045 else 02046 { 02047 expSysId.set(normalizedURI); 02048 } 02049 02050 // If they didn't create a source via the entity handler, then we 02051 // have to create one on our own. 02052 if (!srcToFill) 02053 { 02054 if (fDisableDefaultEntityResolution) 02055 return srcToFill; 02056 02057 ReaderMgr::LastExtEntityInfo lastInfo; 02058 fReaderMgr.getLastExtEntityInfo(lastInfo); 02059 02060 XMLURL urlTmp(fMemoryManager); 02061 if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) || 02062 (urlTmp.isRelative())) 02063 { 02064 if (!fStandardUriConformant) 02065 { 02066 XMLBufBid ddSys(&fBufMgr); 02067 XMLBuffer& resolvedSysId = ddSys.getBuffer(); 02068 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); 02069 02070 srcToFill = new (fMemoryManager) LocalFileInputSource 02071 ( 02072 lastInfo.systemId 02073 , resolvedSysId.getRawBuffer() 02074 , fMemoryManager 02075 ); 02076 } 02077 else 02078 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 02079 } 02080 else 02081 { 02082 if (fStandardUriConformant && urlTmp.hasInvalidChar()) 02083 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 02084 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); 02085 } 02086 } 02087 02088 return srcToFill; 02089 } 02090 02091 02092 // --------------------------------------------------------------------------- 02093 // IGXMLScanner: Private grammar preparsing methods 02094 // --------------------------------------------------------------------------- 02095 Grammar* IGXMLScanner::loadXMLSchemaGrammar(const InputSource& src, 02096 const bool toCache) 02097 { 02098 // Reset the validators 02099 fSchemaValidator->reset(); 02100 fSchemaValidator->setErrorReporter(fErrorReporter); 02101 fSchemaValidator->setExitOnFirstFatal(fExitOnFirstFatal); 02102 fSchemaValidator->setGrammarResolver(fGrammarResolver); 02103 02104 if (fValidatorFromUser) 02105 fValidator->reset(); 02106 02107 if (!fValidator->handlesSchema()) { 02108 if (fValidatorFromUser && fValidate) 02109 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); 02110 else { 02111 fValidator = fSchemaValidator; 02112 } 02113 } 02114 02115 XSDDOMParser parser(0, fMemoryManager, 0); 02116 02117 parser.setValidationScheme(XercesDOMParser::Val_Never); 02118 parser.setDoNamespaces(true); 02119 parser.setUserEntityHandler(fEntityHandler); 02120 parser.setUserErrorReporter(fErrorReporter); 02121 02122 // Should just issue warning if the schema is not found 02123 bool flag = src.getIssueFatalErrorIfNotFound(); 02124 ((InputSource&) src).setIssueFatalErrorIfNotFound(false); 02125 02126 parser.parse(src); 02127 02128 // Reset the InputSource 02129 ((InputSource&) src).setIssueFatalErrorIfNotFound(flag); 02130 02131 if (parser.getSawFatal() && fExitOnFirstFatal) 02132 emitError(XMLErrs::SchemaScanFatalError); 02133 02134 DOMDocument* document = parser.getDocument(); //Our Grammar 02135 02136 if (document != 0) { 02137 02138 DOMElement* root = document->getDocumentElement();// This is what we pass to TraverserSchema 02139 if (root != 0) 02140 { 02141 const XMLCh* nsUri = root->getAttribute(SchemaSymbols::fgATT_TARGETNAMESPACE); 02142 Grammar* grammar = fGrammarResolver->getGrammar(nsUri); 02143 02144 // Check if this exact schema has already been seen. 02145 // 02146 const XMLCh* sysId = src.getSystemId(); 02147 SchemaInfo* importSchemaInfo = 0; 02148 02149 if (grammar) 02150 { 02151 if (nsUri && *nsUri) 02152 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fURIStringPool->addOrFind(nsUri)); 02153 else 02154 importSchemaInfo = fCachedSchemaInfoList->get(sysId, fEmptyNamespaceId); 02155 } 02156 02157 if (!importSchemaInfo) 02158 { 02159 bool grammarFound = grammar && 02160 grammar->getGrammarType() == Grammar::SchemaGrammarType && 02161 getHandleMultipleImports(); 02162 02163 SchemaGrammar* schemaGrammar; 02164 02165 if (grammarFound) 02166 schemaGrammar = (SchemaGrammar*) grammar; 02167 else 02168 schemaGrammar = new (fGrammarPoolMemoryManager) SchemaGrammar(fGrammarPoolMemoryManager); 02169 02170 XMLSchemaDescription* gramDesc = (XMLSchemaDescription*) schemaGrammar->getGrammarDescription(); 02171 gramDesc->setContextType(XMLSchemaDescription::CONTEXT_PREPARSE); 02172 gramDesc->setLocationHints(sysId); 02173 02174 TraverseSchema traverseSchema 02175 ( 02176 root 02177 , fURIStringPool 02178 , schemaGrammar 02179 , fGrammarResolver 02180 , fCachedSchemaInfoList 02181 , toCache ? fCachedSchemaInfoList : fSchemaInfoList 02182 , this 02183 , sysId 02184 , fEntityHandler 02185 , fErrorReporter 02186 , fMemoryManager 02187 , grammarFound 02188 ); 02189 02190 grammar = schemaGrammar; 02191 02192 // Reset the now invalid schema roots in the collected 02193 // schema info entries. 02194 // 02195 { 02196 RefHash2KeysTableOfEnumerator<SchemaInfo> i ( 02197 toCache ? fCachedSchemaInfoList : fSchemaInfoList); 02198 02199 while (i.hasMoreElements ()) 02200 i.nextElement().resetRoot (); 02201 } 02202 } 02203 02204 if (fValidate) { 02205 // validate the Schema scan so far 02206 fValidator->setGrammar(grammar); 02207 fValidator->preContentValidation(false); 02208 } 02209 02210 if (toCache) { 02211 fGrammarResolver->cacheGrammars(); 02212 } 02213 02214 if(getPSVIHandler()) 02215 fModel = fGrammarResolver->getXSModel(); 02216 02217 return grammar; 02218 } 02219 } 02220 02221 return 0; 02222 } 02223 02224 02225 02226 // --------------------------------------------------------------------------- 02227 // IGXMLScanner: Private parsing methods 02228 // --------------------------------------------------------------------------- 02229 02230 // This method is called to do a raw scan of an attribute value. It does not 02231 // do normalization (since we don't know their types yet.) It just scans the 02232 // value and does entity expansion. 02233 // 02234 // End of entity's must be dealt with here. During DTD scan, they can come 02235 // from external entities. During content, they can come from any entity. 02236 // We just eat the end of entity and continue with our scan until we come 02237 // to the closing quote. If an unterminated value causes us to go through 02238 // subsequent entities, that will cause errors back in the calling code, 02239 // but there's little we can do about it here. 02240 bool IGXMLScanner::basicAttrValueScan(const XMLCh* const attrName, XMLBuffer& toFill) 02241 { 02242 // Reset the target buffer 02243 toFill.reset(); 02244 02245 // Get the next char which must be a single or double quote 02246 XMLCh quoteCh; 02247 if (!fReaderMgr.skipIfQuote(quoteCh)) 02248 return false; 02249 02250 // We have to get the current reader because we have to ignore closing 02251 // quotes until we hit the same reader again. 02252 const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); 02253 02254 // Loop until we get the attribute value. Note that we use a double 02255 // loop here to avoid the setup/teardown overhead of the exception 02256 // handler on every round. 02257 while (true) 02258 { 02259 try 02260 { 02261 while(true) 02262 { 02263 XMLCh nextCh = fReaderMgr.getNextChar(); 02264 02265 if (nextCh != quoteCh) 02266 { 02267 if (nextCh != chAmpersand) 02268 { 02269 if ((nextCh < 0xD800) || (nextCh > 0xDFFF)) 02270 { 02271 // Its got to at least be a valid XML character 02272 if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) 02273 { 02274 if (nextCh == 0) 02275 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 02276 02277 XMLCh tmpBuf[9]; 02278 XMLString::binToText 02279 ( 02280 nextCh 02281 , tmpBuf 02282 , 8 02283 , 16 02284 , fMemoryManager 02285 ); 02286 emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); 02287 } 02288 } else // its a surrogate 02289 { 02290 // Deal with surrogate pairs 02291 02292 // we expect a a leading surrogate. 02293 if (nextCh <= 0xDBFF) 02294 { 02295 toFill.append(nextCh); 02296 02297 // process the trailing surrogate 02298 nextCh = fReaderMgr.getNextChar(); 02299 02300 // it should be a trailing surrogate. 02301 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF)) 02302 { 02303 emitError(XMLErrs::Expected2ndSurrogateChar); 02304 } 02305 } else 02306 { 02307 // Its a trailing surrogate, but we are not expecting it 02308 emitError(XMLErrs::Unexpected2ndSurrogateChar); 02309 } 02310 } 02311 } else // its a chAmpersand 02312 { 02313 // Check for an entity ref . We ignore the empty flag in 02314 // this one. 02315 02316 bool escaped; 02317 XMLCh firstCh; 02318 XMLCh secondCh 02319 ; 02320 // If it was not returned directly, then jump back up 02321 if (scanEntityRef(true, firstCh, secondCh, escaped) == EntityExp_Returned) 02322 { 02323 // If it was escaped, then put in a 0xFFFF value. This will 02324 // be used later during validation and normalization of the 02325 // value to know that the following character was via an 02326 // escape char. 02327 if (escaped) 02328 toFill.append(0xFFFF); 02329 02330 toFill.append(firstCh); 02331 if (secondCh) 02332 toFill.append(secondCh); 02333 } 02334 continue; 02335 } 02336 } else // its a quoteCh 02337 { 02338 // Check for our ending quote. It has to be in the same entity 02339 // as where we started. Quotes in nested entities are ignored. 02340 02341 if (curReader == fReaderMgr.getCurrentReaderNum()) 02342 { 02343 return true; 02344 } 02345 02346 // Watch for spillover into a previous entity 02347 if (curReader > fReaderMgr.getCurrentReaderNum()) 02348 { 02349 emitError(XMLErrs::PartialMarkupInEntity); 02350 return false; 02351 } 02352 } 02353 02354 // add it to the buffer 02355 toFill.append(nextCh); 02356 02357 } 02358 } 02359 catch(const EndOfEntityException&) 02360 { 02361 // Just eat it and continue. 02362 } 02363 } 02364 return true; 02365 } 02366 02367 02368 bool IGXMLScanner::scanAttValue( const XMLAttDef* const attDef 02369 , const XMLCh* const attrName 02370 , XMLBuffer& toFill) 02371 { 02372 enum States 02373 { 02374 InWhitespace 02375 , InContent 02376 }; 02377 02378 // Get the type and name 02379 const XMLAttDef::AttTypes type = (attDef) 02380 ?attDef->getType() 02381 :XMLAttDef::CData; 02382 02383 // Reset the target buffer 02384 toFill.reset(); 02385 02386 // Get the next char which must be a single or double quote 02387 XMLCh quoteCh; 02388 if (!fReaderMgr.skipIfQuote(quoteCh)) 02389 return false; 02390 02391 // We have to get the current reader because we have to ignore closing 02392 // quotes until we hit the same reader again. 02393 const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); 02394 02395 // Get attribute def - to check to see if it's declared externally or not 02396 bool isAttExternal = (attDef) 02397 ?attDef->isExternal() 02398 :false; 02399 02400 // Loop until we get the attribute value. Note that we use a double 02401 // loop here to avoid the setup/teardown overhead of the exception 02402 // handler on every round. 02403 XMLCh nextCh; 02404 XMLCh secondCh = 0; 02405 States curState = InContent; 02406 bool firstNonWS = false; 02407 bool gotLeadingSurrogate = false; 02408 bool escaped; 02409 while (true) 02410 { 02411 try 02412 { 02413 while(true) 02414 { 02415 nextCh = fReaderMgr.getNextChar(); 02416 02417 if (!nextCh) 02418 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 02419 02420 // Check for our ending quote in the same entity 02421 if (nextCh == quoteCh) 02422 { 02423 if (curReader == fReaderMgr.getCurrentReaderNum()) 02424 return true; 02425 02426 // Watch for spillover into a previous entity 02427 if (curReader > fReaderMgr.getCurrentReaderNum()) 02428 { 02429 emitError(XMLErrs::PartialMarkupInEntity); 02430 return false; 02431 } 02432 } 02433 02434 // Check for an entity ref now, before we let it affect our 02435 // whitespace normalization logic below. We ignore the empty flag 02436 // in this one. 02437 escaped = false; 02438 if (nextCh == chAmpersand) 02439 { 02440 if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned) 02441 { 02442 gotLeadingSurrogate = false; 02443 continue; 02444 } 02445 } 02446 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) 02447 { 02448 // Deal with surrogate pairs 02449 // Its a leading surrogate. If we already got one, then 02450 // issue an error, else set leading flag to make sure that 02451 // we look for a trailing next time. 02452 if (gotLeadingSurrogate) 02453 emitError(XMLErrs::Expected2ndSurrogateChar); 02454 else 02455 gotLeadingSurrogate = true; 02456 } 02457 else 02458 { 02459 // If its a trailing surrogate, make sure that we are 02460 // prepared for that. Else, its just a regular char so make 02461 // sure that we were not expected a trailing surrogate. 02462 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) 02463 { 02464 // Its trailing, so make sure we were expecting it 02465 if (!gotLeadingSurrogate) 02466 emitError(XMLErrs::Unexpected2ndSurrogateChar); 02467 } 02468 else 02469 { 02470 // Its just a char, so make sure we were not expecting a 02471 // trailing surrogate. 02472 if (gotLeadingSurrogate) 02473 emitError(XMLErrs::Expected2ndSurrogateChar); 02474 02475 // Its got to at least be a valid XML character 02476 if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) 02477 { 02478 XMLCh tmpBuf[9]; 02479 XMLString::binToText 02480 ( 02481 nextCh 02482 , tmpBuf 02483 , 8 02484 , 16 02485 , fMemoryManager 02486 ); 02487 emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf); 02488 } 02489 } 02490 gotLeadingSurrogate = false; 02491 } 02492 02493 // If its not escaped, then make sure its not a < character, which 02494 // is not allowed in attribute values. 02495 if (!escaped && (nextCh == chOpenAngle)) 02496 emitError(XMLErrs::BracketInAttrValue, attrName); 02497 02498 // If the attribute is a CDATA type we do simple replacement of 02499 // tabs and new lines with spaces, if the character is not escaped 02500 // by way of a char ref. 02501 // 02502 // Otherwise, we do the standard non-CDATA normalization of 02503 // compressing whitespace to single spaces and getting rid of leading 02504 // and trailing whitespace. 02505 if (type == XMLAttDef::CData) 02506 { 02507 if (!escaped) 02508 { 02509 if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D)) 02510 { 02511 // Check Validity Constraint for Standalone document declaration 02512 // XML 1.0, Section 2.9 02513 if (fStandalone && fValidate && isAttExternal) 02514 { 02515 // Can't have a standalone document declaration of "yes" if attribute 02516 // values are subject to normalisation 02517 fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); 02518 } 02519 nextCh = chSpace; 02520 } 02521 } 02522 } 02523 else 02524 { 02525 if (curState == InWhitespace) 02526 { 02527 if ((escaped && nextCh != chSpace) || !fReaderMgr.getCurrentReader()->isWhitespace(nextCh)) 02528 { 02529 if (firstNonWS) 02530 toFill.append(chSpace); 02531 curState = InContent; 02532 firstNonWS = true; 02533 } 02534 else 02535 { 02536 continue; 02537 } 02538 } 02539 else if (curState == InContent) 02540 { 02541 if ((nextCh == chSpace) || 02542 (fReaderMgr.getCurrentReader()->isWhitespace(nextCh) && !escaped)) 02543 { 02544 curState = InWhitespace; 02545 02546 // Check Validity Constraint for Standalone document declaration 02547 // XML 1.0, Section 2.9 02548 if (fStandalone && fValidate && isAttExternal) 02549 { 02550 if (!firstNonWS || (nextCh != chSpace) || (fReaderMgr.lookingAtSpace())) 02551 { 02552 // Can't have a standalone document declaration of "yes" if attribute 02553 // values are subject to normalisation 02554 fValidator->emitError(XMLValid::NoAttNormForStandalone, attrName); 02555 } 02556 } 02557 continue; 02558 } 02559 firstNonWS = true; 02560 } 02561 } 02562 02563 // Else add it to the buffer 02564 toFill.append(nextCh); 02565 02566 if (secondCh) 02567 { 02568 toFill.append(secondCh); 02569 secondCh=0; 02570 } 02571 } 02572 } 02573 catch(const EndOfEntityException&) 02574 { 02575 // Just eat it and continue. 02576 gotLeadingSurrogate = false; 02577 escaped = false; 02578 } 02579 } 02580 return true; 02581 } 02582 02583 02584 // This method scans a CDATA section. It collects the character into one 02585 // of the temp buffers and calls the document handler, if any, with the 02586 // characters. It assumes that the <![CDATA string has been scanned before 02587 // this call. 02588 void IGXMLScanner::scanCDSection() 02589 { 02590 static const XMLCh CDataClose[] = 02591 { 02592 chCloseSquare, chCloseAngle, chNull 02593 }; 02594 02595 // The next character should be the opening square bracket. If not 02596 // issue an error, but then try to recover by skipping any whitespace 02597 // and checking again. 02598 if (!fReaderMgr.skippedChar(chOpenSquare)) 02599 { 02600 emitError(XMLErrs::ExpectedOpenSquareBracket); 02601 fReaderMgr.skipPastSpaces(); 02602 02603 // If we still don't find it, then give up, else keep going 02604 if (!fReaderMgr.skippedChar(chOpenSquare)) 02605 return; 02606 } 02607 02608 // Get a buffer for this 02609 XMLBufBid bbCData(&fBufMgr); 02610 02611 // We just scan forward until we hit the end of CDATA section sequence. 02612 // CDATA is effectively a big escape mechanism so we don't treat markup 02613 // characters specially here. 02614 bool emittedError = false; 02615 bool gotLeadingSurrogate = false; 02616 const ElemStack::StackElem* topElem = fElemStack.topElement(); 02617 02618 // Get the character data opts for the current element 02619 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; 02620 if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType) 02621 { 02622 // And see if the current element is a 'Children' style content model 02623 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 02624 if(currType) 02625 { 02626 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); 02627 if(modelType == SchemaElementDecl::Children || 02628 modelType == SchemaElementDecl::ElementOnlyEmpty) 02629 charOpts = XMLElementDecl::SpacesOk; 02630 else if(modelType == SchemaElementDecl::Empty) 02631 charOpts = XMLElementDecl::NoCharData; 02632 } 02633 } else // DTD grammar 02634 charOpts = topElem->fThisElement->getCharDataOpts(); 02635 02636 while (true) 02637 { 02638 const XMLCh nextCh = fReaderMgr.getNextChar(); 02639 02640 // Watch for unexpected end of file 02641 if (!nextCh) 02642 { 02643 emitError(XMLErrs::UnterminatedCDATASection); 02644 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager); 02645 } 02646 02647 if (fValidate && fStandalone && (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))) 02648 { 02649 // This document is standalone; this ignorable CDATA whitespace is forbidden. 02650 // XML 1.0, Section 2.9 02651 // And see if the current element is a 'Children' style content model 02652 if (topElem->fThisElement->isExternal()) { 02653 02654 if (charOpts == XMLElementDecl::SpacesOk) // Element Content 02655 { 02656 // Error - standalone should have a value of "no" as whitespace detected in an 02657 // element type with element content whose element declaration was external 02658 fValidator->emitError(XMLValid::NoWSForStandalone); 02659 if(fGrammarType == Grammar::SchemaGrammarType) 02660 { 02661 if (getPSVIHandler()) 02662 { 02663 // REVISIT: 02664 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 02665 } 02666 } 02667 } 02668 } 02669 } 02670 02671 // If this is a close square bracket it could be our closing 02672 // sequence. 02673 if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose)) 02674 { 02675 // make sure we were not expecting a trailing surrogate. 02676 if (gotLeadingSurrogate) 02677 emitError(XMLErrs::Expected2ndSurrogateChar); 02678 02679 if (fGrammarType == Grammar::SchemaGrammarType) { 02680 02681 XMLSize_t xsLen = bbCData.getLen(); 02682 const XMLCh* xsNormalized = bbCData.getRawBuffer(); 02683 DatatypeValidator* tempDV = ((SchemaValidator*) fValidator)->getCurrentDatatypeValidator(); 02684 if (tempDV && tempDV->getWSFacet() != DatatypeValidator::PRESERVE) 02685 { 02686 // normalize the character according to schema whitespace facet 02687 ((SchemaValidator*) fValidator)->normalizeWhiteSpace(tempDV, xsNormalized, fWSNormalizeBuf); 02688 xsNormalized = fWSNormalizeBuf.getRawBuffer(); 02689 xsLen = fWSNormalizeBuf.getLen(); 02690 if (fNormalizeData && fValidate) { 02691 bbCData.set(xsNormalized); 02692 } 02693 } 02694 02695 if (fValidate) { 02696 02697 // tell the schema validation about the character data for checkContent later 02698 ((SchemaValidator*)fValidator)->setDatatypeBuffer(xsNormalized); 02699 02700 if (charOpts != XMLElementDecl::AllCharData) 02701 { 02702 // They definitely cannot handle any type of char data 02703 fValidator->emitError(XMLValid::NoCharDataInCM); 02704 if (getPSVIHandler()) 02705 { 02706 // REVISIT: 02707 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 02708 } 02709 } 02710 } 02711 02712 // call all active identity constraints 02713 if (toCheckIdentityConstraint() && fICHandler->getMatcherCount()) { 02714 fContent.append(xsNormalized, xsLen); 02715 } 02716 } 02717 else { 02718 if (fValidate) { 02719 02720 if (charOpts != XMLElementDecl::AllCharData) 02721 { 02722 // They definitely cannot handle any type of char data 02723 fValidator->emitError(XMLValid::NoCharDataInCM); 02724 } 02725 } 02726 } 02727 02728 // If we have a doc handler, call it 02729 if (fDocHandler) 02730 { 02731 fDocHandler->docCharacters( 02732 bbCData.getRawBuffer(), bbCData.getLen(), true 02733 ); 02734 } 02735 02736 // And we are done 02737 break; 02738 } 02739 02740 // Make sure its a valid character. But if we've emitted an error 02741 // already, don't bother with the overhead since we've already told 02742 // them about it. 02743 if (!emittedError) 02744 { 02745 // Deal with surrogate pairs 02746 if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) 02747 { 02748 // Its a leading surrogate. If we already got one, then 02749 // issue an error, else set leading flag to make sure that 02750 // we look for a trailing next time. 02751 if (gotLeadingSurrogate) 02752 emitError(XMLErrs::Expected2ndSurrogateChar); 02753 else 02754 gotLeadingSurrogate = true; 02755 } 02756 else 02757 { 02758 // If its a trailing surrogate, make sure that we are 02759 // prepared for that. Else, its just a regular char so make 02760 // sure that we were not expected a trailing surrogate. 02761 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) 02762 { 02763 // Its trailing, so make sure we were expecting it 02764 if (!gotLeadingSurrogate) 02765 emitError(XMLErrs::Unexpected2ndSurrogateChar); 02766 } 02767 else 02768 { 02769 // Its just a char, so make sure we were not expecting a 02770 // trailing surrogate. 02771 if (gotLeadingSurrogate) 02772 emitError(XMLErrs::Expected2ndSurrogateChar); 02773 02774 // Its got to at least be a valid XML character 02775 else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) 02776 { 02777 XMLCh tmpBuf[9]; 02778 XMLString::binToText 02779 ( 02780 nextCh 02781 , tmpBuf 02782 , 8 02783 , 16 02784 , fMemoryManager 02785 ); 02786 emitError(XMLErrs::InvalidCharacter, tmpBuf); 02787 emittedError = true; 02788 } 02789 } 02790 gotLeadingSurrogate = false; 02791 } 02792 } 02793 02794 // Add it to the buffer 02795 bbCData.append(nextCh); 02796 } 02797 } 02798 02799 02800 void IGXMLScanner::scanCharData(XMLBuffer& toUse) 02801 { 02802 // We have to watch for the stupid ]]> sequence, which is illegal in 02803 // character data. So this is a little state machine that handles that. 02804 enum States 02805 { 02806 State_Waiting 02807 , State_GotOne 02808 , State_GotTwo 02809 }; 02810 02811 // Reset the buffer before we start 02812 toUse.reset(); 02813 02814 // Turn on the 'throw at end' flag of the reader manager 02815 ThrowEOEJanitor jan(&fReaderMgr, true); 02816 02817 // In order to be more efficient we have to use kind of a deeply nested 02818 // set of blocks here. The outer block puts on a try and catches end of 02819 // entity exceptions. The inner loop is the per-character loop. If we 02820 // put the try inside the inner loop, it would work but would require 02821 // the exception handling code setup/teardown code to be invoked for 02822 // each character. 02823 XMLCh nextCh; 02824 XMLCh secondCh = 0; 02825 States curState = State_Waiting; 02826 bool escaped = false; 02827 bool gotLeadingSurrogate = false; 02828 bool notDone = true; 02829 while (notDone) 02830 { 02831 try 02832 { 02833 while (true) 02834 { 02835 // Eat through as many plain content characters as possible without 02836 // needing special handling. Moving most content characters here, 02837 // in this one call, rather than running the overall loop once 02838 // per content character, is a speed optimization. 02839 if (curState == State_Waiting && !gotLeadingSurrogate) 02840 { 02841 fReaderMgr.movePlainContentChars(toUse); 02842 } 02843 02844 // Try to get another char from the source 02845 // The code from here on down covers all contengencies, 02846 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh)) 02847 { 02848 // If we were waiting for a trailing surrogate, its an error 02849 if (gotLeadingSurrogate) 02850 emitError(XMLErrs::Expected2ndSurrogateChar); 02851 02852 notDone = false; 02853 break; 02854 } 02855 02856 // Watch for a reference. Note that the escapement mechanism 02857 // is ignored in this content. 02858 escaped = false; 02859 if (nextCh == chAmpersand) 02860 { 02861 sendCharData(toUse); 02862 02863 // Turn off the throwing at the end of entity during this 02864 ThrowEOEJanitor jan(&fReaderMgr, false); 02865 02866 if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned) 02867 { 02868 gotLeadingSurrogate = false; 02869 continue; 02870 } 02871 else 02872 { 02873 if (escaped && !fElemStack.isEmpty()) 02874 fElemStack.setReferenceEscaped(); 02875 } 02876 } 02877 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) 02878 { 02879 // Deal with surrogate pairs 02880 // Its a leading surrogate. If we already got one, then 02881 // issue an error, else set leading flag to make sure that 02882 // we look for a trailing next time. 02883 if (gotLeadingSurrogate) 02884 emitError(XMLErrs::Expected2ndSurrogateChar); 02885 else 02886 gotLeadingSurrogate = true; 02887 } 02888 else 02889 { 02890 // If its a trailing surrogate, make sure that we are 02891 // prepared for that. Else, its just a regular char so make 02892 // sure that we were not expected a trailing surrogate. 02893 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) 02894 { 02895 // Its trailing, so make sure we were expecting it 02896 if (!gotLeadingSurrogate) 02897 emitError(XMLErrs::Unexpected2ndSurrogateChar); 02898 } 02899 else 02900 { 02901 // Its just a char, so make sure we were not expecting a 02902 // trailing surrogate. 02903 if (gotLeadingSurrogate) 02904 emitError(XMLErrs::Expected2ndSurrogateChar); 02905 02906 // Make sure the returned char is a valid XML char 02907 if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) 02908 { 02909 XMLCh tmpBuf[9]; 02910 XMLString::binToText 02911 ( 02912 nextCh 02913 , tmpBuf 02914 , 8 02915 , 16 02916 , fMemoryManager 02917 ); 02918 emitError(XMLErrs::InvalidCharacter, tmpBuf); 02919 } 02920 } 02921 gotLeadingSurrogate = false; 02922 } 02923 02924 // Keep the state machine up to date 02925 if (!escaped) 02926 { 02927 if (nextCh == chCloseSquare) 02928 { 02929 if (curState == State_Waiting) 02930 curState = State_GotOne; 02931 else if (curState == State_GotOne) 02932 curState = State_GotTwo; 02933 } 02934 else if (nextCh == chCloseAngle) 02935 { 02936 if (curState == State_GotTwo) 02937 emitError(XMLErrs::BadSequenceInCharData); 02938 curState = State_Waiting; 02939 } 02940 else 02941 { 02942 curState = State_Waiting; 02943 } 02944 } 02945 else 02946 { 02947 curState = State_Waiting; 02948 } 02949 02950 // Add this char to the buffer 02951 toUse.append(nextCh); 02952 02953 if (secondCh) 02954 { 02955 toUse.append(secondCh); 02956 secondCh=0; 02957 } 02958 } 02959 } 02960 catch(const EndOfEntityException& toCatch) 02961 { 02962 // Some entity ended, so we have to send any accumulated 02963 // chars and send an end of entity event. 02964 sendCharData(toUse); 02965 gotLeadingSurrogate = false; 02966 02967 if (fDocHandler) 02968 fDocHandler->endEntityReference(toCatch.getEntity()); 02969 } 02970 } 02971 02972 // Check the validity constraints as per XML 1.0 Section 2.9 02973 if (fValidate && fStandalone) 02974 { 02975 // See if the text contains whitespace 02976 // Get the raw data we need for the callback 02977 const XMLCh* rawBuf = toUse.getRawBuffer(); 02978 const XMLSize_t len = toUse.getLen(); 02979 const bool isSpaces = fReaderMgr.getCurrentReader()->containsWhiteSpace(rawBuf, len); 02980 02981 if (isSpaces) 02982 { 02983 // And see if the current element is a 'Children' style content model 02984 const ElemStack::StackElem* topElem = fElemStack.topElement(); 02985 02986 if (topElem->fThisElement->isExternal()) { 02987 02988 // Get the character data opts for the current element 02989 XMLElementDecl::CharDataOpts charOpts = XMLElementDecl::AllCharData; 02990 if(fGrammar->getGrammarType() == Grammar::SchemaGrammarType) 02991 { 02992 // And see if the current element is a 'Children' style content model 02993 ComplexTypeInfo *currType = ((SchemaValidator*)fValidator)->getCurrentTypeInfo(); 02994 if(currType) 02995 { 02996 SchemaElementDecl::ModelTypes modelType = (SchemaElementDecl::ModelTypes) currType->getContentType(); 02997 if(modelType == SchemaElementDecl::Children || 02998 modelType == SchemaElementDecl::ElementOnlyEmpty) 02999 charOpts = XMLElementDecl::SpacesOk; 03000 else if(modelType == SchemaElementDecl::Empty) 03001 charOpts = XMLElementDecl::NoCharData; 03002 } 03003 } else // DTD grammar 03004 charOpts = topElem->fThisElement->getCharDataOpts(); 03005 03006 if (charOpts == XMLElementDecl::SpacesOk) // => Element Content 03007 { 03008 // Error - standalone should have a value of "no" as whitespace detected in an 03009 // element type with element content whose element declaration was external 03010 // 03011 fValidator->emitError(XMLValid::NoWSForStandalone); 03012 if(fGrammarType == Grammar::SchemaGrammarType) 03013 { 03014 if (getPSVIHandler()) 03015 { 03016 // REVISIT: 03017 // PSVIElement->setValidity(PSVIItem::VALIDITY_INVALID); 03018 } 03019 } 03020 } 03021 } 03022 } 03023 } 03024 // Send any char data that we accumulated into the buffer 03025 sendCharData(toUse); 03026 } 03027 03028 03029 // This method will scan a general/character entity ref. It will either 03030 // expand a char ref and return it directly, or push a reader for a general 03031 // entity. 03032 // 03033 // The return value indicates whether the char parameters hold the value 03034 // or whether the value was pushed as a reader, or that it failed. 03035 // 03036 // The escaped flag tells the caller whether the returned parameter resulted 03037 // from a character reference, which escapes the character in some cases. It 03038 // only makes any difference if the return value indicates the value was 03039 // returned directly. 03040 IGXMLScanner::EntityExpRes 03041 IGXMLScanner::scanEntityRef( const bool inAttVal 03042 , XMLCh& firstCh 03043 , XMLCh& secondCh 03044 , bool& escaped) 03045 { 03046 // Assume no escape 03047 secondCh = 0; 03048 escaped = false; 03049 03050 // We have to insure that its all in one entity 03051 const XMLSize_t curReader = fReaderMgr.getCurrentReaderNum(); 03052 03053 // If the next char is a pound, then its a character reference and we 03054 // need to expand it always. 03055 if (fReaderMgr.skippedChar(chPound)) 03056 { 03057 // Its a character reference, so scan it and get back the numeric 03058 // value it represents. 03059 if (!scanCharRef(firstCh, secondCh)) 03060 return EntityExp_Failed; 03061 03062 escaped = true; 03063 03064 if (curReader != fReaderMgr.getCurrentReaderNum()) 03065 emitError(XMLErrs::PartialMarkupInEntity); 03066 03067 return EntityExp_Returned; 03068 } 03069 03070 // Expand it since its a normal entity ref 03071 XMLBufBid bbName(&fBufMgr); 03072 int colonPosition; 03073 bool validName = fDoNamespaces ? fReaderMgr.getQName(bbName.getBuffer(), &colonPosition) : 03074 fReaderMgr.getName(bbName.getBuffer()); 03075 if (!validName) 03076 { 03077 if (bbName.isEmpty()) 03078 emitError(XMLErrs::ExpectedEntityRefName); 03079 else 03080 emitError(XMLErrs::InvalidEntityRefName, bbName.getRawBuffer()); 03081 return EntityExp_Failed; 03082 } 03083 03084 // Next char must be a semi-colon. But if its not, just emit 03085 // an error and try to continue. 03086 if (!fReaderMgr.skippedChar(chSemiColon)) 03087 emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer()); 03088 03089 // Make sure we ended up on the same entity reader as the & char 03090 if (curReader != fReaderMgr.getCurrentReaderNum()) 03091 emitError(XMLErrs::PartialMarkupInEntity); 03092 03093 // Look up the name in the general entity pool 03094 XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer()); 03095 03096 // If it does not exist, then obviously an error 03097 if (!decl) 03098 { 03099 // XML 1.0 Section 4.1 03100 // Well-formedness Constraint for entity not found: 03101 // In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references, 03102 // or a document with "standalone='yes'", for an entity reference that does not occur within the external subset 03103 // or a parameter entity 03104 // 03105 // Else it's Validity Constraint 03106 if (fStandalone || fHasNoDTD) 03107 emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer()); 03108 else { 03109 if (fValidate) 03110 fValidator->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer()); 03111 } 03112 03113 return EntityExp_Failed; 03114 } 03115 03116 // XML 1.0 Section 4.1 03117 // If we are a standalone document, then it has to have been declared 03118 // in the internal subset. 03119 if (fStandalone && !decl->getDeclaredInIntSubset()) 03120 emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer()); 03121 03122 if (decl->isExternal()) 03123 { 03124 // If its unparsed, then its not valid here 03125 if (decl->isUnparsed()) 03126 { 03127 emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer()); 03128 return EntityExp_Failed; 03129 } 03130 03131 // If we are in an attribute value, then not valid but keep going 03132 if (inAttVal) 03133 emitError(XMLErrs::NoExtRefsInAttValue); 03134 03135 // And now create a reader to read this entity 03136 InputSource* srcUsed; 03137 XMLReader* reader = fReaderMgr.createReader 03138 ( 03139 decl->getBaseURI() 03140 , decl->getSystemId() 03141 , decl->getPublicId() 03142 , false 03143 , XMLReader::RefFrom_NonLiteral 03144 , XMLReader::Type_General 03145 , XMLReader::Source_External 03146 , srcUsed 03147 , fCalculateSrcOfs 03148 , fLowWaterMark 03149 , fDisableDefaultEntityResolution 03150 ); 03151 03152 // Put a janitor on the source so it gets cleaned up on exit 03153 Janitor<InputSource> janSrc(srcUsed); 03154 03155 // If the creation failed, and its not because the source was empty, 03156 // then emit an error and return. 03157 if (!reader) 03158 ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed ? srcUsed->getSystemId() : decl->getSystemId(), fMemoryManager); 03159 03160 // Push the reader. If its a recursive expansion, then emit an error 03161 // and return an failure. 03162 if (!fReaderMgr.pushReader(reader, decl)) 03163 { 03164 emitError(XMLErrs::RecursiveEntity, decl->getName()); 03165 return EntityExp_Failed; 03166 } 03167 03168 // here's where we need to check if there's a SecurityManager, 03169 // how many entity references we've had 03170 if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) { 03171 XMLCh expLimStr[32]; 03172 XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager); 03173 emitError 03174 ( 03175 XMLErrs::EntityExpansionLimitExceeded 03176 , expLimStr 03177 ); 03178 // there seems nothing better to be done than to reset the entity expansion counter 03179 fEntityExpansionCount = 0; 03180 } 03181 03182 // Do a start entity reference event. 03183 // 03184 // <TBD> For now, we supress them in att values. Later, when 03185 // the stuff is in place to correctly allow DOM to handle them 03186 // we'll turn this back on. 03187 if (fDocHandler && !inAttVal) 03188 fDocHandler->startEntityReference(*decl); 03189 03190 // If it starts with the XML string, then parse a text decl 03191 if (checkXMLDecl(true)) 03192 scanXMLDecl(Decl_Text); 03193 } 03194 else 03195 { 03196 // If its one of the special char references, then we can return 03197 // it as a character, and its considered escaped. 03198 if (decl->getIsSpecialChar()) 03199 { 03200 firstCh = decl->getValue()[0]; 03201 escaped = true; 03202 return EntityExp_Returned; 03203 } 03204 03205 // Create a reader over a memory stream over the entity value 03206 // We force it to assume UTF-16 by passing in an encoding 03207 // string. This way it won't both trying to predecode the 03208 // first line, looking for an XML/TextDecl. 03209 XMLReader* valueReader = fReaderMgr.createIntEntReader 03210 ( 03211 decl->getName() 03212 , XMLReader::RefFrom_NonLiteral 03213 , XMLReader::Type_General 03214 , decl->getValue() 03215 , decl->getValueLen() 03216 , false 03217 ); 03218 03219 // Try to push the entity reader onto the reader manager stack, 03220 // where it will become the subsequent input. If it fails, that 03221 // means the entity is recursive, so issue an error. The reader 03222 // will have just been discarded, but we just keep going. 03223 if (!fReaderMgr.pushReader(valueReader, decl)) 03224 emitError(XMLErrs::RecursiveEntity, decl->getName()); 03225 03226 // here's where we need to check if there's a SecurityManager, 03227 // how many entity references we've had 03228 if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) { 03229 XMLCh expLimStr[32]; 03230 XMLString::sizeToText(fEntityExpansionLimit, expLimStr, 31, 10, fMemoryManager); 03231 emitError 03232 ( 03233 XMLErrs::EntityExpansionLimitExceeded 03234 , expLimStr 03235 ); 03236 } 03237 03238 // Do a start entity reference event. 03239 // 03240 // <TBD> For now, we supress them in att values. Later, when 03241 // the stuff is in place to correctly allow DOM to handle them 03242 // we'll turn this back on. 03243 if (fDocHandler && !inAttVal) 03244 fDocHandler->startEntityReference(*decl); 03245 03246 // If it starts with the XML string, then it's an error 03247 if (checkXMLDecl(true)) { 03248 emitError(XMLErrs::TextDeclNotLegalHere); 03249 fReaderMgr.skipPastChar(chCloseAngle); 03250 } 03251 } 03252 return EntityExp_Pushed; 03253 } 03254 03255 03256 bool IGXMLScanner::switchGrammar(const XMLCh* const newGrammarNameSpace) 03257 { 03258 Grammar* tempGrammar = fGrammarResolver->getGrammar(newGrammarNameSpace); 03259 03260 if (!tempGrammar && !fSkipDTDValidation) { 03261 // This is a case where namespaces is on with a DTD grammar. 03262 tempGrammar = fDTDGrammar; 03263 } 03264 if (!tempGrammar) { 03265 return false; 03266 } 03267 else { 03268 03269 Grammar::GrammarType tempGrammarType = tempGrammar->getGrammarType(); 03270 if (tempGrammarType == Grammar::SchemaGrammarType && !fValidator->handlesSchema()) { 03271 if (fValidatorFromUser) 03272 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoSchemaValidator, fMemoryManager); 03273 else { 03274 fValidator = fSchemaValidator; 03275 } 03276 } 03277 else if (tempGrammarType == Grammar::DTDGrammarType) { 03278 if (fSkipDTDValidation) { 03279 return false; 03280 } 03281 03282 if (!fValidator->handlesDTD()) { 03283 if (fValidatorFromUser) 03284 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Gen_NoDTDValidator, fMemoryManager); 03285 else { 03286 fValidator = fDTDValidator; 03287 } 03288 } 03289 } 03290 03291 fGrammarType = tempGrammarType; 03292 fGrammar = tempGrammar; 03293 fValidator->setGrammar(fGrammar); 03294 return true; 03295 } 03296 } 03297 03298 // check if we should skip or lax the validation of the element 03299 // if skip - no validation 03300 // if lax - validate only if the element if found 03301 bool IGXMLScanner::laxElementValidation(QName* element, ContentLeafNameTypeVector* cv, 03302 const XMLContentModel* const cm, 03303 const XMLSize_t parentElemDepth) 03304 { 03305 bool skipThisOne = false; 03306 bool laxThisOne = false; 03307 unsigned int elementURI = element->getURI(); 03308 unsigned int currState = fElemState[parentElemDepth]; 03309 unsigned int currLoop = fElemLoopState[parentElemDepth]; 03310 03311 if (currState == XMLContentModel::gInvalidTrans) { 03312 return laxThisOne; 03313 } 03314 03315 SubstitutionGroupComparator comparator(fGrammarResolver, fURIStringPool); 03316 03317 if (cv) { 03318 XMLSize_t i = 0; 03319 XMLSize_t leafCount = cv->getLeafCount(); 03320 unsigned int nextState = 0; 03321 03322 for (; i < leafCount; i++) { 03323 03324 QName* fElemMap = cv->getLeafNameAt(i); 03325 unsigned int uri = fElemMap->getURI(); 03326 ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i); 03327 03328 if (type == ContentSpecNode::Leaf) { 03329 if (((uri == elementURI) 03330 && XMLString::equals(fElemMap->getLocalPart(), element->getLocalPart())) 03331 || comparator.isEquivalentTo(element, fElemMap)) { 03332 03333 nextState = cm->getNextState(currState, i); 03334 03335 if (nextState != XMLContentModel::gInvalidTrans) 03336 break; 03337 } 03338 } else if ((type & 0x0f) == ContentSpecNode::Any) { 03339 nextState = cm->getNextState(currState, i); 03340 if (nextState != XMLContentModel::gInvalidTrans) 03341 break; 03342 } 03343 else if ((type & 0x0f) == ContentSpecNode::Any_Other) { 03344 if (uri != elementURI && elementURI != fEmptyNamespaceId) { 03345 nextState = cm->getNextState(currState, i); 03346 if (nextState != XMLContentModel::gInvalidTrans) 03347 break; 03348 } 03349 } 03350 else if ((type & 0x0f) == ContentSpecNode::Any_NS) { 03351 if (uri == elementURI) { 03352 nextState = cm->getNextState(currState, i); 03353 if (nextState != XMLContentModel::gInvalidTrans) 03354 break; 03355 } 03356 } 03357 03358 } // for 03359 03360 if (i == leafCount) { // no match 03361 fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans; 03362 fElemLoopState[parentElemDepth] = 0; 03363 return laxThisOne; 03364 } 03365 03366 unsigned int nextLoop = 0; 03367 if(!cm->handleRepetitions(element, currState, currLoop, nextState, nextLoop, i, &comparator)) { 03368 fElemState[parentElemDepth] = XMLContentModel::gInvalidTrans; 03369 fElemLoopState[parentElemDepth] = 0; 03370 return laxThisOne; 03371 } 03372 03373 ContentSpecNode::NodeTypes type = cv->getLeafTypeAt(i); 03374 if ((type & 0x0f) == ContentSpecNode::Any || 03375 (type & 0x0f) == ContentSpecNode::Any_Other || 03376 (type & 0x0f) == ContentSpecNode::Any_NS) 03377 { 03378 if (type == ContentSpecNode::Any_Skip || 03379 type == ContentSpecNode::Any_NS_Skip || 03380 type == ContentSpecNode::Any_Other_Skip) { 03381 skipThisOne = true; 03382 } 03383 else if (type == ContentSpecNode::Any_Lax || 03384 type == ContentSpecNode::Any_NS_Lax || 03385 type == ContentSpecNode::Any_Other_Lax) { 03386 laxThisOne = true; 03387 } 03388 } 03389 fElemState[parentElemDepth] = nextState; 03390 fElemLoopState[parentElemDepth] = nextLoop; 03391 } // if 03392 03393 if (skipThisOne) { 03394 fValidate = false; 03395 fElemStack.setValidationFlag(fValidate); 03396 } 03397 03398 return laxThisOne; 03399 } 03400 03401 03402 // check if there is an AnyAttribute, and if so, see if we should lax or skip 03403 // if skip - no validation 03404 // if lax - validate only if the attribute if found 03405 bool IGXMLScanner::anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne) 03406 { 03407 XMLAttDef::AttTypes wildCardType = attWildCard->getType(); 03408 bool anyEncountered = false; 03409 skipThisOne = false; 03410 laxThisOne = false; 03411 if (wildCardType == XMLAttDef::Any_Any) 03412 anyEncountered = true; 03413 else if (wildCardType == XMLAttDef::Any_Other) { 03414 if (attWildCard->getAttName()->getURI() != uriId 03415 && uriId != fEmptyNamespaceId) 03416 anyEncountered = true; 03417 } 03418 else if (wildCardType == XMLAttDef::Any_List) { 03419 ValueVectorOf<unsigned int>* nameURIList = attWildCard->getNamespaceList(); 03420 XMLSize_t listSize = (nameURIList) ? nameURIList->size() : 0; 03421 03422 if (listSize) { 03423 for (XMLSize_t i=0; i < listSize; i++) { 03424 if (nameURIList->elementAt(i) == uriId) 03425 anyEncountered = true; 03426 } 03427 } 03428 } 03429 03430 if (anyEncountered) { 03431 XMLAttDef::DefAttTypes defType = attWildCard->getDefaultType(); 03432 if (defType == XMLAttDef::ProcessContents_Skip) { 03433 // attribute should just be bypassed, 03434 skipThisOne = true; 03435 } 03436 else if (defType == XMLAttDef::ProcessContents_Lax) { 03437 laxThisOne = true; 03438 } 03439 } 03440 03441 return anyEncountered; 03442 } 03443 03444 inline XMLAttDefList& getAttDefList(bool isSchemaGrammar 03445 , ComplexTypeInfo* currType 03446 , XMLElementDecl* elemDecl) 03447 { 03448 if (isSchemaGrammar && currType) 03449 return currType->getAttDefList(); 03450 else 03451 return elemDecl->getAttDefList(); 03452 } 03453 03454 XERCES_CPP_NAMESPACE_END