GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: DTDValidator.cpp 729944 2008-12-29 17:03:32Z amassari $ 00020 */ 00021 00022 00023 // --------------------------------------------------------------------------- 00024 // Includes 00025 // --------------------------------------------------------------------------- 00026 #include <xercesc/util/Janitor.hpp> 00027 #include <xercesc/util/XMLUniDefs.hpp> 00028 #include <xercesc/util/XMLUni.hpp> 00029 #include <xercesc/internal/ReaderMgr.hpp> 00030 #include <xercesc/internal/XMLScanner.hpp> 00031 #include <xercesc/validators/DTD/DTDValidator.hpp> 00032 00033 XERCES_CPP_NAMESPACE_BEGIN 00034 00035 // --------------------------------------------------------------------------- 00036 // DTDValidator: Constructors and Destructor 00037 // --------------------------------------------------------------------------- 00038 DTDValidator::DTDValidator(XMLErrorReporter* const errReporter) : 00039 00040 XMLValidator(errReporter) 00041 , fDTDGrammar(0) 00042 { 00043 reset(); 00044 } 00045 00046 DTDValidator::~DTDValidator() 00047 { 00048 } 00049 00050 00051 // --------------------------------------------------------------------------- 00052 // DTDValidator: Implementation of the XMLValidator interface 00053 // --------------------------------------------------------------------------- 00054 bool DTDValidator::checkContent(XMLElementDecl* const elemDecl 00055 , QName** const children 00056 , XMLSize_t childCount 00057 , XMLSize_t* indexFailingChild) 00058 { 00059 // 00060 // Look up the element id in our element decl pool. This will get us 00061 // the element decl in our own way of looking at them. 00062 // 00063 if (!elemDecl) 00064 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_InvalidElemId, getScanner()->getMemoryManager()); 00065 00066 // 00067 // Get the content spec type of this element. This will tell us what 00068 // to do to validate it. 00069 // 00070 const DTDElementDecl::ModelTypes modelType = ((DTDElementDecl*) elemDecl)->getModelType(); 00071 00072 if (modelType == DTDElementDecl::Empty) 00073 { 00074 // 00075 // We can do this one here. It cannot have any children. If it does 00076 // we return 0 as the index of the first bad child. 00077 // 00078 if (childCount) 00079 { 00080 *indexFailingChild=0; 00081 return false; 00082 } 00083 } 00084 else if (modelType == DTDElementDecl::Any) 00085 { 00086 // We pass no judgement on this one, anything goes 00087 } 00088 else if ((modelType == DTDElementDecl::Mixed_Simple) 00089 || (modelType == DTDElementDecl::Children)) 00090 { 00091 // Get the element's content model or fault it in 00092 const XMLContentModel* elemCM = elemDecl->getContentModel(); 00093 00094 // Ask it to validate and return its return 00095 return elemCM->validateContent(children, childCount, getScanner()->getEmptyNamespaceId(), indexFailingChild, getScanner()->getMemoryManager()); 00096 } 00097 else 00098 { 00099 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMType, getScanner()->getMemoryManager()); 00100 } 00101 00102 // Went ok, so return success 00103 return true; 00104 } 00105 00106 00107 void DTDValidator::faultInAttr(XMLAttr& toFill, const XMLAttDef& attDef) const 00108 { 00109 toFill.set(0, attDef.getFullName(), attDef.getValue(), attDef.getType()); 00110 } 00111 00112 void DTDValidator::reset() 00113 { 00114 } 00115 00116 00117 bool DTDValidator::requiresNamespaces() const 00118 { 00119 // Namespaces are not supported for DTDs 00120 return false; 00121 } 00122 00123 00124 void 00125 DTDValidator::validateAttrValue(const XMLAttDef* attDef 00126 , const XMLCh* const attrValue 00127 , bool preValidation 00128 , const XMLElementDecl*) 00129 { 00130 // 00131 // Get quick refs to lost of of the stuff in the passed objects in 00132 // order to simplify the code below, which will reference them very 00133 // often. 00134 // 00135 const XMLAttDef::AttTypes type = attDef->getType(); 00136 const XMLAttDef::DefAttTypes defType = attDef->getDefaultType(); 00137 const XMLCh* const valueText = attDef->getValue(); 00138 const XMLCh* const fullName = attDef->getFullName(); 00139 const XMLCh* const enumList = attDef->getEnumeration(); 00140 00141 // 00142 // If the default type is fixed, then make sure the passed value maps 00143 // to the fixed value. 00144 // If during preContentValidation, the value we are validating is the fixed value itself 00145 // so no need to compare. 00146 // Only need to do this for regular attribute value validation 00147 // 00148 if (defType == XMLAttDef::Fixed && !preValidation) 00149 { 00150 if (!XMLString::equals(attrValue, valueText)) 00151 emitError(XMLValid::NotSameAsFixedValue, fullName, attrValue, valueText); 00152 } 00153 00154 // 00155 // If its a CDATA attribute, then we are done with any DTD level 00156 // validation else do the rest. 00157 // 00158 if (type == XMLAttDef::CData) 00159 return; 00160 00161 00162 00163 // An empty string cannot be valid for any of the other types 00164 if (!attrValue[0]) 00165 { 00166 emitError(XMLValid::InvalidEmptyAttValue, fullName); 00167 return; 00168 } 00169 00170 // See whether we are doing multiple values or not 00171 const bool multipleValues = 00172 ( 00173 (type == XMLAttDef::IDRefs) 00174 || (type == XMLAttDef::Entities) 00175 || (type == XMLAttDef::NmTokens) 00176 || (type == XMLAttDef::Notation) 00177 || (type == XMLAttDef::Enumeration) 00178 ); 00179 00180 // And whether we must check for a first name char 00181 const bool firstNameChar = 00182 ( 00183 (type == XMLAttDef::ID) 00184 || (type == XMLAttDef::IDRef) 00185 || (type == XMLAttDef::IDRefs) 00186 || (type == XMLAttDef::Entity) 00187 || (type == XMLAttDef::Entities) 00188 || (type == XMLAttDef::Notation) 00189 ); 00190 00191 // Whether it requires ref checking stuff 00192 const bool isARefType 00193 ( 00194 (type == XMLAttDef::ID) 00195 || (type == XMLAttDef::IDRef) 00196 || (type == XMLAttDef::IDRefs) 00197 ); 00198 00199 // Some trigger flags to avoid issuing redundant errors and whatnot 00200 bool alreadyCapped = false; 00201 00202 // 00203 // Make a copy of the text that we can mangle and get a pointer we can 00204 // move through the value 00205 // 00206 00207 // Use a stack-based buffer, when possible... 00208 XMLCh tempBuffer[100]; 00209 00210 XMLCh* pszTmpVal = 0; 00211 00212 ArrayJanitor<XMLCh> janTmpVal(0); 00213 00214 if (XMLString::stringLen(attrValue) < sizeof(tempBuffer) / sizeof(tempBuffer[0])) 00215 { 00216 XMLString::copyString(tempBuffer, attrValue); 00217 pszTmpVal = tempBuffer; 00218 } 00219 else 00220 { 00221 janTmpVal.reset(XMLString::replicate(attrValue, getScanner()->getMemoryManager()), getScanner()->getMemoryManager()); 00222 pszTmpVal = janTmpVal.get(); 00223 } 00224 00225 XMLCh* valPtr = pszTmpVal; 00226 00227 bool doNamespace = getScanner()->getDoNamespaces(); 00228 00229 while (true) 00230 { 00231 // 00232 // Make sure the first character is a valid first name char, i.e. 00233 // if its a Name value. For NmToken values we don't treat the first 00234 // char any differently. 00235 // 00236 if (firstNameChar) 00237 { 00238 // If its not, emit and error but try to keep going 00239 if (!getReaderMgr()->getCurrentReader()->isFirstNameChar(*valPtr)) 00240 emitError(XMLValid::AttrValNotName, valPtr, fullName); 00241 valPtr++; 00242 } 00243 00244 // Make sure all the remaining chars are valid name chars 00245 while (*valPtr) 00246 { 00247 // 00248 // If we hit a whitespace, its either a break between two 00249 // or more values, or an error if we have a single value. 00250 // 00251 // 00252 // XML1.0-3rd 00253 // 00254 // [6] Names ::= Name (#x20 Name)* 00255 // [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* 00256 // 00257 // only and only ONE #x20 is allowed to be the delimiter 00258 // 00259 if (*valPtr==chSpace) 00260 { 00261 if (!multipleValues) 00262 { 00263 emitError(XMLValid::NoMultipleValues, fullName); 00264 return; 00265 } 00266 00267 break; 00268 } 00269 00270 // Now this attribute can be of type 00271 // ID, IDREF, IDREFS, ENTITY, ENTITIES, NOTATION, NMTOKEN, NMTOKENS, ENUMERATION 00272 // All these must be valid XMLName 00273 // If namespace is enabled, colon is not allowed in the first 6 00274 00275 if (doNamespace && *valPtr == chColon && firstNameChar) 00276 emitError(XMLValid::ColonNotValidWithNS); 00277 00278 if (!getReaderMgr()->getCurrentReader()->isNameChar(*valPtr)) 00279 { 00280 emitError(XMLValid::AttrValNotName, valPtr, fullName); 00281 return; 00282 } 00283 valPtr++; 00284 } 00285 00286 // 00287 // Cap it off at the current non-name char. If already capped, 00288 // then remember this. 00289 // 00290 if (!(*valPtr)) 00291 alreadyCapped = true; 00292 *valPtr = 0; 00293 00294 // 00295 // If this type of attribute requires that we track reference 00296 // stuff, then handle that. 00297 // 00298 if (isARefType) 00299 { 00300 if ((type == XMLAttDef::ID) 00301 || (type == XMLAttDef::IDRef) 00302 || (type == XMLAttDef::IDRefs)) 00303 { 00304 XMLRefInfo* find = getScanner()->getIDRefList()->get(pszTmpVal); 00305 if (find) 00306 { 00307 if (find->getDeclared() && (type == XMLAttDef::ID)) 00308 emitError(XMLValid::ReusedIDValue, pszTmpVal); 00309 } 00310 else 00311 { 00312 find = new (getScanner()->getMemoryManager()) XMLRefInfo 00313 ( 00314 pszTmpVal 00315 , false 00316 , false 00317 , getScanner()->getMemoryManager() 00318 ); 00319 getScanner()->getIDRefList()->put((void*)find->getRefName(), find); 00320 } 00321 00322 // 00323 // Mark it declared or used, which might be redundant in some cases 00324 // but not worth checking 00325 // 00326 if (type == XMLAttDef::ID) 00327 find->setDeclared(true); 00328 else { 00329 if (!preValidation) { 00330 find->setUsed(true); 00331 } 00332 } 00333 } 00334 } 00335 else if (!preValidation && ((type == XMLAttDef::Entity) || (type == XMLAttDef::Entities))) 00336 { 00337 // 00338 // If its refering to a entity, then look up the name in the 00339 // general entity pool. If not there, then its an error. If its 00340 // not an external unparsed entity, then its an error. 00341 // 00342 // In case of pre-validation, the above errors should be ignored. 00343 // 00344 const XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(pszTmpVal); 00345 if (decl) 00346 { 00347 if (!decl->isUnparsed()) 00348 emitError(XMLValid::BadEntityRefAttr, pszTmpVal, fullName); 00349 } 00350 else 00351 { 00352 emitError 00353 ( 00354 XMLValid::UnknownEntityRefAttr 00355 , fullName 00356 , pszTmpVal 00357 ); 00358 } 00359 } 00360 else if ((type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration)) 00361 { 00362 // 00363 // Make sure that this value maps to one of the enumeration or 00364 // notation values in the enumList parameter. We don't have to 00365 // look it up in the notation pool (if a notation) because we 00366 // will look up the enumerated values themselves. If they are in 00367 // the notation pool (after the DTD is parsed), then obviously 00368 // this value will be legal since it matches one of them. 00369 // 00370 if (!XMLString::isInList(pszTmpVal, enumList)) 00371 emitError(XMLValid::DoesNotMatchEnumList, pszTmpVal, fullName); 00372 } 00373 00374 // If not doing multiple values, then we are done 00375 if (!multipleValues) 00376 break; 00377 00378 // 00379 // If we are at the end, then break out now, else move up to the 00380 // next char and update the base pointer. 00381 // 00382 if (alreadyCapped) 00383 break; 00384 00385 valPtr++; 00386 pszTmpVal = valPtr; 00387 } 00388 00389 } 00390 00391 void DTDValidator::preContentValidation(bool 00392 #if defined(XERCES_DEBUG) 00393 reuseGrammar 00394 #endif 00395 ,bool validateDefAttr) 00396 { 00397 // 00398 // Lets enumerate all of the elements in the element decl pool 00399 // and put out an error for any that did not get declared. 00400 // We also check all of the attributes as well. 00401 // 00402 NameIdPoolEnumerator<DTDElementDecl> elemEnum = fDTDGrammar->getElemEnumerator(); 00403 fDTDGrammar->setValidated(true); 00404 while (elemEnum.hasMoreElements()) 00405 { 00406 const DTDElementDecl& curElem = elemEnum.nextElement(); 00407 const DTDElementDecl::CreateReasons reason = curElem.getCreateReason(); 00408 00409 // 00410 // See if this element decl was ever marked as declared. If 00411 // not, then put out an error. In some cases its just 00412 // a warning, such as being referenced in a content model. 00413 // 00414 if (reason != XMLElementDecl::Declared) 00415 { 00416 if (reason == XMLElementDecl::AttList) 00417 { 00418 getScanner()->emitError 00419 ( 00420 XMLErrs::UndeclaredElemInAttList 00421 , curElem.getFullName() 00422 ); 00423 } 00424 else if (reason == XMLElementDecl::AsRootElem) 00425 { 00426 // It's ok that the root element is not declared in the DTD 00427 /* 00428 emitError 00429 ( 00430 XMLValid::UndeclaredElemInDocType 00431 , curElem.getFullName() 00432 );*/ 00433 } 00434 else if (reason == XMLElementDecl::InContentModel) 00435 { 00436 getScanner()->emitError 00437 ( 00438 XMLErrs::UndeclaredElemInCM 00439 , curElem.getFullName() 00440 ); 00441 } 00442 else 00443 { 00444 #if defined(XERCES_DEBUG) 00445 if(reuseGrammar && reason == XMLElementDecl::JustFaultIn){ 00446 } 00447 else 00448 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::DTD_UnknownCreateReason, getScanner()->getMemoryManager()); 00449 #endif 00450 } 00451 } 00452 00453 // 00454 // Check all of the attributes of the current element. 00455 // We check for: 00456 // 00457 // 1) Multiple ID attributes 00458 // 2) That all of the default values of attributes are 00459 // valid for their type. 00460 // 3) That for any notation types, that their lists 00461 // of possible values refer to declared notations. 00462 // 00463 // 4) XML1.0(3rd edition) 00464 // 00465 // Validity constraint: One Notation Per Element Type 00466 // An element type MUST NOT have more than one NOTATION attribute specified. 00467 // 00468 // Validity constraint: No Notation on Empty Element 00469 // For compatibility, an attribute of type NOTATION MUST NOT be declared on an element declared EMPTY. 00470 // 00471 // Validity constraint: No Duplicate Tokens 00472 // The notation names in a single NotationType attribute declaration, as well as 00473 // the NmTokens in a single Enumeration attribute declaration, MUST all be distinct. 00474 // 00475 00476 XMLAttDefList& attDefList = curElem.getAttDefList(); 00477 bool seenId = false; 00478 bool seenNOTATION = false; 00479 bool elemEmpty = (curElem.getModelType() == DTDElementDecl::Empty); 00480 00481 for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++) 00482 { 00483 const XMLAttDef& curAttDef = attDefList.getAttDef(i); 00484 00485 if (curAttDef.getType() == XMLAttDef::ID) 00486 { 00487 if (seenId) 00488 { 00489 emitError 00490 ( 00491 XMLValid::MultipleIdAttrs 00492 , curElem.getFullName() 00493 ); 00494 break; 00495 } 00496 00497 seenId = true; 00498 } 00499 else if (curAttDef.getType() == XMLAttDef::Notation) 00500 { 00501 if (seenNOTATION) 00502 { 00503 emitError 00504 ( 00505 XMLValid::ElemOneNotationAttr 00506 , curElem.getFullName() 00507 ); 00508 00509 break; 00510 } 00511 00512 seenNOTATION = true; 00513 00514 // no notation attribute on empty element 00515 if (elemEmpty) 00516 { 00517 emitError 00518 ( 00519 XMLValid::EmptyElemNotationAttr 00520 , curElem.getFullName() 00521 , curAttDef.getFullName() 00522 ); 00523 00524 break; 00525 } 00526 00527 //go through enumeration list to check 00528 // distinct 00529 // notation declaration 00530 if (curAttDef.getEnumeration()) 00531 { 00532 checkTokenList(curAttDef, true); 00533 } 00534 } 00535 else if (curAttDef.getType() == XMLAttDef::Enumeration ) 00536 { 00537 //go through enumeration list to check 00538 // distinct only 00539 if (curAttDef.getEnumeration()) 00540 { 00541 checkTokenList(curAttDef, false); 00542 } 00543 } 00544 00545 // If it has a default/fixed value, then validate it 00546 if (validateDefAttr && curAttDef.getValue()) 00547 { 00548 validateAttrValue 00549 ( 00550 &curAttDef 00551 , curAttDef.getValue() 00552 , true 00553 , &curElem 00554 ); 00555 } 00556 } 00557 } 00558 00559 // 00560 // And enumerate all of the general entities. If any of them 00561 // reference a notation, then make sure the notation exists. 00562 // 00563 NameIdPoolEnumerator<DTDEntityDecl> entEnum = fDTDGrammar->getEntityEnumerator(); 00564 while (entEnum.hasMoreElements()) 00565 { 00566 const DTDEntityDecl& curEntity = entEnum.nextElement(); 00567 00568 if (!curEntity.getNotationName()) 00569 continue; 00570 00571 // It has a notation name, so look it up 00572 if (!fDTDGrammar->getNotationDecl(curEntity.getNotationName())) 00573 { 00574 emitError 00575 ( 00576 XMLValid::NotationNotDeclared 00577 , curEntity.getNotationName() 00578 ); 00579 } 00580 } 00581 } 00582 00583 void DTDValidator::postParseValidation() 00584 { 00585 // 00586 // At this time, there is nothing to do here. The scanner itself handles 00587 // ID/IDREF validation, since that is the same no matter what kind of 00588 // validator. 00589 // 00590 } 00591 00592 // 00593 // We need to verify that all of its possible values 00594 // (in the enum list) 00595 // is distinct and 00596 // refer to valid notations if toValidateNotation is set on 00597 // 00598 void DTDValidator::checkTokenList(const XMLAttDef& curAttDef 00599 , bool toValidateNotation) 00600 { 00601 00602 XMLCh* list = XMLString::replicate(curAttDef.getEnumeration(), getScanner()->getMemoryManager()); 00603 ArrayJanitor<XMLCh> janList(list, getScanner()->getMemoryManager()); 00604 00605 // 00606 // Search forward for a space or a null. If a null, 00607 // we are done. If a space, cap it and look it up. 00608 // 00609 bool breakFlag = false; 00610 XMLCh* listPtr = list; 00611 XMLCh* lastPtr = listPtr; 00612 while (true) 00613 { 00614 while (*listPtr && (*listPtr != chSpace)) 00615 listPtr++; 00616 00617 // 00618 // If at the end, indicate we need to break after 00619 // this one. Else, cap it off here. 00620 // 00621 if (!*listPtr) 00622 breakFlag = true; 00623 else 00624 *listPtr++ = chNull; 00625 00626 //distinction check 00627 //there should be no same token found in the remaining list 00628 if (XMLString::isInList(lastPtr, listPtr)) 00629 { 00630 emitError 00631 ( 00632 XMLValid::AttrDupToken 00633 , curAttDef.getFullName() 00634 , lastPtr 00635 ); 00636 } 00637 00638 if (toValidateNotation && !fDTDGrammar->getNotationDecl(lastPtr)) 00639 { 00640 emitError 00641 ( 00642 XMLValid::UnknownNotRefAttr 00643 , curAttDef.getFullName() 00644 , lastPtr 00645 ); 00646 } 00647 00648 // Break out if we hit the end last time 00649 if (breakFlag) 00650 break; 00651 00652 // Else move upwards and try again 00653 lastPtr = listPtr; 00654 } 00655 } 00656 00657 XERCES_CPP_NAMESPACE_END