GME  13
DTDValidator.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: DTDValidator.cpp 729944 2008-12-29 17:03:32Z amassari $
00020  */
00021 
00022 
00023 // ---------------------------------------------------------------------------
00024 //  Includes
00025 // ---------------------------------------------------------------------------
00026 #include <xercesc/util/Janitor.hpp>
00027 #include <xercesc/util/XMLUniDefs.hpp>
00028 #include <xercesc/util/XMLUni.hpp>
00029 #include <xercesc/internal/ReaderMgr.hpp>
00030 #include <xercesc/internal/XMLScanner.hpp>
00031 #include <xercesc/validators/DTD/DTDValidator.hpp>
00032 
00033 XERCES_CPP_NAMESPACE_BEGIN
00034 
00035 // ---------------------------------------------------------------------------
00036 //  DTDValidator: Constructors and Destructor
00037 // ---------------------------------------------------------------------------
00038 DTDValidator::DTDValidator(XMLErrorReporter* const errReporter) :
00039 
00040     XMLValidator(errReporter)
00041     , fDTDGrammar(0)
00042 {
00043     reset();
00044 }
00045 
00046 DTDValidator::~DTDValidator()
00047 {
00048 }
00049 
00050 
00051 // ---------------------------------------------------------------------------
00052 //  DTDValidator: Implementation of the XMLValidator interface
00053 // ---------------------------------------------------------------------------
00054 bool DTDValidator::checkContent(XMLElementDecl* const elemDecl
00055                               , QName** const         children
00056                               , XMLSize_t             childCount
00057                               , XMLSize_t*         indexFailingChild)
00058 {
00059     //
00060     //  Look up the element id in our element decl pool. This will get us
00061     //  the element decl in our own way of looking at them.
00062     //
00063     if (!elemDecl)
00064         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_InvalidElemId, getScanner()->getMemoryManager());
00065 
00066     //
00067     //  Get the content spec type of this element. This will tell us what
00068     //  to do to validate it.
00069     //
00070     const DTDElementDecl::ModelTypes modelType = ((DTDElementDecl*) elemDecl)->getModelType();
00071 
00072     if (modelType == DTDElementDecl::Empty)
00073     {
00074         //
00075         //  We can do this one here. It cannot have any children. If it does
00076         //  we return 0 as the index of the first bad child.
00077         //
00078         if (childCount)
00079         {
00080             *indexFailingChild=0;
00081             return false;
00082         }
00083     }
00084      else if (modelType == DTDElementDecl::Any)
00085     {
00086         // We pass no judgement on this one, anything goes
00087     }
00088      else if ((modelType == DTDElementDecl::Mixed_Simple)
00089           ||  (modelType == DTDElementDecl::Children))
00090     {
00091         // Get the element's content model or fault it in
00092         const XMLContentModel* elemCM = elemDecl->getContentModel();
00093 
00094         // Ask it to validate and return its return
00095         return elemCM->validateContent(children, childCount, getScanner()->getEmptyNamespaceId(), indexFailingChild, getScanner()->getMemoryManager());
00096     }
00097      else
00098     {
00099         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMType, getScanner()->getMemoryManager());
00100     }
00101 
00102     // Went ok, so return success
00103     return true;
00104 }
00105 
00106 
00107 void DTDValidator::faultInAttr(XMLAttr& toFill, const XMLAttDef& attDef) const
00108 {
00109     toFill.set(0, attDef.getFullName(), attDef.getValue(), attDef.getType());
00110 }
00111 
00112 void DTDValidator::reset()
00113 {
00114 }
00115 
00116 
00117 bool DTDValidator::requiresNamespaces() const
00118 {
00119     // Namespaces are not supported for DTDs
00120     return false;
00121 }
00122 
00123 
00124 void
00125 DTDValidator::validateAttrValue(const   XMLAttDef*      attDef
00126                                 , const XMLCh* const    attrValue
00127                                 , bool                  preValidation
00128                                 , const XMLElementDecl*)
00129 {
00130     //
00131     //  Get quick refs to lost of of the stuff in the passed objects in
00132     //  order to simplify the code below, which will reference them very
00133     //  often.
00134     //
00135     const XMLAttDef::AttTypes       type = attDef->getType();
00136     const XMLAttDef::DefAttTypes    defType = attDef->getDefaultType();
00137     const XMLCh* const              valueText = attDef->getValue();
00138     const XMLCh* const              fullName = attDef->getFullName();
00139     const XMLCh* const              enumList = attDef->getEnumeration();
00140 
00141     //
00142     //  If the default type is fixed, then make sure the passed value maps
00143     //  to the fixed value.
00144     //  If during preContentValidation, the value we are validating is the fixed value itself
00145     //  so no need to compare.
00146     //  Only need to do this for regular attribute value validation
00147     //
00148     if (defType == XMLAttDef::Fixed && !preValidation)
00149     {
00150         if (!XMLString::equals(attrValue, valueText))
00151             emitError(XMLValid::NotSameAsFixedValue, fullName, attrValue, valueText);
00152     }
00153 
00154     //
00155     //  If its a CDATA attribute, then we are done with any DTD level
00156     //  validation else do the rest.
00157     //
00158     if (type == XMLAttDef::CData)
00159         return;
00160 
00161 
00162 
00163     // An empty string cannot be valid for any of the other types
00164     if (!attrValue[0])
00165     {
00166         emitError(XMLValid::InvalidEmptyAttValue, fullName);
00167         return;
00168     }
00169 
00170     // See whether we are doing multiple values or not
00171     const bool multipleValues =
00172     (
00173         (type == XMLAttDef::IDRefs)
00174         || (type == XMLAttDef::Entities)
00175         || (type == XMLAttDef::NmTokens)
00176         || (type == XMLAttDef::Notation)
00177         || (type == XMLAttDef::Enumeration)
00178     );
00179 
00180     // And whether we must check for a first name char
00181     const bool firstNameChar =
00182     (
00183         (type == XMLAttDef::ID)
00184         || (type == XMLAttDef::IDRef)
00185         || (type == XMLAttDef::IDRefs)
00186         || (type == XMLAttDef::Entity)
00187         || (type == XMLAttDef::Entities)
00188         || (type == XMLAttDef::Notation)
00189     );
00190 
00191     // Whether it requires ref checking stuff
00192     const bool isARefType
00193     (
00194         (type == XMLAttDef::ID)
00195         || (type == XMLAttDef::IDRef)
00196         || (type == XMLAttDef::IDRefs)
00197     );
00198 
00199     // Some trigger flags to avoid issuing redundant errors and whatnot    
00200     bool alreadyCapped = false;
00201 
00202     //
00203     //  Make a copy of the text that we can mangle and get a pointer we can
00204     //  move through the value
00205     //
00206 
00207     // Use a stack-based buffer, when possible...
00208     XMLCh   tempBuffer[100];
00209 
00210     XMLCh* pszTmpVal = 0;
00211 
00212     ArrayJanitor<XMLCh> janTmpVal(0);
00213 
00214     if (XMLString::stringLen(attrValue) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
00215     {
00216         XMLString::copyString(tempBuffer, attrValue);
00217         pszTmpVal = tempBuffer;
00218     }
00219     else
00220     {
00221         janTmpVal.reset(XMLString::replicate(attrValue, getScanner()->getMemoryManager()), getScanner()->getMemoryManager());
00222         pszTmpVal = janTmpVal.get();
00223     }
00224 
00225     XMLCh* valPtr = pszTmpVal;
00226 
00227     bool doNamespace = getScanner()->getDoNamespaces();
00228 
00229     while (true)
00230     {
00231         //
00232         //  Make sure the first character is a valid first name char, i.e.
00233         //  if its a Name value. For NmToken values we don't treat the first
00234         //  char any differently.
00235         //
00236         if (firstNameChar)
00237         {
00238             // If its not, emit and error but try to keep going
00239             if (!getReaderMgr()->getCurrentReader()->isFirstNameChar(*valPtr))
00240                 emitError(XMLValid::AttrValNotName, valPtr, fullName);
00241             valPtr++;
00242         }
00243 
00244         // Make sure all the remaining chars are valid name chars
00245         while (*valPtr)
00246         {
00247             //
00248             //  If we hit a whitespace, its either a break between two
00249             //  or more values, or an error if we have a single value.
00250             //
00251             //
00252             //   XML1.0-3rd
00253             //
00254             //   [6]   Names   ::=   Name (#x20 Name)*
00255             //   [8]   Nmtokens   ::=   Nmtoken (#x20 Nmtoken)*
00256             //
00257             //   only and only ONE #x20 is allowed to be the delimiter
00258             //
00259             if (*valPtr==chSpace)
00260             {
00261                 if (!multipleValues)
00262                 {
00263                     emitError(XMLValid::NoMultipleValues, fullName);
00264                     return;
00265                 }
00266 
00267                 break;
00268             }
00269 
00270             // Now this attribute can be of type
00271             //     ID, IDREF, IDREFS, ENTITY, ENTITIES, NOTATION, NMTOKEN, NMTOKENS, ENUMERATION
00272             //  All these must be valid XMLName
00273             // If namespace is enabled, colon is not allowed in the first 6
00274 
00275             if (doNamespace && *valPtr == chColon && firstNameChar)
00276                 emitError(XMLValid::ColonNotValidWithNS);
00277 
00278             if (!getReaderMgr()->getCurrentReader()->isNameChar(*valPtr))
00279             {
00280                 emitError(XMLValid::AttrValNotName, valPtr, fullName);
00281                 return;
00282             }
00283             valPtr++;
00284         }
00285 
00286         //
00287         //  Cap it off at the current non-name char. If already capped,
00288         //  then remember this.
00289         //
00290         if (!(*valPtr))
00291             alreadyCapped = true;
00292         *valPtr = 0;
00293 
00294         //
00295         //  If this type of attribute requires that we track reference
00296         //  stuff, then handle that.
00297         //
00298         if (isARefType)
00299         {
00300             if ((type == XMLAttDef::ID)
00301             ||  (type == XMLAttDef::IDRef)
00302             ||  (type == XMLAttDef::IDRefs))
00303             {
00304                 XMLRefInfo* find = getScanner()->getIDRefList()->get(pszTmpVal);
00305                 if (find)
00306                 {
00307                     if (find->getDeclared() && (type == XMLAttDef::ID))
00308                         emitError(XMLValid::ReusedIDValue, pszTmpVal);
00309                 }
00310                  else
00311                 {
00312                     find = new (getScanner()->getMemoryManager()) XMLRefInfo
00313                     (
00314                         pszTmpVal
00315                         , false
00316                         , false
00317                         , getScanner()->getMemoryManager()
00318                     );
00319                     getScanner()->getIDRefList()->put((void*)find->getRefName(), find);
00320                 }
00321 
00322                 //
00323                 //  Mark it declared or used, which might be redundant in some cases
00324                 //  but not worth checking
00325                 //
00326                 if (type == XMLAttDef::ID)
00327                     find->setDeclared(true);
00328                 else {
00329                     if (!preValidation) {
00330                         find->setUsed(true);
00331                     }
00332                 }
00333             }
00334         }
00335          else if (!preValidation && ((type == XMLAttDef::Entity) || (type == XMLAttDef::Entities)))
00336         {
00337             //
00338             //  If its refering to a entity, then look up the name in the
00339             //  general entity pool. If not there, then its an error. If its
00340             //  not an external unparsed entity, then its an error.
00341             //
00342             //  In case of pre-validation, the above errors should be ignored.
00343             //
00344             const XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(pszTmpVal);
00345             if (decl)
00346             {
00347                 if (!decl->isUnparsed())
00348                     emitError(XMLValid::BadEntityRefAttr, pszTmpVal, fullName);
00349             }
00350              else
00351             {
00352                 emitError
00353                 (
00354                     XMLValid::UnknownEntityRefAttr
00355                     , fullName
00356                     , pszTmpVal
00357                 );
00358             }
00359         }
00360          else if ((type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration))
00361         {
00362             //
00363             //  Make sure that this value maps to one of the enumeration or
00364             //  notation values in the enumList parameter. We don't have to
00365             //  look it up in the notation pool (if a notation) because we
00366             //  will look up the enumerated values themselves. If they are in
00367             //  the notation pool (after the DTD is parsed), then obviously
00368             //  this value will be legal since it matches one of them.
00369             //
00370             if (!XMLString::isInList(pszTmpVal, enumList))
00371                 emitError(XMLValid::DoesNotMatchEnumList, pszTmpVal, fullName);
00372         }
00373 
00374         // If not doing multiple values, then we are done
00375         if (!multipleValues)
00376             break;
00377 
00378         //
00379         //  If we are at the end, then break out now, else move up to the
00380         //  next char and update the base pointer.
00381         //
00382         if (alreadyCapped)
00383             break;
00384 
00385         valPtr++;
00386         pszTmpVal = valPtr;
00387     }
00388 
00389 }
00390 
00391 void DTDValidator::preContentValidation(bool
00392 #if defined(XERCES_DEBUG)
00393                                                                                 reuseGrammar
00394 #endif
00395                                        ,bool validateDefAttr)
00396 {
00397     //
00398     //  Lets enumerate all of the elements in the element decl pool
00399     //  and put out an error for any that did not get declared.
00400     //  We also check all of the attributes as well.
00401     //
00402     NameIdPoolEnumerator<DTDElementDecl> elemEnum = fDTDGrammar->getElemEnumerator();
00403     fDTDGrammar->setValidated(true);
00404     while (elemEnum.hasMoreElements())
00405     {
00406         const DTDElementDecl& curElem = elemEnum.nextElement();
00407         const DTDElementDecl::CreateReasons reason = curElem.getCreateReason();
00408 
00409         //
00410         //  See if this element decl was ever marked as declared. If
00411         //  not, then put out an error. In some cases its just
00412         //  a warning, such as being referenced in a content model.
00413         //
00414         if (reason != XMLElementDecl::Declared)
00415         {
00416             if (reason == XMLElementDecl::AttList)
00417             {
00418                 getScanner()->emitError
00419                 (
00420                     XMLErrs::UndeclaredElemInAttList
00421                     , curElem.getFullName()
00422                 );
00423             }
00424              else if (reason == XMLElementDecl::AsRootElem)
00425             {
00426                 // It's ok that the root element is not declared in the DTD
00427                 /*
00428                 emitError
00429                 (
00430                     XMLValid::UndeclaredElemInDocType
00431                     , curElem.getFullName()
00432                 );*/
00433             }
00434              else if (reason == XMLElementDecl::InContentModel)
00435             {
00436                 getScanner()->emitError
00437                 (
00438                     XMLErrs::UndeclaredElemInCM
00439                     , curElem.getFullName()
00440                 );
00441             }
00442             else
00443             {
00444                 #if defined(XERCES_DEBUG)
00445                   if(reuseGrammar && reason == XMLElementDecl::JustFaultIn){
00446                   }
00447                   else
00448                       ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::DTD_UnknownCreateReason, getScanner()->getMemoryManager());
00449                 #endif
00450             }
00451         }
00452 
00453         //
00454         //  Check all of the attributes of the current element.
00455         //  We check for:
00456         //
00457         //  1) Multiple ID attributes
00458         //  2) That all of the default values of attributes are
00459         //      valid for their type.
00460         //  3) That for any notation types, that their lists
00461         //      of possible values refer to declared notations.
00462         //
00463         //  4) XML1.0(3rd edition)
00464         //
00465         //     Validity constraint: One Notation Per Element Type
00466         //     An element type MUST NOT have more than one NOTATION attribute specified.
00467         //
00468         //     Validity constraint: No Notation on Empty Element
00469         //     For compatibility, an attribute of type NOTATION MUST NOT be declared on an element declared EMPTY.
00470         //
00471         //     Validity constraint: No Duplicate Tokens
00472         //     The notation names in a single NotationType attribute declaration, as well as 
00473         //     the NmTokens in a single Enumeration attribute declaration, MUST all be distinct.
00474         //
00475 
00476         XMLAttDefList& attDefList = curElem.getAttDefList();
00477         bool seenId = false;
00478         bool seenNOTATION = false;
00479         bool elemEmpty = (curElem.getModelType() == DTDElementDecl::Empty);
00480 
00481         for(XMLSize_t i=0; i<attDefList.getAttDefCount(); i++)
00482         {
00483             const XMLAttDef& curAttDef = attDefList.getAttDef(i);
00484 
00485             if (curAttDef.getType() == XMLAttDef::ID)
00486             {
00487                 if (seenId)
00488                 {
00489                     emitError
00490                     (
00491                         XMLValid::MultipleIdAttrs
00492                         , curElem.getFullName()
00493                     );
00494                     break;
00495                 }
00496 
00497                 seenId = true;
00498             }
00499              else if (curAttDef.getType() == XMLAttDef::Notation)
00500             {
00501                 if (seenNOTATION)
00502                 {
00503                     emitError
00504                     (
00505                         XMLValid::ElemOneNotationAttr
00506                       , curElem.getFullName()
00507                     );
00508 
00509                     break;
00510                 }
00511 
00512                 seenNOTATION = true;
00513 
00514                 // no notation attribute on empty element
00515                 if (elemEmpty)
00516                 {
00517                     emitError
00518                    (
00519                       XMLValid::EmptyElemNotationAttr
00520                     , curElem.getFullName()
00521                     , curAttDef.getFullName()
00522                     );
00523 
00524                     break;
00525                 }
00526 
00527                 //go through enumeration list to check
00528                 // distinct 
00529                 // notation declaration
00530                 if (curAttDef.getEnumeration())
00531                 {
00532                     checkTokenList(curAttDef, true);
00533                 }
00534              }
00535              else if (curAttDef.getType() == XMLAttDef::Enumeration )
00536              {
00537                 //go through enumeration list to check
00538                 // distinct only
00539                 if (curAttDef.getEnumeration())
00540                 {
00541                     checkTokenList(curAttDef, false);
00542                 }
00543              }
00544 
00545             // If it has a default/fixed value, then validate it
00546             if (validateDefAttr && curAttDef.getValue())
00547             {
00548                 validateAttrValue
00549                 (
00550                     &curAttDef
00551                     , curAttDef.getValue()
00552                     , true
00553                     , &curElem
00554                 );
00555             }
00556         }
00557     }
00558 
00559     //
00560     //  And enumerate all of the general entities. If any of them
00561     //  reference a notation, then make sure the notation exists.
00562     //
00563     NameIdPoolEnumerator<DTDEntityDecl> entEnum = fDTDGrammar->getEntityEnumerator();
00564     while (entEnum.hasMoreElements())
00565     {
00566         const DTDEntityDecl& curEntity = entEnum.nextElement();
00567 
00568         if (!curEntity.getNotationName())
00569             continue;
00570 
00571         // It has a notation name, so look it up
00572         if (!fDTDGrammar->getNotationDecl(curEntity.getNotationName()))
00573         {
00574             emitError
00575             (
00576                 XMLValid::NotationNotDeclared
00577                 , curEntity.getNotationName()
00578             );
00579         }
00580     }
00581 }
00582 
00583 void DTDValidator::postParseValidation()
00584 {
00585     //
00586     //  At this time, there is nothing to do here. The scanner itself handles
00587     //  ID/IDREF validation, since that is the same no matter what kind of
00588     //  validator.
00589     //
00590 }
00591 
00592 //
00593 //  We need to verify that all of its possible values
00594 //  (in the enum list) 
00595 //   is distinct and
00596 //   refer to valid notations if toValidateNotation is set on
00597 //
00598 void DTDValidator::checkTokenList(const XMLAttDef&  curAttDef
00599                                 ,       bool        toValidateNotation)
00600 {
00601 
00602     XMLCh* list = XMLString::replicate(curAttDef.getEnumeration(), getScanner()->getMemoryManager());
00603     ArrayJanitor<XMLCh> janList(list, getScanner()->getMemoryManager());
00604 
00605     //
00606     //  Search forward for a space or a null. If a null,
00607     //  we are done. If a space, cap it and look it up.
00608     //
00609     bool    breakFlag = false;
00610     XMLCh*  listPtr = list;
00611     XMLCh*  lastPtr = listPtr;
00612     while (true)
00613     {
00614         while (*listPtr && (*listPtr != chSpace))
00615             listPtr++;
00616 
00617         //
00618         //  If at the end, indicate we need to break after
00619         //  this one. Else, cap it off here.
00620         //
00621         if (!*listPtr)
00622             breakFlag = true;
00623         else
00624             *listPtr++ = chNull;
00625 
00626         //distinction check
00627         //there should be no same token found in the remaining list
00628         if (XMLString::isInList(lastPtr, listPtr))
00629         {
00630             emitError
00631                 (
00632                 XMLValid::AttrDupToken
00633                 , curAttDef.getFullName()
00634                 , lastPtr
00635                 );
00636         }
00637 
00638         if (toValidateNotation && !fDTDGrammar->getNotationDecl(lastPtr))
00639         {
00640             emitError
00641                 (
00642                 XMLValid::UnknownNotRefAttr
00643                 , curAttDef.getFullName()
00644                 , lastPtr
00645                 );
00646         }
00647 
00648         // Break out if we hit the end last time
00649         if (breakFlag)
00650             break;
00651 
00652         // Else move upwards and try again
00653         lastPtr = listPtr;
00654     }
00655 }
00656 
00657 XERCES_CPP_NAMESPACE_END