GME  13
MixedContentModel.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: MixedContentModel.cpp 676911 2008-07-15 13:27:32Z amassari $
00020  */
00021 
00022 
00023 // ---------------------------------------------------------------------------
00024 //  Includes
00025 // ---------------------------------------------------------------------------
00026 #include <string.h>
00027 #include <xercesc/util/RuntimeException.hpp>
00028 #include <xercesc/framework/XMLElementDecl.hpp>
00029 #include <xercesc/validators/common/ContentSpecNode.hpp>
00030 #include <xercesc/validators/common/MixedContentModel.hpp>
00031 #include <xercesc/validators/common/CMStateSet.hpp>
00032 #include <xercesc/validators/common/Grammar.hpp>
00033 #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
00034 
00035 XERCES_CPP_NAMESPACE_BEGIN
00036 
00037 // ---------------------------------------------------------------------------
00038 //  MixedContentModel: Constructors and Destructor
00039 // ---------------------------------------------------------------------------
00040 MixedContentModel::MixedContentModel(const bool             dtd
00041                                    , ContentSpecNode* const parentContentSpec
00042                                    , const bool             ordered
00043                                    , MemoryManager* const   manager) :
00044    fCount(0)
00045  , fChildren(0)
00046  , fChildTypes(0)
00047  , fOrdered(ordered)
00048  , fDTD(dtd)
00049  , fMemoryManager(manager)
00050 {
00051     //
00052     //  Create a vector of unsigned ints that will be filled in with the
00053     //  ids of the child nodes. It will be expanded as needed but we give
00054     //  it an initial capacity of 64 which should be more than enough for
00055     //  99% of the scenarios.
00056     //
00057     ValueVectorOf<QName*> children(64, fMemoryManager);
00058     ValueVectorOf<ContentSpecNode::NodeTypes> childTypes(64, fMemoryManager);
00059 
00060     //
00061     //  Get the parent element's content spec. This is the head of the tree
00062     //  of nodes that describes the content model. We will iterate this
00063     //  tree.
00064     //
00065     ContentSpecNode* curNode = parentContentSpec;
00066     if (!curNode)
00067         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_NoParentCSN, fMemoryManager);
00068 
00069     // And now call the private recursive method that iterates the tree
00070     buildChildList(curNode, children, childTypes);
00071 
00072     //
00073     //  And now we know how many elements we need in our member list. So
00074     //  fill them in.
00075     //
00076     fCount = children.size();
00077     fChildren = (QName**) fMemoryManager->allocate(fCount * sizeof(QName*)); //new QName*[fCount];
00078     fChildTypes = (ContentSpecNode::NodeTypes*) fMemoryManager->allocate
00079     (
00080         fCount * sizeof(ContentSpecNode::NodeTypes)
00081     ); //new ContentSpecNode::NodeTypes[fCount];
00082     for (XMLSize_t index = 0; index < fCount; index++) {
00083         fChildren[index] = new (fMemoryManager) QName(*children.elementAt(index));
00084         fChildTypes[index] = childTypes.elementAt(index);
00085     }
00086 }
00087 
00088 MixedContentModel::~MixedContentModel()
00089 {
00090     for (XMLSize_t index = 0; index < fCount; index++) {
00091         delete fChildren[index];
00092     }
00093     fMemoryManager->deallocate(fChildren); //delete [] fChildren;
00094     fMemoryManager->deallocate(fChildTypes); //delete [] fChildTypes;
00095 }
00096 
00097 
00098 // ---------------------------------------------------------------------------
00099 //  MixedContentModel: Getter methods
00100 // ---------------------------------------------------------------------------
00101 bool MixedContentModel::hasDups() const
00102 {
00103     // Can't have dups if only one child
00104     if (fCount == 1)
00105         return false;
00106 
00107     for (XMLSize_t index = 0; index < fCount; index++)
00108     {
00109         const QName* curVal = fChildren[index];
00110         for (XMLSize_t iIndex = 0; iIndex < fCount; iIndex++)
00111         {
00112             if (iIndex == index)
00113                 continue;
00114 
00115             if (fDTD) {
00116                 if (XMLString::equals(curVal->getRawName(), fChildren[iIndex]->getRawName())) {
00117                     return true;
00118                 }
00119             }
00120             else {
00121                 if ((curVal->getURI() == fChildren[iIndex]->getURI()) &&
00122                     (XMLString::equals(curVal->getLocalPart(), fChildren[iIndex]->getLocalPart()))) {
00123                     return true;
00124                 }
00125             }
00126         }
00127     }
00128     return false;
00129 }
00130 
00131 
00132 // ---------------------------------------------------------------------------
00133 //  MixedContentModel: Implementation of the ContentModel virtual interface
00134 // ---------------------------------------------------------------------------
00135 //
00136 //Under the XML Schema mixed model,
00137 //the order and number of child elements appearing in an instance
00138 //must agree with
00139 //the order and number of child elements specified in the model.
00140 //
00141 bool
00142 MixedContentModel::validateContent( QName** const         children
00143                                   , XMLSize_t             childCount
00144                                   , unsigned int
00145                                   , XMLSize_t*            indexFailingChild
00146                                   , MemoryManager*    const) const
00147 {
00148     // must match order
00149     if (fOrdered) {
00150         unsigned int inIndex = 0;
00151         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
00152 
00153             // Get the current child out of the source index
00154             const QName* curChild = children[outIndex];
00155 
00156             // If its PCDATA, then we just accept that
00157             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
00158                 continue;
00159 
00160             ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
00161             const QName* inChild = fChildren[inIndex];
00162 
00163             if (type == ContentSpecNode::Leaf) {
00164                 if (fDTD) {
00165                     if (!XMLString::equals(inChild->getRawName(), curChild->getRawName())) {
00166                         *indexFailingChild=outIndex;
00167                         return false;
00168                     }
00169                 }
00170                 else {
00171                     if ((inChild->getURI() != curChild->getURI()) ||
00172                         (!XMLString::equals(inChild->getLocalPart(), curChild->getLocalPart()))) {
00173                         *indexFailingChild=outIndex;
00174                         return false;
00175                     }
00176                 }
00177             }
00178             else if (type == ContentSpecNode::Any) {
00179             }
00180             else if (type == ContentSpecNode::Any_NS) {
00181                 if (inChild->getURI() != curChild->getURI())
00182                 {
00183                     *indexFailingChild=outIndex;
00184                     return false;
00185                 }
00186             }
00187             else if (type == ContentSpecNode::Any_Other)
00188             {
00189                 // Here we assume that empty string has id 1.
00190                 //
00191                 unsigned int uriId = curChild->getURI();
00192                 if (uriId == 1 || uriId == inChild->getURI())
00193                 {
00194                     *indexFailingChild=outIndex;
00195                     return false;
00196                 }
00197             }
00198 
00199             // advance index
00200             inIndex++;
00201         }
00202     }
00203 
00204     // can appear in any order
00205     else {
00206         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
00207             // Get the current child out of the source index
00208             const QName* curChild = children[outIndex];
00209 
00210             // If its PCDATA, then we just accept that
00211             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
00212                 continue;
00213 
00214             // And try to find it in our list
00215             unsigned int inIndex = 0;
00216             for (; inIndex < fCount; inIndex++)
00217             {
00218                 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
00219                 const QName* inChild = fChildren[inIndex];
00220 
00221                 if (type == ContentSpecNode::Leaf) {
00222                     if (fDTD) {
00223                         if (XMLString::equals(inChild->getRawName(), curChild->getRawName())) {
00224                             break;
00225                         }
00226                     }
00227                     else {
00228                         if ((inChild->getURI() == curChild->getURI()) &&
00229                             (XMLString::equals(inChild->getLocalPart(), curChild->getLocalPart()))) {
00230                             break;
00231                         }
00232                     }
00233                 }
00234                 else if (type == ContentSpecNode::Any) {
00235                     break;
00236                 }
00237                 else if (type == ContentSpecNode::Any_NS) {
00238                     if (inChild->getURI() == curChild->getURI())
00239                         break;
00240                 }
00241                 else if (type == ContentSpecNode::Any_Other)
00242                 {
00243                     // Here we assume that empty string has id 1.
00244                     //
00245                     unsigned int uriId = curChild->getURI();
00246                     if (uriId != 1 && uriId != inChild->getURI())
00247                         break;
00248                 }
00249 
00250                 // REVISIT: What about checking for multiple ANY matches?
00251                 //          The content model ambiguity *could* be checked
00252                 //          by the caller before constructing the mixed
00253                 //          content model.
00254             }
00255             // We did not find this one, so the validation failed
00256             if (inIndex == fCount)
00257             {
00258                 *indexFailingChild=outIndex;
00259                 return false;
00260             }
00261         }
00262     }
00263 
00264     // Everything seems to be in order, so return success
00265     return true;
00266 }
00267 
00268 
00269 bool MixedContentModel::validateContentSpecial(QName** const          children
00270                                             , XMLSize_t               childCount
00271                                             , unsigned int
00272                                             , GrammarResolver*  const pGrammarResolver
00273                                             , XMLStringPool*    const pStringPool
00274                                             , XMLSize_t*              indexFailingChild
00275                                             , MemoryManager*    const) const
00276 {
00277 
00278     SubstitutionGroupComparator comparator(pGrammarResolver, pStringPool);
00279 
00280     // must match order
00281     if (fOrdered) {
00282         unsigned int inIndex = 0;
00283         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
00284 
00285             // Get the current child out of the source index
00286             QName* curChild = children[outIndex];
00287 
00288             // If its PCDATA, then we just accept that
00289             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
00290                 continue;
00291 
00292             ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
00293             QName* inChild = fChildren[inIndex];
00294 
00295             if (type == ContentSpecNode::Leaf) {
00296                 if ( !comparator.isEquivalentTo(curChild, inChild))
00297                 {
00298                     *indexFailingChild=outIndex;
00299                     return false;
00300                 }
00301             }
00302             else if (type == ContentSpecNode::Any) {
00303             }
00304             else if (type == ContentSpecNode::Any_NS) {
00305                 if (inChild->getURI() != curChild->getURI())
00306                 {
00307                     *indexFailingChild=outIndex;
00308                     return false;
00309                 }
00310             }
00311             else if (type == ContentSpecNode::Any_Other)
00312             {
00313                 // Here we assume that empty string has id 1.
00314                 //
00315                 unsigned int uriId = curChild->getURI();
00316                 if (uriId == 1 || uriId == inChild->getURI())
00317                 {
00318                     *indexFailingChild=outIndex;
00319                     return false;
00320                 }
00321             }
00322 
00323             // advance index
00324             inIndex++;
00325         }
00326     }
00327 
00328     // can appear in any order
00329     else {
00330         for (unsigned int outIndex = 0; outIndex < childCount; outIndex++) {
00331             // Get the current child out of the source index
00332             QName* curChild = children[outIndex];
00333 
00334             // If its PCDATA, then we just accept that
00335             if (curChild->getURI() == XMLElementDecl::fgPCDataElemId)
00336                 continue;
00337 
00338             // And try to find it in our list
00339             unsigned int inIndex = 0;
00340             for (; inIndex < fCount; inIndex++)
00341             {
00342                 ContentSpecNode::NodeTypes type = fChildTypes[inIndex];
00343                 QName* inChild = fChildren[inIndex];
00344 
00345                 if (type == ContentSpecNode::Leaf) {
00346                     if ( comparator.isEquivalentTo(curChild, inChild))
00347                         break;
00348                 }
00349                 else if (type == ContentSpecNode::Any) {
00350                     break;
00351                 }
00352                 else if (type == ContentSpecNode::Any_NS) {
00353                     if (inChild->getURI() == curChild->getURI())
00354                         break;
00355                 }
00356                 else if (type == ContentSpecNode::Any_Other)
00357                 {
00358                   // Here we assume that empty string has id 1.
00359                   //
00360                   unsigned int uriId = curChild->getURI();
00361                   if (uriId != 1 && uriId != inChild->getURI())
00362                     break;
00363                 }
00364 
00365                 // REVISIT: What about checking for multiple ANY matches?
00366                 //          The content model ambiguity *could* be checked
00367                 //          by the caller before constructing the mixed
00368                 //          content model.
00369             }
00370             // We did not find this one, so the validation failed
00371             if (inIndex == fCount)
00372             {
00373                 *indexFailingChild=outIndex;
00374                 return false;
00375             }
00376         }
00377     }
00378 
00379     // Everything seems to be in order, so return success
00380     return true;
00381 }
00382 
00383 // ---------------------------------------------------------------------------
00384 //  MixedContentModel: Private helper methods
00385 // ---------------------------------------------------------------------------
00386 void
00387 MixedContentModel::buildChildList(  ContentSpecNode* const       curNode
00388                                   , ValueVectorOf<QName*>&       toFill
00389                                   , ValueVectorOf<ContentSpecNode::NodeTypes>& toType)
00390 {
00391     // Get the type of spec node our current node is
00392     const ContentSpecNode::NodeTypes curType = curNode->getType();
00393 
00394     // If its a leaf, then store its id in the target list
00395     if ((curType == ContentSpecNode::Leaf)      ||
00396         (curType == ContentSpecNode::Any)       ||
00397         (curType == ContentSpecNode::Any_Other) ||
00398         (curType == ContentSpecNode::Any_NS)   )
00399     {
00400         toFill.addElement(curNode->getElement());
00401         toType.addElement(curType);
00402         return;
00403     }
00404 
00405     // Get both the child node pointers
00406     ContentSpecNode* leftNode = curNode->getFirst();
00407     ContentSpecNode* rightNode = curNode->getSecond();
00408 
00409     // And recurse according to the type of node
00410     if (((curType & 0x0f) == ContentSpecNode::Choice)
00411     ||  ((curType & 0x0f) == ContentSpecNode::Sequence))
00412     {
00413         // Recurse on the left and right nodes
00414         buildChildList(leftNode, toFill, toType);
00415 
00416         // The last node of a choice or sequence has a null right
00417         if (rightNode)
00418             buildChildList(rightNode, toFill, toType);
00419     }
00420     else if ((curType == ContentSpecNode::OneOrMore)
00421          ||  (curType == ContentSpecNode::ZeroOrOne)
00422          ||  (curType == ContentSpecNode::ZeroOrMore))
00423     {
00424         // Just do the left node on this one
00425         buildChildList(leftNode, toFill, toType);
00426     }
00427 }
00428 
00429 XERCES_CPP_NAMESPACE_END