GME  13
DOMNormalizer.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 #include <xercesc/dom/DOMAttr.hpp>
00019 #include <xercesc/dom/DOMNode.hpp>
00020 #include <xercesc/dom/DOMErrorHandler.hpp>
00021 #include <xercesc/dom/DOMError.hpp>
00022 #include <xercesc/dom/DOMText.hpp>
00023 #include <xercesc/framework/XMLBuffer.hpp>
00024 
00025 #include <xercesc/util/Mutexes.hpp>
00026 #include <xercesc/util/PlatformUtils.hpp>
00027 #include <xercesc/util/XMLInitializer.hpp>
00028 #include <xercesc/util/XMLMsgLoader.hpp>
00029 #include <xercesc/util/XMLString.hpp>
00030 #include <xercesc/util/XMLUni.hpp>
00031 #include <xercesc/util/XMLUniDefs.hpp>
00032 
00033 #include "DOMConfigurationImpl.hpp"
00034 #include "DOMDocumentImpl.hpp"
00035 #include "DOMElementImpl.hpp"
00036 #include "DOMErrorImpl.hpp"
00037 #include "DOMEntityReferenceImpl.hpp"
00038 #include "DOMNormalizer.hpp"
00039 #include "DOMTextImpl.hpp"
00040 
00041 XERCES_CPP_NAMESPACE_BEGIN
00042 
00043 static XMLMsgLoader*   gMsgLoader = 0;
00044 
00045 void XMLInitializer::initializeDOMNormalizer()
00046 {
00047     gMsgLoader = XMLPlatformUtils::loadMsgSet(XMLUni::fgXMLErrDomain);
00048 
00049     if (!gMsgLoader)
00050       XMLPlatformUtils::panic(PanicHandler::Panic_CantLoadMsgDomain);
00051 }
00052 
00053 void XMLInitializer::terminateDOMNormalizer()
00054 {
00055     delete gMsgLoader;
00056     gMsgLoader = 0;
00057 }
00058 
00059 //
00060 //
00061 DOMNormalizer::DOMNormalizer(MemoryManager* const manager)
00062     : fDocument(0)
00063     , fConfiguration(0)
00064     , fErrorHandler(0)
00065     , fNSScope(0)
00066     , fNewNamespaceCount(1)
00067     , fMemoryManager(manager)
00068 {
00069     fNSScope = new (fMemoryManager) InScopeNamespaces(fMemoryManager);
00070 }
00071 
00072 DOMNormalizer::~DOMNormalizer() {
00073     delete fNSScope;
00074 }
00075 
00076 void DOMNormalizer::normalizeDocument(DOMDocumentImpl *doc) {
00077 
00078     fDocument = doc;
00079     fConfiguration = (DOMConfigurationImpl*)doc->getDOMConfig();
00080     DOMConfigurationImpl *dci = (DOMConfigurationImpl*)fDocument->getDOMConfig();
00081     if(dci)
00082         fErrorHandler = dci->getErrorHandler();
00083     else
00084         fErrorHandler = 0;
00085 
00086     DOMNode *child = 0;
00087     DOMNode *next = 0;
00088     ((DOMNormalizer *)this)->fNewNamespaceCount = 1;
00089 
00090     for(child = doc->getFirstChild();child != 0; child = next) {
00091         next = child->getNextSibling();
00092         child = normalizeNode(child);
00093         if(child != 0) {
00094             next = child;
00095         }
00096     }
00097 }
00098 
00099 DOMNode * DOMNormalizer::normalizeNode(DOMNode *node) const {
00100     switch(node->getNodeType()) {
00101     case DOMNode::ELEMENT_NODE: {
00102         fNSScope->addScope(fMemoryManager);
00103         DOMNamedNodeMap *attrMap = node->getAttributes();
00104 
00105         if(fConfiguration->featureValues & DOMConfigurationImpl::FEATURE_NAMESPACES) {
00106             namespaceFixUp((DOMElementImpl*)node);
00107         }
00108         else {
00109             //this is done in namespace fixup so no need to do it if namespace is on
00110             if(attrMap) {
00111                 for(XMLSize_t i = 0; i < attrMap->getLength(); i++) {
00112                     attrMap->item(i)->normalize();
00113                 }
00114             }
00115         }
00116 
00117         DOMNode *child = node->getFirstChild();
00118         DOMNode *next = 0;
00119         for (; child != 0; child = next) {
00120             next = child->getNextSibling();
00121             child = normalizeNode(child);
00122             if(child != 0) {
00123                 next = child;
00124             }
00125         }
00126         fNSScope->removeScope();
00127         break;
00128     }
00129     case DOMNode::COMMENT_NODE: {
00130         if (!(fConfiguration->featureValues & DOMConfigurationImpl::FEATURE_COMMENTS)) {
00131             DOMNode *prevSibling = node->getPreviousSibling();
00132             DOMNode *parent = node->getParentNode();
00133             // remove the comment node
00134             parent->removeChild(node);
00135             if (prevSibling != 0 && prevSibling->getNodeType() == DOMNode::TEXT_NODE) {
00136                 DOMNode *nextSibling = prevSibling->getNextSibling();
00137                 if (nextSibling != 0 && nextSibling->getNodeType() == DOMNode::TEXT_NODE) {
00138                     ((DOMTextImpl*)nextSibling)->insertData(0, prevSibling->getNodeValue());
00139                     parent->removeChild(prevSibling);
00140                     return nextSibling;
00141                 }
00142             }
00143         }
00144         break;
00145     }
00146     case DOMNode::CDATA_SECTION_NODE: {
00147         if (!(fConfiguration->featureValues & DOMConfigurationImpl::FEATURE_CDATA_SECTIONS)) {
00148             // convert CDATA to TEXT nodes
00149             DOMText *text = fDocument->createTextNode(node->getNodeValue());
00150             DOMNode *parent = node->getParentNode();
00151             DOMNode *prevSibling = node->getPreviousSibling();
00152             node = parent->replaceChild(text, node);
00153             if (prevSibling != 0 && prevSibling->getNodeType() == DOMNode::TEXT_NODE) {
00154                 text->insertData(0, prevSibling->getNodeValue());
00155                 parent->removeChild(prevSibling);
00156             }
00157             return text; // Don't advance;
00158         }
00159         break;
00160     }
00161     case DOMNode::TEXT_NODE: {
00162         DOMNode *next = node->getNextSibling();
00163 
00164         if(next != 0 && next->getNodeType() == DOMNode::TEXT_NODE) {
00165             ((DOMText*)node)->appendData(next->getNodeValue());
00166             node->getParentNode()->removeChild(next);
00167             return node;
00168         } else {
00169             const XMLCh* nv = node->getNodeValue();
00170             if (nv == 0 || *nv == 0) {
00171                 node->getParentNode()->removeChild(node);
00172             }
00173         }
00174     }
00175     default:
00176         break;
00177     }
00178 
00179     return 0;
00180 }
00181 
00182 
00183 void DOMNormalizer::namespaceFixUp(DOMElementImpl *ele) const {
00184     DOMAttrMapImpl *attrMap = ele->fAttributes;
00185 
00186     XMLSize_t len = attrMap->getLength();
00187     //get the ns info from the attrs
00188     for(XMLSize_t i = 0; i < len; i++) {
00189         DOMAttr *at = (DOMAttr*)attrMap->item(i);
00190 
00191         //normalize the attr whatever happens
00192         at->normalize();
00193 
00194         const XMLCh *uri = at->getNamespaceURI();
00195         const XMLCh *value = at->getNodeValue();
00196 
00197         if(XMLString::equals(XMLUni::fgXMLNSURIName, uri)) {
00198             if(XMLString::equals(XMLUni::fgXMLNSURIName, value)) {
00199                 error(XMLErrs::NSDeclInvalid, ele);
00200             }
00201             else {
00202                 const XMLCh *prefix = at->getPrefix();
00203 
00204                 if(XMLString::equals(prefix, XMLUni::fgXMLNSString)) {
00205                     fNSScope->addOrChangeBinding(at->getLocalName(), value, fMemoryManager);
00206                 }
00207                 else {
00208                     fNSScope->addOrChangeBinding(XMLUni::fgZeroLenString, value, fMemoryManager);
00209                 }
00210             }
00211         }
00212     }
00213 
00214     const XMLCh* prefix = ele->getPrefix();
00215     prefix ? prefix : prefix = XMLUni::fgZeroLenString;
00216     const XMLCh* uri = ele->getNamespaceURI();
00217     uri ? uri : uri = XMLUni::fgZeroLenString;
00218 
00219     if(!XMLString::equals(uri, XMLUni::fgZeroLenString)) {
00220         if(!fNSScope->isValidBinding(prefix, uri)) {
00221             addOrChangeNamespaceDecl(prefix, uri, ele);
00222             fNSScope->addOrChangeBinding(prefix, uri, fMemoryManager);
00223         }
00224     }
00225     else {
00226         if(ele->getLocalName() == 0) {
00227             error(XMLErrs::DOMLevel1Node, ele);
00228         }
00229         else if(!fNSScope->isValidBinding(XMLUni::fgZeroLenString, XMLUni::fgZeroLenString)) {
00230             addOrChangeNamespaceDecl(XMLUni::fgZeroLenString, XMLUni::fgZeroLenString, ele);
00231             fNSScope->addOrChangeBinding(XMLUni::fgZeroLenString, XMLUni::fgZeroLenString, fMemoryManager);
00232         }
00233     }
00234 
00235     //fix up non ns attrs
00236     len = attrMap->getLength();
00237 
00238     // hp aCC complains this i is a redefinition of the i on line 283
00239     for(XMLSize_t j = 0; j < len; j++) {
00240         DOMAttr *at = (DOMAttr*)attrMap->item(j);
00241         const XMLCh *uri = at->getNamespaceURI();
00242         const XMLCh* prefix = at->getPrefix();
00243 
00244         if(!XMLString::equals(XMLUni::fgXMLNSURIName, uri)) {
00245             if(uri != 0) {
00246                 if(prefix == 0 || !fNSScope->isValidBinding(prefix, uri)) {
00247 
00248                     const XMLCh* newPrefix =  fNSScope->getPrefix(uri);
00249 
00250                     if(newPrefix != 0) {
00251                         at->setPrefix(newPrefix);
00252                     }
00253                     else {
00254                         if(prefix != 0 && !fNSScope->getUri(prefix)) {
00255                             fNSScope->addOrChangeBinding(prefix, uri, fMemoryManager);
00256                             addOrChangeNamespaceDecl(prefix, uri, ele);
00257                         }
00258                         else {
00259                             newPrefix = addCustomNamespaceDecl(uri, ele);
00260                             fNSScope->addOrChangeBinding(newPrefix, uri, fMemoryManager);
00261                             at->setPrefix(newPrefix);
00262                         }
00263                     }
00264                 }
00265             }
00266             else if(at->getLocalName() == 0) {
00267                 error(XMLErrs::DOMLevel1Node, at);
00268             }
00269         }
00270     }
00271 }
00272 
00273 
00274 
00275 const XMLCh * DOMNormalizer::integerToXMLCh(unsigned int i) const {
00276     XMLCh *buf = (XMLCh*) fMemoryManager->allocate(15 * sizeof(XMLCh));//new XMLCh[15];
00277         XMLCh *pos = buf + sizeof(buf) - sizeof(XMLCh);
00278         *pos = chNull;
00279 
00280         do {
00281         switch(i % 10) {
00282         case 0 : *--pos = chDigit_0;break;
00283         case 1 : *--pos = chDigit_1;break;
00284         case 2 : *--pos = chDigit_2;break;
00285         case 3 : *--pos = chDigit_3;break;
00286         case 4 : *--pos = chDigit_4;break;
00287         case 5 : *--pos = chDigit_5;break;
00288         case 6 : *--pos = chDigit_6;break;
00289         case 7 : *--pos = chDigit_7;break;
00290         case 8 : *--pos = chDigit_8;break;
00291         case 9 : *--pos = chDigit_9;break;
00292         default:;
00293         }
00294                 i /= 10;
00295         } while (i);
00296 
00297     const XMLCh *copy = fDocument->getPooledString(pos);
00298     fMemoryManager->deallocate(buf);//delete[] buf;
00299         return copy;
00300 }
00301 
00302 
00303 
00304 
00305 
00306 void DOMNormalizer::addOrChangeNamespaceDecl(const XMLCh* prefix, const XMLCh* uri, DOMElementImpl* element) const {
00307 
00308     if (XMLString::equals(prefix, XMLUni::fgZeroLenString)) {
00309         element->setAttributeNS(XMLUni::fgXMLNSURIName, XMLUni::fgXMLNSString, uri);
00310     } else {
00311         XMLBuffer buf(1023, fMemoryManager);
00312         buf.set(XMLUni::fgXMLNSString);
00313         buf.append(chColon);
00314         buf.append(prefix);
00315         element->setAttributeNS(XMLUni::fgXMLNSURIName, buf.getRawBuffer(), uri);
00316     }
00317 }
00318 
00319 const XMLCh* DOMNormalizer::addCustomNamespaceDecl(const XMLCh* uri, DOMElementImpl *element) const {
00320     XMLBuffer preBuf(1023, fMemoryManager);
00321     preBuf.append(chLatin_N);
00322     preBuf.append(chLatin_S);
00323     preBuf.append(integerToXMLCh(fNewNamespaceCount));
00324     ((DOMNormalizer *)this)->fNewNamespaceCount++;
00325 
00326     while(fNSScope->getUri(preBuf.getRawBuffer())) {
00327         preBuf.reset();
00328         preBuf.append(chLatin_N);
00329         preBuf.append(chLatin_S);
00330         preBuf.append(integerToXMLCh(fNewNamespaceCount));
00331         ((DOMNormalizer *)this)->fNewNamespaceCount++;
00332     }
00333 
00334     XMLBuffer buf(1023, fMemoryManager);
00335     buf.set(XMLUni::fgXMLNSString);
00336     buf.append(chColon);
00337     buf.append(preBuf.getRawBuffer());
00338     element->setAttributeNS(XMLUni::fgXMLNSURIName, buf.getRawBuffer(), uri);
00339 
00340     return element->getAttributeNodeNS(XMLUni::fgXMLNSURIName, preBuf.getRawBuffer())->getLocalName();
00341 }
00342 
00343 XMLSize_t DOMNormalizer::InScopeNamespaces::size() {
00344     return fScopes->size();
00345 }
00346 
00347 DOMNormalizer::InScopeNamespaces::InScopeNamespaces(MemoryManager* const manager)
00348 : lastScopeWithBindings(0)
00349 {
00350     fScopes = new (manager) RefVectorOf<Scope>(10, true, manager);
00351 }
00352 
00353 DOMNormalizer::InScopeNamespaces::~InScopeNamespaces() {
00354     delete fScopes;
00355 }
00356 
00357 void DOMNormalizer::InScopeNamespaces::addOrChangeBinding(const XMLCh *prefix, const XMLCh *uri,
00358                                                           MemoryManager* const manager) {
00359     XMLSize_t s = fScopes->size();
00360 
00361     if(!s)
00362         addScope(manager);
00363 
00364     Scope *curScope = fScopes->elementAt(s - 1);
00365     curScope->addOrChangeBinding(prefix, uri, manager);
00366 
00367     lastScopeWithBindings = curScope;
00368 }
00369 
00370 void DOMNormalizer::InScopeNamespaces::addScope(MemoryManager* const manager) {
00371     Scope *s = new (manager) Scope(lastScopeWithBindings);
00372     fScopes->addElement(s);
00373 }
00374 
00375 void DOMNormalizer::InScopeNamespaces::removeScope() {
00376     lastScopeWithBindings = fScopes->elementAt(fScopes->size() - 1)->fBaseScopeWithBindings;
00377     Scope *s = fScopes->orphanElementAt(fScopes->size() - 1);
00378     delete s;
00379 }
00380 
00381 bool DOMNormalizer::InScopeNamespaces::isValidBinding(const XMLCh* prefix, const XMLCh* uri) const {
00382     const XMLCh* actual = fScopes->elementAt(fScopes->size() - 1)->getUri(prefix);
00383     if(actual == 0 || !XMLString::equals(actual, uri))
00384         return false;
00385     return true;
00386 }
00387 
00388 const XMLCh* DOMNormalizer::InScopeNamespaces::getPrefix(const XMLCh* uri) const {
00389     return fScopes->elementAt(fScopes->size() - 1)->getPrefix(uri);
00390 }
00391 
00392 const XMLCh* DOMNormalizer::InScopeNamespaces::getUri(const XMLCh* prefix) const {
00393     return fScopes->elementAt(fScopes->size() - 1)->getUri(prefix);
00394 }
00395 
00396 
00397 
00398 DOMNormalizer::InScopeNamespaces::Scope::Scope(Scope *baseScopeWithBindings) : fBaseScopeWithBindings(baseScopeWithBindings), fPrefixHash(0), fUriHash(0)
00399 {
00400 }
00401 
00402 DOMNormalizer::InScopeNamespaces::Scope::~Scope() {
00403     delete fPrefixHash;
00404     delete fUriHash;
00405 }
00406 
00407 void DOMNormalizer::InScopeNamespaces::Scope::addOrChangeBinding(const XMLCh *prefix, const XMLCh *uri,
00408                                                                  MemoryManager* const manager) {
00409     //initialize and copy forward now we need to
00410     if(!fUriHash) {
00411         fPrefixHash = new (manager) RefHashTableOf<XMLCh>(10, (bool) false, manager);
00412         fUriHash = new (manager) RefHashTableOf<XMLCh>(10, (bool) false, manager);
00413 
00414         if(fBaseScopeWithBindings) {
00415             RefHashTableOfEnumerator<XMLCh> preEnumer(fBaseScopeWithBindings->fPrefixHash, false, manager);
00416             while(preEnumer.hasMoreElements()) {
00417                 const XMLCh* prefix = (XMLCh*) preEnumer.nextElementKey();
00418                 const XMLCh* uri  = fBaseScopeWithBindings->fPrefixHash->get((void*)prefix);
00419 
00420                 //have to cast here because otherwise we have delete problems under windows :(
00421                 fPrefixHash->put((void *)prefix, (XMLCh*)uri);
00422             }
00423 
00424             RefHashTableOfEnumerator<XMLCh> uriEnumer(fBaseScopeWithBindings->fUriHash, false, manager);
00425             while(uriEnumer.hasMoreElements()) {
00426                 const XMLCh* uri = (XMLCh*) uriEnumer.nextElementKey();
00427                 const XMLCh* prefix  = fBaseScopeWithBindings->fUriHash->get((void*)uri);
00428 
00429                 //have to cast here because otherwise we have delete problems under windows :(
00430                 fUriHash->put((void *)uri, (XMLCh*)prefix);
00431             }
00432         }
00433     }
00434 
00435     const XMLCh *oldUri = fPrefixHash->get(prefix);
00436     if(oldUri) {
00437         fUriHash->removeKey(oldUri);
00438     }
00439 
00440     fPrefixHash->put((void *)prefix, (XMLCh*)uri);
00441     fUriHash->put((void *)uri, (XMLCh*)prefix);
00442 }
00443 
00444 const XMLCh* DOMNormalizer::InScopeNamespaces::Scope::getUri(const XMLCh *prefix) const {
00445     const XMLCh* uri = 0;
00446 
00447     if(fPrefixHash) {
00448         uri = fPrefixHash->get(prefix);
00449     }
00450     else if(fBaseScopeWithBindings) {
00451         uri = fBaseScopeWithBindings->getUri(prefix);
00452     }
00453 
00454     return uri ? uri : 0;
00455 }
00456 
00457 const XMLCh* DOMNormalizer::InScopeNamespaces::Scope::getPrefix(const XMLCh* uri) const {
00458     const XMLCh* prefix = 0;
00459 
00460     if(fUriHash) {
00461         prefix = fUriHash->get(uri);
00462     }
00463     else if(fBaseScopeWithBindings) {
00464         prefix = fBaseScopeWithBindings->getPrefix(uri);
00465     }
00466     return prefix ? prefix : 0;
00467 }
00468 
00469 void DOMNormalizer::error(const XMLErrs::Codes code, const DOMNode *node) const
00470 {
00471     if (fErrorHandler) {
00472 
00473         //  Load the message into alocal and replace any tokens found in
00474         //  the text.
00475         const XMLSize_t maxChars = 2047;
00476         XMLCh errText[maxChars + 1];
00477 
00478         if (!gMsgLoader->loadMsg(code, errText, maxChars))
00479         {
00480                 // <TBD> Should probably load a default message here
00481         }
00482 
00483         DOMErrorImpl domError(
00484           XMLErrs::DOMErrorType (code), 0, errText, (void*)node);
00485         bool toContinueProcess = true;
00486         try
00487         {
00488             toContinueProcess = fErrorHandler->handleError(domError);
00489         }
00490         catch(...)
00491         {
00492         }
00493         if (!toContinueProcess)
00494             throw (XMLErrs::Codes) code;
00495     }
00496 }
00497 
00498 
00499 
00500 XERCES_CPP_NAMESPACE_END