GME  13
AnyURIDatatypeValidator.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: AnyURIDatatypeValidator.cpp 676796 2008-07-15 05:04:13Z dbertoni $
00020  */
00021 
00022 // ---------------------------------------------------------------------------
00023 //  Includes
00024 // ---------------------------------------------------------------------------
00025 #include <stdio.h>
00026 #include <xercesc/util/OutOfMemoryException.hpp>
00027 #include <xercesc/util/XMLUTF8Transcoder.hpp>
00028 #include <xercesc/framework/XMLBuffer.hpp>
00029 #include <xercesc/validators/datatype/AnyURIDatatypeValidator.hpp>
00030 #include <xercesc/validators/datatype/InvalidDatatypeFacetException.hpp>
00031 #include <xercesc/validators/datatype/InvalidDatatypeValueException.hpp>
00032 
00033 XERCES_CPP_NAMESPACE_BEGIN
00034 
00035 // ---------------------------------------------------------------------------
00036 //  Constructors and Destructor
00037 // ---------------------------------------------------------------------------
00038 AnyURIDatatypeValidator::AnyURIDatatypeValidator(MemoryManager* const manager)
00039 :AbstractStringValidator(0, 0, 0, DatatypeValidator::AnyURI, manager)
00040 {}
00041 
00042 AnyURIDatatypeValidator::~AnyURIDatatypeValidator()
00043 {  
00044 }
00045 
00046 AnyURIDatatypeValidator::AnyURIDatatypeValidator(
00047                           DatatypeValidator*            const baseValidator
00048                         , RefHashTableOf<KVStringPair>* const facets
00049                         , RefArrayVectorOf<XMLCh>*      const enums
00050                         , const int                           finalSet
00051                         , MemoryManager* const manager)
00052 :AbstractStringValidator(baseValidator, facets, finalSet, DatatypeValidator::AnyURI, manager)
00053 {
00054     init(enums, manager);
00055 }
00056 
00057 DatatypeValidator* AnyURIDatatypeValidator::newInstance(
00058                                       RefHashTableOf<KVStringPair>* const facets
00059                                     , RefArrayVectorOf<XMLCh>*           const enums
00060                                     , const int                           finalSet
00061                                     , MemoryManager* const manager)
00062 {
00063     return (DatatypeValidator*) new (manager) AnyURIDatatypeValidator(this, facets, enums, finalSet, manager);
00064 }
00065 
00066 // ---------------------------------------------------------------------------
00067 //  Utilities
00068 // ---------------------------------------------------------------------------
00069 
00070 void AnyURIDatatypeValidator::checkValueSpace(const XMLCh* const content
00071                                               , MemoryManager* const manager)
00072 {
00073     bool validURI = true;
00074 
00075     // check 3.2.17.c0 must: URI (rfc 2396/2723)
00076     try
00077     {
00078         // Support for relative URLs
00079         // According to Java 1.1: URLs may also be specified with a
00080         // String and the URL object that it is related to.
00081         //
00082         XMLSize_t len = XMLString::stringLen(content);
00083         if (len)
00084         {          
00085             // Encode special characters using XLink 5.4 algorithm
00086                         XMLBuffer encoded((len*3)+1, manager);
00087             encode(content, len, encoded, manager);
00088             validURI = XMLUri::isValidURI(true, encoded.getRawBuffer(), true);            
00089         }
00090     }
00091     catch(const OutOfMemoryException&)
00092     {
00093         throw;
00094     }
00095     catch (...)
00096     {
00097         ThrowXMLwithMemMgr1(InvalidDatatypeValueException
00098                 , XMLExcepts::VALUE_URI_Malformed
00099                 , content
00100                 , manager);
00101     }
00102     
00103     if (!validURI) {
00104         ThrowXMLwithMemMgr1(InvalidDatatypeValueException
00105                     , XMLExcepts::VALUE_URI_Malformed
00106                     , content
00107                     , manager);
00108     }
00109 }
00110 
00111 /***
00112  * To encode special characters in anyURI, by using %HH to represent
00113  * special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
00114  * and non-ASCII characters (whose value >= 128).
00115  ***/
00116 void AnyURIDatatypeValidator::encode(const XMLCh* const content, const XMLSize_t len, XMLBuffer& encoded, MemoryManager* const manager)
00117 {
00118     static const bool needEscapeMap[] = {
00119         true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , /* 0x00 to 0x0F need escape */
00120         true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , /* 0x10 to 0x1F need escape */
00121         true , false, true , false, false, false, false, false, false, false, false, false, false, false, false, false, /* 0x20:' ', 0x22:'"' */
00122         false, false, false, false, false, false, false, false, false, false, false, false, true , false, true , false, /* 0x3C:'<', 0x3E:'>' */
00123         false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
00124         false, false, false, false, false, false, false, false, false, false, false, false, true , false, true , false, /* 0x5C:'\\', 0x5E:'^' */
00125         true , false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, /* 0x60:'`' */
00126         false, false, false, false, false, false, false, false, false, false, false, true , true , true , true , true   /* 0x7B:'{', 0x7C:'|', 0x7D:'}', 0x7E:'~', 0x7F:DEL */
00127     };
00128 
00129     // For each character in content
00130     XMLSize_t i;
00131     for (i = 0; i < len; i++)
00132     {
00133         int ch = (int)content[i];
00134         // If it's not an ASCII character, break here, and use UTF-8 encoding
00135         if (ch >= 128)
00136             break;
00137 
00138         if (needEscapeMap[ch])
00139         {
00140             char tempStr[3] = "\0";
00141             sprintf(tempStr, "%02X", ch);
00142             encoded.append('%');
00143             encoded.append((XMLCh)tempStr[0]);
00144             encoded.append((XMLCh)tempStr[1]);
00145         }
00146         else
00147         {
00148             encoded.append((XMLCh)ch);
00149         }
00150     }
00151 
00152     // we saw some non-ascii character
00153     if (i < len) {
00154         // get UTF-8 bytes for the remaining sub-string
00155         const XMLCh* remContent = (XMLCh*)&content[i];
00156         const XMLSize_t remContentLen = len - i;
00157         XMLByte* UTF8Byte = (XMLByte*)manager->allocate((remContentLen*4+1) * sizeof(XMLByte));
00158         XMLSize_t charsEaten;
00159 
00160         XMLUTF8Transcoder transcoder(XMLUni::fgUTF8EncodingString, remContentLen*4+1, manager);
00161         XMLSize_t utf8Len = transcoder.transcodeTo(remContent, remContentLen, UTF8Byte, remContentLen*4, charsEaten, XMLTranscoder::UnRep_RepChar);
00162         assert(charsEaten == remContentLen);
00163 
00164         XMLSize_t j;
00165         for (j = 0; j < utf8Len; j++) {
00166             XMLByte b = UTF8Byte[j];
00167             if (b >= 128 || needEscapeMap[b])
00168             {
00169                 char tempStr[3] = "\0";
00170                 sprintf(tempStr, "%02X", b);
00171                 encoded.append('%');
00172                 encoded.append((XMLCh)tempStr[0]);
00173                 encoded.append((XMLCh)tempStr[1]);
00174             }
00175             else
00176             {
00177                 encoded.append((XMLCh)b);
00178             }
00179         }
00180         manager->deallocate(UTF8Byte);
00181     }
00182 }
00183 
00184 /***
00185  * Support for Serialization/De-serialization
00186  ***/
00187 
00188 IMPL_XSERIALIZABLE_TOCREATE(AnyURIDatatypeValidator)
00189 
00190 void AnyURIDatatypeValidator::serialize(XSerializeEngine& serEng)
00191 {
00192     AbstractStringValidator::serialize(serEng);
00193 }
00194 
00195 XERCES_CPP_NAMESPACE_END
00196