GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 00019 // --------------------------------------------------------------------------- 00020 // Includes 00021 // --------------------------------------------------------------------------- 00022 #include <xercesc/util/BitOps.hpp> 00023 #include <xercesc/util/XMLUCS4Transcoder.hpp> 00024 #include <xercesc/util/TranscodingException.hpp> 00025 #include <string.h> 00026 00027 XERCES_CPP_NAMESPACE_BEGIN 00028 00029 // --------------------------------------------------------------------------- 00030 // XMLUCS4Transcoder: Constructors and Destructor 00031 // --------------------------------------------------------------------------- 00032 XMLUCS4Transcoder::XMLUCS4Transcoder(const XMLCh* const encodingName 00033 , const XMLSize_t blockSize 00034 , const bool swapped 00035 , MemoryManager* const manager) : 00036 00037 XMLTranscoder(encodingName, blockSize, manager) 00038 , fSwapped(swapped) 00039 { 00040 } 00041 00042 00043 XMLUCS4Transcoder::~XMLUCS4Transcoder() 00044 { 00045 } 00046 00047 00048 // --------------------------------------------------------------------------- 00049 // XMLUCS4Transcoder: Implementation of the transcoder API 00050 // --------------------------------------------------------------------------- 00051 XMLSize_t 00052 XMLUCS4Transcoder::transcodeFrom(const XMLByte* const srcData 00053 , const XMLSize_t srcCount 00054 , XMLCh* const toFill 00055 , const XMLSize_t maxChars 00056 , XMLSize_t& bytesEaten 00057 , unsigned char* const charSizes) 00058 { 00059 // 00060 // Get pointers to the start and end of the source buffer in terms of 00061 // UCS-4 characters. 00062 // 00063 const UCS4Ch* srcPtr = (const UCS4Ch*)srcData; 00064 const UCS4Ch* srcEnd = srcPtr + (srcCount / sizeof(UCS4Ch)); 00065 00066 // 00067 // Get pointers to the start and end of the target buffer, which is 00068 // in terms of the XMLCh chars we output. 00069 // 00070 XMLCh* outPtr = toFill; 00071 XMLCh* outEnd = toFill + maxChars; 00072 00073 // 00074 // And get a pointer into the char sizes buffer. We will run this 00075 // up as we put chars into the output buffer. 00076 // 00077 unsigned char* sizePtr = charSizes; 00078 00079 // 00080 // Now process chars until we either use up all our source or all of 00081 // our output space. 00082 // 00083 while ((outPtr < outEnd) && (srcPtr < srcEnd)) 00084 { 00085 // 00086 // Get the next UCS char out of the buffer. Don't bump the ptr 00087 // yet since we might not have enough storage for it in the target 00088 // (if its causes a surrogate pair to be created. 00089 // 00090 UCS4Ch nextVal = *srcPtr; 00091 00092 // If it needs to be swapped, then do it 00093 if (fSwapped) 00094 nextVal = BitOps::swapBytes(nextVal); 00095 00096 // Handle a surrogate pair if needed 00097 if (nextVal & 0xFFFF0000) 00098 { 00099 // 00100 // If we don't have room for both of the chars, then we 00101 // bail out now. 00102 // 00103 if (outPtr + 1 == outEnd) 00104 break; 00105 00106 const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10); 00107 const XMLCh ch1 = XMLCh(LEAD_OFFSET + (nextVal >> 10)); 00108 const XMLCh ch2 = XMLCh(0xDC00 + (nextVal & 0x3FF)); 00109 00110 // 00111 // We have room so store them both. But note that the 00112 // second one took up no source bytes! 00113 // 00114 *sizePtr++ = sizeof(UCS4Ch); 00115 *outPtr++ = ch1; 00116 *sizePtr++ = 0; 00117 *outPtr++ = ch2; 00118 } 00119 else 00120 { 00121 // 00122 // No surrogate, so just store it and bump the count of chars 00123 // read. Update the char sizes buffer for this char's entry. 00124 // 00125 *sizePtr++ = sizeof(UCS4Ch); 00126 *outPtr++ = XMLCh(nextVal); 00127 } 00128 00129 // Indicate that we ate another UCS char's worth of bytes 00130 srcPtr++; 00131 } 00132 00133 // Set the bytes eaten parameter 00134 bytesEaten = ((const XMLByte*)srcPtr) - srcData; 00135 00136 // And return the chars written into the output buffer 00137 return outPtr - toFill; 00138 } 00139 00140 00141 XMLSize_t 00142 XMLUCS4Transcoder::transcodeTo( const XMLCh* const srcData 00143 , const XMLSize_t srcCount 00144 , XMLByte* const toFill 00145 , const XMLSize_t maxBytes 00146 , XMLSize_t& charsEaten 00147 , const UnRepOpts) 00148 { 00149 // 00150 // Get pointers to the start and end of the source buffer, which 00151 // is in terms of XMLCh chars. 00152 // 00153 const XMLCh* srcPtr = srcData; 00154 const XMLCh* srcEnd = srcData + srcCount; 00155 00156 // 00157 // Get pointers to the start and end of the target buffer, in terms 00158 // of UCS-4 chars. 00159 // 00160 UCS4Ch* outPtr = (UCS4Ch*)toFill; 00161 UCS4Ch* outEnd = outPtr + (maxBytes / sizeof(UCS4Ch)); 00162 00163 // 00164 // Now loop until we either run out of source characters or we 00165 // fill up our output buffer. 00166 // 00167 XMLCh trailCh; 00168 while ((outPtr < outEnd) && (srcPtr < srcEnd)) 00169 { 00170 // 00171 // Get out an XMLCh char from the source. Don't bump up the 00172 // pointer yet, since it might be a leading for which we don't 00173 // have the trailing. 00174 // 00175 const XMLCh curCh = *srcPtr; 00176 00177 // 00178 // If its a leading char of a surrogate pair handle it one way, 00179 // else just cast it over into the target. 00180 // 00181 if ((curCh >= 0xD800) && (curCh <= 0xDBFF)) 00182 { 00183 // 00184 // Ok, we have to have another source char available or we 00185 // just give up without eating the leading char. 00186 // 00187 if (srcPtr + 1 == srcEnd) 00188 break; 00189 00190 // 00191 // We have the trailing char, so eat the first char and the 00192 // trailing char from the source. 00193 // 00194 srcPtr++; 00195 trailCh = *srcPtr++; 00196 00197 // 00198 // Then make sure its a legal trailing char. If not, throw 00199 // an exception. 00200 // 00201 if ( !( (trailCh >= 0xDC00) && (trailCh <= 0xDFFF) ) ) 00202 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadTrailingSurrogate, getMemoryManager()); 00203 00204 // And now combine the two into a single output char 00205 const XMLInt32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00; 00206 *outPtr++ = (curCh << 10) + trailCh + SURROGATE_OFFSET; 00207 } 00208 else 00209 { 00210 // 00211 // Its just a char, so we can take it as is. If we need to 00212 // swap it, then swap it. Because of flakey compilers, use 00213 // a temp first. 00214 // 00215 const UCS4Ch tmpCh = UCS4Ch(curCh); 00216 if (fSwapped) 00217 *outPtr++ = BitOps::swapBytes(tmpCh); 00218 else 00219 *outPtr++ = tmpCh; 00220 00221 // Bump the source pointer 00222 srcPtr++; 00223 } 00224 } 00225 00226 // Set the chars we ate from the source 00227 charsEaten = srcPtr - srcData; 00228 00229 // Return the bytes we wrote to the output 00230 return ((XMLByte*)outPtr) - toFill; 00231 } 00232 00233 00234 bool XMLUCS4Transcoder::canTranscodeTo(const unsigned int) 00235 { 00236 // We can handle anything 00237 return true; 00238 } 00239 00240 XERCES_CPP_NAMESPACE_END