GME  13
XMLUTF16Transcoder.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 
00019 // ---------------------------------------------------------------------------
00020 //  Includes
00021 // ---------------------------------------------------------------------------
00022 #include <xercesc/util/BitOps.hpp>
00023 #include <xercesc/util/XMLUTF16Transcoder.hpp>
00024 #include <xercesc/util/TranscodingException.hpp>
00025 #include <string.h>
00026 
00027 XERCES_CPP_NAMESPACE_BEGIN
00028 
00029 
00030 
00031 // ---------------------------------------------------------------------------
00032 //  XMLUTF16Transcoder: Constructors and Destructor
00033 // ---------------------------------------------------------------------------
00034 XMLUTF16Transcoder::XMLUTF16Transcoder( const   XMLCh* const    encodingName
00035                                         , const XMLSize_t       blockSize
00036                                         , const bool            swapped
00037                                         , MemoryManager* const manager) :
00038 
00039     XMLTranscoder(encodingName, blockSize, manager)
00040     , fSwapped(swapped)
00041 {
00042 }
00043 
00044 
00045 XMLUTF16Transcoder::~XMLUTF16Transcoder()
00046 {
00047 }
00048 
00049 
00050 // ---------------------------------------------------------------------------
00051 //  XMLUTF16Transcoder: Implementation of the transcoder API
00052 // ---------------------------------------------------------------------------
00053 XMLSize_t
00054 XMLUTF16Transcoder::transcodeFrom(  const   XMLByte* const       srcData
00055                                     , const XMLSize_t            srcCount
00056                                     ,       XMLCh* const         toFill
00057                                     , const XMLSize_t            maxChars
00058                                     ,       XMLSize_t&           bytesEaten
00059                                     ,       unsigned char* const charSizes)
00060 {
00061     //
00062     //  Calculate the max chars we can do here. Its the lesser of the
00063     //  max output chars and the number of chars in the source.
00064     //
00065     const XMLSize_t srcChars = srcCount / sizeof(UTF16Ch);
00066     const XMLSize_t countToDo = srcChars < maxChars ? srcChars : maxChars;
00067 
00068     // Look at the source data as UTF16 chars
00069     const UTF16Ch* asUTF16 = (const UTF16Ch*)srcData;
00070 
00071     // And get a mutable pointer to the output
00072     XMLCh* outPtr = toFill;
00073 
00074     //
00075     //  If its swapped, we have to do a char by char swap and cast. Else
00076     //  we have to check whether our XMLCh and UTF16Ch types are the same
00077     //  size or not. If so, we can optimize by just doing a buffer copy.
00078     //
00079     if (fSwapped)
00080     {
00081         //
00082         //  And then do the swapping loop for the count we precalculated. Note
00083         //  that this also handles size conversion as well if XMLCh is not the
00084         //  same size as UTF16Ch.
00085         //
00086         for (XMLSize_t index = 0; index < countToDo; index++)
00087             *outPtr++ = BitOps::swapBytes(*asUTF16++);
00088     }
00089      else
00090     {
00091         //
00092         //  If the XMLCh type is the same size as a UTF16 value on this
00093         //  platform, then we can do just a buffer copy straight to the target
00094         //  buffer since our source chars are UTF-16 chars. If its not, then
00095         //  we still have to do a loop and assign each one, in order to
00096         //  implicitly convert.
00097         //
00098         if (sizeof(XMLCh) == sizeof(UTF16Ch))
00099         {
00100             //  Notice we convert char count to byte count here!!!
00101             memcpy(toFill, srcData, countToDo * sizeof(UTF16Ch));
00102         }
00103          else
00104         {
00105             for (XMLSize_t index = 0; index < countToDo; index++)
00106                 *outPtr++ = XMLCh(*asUTF16++);
00107         }
00108     }
00109 
00110     // Set the bytes eaten
00111     bytesEaten = countToDo * sizeof(UTF16Ch);
00112 
00113     // Set the character sizes to the fixed size
00114     memset(charSizes, sizeof(UTF16Ch), countToDo);
00115 
00116     // Return the chars we transcoded
00117     return countToDo;
00118 }
00119 
00120 
00121 XMLSize_t
00122 XMLUTF16Transcoder::transcodeTo(const   XMLCh* const    srcData
00123                                 , const XMLSize_t       srcCount
00124                                 ,       XMLByte* const  toFill
00125                                 , const XMLSize_t       maxBytes
00126                                 ,       XMLSize_t&      charsEaten
00127                                 , const UnRepOpts)
00128 {
00129     //
00130     //  Calculate the max chars we can do here. Its the lesser of the
00131     //  chars that we can fit into the output buffer, and the source
00132     //  chars available.
00133     //
00134     const XMLSize_t maxOutChars = maxBytes / sizeof(UTF16Ch);
00135     const XMLSize_t countToDo = srcCount < maxOutChars ? srcCount : maxOutChars;
00136 
00137     //
00138     //  Get a pointer tot he output buffer in the UTF-16 character format
00139     //  that we need to work with. And get a mutable pointer to the source
00140     //  character buffer.
00141     //
00142     UTF16Ch*        outPtr = (UTF16Ch*)toFill;
00143     const XMLCh*    srcPtr = srcData;
00144 
00145     //
00146     //  If the target format is swapped from our native format, then handle
00147     //  it one way, else handle it another.
00148     //
00149     if (fSwapped)
00150     {
00151         //
00152         //  And then do the swapping loop for the count we precalculated. Note
00153         //  that this also handles size conversion as well if XMLCh is not the
00154         //  same size as UTF16Ch.
00155         //
00156         for (XMLSize_t index = 0; index < countToDo; index++)
00157         {
00158             // To avoid flakey compilers, use a temp
00159             const UTF16Ch tmpCh = UTF16Ch(*srcPtr++);
00160             *outPtr++ = BitOps::swapBytes(tmpCh);
00161         }
00162     }
00163      else
00164     {
00165         //
00166         //  If XMLCh and UTF16Ch are the same size, we can just do a fast
00167         //  memory copy. Otherwise, we have to do a loop and downcast each
00168         //  character into its new 16 bit storage.
00169         //
00170         if (sizeof(XMLCh) == sizeof(UTF16Ch))
00171         {
00172             //  Notice we convert char count to byte count here!!!
00173             memcpy(toFill, srcData, countToDo * sizeof(UTF16Ch));
00174         }
00175          else
00176         {
00177             for (XMLSize_t index = 0; index < countToDo; index++)
00178                 *outPtr++ = UTF16Ch(*srcPtr++);
00179         }
00180     }
00181 
00182     // Set the chars eaten to the calculated number we ate
00183     charsEaten = countToDo;
00184 
00185     //Return the bytes we ate. Note we convert to a byte count here!
00186     return countToDo * sizeof(UTF16Ch);
00187 }
00188 
00189 
00190 bool XMLUTF16Transcoder::canTranscodeTo(const unsigned int)
00191 {
00192     // We can handle anything
00193     return true;
00194 }
00195 
00196 XERCES_CPP_NAMESPACE_END
00197