GME  13
IconvTransService.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: IconvTransService.cpp 695885 2008-09-16 14:00:19Z borisk $
00020  */
00021 
00022 
00023 // ---------------------------------------------------------------------------
00024 //  Includes
00025 // ---------------------------------------------------------------------------
00026 
00027 #if HAVE_CONFIG_H
00028 #       include <config.h>
00029 #endif
00030 
00031 #if HAVE_WCHAR_H
00032 #       include <wchar.h>
00033 #endif
00034 #if HAVE_WCTYPE_H
00035 #       include <wctype.h>
00036 #endif
00037 
00038 // Fill in for broken or missing wctype functions on some platforms
00039 #if !HAVE_TOWUPPER
00040 #       include <towupper.h>
00041 #endif
00042 #if !HAVE_TOWLOWER
00043 #       include <towlower.h>
00044 #endif
00045 
00046 #include <string.h>
00047 #include <stdlib.h>
00048 #include <stdio.h>
00049 
00050 #include "IconvTransService.hpp"
00051 #include <xercesc/util/XMLUniDefs.hpp>
00052 #include <xercesc/util/XMLUni.hpp>
00053 #include <xercesc/framework/MemoryManager.hpp>
00054 
00055 
00056 XERCES_CPP_NAMESPACE_BEGIN
00057 
00058 // ---------------------------------------------------------------------------
00059 //  Local, const data
00060 // ---------------------------------------------------------------------------
00061 static const int    gTempBuffArraySize = 1024;
00062 static const XMLCh  gMyServiceId[] =
00063 {
00064     chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull
00065 };
00066 
00067 // ---------------------------------------------------------------------------
00068 // the following is defined by 'man mbrtowc':
00069 // ---------------------------------------------------------------------------
00070 static const size_t TRANSCODING_ERROR = (size_t)(-1);
00071 
00072 // ---------------------------------------------------------------------------
00073 //  Local methods
00074 // ---------------------------------------------------------------------------
00075 static unsigned int getWideCharLength(const XMLCh* const src)
00076 {
00077     if (!src)
00078         return 0;
00079 
00080     unsigned int len = 0;
00081     const XMLCh* pTmp = src;
00082     while (*pTmp++)
00083         len++;
00084     return len;
00085 }
00086 
00087 
00088 
00089 // ---------------------------------------------------------------------------
00090 //  IconvTransService: Constructors and Destructor
00091 // ---------------------------------------------------------------------------
00092 IconvTransService::IconvTransService(MemoryManager* /* manager */)
00093 {
00094 }
00095 
00096 IconvTransService::~IconvTransService()
00097 {
00098 }
00099 
00100 
00101 // ---------------------------------------------------------------------------
00102 //  IconvTransService: The virtual transcoding service API
00103 // ---------------------------------------------------------------------------
00104 int IconvTransService::compareIString(  const   XMLCh* const    comp1
00105                                         , const XMLCh* const    comp2)
00106 {
00107     const XMLCh* cptr1 = comp1;
00108     const XMLCh* cptr2 = comp2;
00109 
00110     while ( (*cptr1 != 0) && (*cptr2 != 0) )
00111     {
00112         wint_t wch1 = towupper(*cptr1);
00113         wint_t wch2 = towupper(*cptr2);
00114         if (wch1 != wch2)
00115             break;
00116 
00117         cptr1++;
00118         cptr2++;
00119     }
00120     return (int) ( towupper(*cptr1) - towupper(*cptr2) );
00121 }
00122 
00123 
00124 int IconvTransService::compareNIString( const   XMLCh* const    comp1
00125                                         , const XMLCh* const    comp2
00126                                         , const XMLSize_t       maxChars)
00127 {
00128     unsigned int  n = 0;
00129     const XMLCh* cptr1 = comp1;
00130     const XMLCh* cptr2 = comp2;
00131 
00132     while (true && maxChars)
00133     {
00134         wint_t wch1 = towupper(*cptr1);
00135         wint_t wch2 = towupper(*cptr2);
00136 
00137         if (wch1 != wch2)
00138             return (int) (wch1 - wch2);
00139 
00140         // If either ended, then both ended, so equal
00141         if (!*cptr1 || !*cptr2)
00142             break;
00143 
00144         cptr1++;
00145         cptr2++;
00146 
00147         //  Bump the count of chars done. If it equals the count then we
00148         //  are equal for the requested count, so break out and return
00149         //  equal.
00150         n++;
00151         if (n == maxChars)
00152             break;
00153     }
00154 
00155     return 0;
00156 }
00157 
00158 
00159 const XMLCh* IconvTransService::getId() const
00160 {
00161     return gMyServiceId;
00162 }
00163 
00164 XMLLCPTranscoder* IconvTransService::makeNewLCPTranscoder(MemoryManager* manager)
00165 {
00166     // Just allocate a new transcoder of our type
00167     return new (manager) IconvLCPTranscoder;
00168 }
00169 
00170 bool IconvTransService::supportsSrcOfs() const
00171 {
00172     return true;
00173 }
00174 
00175 
00176 // ---------------------------------------------------------------------------
00177 //  IconvTransService: The protected virtual transcoding service API
00178 // ---------------------------------------------------------------------------
00179 XMLTranscoder*
00180 IconvTransService::makeNewXMLTranscoder(const   XMLCh* const
00181                                         ,       XMLTransService::Codes& resValue
00182                                         , const XMLSize_t
00183                                         ,       MemoryManager* const)
00184 {
00185     //
00186     //  NOTE: We don't use the block size here
00187     //
00188     //  This is a minimalist transcoding service, that only supports a local
00189     //  default transcoder. All named encodings return zero as a failure,
00190     //  which means that only the intrinsic encodings supported by the parser
00191     //  itself will work for XML data.
00192     //
00193     resValue = XMLTransService::UnsupportedEncoding;
00194     return 0;
00195 }
00196 
00197 
00198 void IconvTransService::upperCase(XMLCh* const toUpperCase)
00199 {
00200     XMLCh* outPtr = toUpperCase;
00201     while (*outPtr)
00202     {
00203         *outPtr = towupper(*outPtr);
00204         outPtr++;
00205     }
00206 }
00207 
00208 
00209 void IconvTransService::lowerCase(XMLCh* const toLowerCase)
00210 {
00211     XMLCh* outPtr = toLowerCase;
00212     while (*outPtr)
00213     {
00214         *outPtr = towlower(*outPtr);
00215         outPtr++;
00216     }
00217 }
00218 
00219 
00220 // ---------------------------------------------------------------------------
00221 //  IconvLCPTranscoder: The virtual transcoder API
00222 // ---------------------------------------------------------------------------
00223 XMLSize_t IconvLCPTranscoder::calcRequiredSize(const char* const srcText
00224                                                   , MemoryManager* const)
00225 {
00226     if (!srcText)
00227         return 0;
00228 
00229     XMLSize_t len = 0;
00230     const char *src = srcText;
00231 #if HAVE_MBRLEN
00232     mbstate_t st;
00233     memset(&st, 0, sizeof(st));
00234 #endif
00235     for ( ; *src; ++len)
00236     {
00237 #if HAVE_MBRLEN
00238         int l=::mbrlen( src, MB_CUR_MAX, &st );
00239 #else
00240         int l=::mblen( src, MB_CUR_MAX );
00241 #endif
00242         if( l == TRANSCODING_ERROR )
00243             return 0;
00244         src += l;
00245     }
00246     return len;
00247 }
00248 
00249 
00250 XMLSize_t IconvLCPTranscoder::calcRequiredSize(const XMLCh* const srcText
00251                                                   , MemoryManager* const manager)
00252 {
00253     if (!srcText)
00254         return 0;
00255 
00256     XMLSize_t     wLent = getWideCharLength(srcText);
00257     wchar_t       tmpWideCharArr[gTempBuffArraySize];
00258     wchar_t*      allocatedArray = 0;
00259     wchar_t*      wideCharBuf = 0;
00260 
00261     if (wLent >= gTempBuffArraySize)
00262         wideCharBuf = allocatedArray = (wchar_t*)
00263             manager->allocate
00264             (
00265                 (wLent + 1) * sizeof(wchar_t)
00266             );//new wchar_t[wLent + 1];
00267     else
00268         wideCharBuf = tmpWideCharArr;
00269 
00270     for (XMLSize_t i = 0; i < wLent; i++)
00271     {
00272         wideCharBuf[i] = srcText[i];
00273     }
00274     wideCharBuf[wLent] = 0x00;
00275 
00276     const XMLSize_t retVal = ::wcstombs(NULL, wideCharBuf, 0);
00277 
00278     if (allocatedArray)
00279       manager->deallocate(allocatedArray);
00280 
00281     if (retVal == ~0)
00282         return 0;
00283     return retVal;
00284 }
00285 
00286 
00287 bool IconvLCPTranscoder::transcode( const   XMLCh* const    toTranscode
00288                                     ,       char* const     toFill
00289                                     , const XMLSize_t       maxBytes
00290                                     , MemoryManager* const  manager)
00291 {
00292     // Watch for a couple of pyscho corner cases
00293     if (!toTranscode || !maxBytes)
00294     {
00295         toFill[0] = 0;
00296         return true;
00297     }
00298 
00299     if (!*toTranscode)
00300     {
00301         toFill[0] = 0;
00302         return true;
00303     }
00304 
00305     unsigned int  wLent = getWideCharLength(toTranscode);
00306     wchar_t       tmpWideCharArr[gTempBuffArraySize];
00307     wchar_t*      allocatedArray = 0;
00308     wchar_t*      wideCharBuf = 0;
00309 
00310     if (wLent > maxBytes) {
00311         wLent = maxBytes;
00312     }
00313 
00314     if (maxBytes >= gTempBuffArraySize) {
00315         wideCharBuf = allocatedArray = (wchar_t*)
00316             manager->allocate
00317             (
00318                 (maxBytes + 1) * sizeof(wchar_t)
00319             );//new wchar_t[maxBytes + 1];
00320     }
00321     else
00322         wideCharBuf = tmpWideCharArr;
00323 
00324     for (unsigned int i = 0; i < wLent; i++)
00325     {
00326         wideCharBuf[i] = toTranscode[i];
00327     }
00328     wideCharBuf[wLent] = 0x00;
00329 
00330     // Ok, go ahead and try the transcoding. If it fails, then ...
00331     size_t mblen = ::wcstombs(toFill, wideCharBuf, maxBytes);
00332     if (mblen == (size_t)-1)
00333     {
00334         if (allocatedArray)
00335           manager->deallocate(allocatedArray);
00336         return false;
00337     }
00338 
00339     // Cap it off just in case
00340     toFill[mblen] = 0;
00341 
00342     if (allocatedArray)
00343       manager->deallocate(allocatedArray);
00344 
00345     return true;
00346 }
00347 
00348 
00349 bool IconvLCPTranscoder::transcode( const   char* const     toTranscode
00350                                     ,       XMLCh* const    toFill
00351                                     , const XMLSize_t       maxChars
00352                                     , MemoryManager* const  manager)
00353 {
00354     // Check for a couple of psycho corner cases
00355     if (!toTranscode || !maxChars)
00356     {
00357         toFill[0] = 0;
00358         return true;
00359     }
00360 
00361     if (!*toTranscode)
00362     {
00363         toFill[0] = 0;
00364         return true;
00365     }
00366 
00367     XMLSize_t     len = calcRequiredSize(toTranscode);
00368     wchar_t       tmpWideCharArr[gTempBuffArraySize];
00369     wchar_t*      allocatedArray = 0;
00370     wchar_t*      wideCharBuf = 0;
00371 
00372     if (len > maxChars) {
00373         len = maxChars;
00374     }
00375 
00376     if (maxChars >= gTempBuffArraySize)
00377         wideCharBuf = allocatedArray = (wchar_t*) manager->allocate
00378         (
00379             (maxChars + 1) * sizeof(wchar_t)
00380         );//new wchar_t[maxChars + 1];
00381     else
00382         wideCharBuf = tmpWideCharArr;
00383 
00384     if (::mbstowcs(wideCharBuf, toTranscode, maxChars) == (size_t)-1)
00385     {
00386         if (allocatedArray)
00387           manager->deallocate(allocatedArray);
00388         return false;
00389     }
00390 
00391     for (XMLSize_t i = 0; i < len; i++)
00392     {
00393         toFill[i] = (XMLCh) wideCharBuf[i];
00394     }
00395     toFill[len] = 0x00;
00396 
00397     if (allocatedArray)
00398       manager->deallocate(allocatedArray);
00399 
00400     return true;
00401 }
00402 
00403 
00404 template <typename T>
00405 void reallocString(T *&ref, size_t &size, MemoryManager* const manager, bool releaseOld)
00406 {
00407     T *tmp = (T*)manager->allocate(2 * size * sizeof(T));
00408     memcpy(tmp, ref, size * sizeof(T));
00409     if (releaseOld) manager->deallocate(ref);
00410     ref = tmp;
00411     size *= 2;
00412 }
00413 
00414 
00415 char* IconvLCPTranscoder::transcode(const XMLCh* const toTranscode,
00416                                     MemoryManager* const manager)
00417 {
00418     if (!toTranscode)
00419         return 0;
00420     size_t srcCursor = 0, dstCursor = 0;
00421     size_t resultSize = gTempBuffArraySize;
00422     char localBuffer[gTempBuffArraySize];
00423     char* resultString = localBuffer;
00424 
00425 #if HAVE_WCSRTOMBS
00426     mbstate_t st;
00427     memset(&st, 0, sizeof(st));
00428     wchar_t srcBuffer[gTempBuffArraySize];
00429     srcBuffer[gTempBuffArraySize - 1] = 0;
00430     const wchar_t *src = 0;
00431 
00432     while (toTranscode[srcCursor] || src)
00433     {
00434         if (src == 0) // copy a piece of the source string into a local
00435                       // buffer, converted to wchar_t and NULL-terminated.
00436                       // after that, src points to the beginning of the
00437                       // local buffer and is used for the call to ::wcsrtombs
00438         {
00439             size_t i;
00440             for (i=0; i<gTempBuffArraySize-1; ++i)
00441             {
00442                 srcBuffer[i] = toTranscode[srcCursor];
00443                 if (srcBuffer[i] == '\0')
00444                     break;
00445                 ++srcCursor;
00446             }
00447             src = srcBuffer;
00448         }
00449 
00450         size_t len = ::wcsrtombs(resultString + dstCursor, &src, resultSize - dstCursor, &st);
00451         if (len == TRANSCODING_ERROR)
00452         {
00453             dstCursor = 0;
00454             break;
00455         }
00456         dstCursor += len;
00457         if (src != 0) // conversion not finished. This *always* means there
00458                       // was not enough room in the destination buffer.
00459         {
00460             reallocString<char>(resultString, resultSize, manager, resultString != localBuffer);
00461         }
00462     }
00463 #else
00464     while (toTranscode[srcCursor])
00465     {
00466         char mbBuf[16]; // MB_CUR_MAX is not defined as a constant on some platforms
00467         int len = wctomb(mbBuf, toTranscode[srcCursor++]);
00468         if (len < 0)
00469         {
00470             dstCursor = 0;
00471             break;
00472         }
00473         if (dstCursor + len >= resultSize - 1)
00474             reallocString<char>(resultString, resultSize, manager, resultString != localBuffer);
00475         for (int j=0; j<len; ++j)
00476             resultString[dstCursor++] = mbBuf[j];
00477     }
00478 #endif
00479 
00480     if (resultString == localBuffer)
00481     {
00482         resultString = (char*)manager->allocate((dstCursor + 1) * sizeof(char));
00483         memcpy(resultString, localBuffer, dstCursor * sizeof(char));
00484     }
00485 
00486     resultString[dstCursor] = '\0';
00487     return resultString;
00488 }
00489 
00490 XMLCh* IconvLCPTranscoder::transcode(const char* const toTranscode,
00491                                      MemoryManager* const manager)
00492 {
00493     if (!toTranscode)
00494         return 0;
00495     size_t resultSize = gTempBuffArraySize;
00496     size_t srcCursor = 0, dstCursor = 0;
00497 
00498 #if HAVE_MBSRTOWCS
00499     wchar_t localBuffer[gTempBuffArraySize];
00500     wchar_t *tmpString = localBuffer;
00501 
00502     mbstate_t st;
00503     memset(&st, 0, sizeof(st));
00504     const char *src = toTranscode;
00505 
00506     while(true)
00507     {
00508         size_t len = ::mbsrtowcs(tmpString + dstCursor, &src, resultSize - dstCursor, &st);
00509         if (len == TRANSCODING_ERROR)
00510         {
00511             dstCursor = 0;
00512             break;
00513         }
00514         dstCursor += len;
00515         if (src == 0) // conversion finished
00516             break;
00517         if (dstCursor >= resultSize - 1)
00518             reallocString<wchar_t>(tmpString, resultSize, manager, tmpString != localBuffer);
00519     }
00520     // make a final copy, converting from wchar_t to XMLCh:
00521     XMLCh* resultString = (XMLCh*)manager->allocate((dstCursor + 1) * sizeof(XMLCh));
00522     size_t i;
00523     for (i=0; i<dstCursor; ++i)
00524         resultString[i] = tmpString[i];
00525     if (tmpString != localBuffer) // did we allocate something?
00526         manager->deallocate(tmpString);
00527 #else
00528     XMLCh localBuffer[gTempBuffArraySize];
00529     XMLCh* resultString = localBuffer;
00530     size_t srcLen = strlen(toTranscode);
00531 
00532     while(srcLen > srcCursor)
00533     {
00534         wchar_t wcBuf[1];
00535         int len = mbtowc(wcBuf, toTranscode + srcCursor, srcLen - srcCursor);
00536         if (len <= 0)
00537         {
00538             if (len < 0)
00539                 dstCursor = 0;
00540             break;
00541         }
00542         srcCursor += len;
00543         if (dstCursor + 1 >= resultSize - 1)
00544             reallocString<XMLCh>(resultString, resultSize, manager, resultString != localBuffer);
00545         resultString[dstCursor++] = wcBuf[0];
00546     }
00547 
00548     if (resultString == localBuffer)
00549     {
00550         resultString = (XMLCh*)manager->allocate((dstCursor + 1) * sizeof(XMLCh));
00551         memcpy(resultString, localBuffer, dstCursor * sizeof(XMLCh));
00552     }
00553 #endif
00554 
00555     resultString[dstCursor] = L'\0';
00556     return resultString;
00557 }
00558 
00559 
00560 // ---------------------------------------------------------------------------
00561 //  IconvLCPTranscoder: Constructors and Destructor
00562 // ---------------------------------------------------------------------------
00563 IconvLCPTranscoder::IconvLCPTranscoder()
00564 {
00565 }
00566 
00567 IconvLCPTranscoder::~IconvLCPTranscoder()
00568 {
00569 }
00570 
00571 XERCES_CPP_NAMESPACE_END