GME  13
IconvGNUTransService.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: IconvGNUTransService.cpp 901107 2010-01-20 08:45:02Z borisk $
00020  */
00021 
00022 // ---------------------------------------------------------------------------
00023 //  Includes
00024 // ---------------------------------------------------------------------------
00025 #if HAVE_CONFIG_H
00026   #include <config.h>
00027 #endif
00028 
00029 #include <ctype.h>
00030 
00031 #include <locale.h>
00032 #include <errno.h>
00033 
00034 #if HAVE_ENDIAN_H
00035   #include <endian.h>
00036 #elif HAVE_MACHINE_ENDIAN_H
00037   #include <machine/endian.h>
00038 #elif HAVE_ARPA_NAMESER_COMPAT_H
00039   #include <arpa/nameser_compat.h>
00040 #endif
00041 
00042 #define MAX_UCHSIZE 4
00043 
00044 //--------------------------------------------------
00045 // Macro-definitions to translate "native unicode"
00046 // characters <-> XMLCh with different host byte order
00047 // and encoding schemas.
00048 
00049 # if BYTE_ORDER == LITTLE_ENDIAN
00050 #  define IXMLCh2WC16(x,w)            \
00051     *(w) = ((*(x)) >> 8) & 0xFF;        \
00052     *((w)+1) = (*(x)) & 0xFF
00053 #  define IWC162XMLCh(w,x)    *(x) = ((*(w)) << 8) | (*((w)+1))
00054 #  define XMLCh2WC16(x,w)            \
00055     *(w) = (*(x)) & 0xFF;            \
00056     *((w)+1) = ((*(x)) >> 8) & 0xFF
00057 #  define WC162XMLCh(w,x)    *(x) = ((*((w)+1)) << 8) | (*(w))
00058 
00059 #  define IXMLCh2WC32(x,w)            \
00060     *(w) = ((*(x)) >> 24) & 0xFF;        \
00061     *((w)+1) = ((*(x)) >> 16) & 0xFF;    \
00062     *((w)+2) = ((*(x)) >> 8) & 0xFF;    \
00063     *((w)+3) = (*(x)) & 0xFF
00064 #  define IWC322XMLCh(w,x)                \
00065       *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) |    \
00066           ((*((w)+2)) << 8) | (*((w)+3))
00067 #  define XMLCh2WC32(x,w)            \
00068     *((w)+3) = ((*(x)) >> 24) & 0xFF;    \
00069     *((w)+2) = ((*(x)) >> 16) & 0xFF;    \
00070     *((w)+1) = ((*(x)) >> 8) & 0xFF;    \
00071     *(w) = (*(x)) & 0xFF
00072 #  define WC322XMLCh(w,x)                    \
00073       *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) |    \
00074         ((*((w)+1)) << 8) | (*(w))
00075 
00076 # else /* BYTE_ORDER != LITTLE_ENDIAN */
00077 
00078 #  define XMLCh2WC16(x,w)            \
00079     *(w) = ((*(x)) >> 8) & 0xFF;        \
00080     *((w)+1) = (*(x)) & 0xFF
00081 #  define WC162XMLCh(w,x)    *(x) = ((*(w)) << 8) | (*((w)+1))
00082 #  define IXMLCh2WC16(x,w)            \
00083     *(w) = (*(x)) & 0xFF;            \
00084     *((w)+1) = ((*(x)) >> 8) & 0xFF
00085 #  define IWC162XMLCh(w,x)    *(x) = ((*((w)+1)) << 8) | (*(w))
00086 
00087 #  define XMLCh2WC32(x,w)            \
00088     *(w) = ((*(x)) >> 24) & 0xFF;        \
00089     *((w)+1) = ((*(x)) >> 16) & 0xFF;    \
00090     *((w)+2) = ((*(x)) >> 8) & 0xFF;    \
00091     *((w)+3) = (*(x)) & 0xFF
00092 #  define WC322XMLCh(w,x)                \
00093       *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) |    \
00094           ((*((w)+2)) << 8) | (*((w)+3))
00095 #  define IXMLCh2WC32(x,w)            \
00096     *((w)+3) = ((*(x)) >> 24) & 0xFF;    \
00097     *((w)+2) = ((*(x)) >> 16) & 0xFF;    \
00098     *((w)+1) = ((*(x)) >> 8) & 0xFF;    \
00099     *(w) = (*(x)) & 0xFF
00100 #  define IWC322XMLCh(w,x)                    \
00101       *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) |    \
00102         ((*((w)+1)) << 8) | (*(w))
00103 # endif /* BYTE_ORDER == LITTLE_ENDIAN */
00104 
00105 #include <wchar.h>
00106 #include <string.h>
00107 #include <stdlib.h>
00108 #include <stdio.h>
00109 
00110 #include <xercesc/util/XMLString.hpp>
00111 #include <xercesc/util/XMLUniDefs.hpp>
00112 #include <xercesc/util/XMLUni.hpp>
00113 #include <xercesc/util/PlatformUtils.hpp>
00114 #include <xercesc/util/TranscodingException.hpp>
00115 #include <xercesc/util/Janitor.hpp>
00116 #include "IconvGNUTransService.hpp"
00117 
00118 
00119 XERCES_CPP_NAMESPACE_BEGIN
00120 
00121 // ---------------------------------------------------------------------------
00122 // Description of encoding schemas, supported by iconv()
00123 // ---------------------------------------------------------------------------
00124 typedef struct __IconvGNUEncoding {
00125     const char*    fSchema;    // schema name
00126     size_t    fUChSize;    // size of the character
00127     unsigned int fUBO;        // byte order, relative to the host
00128 } IconvGNUEncoding;
00129 
00130 static const IconvGNUEncoding    gIconvGNUEncodings[] = {
00131     { "UTF-16LE",        2,    LITTLE_ENDIAN },
00132     { "UTF-16BE",        2,    BIG_ENDIAN },
00133     { "UCS-2LE",         2,    LITTLE_ENDIAN },
00134     { "UCS-2BE",         2,    BIG_ENDIAN },
00135     { "UCS-2-INTERNAL",  2,    BYTE_ORDER },
00136     { NULL,              0,    0 }
00137 };
00138 
00139 // ---------------------------------------------------------------------------
00140 //  Local, const data
00141 // ---------------------------------------------------------------------------
00142 static const unsigned int    gTempBuffArraySize = 4096;
00143 static const XMLCh        gMyServiceId[] =
00144 {
00145     chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull
00146 };
00147 
00148 
00149 // ---------------------------------------------------------------------------
00150 //  Local methods
00151 // ---------------------------------------------------------------------------
00152 static XMLSize_t getWideCharLength(const XMLCh* const src)
00153 {
00154     if (!src)
00155         return 0;
00156 
00157     XMLSize_t len = 0;
00158     const XMLCh* pTmp = src;
00159     while (*pTmp++)
00160         len++;
00161     return len;
00162 }
00163 
00164 
00165 //----------------------------------------------------------------------------
00166 // There is implementation of the libiconv for FreeBSD (available through the
00167 // ports collection). The following is a wrapper around the iconv().
00168 //----------------------------------------------------------------------------
00169 
00170 IconvGNUWrapper::IconvGNUWrapper (MemoryManager* manager)
00171     : fUChSize(0), fUBO(LITTLE_ENDIAN),
00172       fCDTo((iconv_t)-1), fCDFrom((iconv_t)-1), fMutex(manager)
00173 {
00174 }
00175 
00176 IconvGNUWrapper::IconvGNUWrapper ( iconv_t    cd_from,
00177                iconv_t    cd_to,
00178                size_t    uchsize,
00179                unsigned int    ubo,
00180                MemoryManager* manager)
00181     : fUChSize(uchsize), fUBO(ubo),
00182       fCDTo(cd_to), fCDFrom(cd_from), fMutex(manager)
00183 {
00184     if (fCDFrom == (iconv_t) -1 || fCDTo == (iconv_t) -1) {
00185         XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService);
00186     }
00187 }
00188 
00189 IconvGNUWrapper::~IconvGNUWrapper()
00190 {
00191 }
00192 
00193 // Convert "native unicode" character into XMLCh
00194 void    IconvGNUWrapper::mbcToXMLCh (const char *mbc, XMLCh *toRet) const
00195 {
00196     if (fUBO == BYTE_ORDER) {
00197         if (fUChSize == sizeof(XMLCh))
00198             *toRet = *((XMLCh*) mbc);
00199         else if (fUChSize == 2) {
00200             WC162XMLCh( mbc, toRet );
00201         } else {
00202             WC322XMLCh( mbc, toRet );
00203         }
00204     } else {
00205         if (fUChSize == 2) {
00206             IWC162XMLCh( mbc, toRet );
00207         } else {
00208             IWC322XMLCh( mbc, toRet );
00209         }
00210     }
00211 }
00212 
00213 // Convert XMLCh into "native unicode" character
00214 void    IconvGNUWrapper::xmlChToMbc (XMLCh xch, char *mbc) const
00215 {
00216     if (fUBO == BYTE_ORDER) {
00217         if (fUChSize == sizeof(XMLCh)) {
00218             memcpy (mbc, &xch, fUChSize);
00219             return;
00220         }
00221         if (fUChSize == 2) {
00222             XMLCh2WC16( &xch, mbc );
00223         } else {
00224             XMLCh2WC32( &xch, mbc );
00225         }
00226     } else {
00227         if (fUChSize == 2) {
00228             IXMLCh2WC16( &xch, mbc );
00229         } else {
00230             IXMLCh2WC32( &xch, mbc );
00231         }
00232     }
00233 }
00234 
00235 // Return uppercase equivalent for XMLCh
00236 XMLCh IconvGNUWrapper::toUpper (const XMLCh ch)
00237 {
00238     if (ch <= 0x7F)
00239         return toupper(ch);
00240 
00241     char    wcbuf[MAX_UCHSIZE * 2];
00242     xmlChToMbc (ch, wcbuf);
00243 
00244     char    tmpArr[4];
00245 #if ICONV_USES_CONST_POINTER
00246     const char* ptr = wcbuf;
00247 #else
00248     char* ptr = wcbuf;
00249 #endif
00250     size_t    len = fUChSize;
00251     char    *pTmpArr = tmpArr;
00252     size_t    bLen = 2;
00253 
00254     if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
00255         return 0;
00256     tmpArr[1] = toupper (*((unsigned char *)tmpArr));
00257     *tmpArr = tmpArr[1];
00258     len = 1;
00259     pTmpArr = wcbuf;
00260     bLen = fUChSize;
00261     ptr = tmpArr;
00262     if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
00263         return 0;
00264     mbcToXMLCh (wcbuf, (XMLCh*) &ch);
00265     return ch;
00266 }
00267 
00268 // Return lowercase equivalent for XMLCh
00269 XMLCh IconvGNUWrapper::toLower (const XMLCh ch)
00270 {
00271     if (ch <= 0x7F)
00272         return tolower(ch);
00273 
00274     char    wcbuf[MAX_UCHSIZE * 2];
00275     xmlChToMbc (ch, wcbuf);
00276 
00277     char    tmpArr[4];
00278 #if ICONV_USES_CONST_POINTER
00279     const char* ptr = wcbuf;
00280 #else
00281     char* ptr = wcbuf;
00282 #endif
00283     size_t    len = fUChSize;
00284     char    *pTmpArr = tmpArr;
00285     size_t    bLen = 2;
00286 
00287     if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
00288         return 0;
00289     tmpArr[1] = tolower (*((unsigned char*)tmpArr));
00290     *tmpArr = tmpArr[1];
00291     len = 1;
00292     pTmpArr = wcbuf;
00293     bLen = fUChSize;
00294     ptr = tmpArr;
00295     if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1)
00296         return 0;
00297     mbcToXMLCh (wcbuf, (XMLCh*) &ch);
00298     return ch;
00299 }
00300 
00301 // Fill array of XMLCh characters with data, supplyed in the array
00302 // of "native unicode" characters.
00303 XMLCh*    IconvGNUWrapper::mbsToXML
00304 (
00305     const char*      mbs_str
00306     ,      XMLCh*    xml_str
00307     ,      size_t    cnt
00308 ) const
00309 {
00310     if (mbs_str == NULL || xml_str == NULL || cnt == 0)
00311         return NULL;
00312     if (fUBO == BYTE_ORDER) {
00313         if (fUChSize == sizeof(XMLCh)) {
00314             // null-transformation
00315             memcpy (xml_str, mbs_str, fUChSize * cnt);
00316             return xml_str;
00317         }
00318         if (fUChSize == 2)
00319             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
00320                 WC162XMLCh( mbs_str, xml_str + i);
00321             }
00322         else
00323             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
00324                 WC322XMLCh( mbs_str, xml_str + i );
00325             }
00326     } else {
00327         if (fUChSize == 2)
00328             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
00329                 IWC162XMLCh( mbs_str, xml_str + i );
00330             }
00331         else
00332             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) {
00333                 IWC322XMLCh( mbs_str, xml_str + i );
00334             }
00335     }
00336     return xml_str;
00337 }
00338 
00339 // Fill array of "native unicode" characters with data, supplyed
00340 // in the array of XMLCh characters.
00341 char*    IconvGNUWrapper::xmlToMbs
00342 (
00343     const XMLCh*     xml_str
00344     ,      char*     mbs_str
00345     ,      size_t    cnt
00346 ) const
00347 {
00348     if (mbs_str == NULL || xml_str == NULL || cnt == 0)
00349         return NULL;
00350     char    *toReturn = mbs_str;
00351     if (fUBO == BYTE_ORDER) {
00352         if (fUChSize == sizeof(XMLCh)) {
00353             // null-transformation
00354             memcpy (mbs_str, xml_str, fUChSize * cnt);
00355             return toReturn;
00356         }
00357         if (fUChSize == 2)
00358             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
00359                 XMLCh2WC16( xml_str, mbs_str );
00360             }
00361         else
00362             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
00363                 XMLCh2WC32( xml_str, mbs_str );
00364             }
00365     } else {
00366         if (fUChSize == 2)
00367             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
00368                 IXMLCh2WC16( xml_str, mbs_str );
00369             }
00370         else
00371             for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) {
00372                 IXMLCh2WC32( xml_str, mbs_str );
00373             }
00374     }
00375     return toReturn;
00376 }
00377 
00378 size_t    IconvGNUWrapper::iconvFrom ( const char    *fromPtr,
00379                  size_t        *fromLen,
00380                  char        **toPtr,
00381                  size_t        toLen )
00382 {
00383 #if ICONV_USES_CONST_POINTER
00384     const char ** tmpPtr = &fromPtr;
00385 #else
00386     char ** tmpPtr = (char**)&fromPtr;
00387 #endif
00388     return ::iconv (fCDFrom, tmpPtr, fromLen, toPtr, &toLen);
00389 }
00390 
00391 size_t    IconvGNUWrapper::iconvTo ( const char    *fromPtr,
00392                    size_t        *fromLen,
00393                    char        **toPtr,
00394                    size_t        toLen )
00395 {
00396 #if ICONV_USES_CONST_POINTER
00397     const char ** tmpPtr = &fromPtr;
00398 #else
00399     char ** tmpPtr = (char**)&fromPtr;
00400 #endif
00401     return ::iconv (fCDTo, tmpPtr, fromLen, toPtr, &toLen);
00402 }
00403 
00404 
00405 // ---------------------------------------------------------------------------
00406 //  IconvGNUTransService: Constructors and Destructor
00407 // ---------------------------------------------------------------------------
00408 
00409 IconvGNUTransService::IconvGNUTransService(MemoryManager* manager)
00410     : IconvGNUWrapper(manager), fUnicodeCP(0)
00411 {
00412     // Try to obtain local (host) characterset from the setlocale
00413     // and through the environment. Do not call setlocale(LC_*, "")!
00414     // Using an empty string instead of NULL, will modify the libc
00415     // behavior.
00416     //
00417     const char* fLocalCP = setlocale (LC_CTYPE, NULL);
00418     if (fLocalCP == NULL || *fLocalCP == 0 ||
00419         strcmp (fLocalCP, "C") == 0 ||
00420         strcmp (fLocalCP, "POSIX") == 0) {
00421       fLocalCP = getenv ("LC_ALL");
00422       if (fLocalCP == NULL) {
00423         fLocalCP = getenv ("LC_CTYPE");
00424         if (fLocalCP == NULL)
00425           fLocalCP = getenv ("LANG");
00426       }
00427     }
00428 
00429     if (fLocalCP == NULL || *fLocalCP == 0 ||
00430         strcmp (fLocalCP, "C") == 0 ||
00431         strcmp (fLocalCP, "POSIX") == 0)
00432         fLocalCP = "iso-8859-1";    // fallback locale
00433     else {
00434         const char *ptr = strchr (fLocalCP, '.');
00435         if (ptr == NULL)
00436             fLocalCP = "iso-8859-1";    // fallback locale
00437         else
00438             fLocalCP = ptr + 1;
00439     }
00440 
00441     // Select the native unicode characters encoding schema
00442     const IconvGNUEncoding    *eptr;
00443     // first - try to use the schema with character size equal to XMLCh, and same endianness
00444     for (eptr = gIconvGNUEncodings; eptr->fSchema; eptr++)
00445     {
00446         if (eptr->fUChSize != sizeof(XMLCh) || eptr->fUBO != BYTE_ORDER)
00447             continue;
00448 
00449         // try to create conversion descriptor
00450         iconv_t    cd_to = iconv_open(fLocalCP, eptr->fSchema);
00451         if (cd_to == (iconv_t)-1)
00452             continue;
00453         iconv_t    cd_from = iconv_open(eptr->fSchema, fLocalCP);
00454         if (cd_from == (iconv_t)-1) {
00455             iconv_close (cd_to);
00456             continue;
00457         }
00458 
00459         // got it
00460         setUChSize(eptr->fUChSize);
00461         setUBO(eptr->fUBO);
00462         setCDTo(cd_to);
00463         setCDFrom(cd_from);
00464         fUnicodeCP = eptr->fSchema;
00465         break;
00466     }
00467     if (fUnicodeCP == NULL)
00468         // try to use any known schema
00469         for (eptr = gIconvGNUEncodings; eptr->fSchema; eptr++)
00470         {
00471             // try to create conversion descriptor
00472             iconv_t    cd_to = iconv_open(fLocalCP, eptr->fSchema);
00473             if (cd_to == (iconv_t)-1)
00474                 continue;
00475             iconv_t    cd_from = iconv_open(eptr->fSchema, fLocalCP);
00476             if (cd_from == (iconv_t)-1) {
00477                 iconv_close (cd_to);
00478                 continue;
00479             }
00480 
00481             // got it
00482             setUChSize(eptr->fUChSize);
00483             setUBO(eptr->fUBO);
00484             setCDTo(cd_to);
00485             setCDFrom(cd_from);
00486             fUnicodeCP = eptr->fSchema;
00487             break;
00488         }
00489 
00490     if (fUnicodeCP == NULL || cdTo() == (iconv_t)-1 || cdFrom() == (iconv_t)-1)
00491         XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService);
00492 }
00493 
00494 IconvGNUTransService::~IconvGNUTransService()
00495 {
00496     if (cdTo() != (iconv_t) -1) {
00497         iconv_close (cdTo());
00498         setCDTo ((iconv_t)-1);
00499     }
00500     if (cdFrom() != (iconv_t) -1) {
00501         iconv_close (cdFrom());
00502         setCDFrom ((iconv_t)-1);
00503     }
00504 }
00505 
00506 // ---------------------------------------------------------------------------
00507 //  IconvGNUTransService: The virtual transcoding service API
00508 // ---------------------------------------------------------------------------
00509 int IconvGNUTransService::compareIString(const XMLCh* const    comp1
00510                                         , const XMLCh* const    comp2)
00511 {
00512     const XMLCh* cptr1 = comp1;
00513     const XMLCh* cptr2 = comp2;
00514 
00515     XMLMutexLock lockConverter(&fMutex);
00516 
00517     XMLCh    c1 = toUpper(*cptr1);
00518     XMLCh    c2 = toUpper(*cptr2);
00519     while ( (*cptr1 != 0) && (*cptr2 != 0) ) {
00520         if (c1 != c2)
00521             break;
00522         c1 = toUpper(*(++cptr1));
00523         c2 = toUpper(*(++cptr2));
00524 
00525     }
00526     return (int) ( c1 - c2 );
00527 }
00528 
00529 
00530 int IconvGNUTransService::compareNIString(const XMLCh* const     comp1
00531                                          , const XMLCh* const    comp2
00532                                          , const XMLSize_t       maxChars)
00533 {
00534     unsigned int  n = 0;
00535     const XMLCh* cptr1 = comp1;
00536     const XMLCh* cptr2 = comp2;
00537 
00538     XMLMutexLock lockConverter(&fMutex);
00539 
00540     while (true && maxChars)
00541     {
00542         XMLCh    c1 = toUpper(*cptr1);
00543         XMLCh    c2 = toUpper(*cptr2);
00544 
00545         if (c1 != c2)
00546             return (int) (c1 - c2);
00547 
00548         // If either ended, then both ended, so equal
00549         if (!*cptr1 || !*cptr2)
00550             break;
00551 
00552         cptr1++;
00553         cptr2++;
00554 
00555         //  Bump the count of chars done. If it equals the count then we
00556         //  are equal for the requested count, so break out and return
00557         //  equal.
00558         n++;
00559         if (n == maxChars)
00560             break;
00561     }
00562 
00563     return 0;
00564 }
00565 
00566 
00567 const XMLCh* IconvGNUTransService::getId() const
00568 {
00569     return gMyServiceId;
00570 }
00571 
00572 XMLLCPTranscoder* IconvGNUTransService::makeNewLCPTranscoder(MemoryManager* manager)
00573 {
00574     return new (manager) IconvGNULCPTranscoder (cdFrom(), cdTo(), uChSize(), UBO(), manager);
00575 }
00576 
00577 bool IconvGNUTransService::supportsSrcOfs() const
00578 {
00579     return true;
00580 }
00581 
00582 // ---------------------------------------------------------------------------
00583 //  IconvGNUTransService: The protected virtual transcoding service API
00584 // ---------------------------------------------------------------------------
00585 XMLTranscoder*
00586 IconvGNUTransService::makeNewXMLTranscoder
00587 (
00588     const    XMLCh* const    encodingName
00589     ,    XMLTransService::Codes&    resValue
00590     , const    XMLSize_t    blockSize
00591     ,        MemoryManager* const    manager
00592 )
00593 {
00594     resValue = XMLTransService::UnsupportedEncoding;
00595     IconvGNUTranscoder    *newTranscoder = NULL;
00596 
00597     char    *encLocal = XMLString::transcode(encodingName, manager);
00598     ArrayJanitor<char> janBuf(encLocal, manager);
00599     iconv_t    cd_from, cd_to;
00600 
00601     cd_from = iconv_open (fUnicodeCP, encLocal);
00602     if (cd_from == (iconv_t)-1) {
00603         resValue = XMLTransService::SupportFilesNotFound;
00604         return NULL;
00605     }
00606     cd_to = iconv_open (encLocal, fUnicodeCP);
00607     if (cd_to == (iconv_t)-1) {
00608         resValue = XMLTransService::SupportFilesNotFound;
00609         iconv_close (cd_from);
00610         return NULL;
00611     }
00612     newTranscoder = new (manager) IconvGNUTranscoder (encodingName,
00613                          blockSize,
00614                          cd_from, cd_to,
00615                          uChSize(), UBO(), manager);
00616     if (newTranscoder)
00617         resValue = XMLTransService::Ok;
00618     return newTranscoder;
00619 }
00620 
00621 void IconvGNUTransService::upperCase(XMLCh* const toUpperCase)
00622 {
00623     XMLCh* outPtr = toUpperCase;
00624 
00625     XMLMutexLock lockConverter(&fMutex);
00626 
00627     while (*outPtr)
00628     {
00629         *outPtr = toUpper(*outPtr);
00630         outPtr++;
00631     }
00632 }
00633 
00634 void IconvGNUTransService::lowerCase(XMLCh* const toLowerCase)
00635 {
00636     XMLCh* outPtr = toLowerCase;
00637 
00638     XMLMutexLock lockConverter(&fMutex);
00639 
00640     while (*outPtr)
00641     {
00642         *outPtr = toLower(*outPtr);
00643         outPtr++;
00644     }
00645 }
00646 
00647 // ---------------------------------------------------------------------------
00648 //  IconvGNULCPTranscoder: The virtual transcoder API
00649 // ---------------------------------------------------------------------------
00650 XMLSize_t IconvGNULCPTranscoder::calcRequiredSize (const char* const srcText
00651                                          , MemoryManager* const manager)
00652 {
00653     if (!srcText)
00654         return 0;
00655 
00656     size_t len, srcLen;
00657     len = srcLen = strlen(srcText);
00658     if (len == 0)
00659         return 0;
00660 
00661     char tmpWideArr[gTempBuffArraySize];
00662     size_t totalLen = 0;
00663 
00664     XMLMutexLock lockConverter(&fMutex);
00665 
00666     for (;;) {
00667         char        *pTmpArr = tmpWideArr;
00668         const char    *ptr = srcText + srcLen - len;
00669         size_t    rc = iconvFrom(ptr, &len, &pTmpArr, gTempBuffArraySize);
00670         if (rc == (size_t) -1 && errno != E2BIG) {
00671             ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, manager);
00672             /* return 0; */
00673         }
00674         rc = pTmpArr - (char *) tmpWideArr;
00675         totalLen += rc;
00676         if (rc == 0 || len == 0)
00677             break;
00678     }
00679     return totalLen / uChSize();
00680 }
00681 
00682 
00683 XMLSize_t IconvGNULCPTranscoder::calcRequiredSize(const XMLCh* const srcText
00684                                         , MemoryManager* const manager)
00685 {
00686     if (!srcText)
00687         return 0;
00688     XMLSize_t  wLent = getWideCharLength(srcText);
00689     if (wLent == 0)
00690         return 0;
00691 
00692     char    tmpWBuff[gTempBuffArraySize];
00693     char    *wBuf = 0;
00694     char    *wBufPtr = 0;
00695     ArrayJanitor<char>  janBuf(wBufPtr, manager);
00696     size_t      len = wLent * uChSize();
00697     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
00698         if (len > gTempBuffArraySize) {
00699             wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
00700             janBuf.reset(wBufPtr, manager);
00701             wBuf = wBufPtr;
00702         } else
00703             wBuf = tmpWBuff;
00704         xmlToMbs (srcText, wBuf, wLent);
00705     } else
00706         wBuf = (char *) srcText;
00707 
00708     char    tmpBuff[gTempBuffArraySize];
00709     size_t    totalLen = 0;
00710     char    *srcEnd = wBuf + wLent * uChSize();
00711 
00712     XMLMutexLock lockConverter(&fMutex);
00713 
00714     for (;;) {
00715         char        *pTmpArr = tmpBuff;
00716         const char    *ptr = srcEnd - len;
00717         size_t    rc = iconvTo(ptr, &len, &pTmpArr, gTempBuffArraySize);
00718         if (rc == (size_t) -1 && errno != E2BIG) {
00719             ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, manager);
00720             /* return 0; */
00721         }
00722         rc = pTmpArr - tmpBuff;
00723         totalLen += rc;
00724         if (rc == 0 || len == 0)
00725             break;
00726     }
00727     return totalLen;
00728 }
00729 
00730 
00731 char* IconvGNULCPTranscoder::transcode(const XMLCh* const toTranscode,
00732                                        MemoryManager* const manager)
00733 {
00734     if (!toTranscode)
00735         return 0;
00736 
00737     char* retVal = 0;
00738     if (!*toTranscode) {
00739         retVal = (char*) manager->allocate(sizeof(char));//new char[1];
00740         retVal[0] = 0;
00741         return retVal;
00742     }
00743 
00744     XMLSize_t wLent = getWideCharLength(toTranscode);
00745 
00746     // Calc needed size.
00747     XMLSize_t neededLen = calcRequiredSize (toTranscode, manager);
00748     if (neededLen == 0)
00749         return 0;
00750     // allocate output buffer
00751     retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char));//new char[neededLen + 1];
00752     // prepare the original
00753     char    tmpWBuff[gTempBuffArraySize];
00754     char    *wideCharBuf = 0;
00755     char    *wBufPtr = 0;
00756     ArrayJanitor<char>  janBuf(wBufPtr, manager);
00757     size_t  len = wLent * uChSize();
00758 
00759     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
00760         if (len > gTempBuffArraySize) {
00761             wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
00762             janBuf.reset(wBufPtr, manager);
00763             wideCharBuf = wBufPtr;
00764         } else
00765             wideCharBuf = tmpWBuff;
00766         xmlToMbs (toTranscode, wideCharBuf, wLent);
00767     } else
00768         wideCharBuf = (char *) toTranscode;
00769 
00770     // perform conversion
00771     char* ptr = retVal;
00772     size_t rc;
00773 
00774     {
00775       XMLMutexLock lockConverter(&fMutex);
00776       rc = iconvTo(wideCharBuf, &len, &ptr, neededLen);
00777     }
00778 
00779     if (rc == (size_t)-1) {
00780         return 0;
00781     }
00782     retVal[neededLen] = 0;
00783 
00784     return retVal;
00785 }
00786 
00787 
00788 bool IconvGNULCPTranscoder::transcode( const   XMLCh* const    toTranscode
00789                     , char* const        toFill
00790                     , const XMLSize_t       maxBytes
00791                     , MemoryManager* const  manager)
00792 {
00793     // Watch for a couple of pyscho corner cases
00794     if (!toTranscode || !maxBytes) {
00795         toFill[0] = 0;
00796         return true;
00797     }
00798     if (!*toTranscode) {
00799         toFill[0] = 0;
00800         return true;
00801     }
00802 
00803     XMLSize_t wLent = getWideCharLength(toTranscode);
00804     if (wLent > maxBytes)
00805         wLent = maxBytes;
00806 
00807     // Fill the "unicode" string
00808     char    tmpWBuff[gTempBuffArraySize];
00809     char    *wideCharBuf = 0;
00810     char    *wBufPtr = 0;
00811     ArrayJanitor<char>  janBuf(wBufPtr, manager);
00812     size_t  len = wLent * uChSize();
00813 
00814     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
00815         if (len > gTempBuffArraySize) {
00816             wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
00817             janBuf.reset(wBufPtr, manager);
00818             wideCharBuf = wBufPtr;
00819         } else
00820             wideCharBuf = tmpWBuff;
00821         xmlToMbs (toTranscode, wideCharBuf, wLent);
00822     } else
00823         wideCharBuf = (char *) toTranscode;
00824 
00825     // Ok, go ahead and try the transcoding. If it fails, then ...
00826     char    *ptr = toFill;
00827     size_t rc;
00828 
00829     {
00830       XMLMutexLock lockConverter(&fMutex);
00831       rc = iconvTo(wideCharBuf, &len, &ptr, maxBytes);
00832     }
00833 
00834     if (rc == (size_t)-1) {
00835         return false;
00836     }
00837 
00838     // Cap it off
00839     *ptr = 0;
00840     return true;
00841 }
00842 
00843 
00844 XMLCh* IconvGNULCPTranscoder::transcode(const char* const toTranscode,
00845                                         MemoryManager* const manager)
00846 {
00847     if (!toTranscode)
00848         return 0;
00849 
00850     XMLCh* retVal = 0;
00851     if (!*toTranscode) {
00852         retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
00853         retVal[0] = 0;
00854         return retVal;
00855     }
00856 
00857     XMLSize_t wLent = calcRequiredSize(toTranscode, manager);
00858     if (wLent == 0) {
00859         retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
00860         retVal[0] = 0;
00861         return retVal;
00862     }
00863 
00864     char    tmpWBuff[gTempBuffArraySize];
00865     char    *wideCharBuf = 0;
00866     char    *wBufPtr = 0;
00867     ArrayJanitor<char>  janBuf(wBufPtr, manager);
00868     size_t  len = wLent * uChSize();
00869 
00870     retVal = (XMLCh*) manager->allocate((wLent + 1) * sizeof(XMLCh));//new XMLCh[wLent + 1];
00871     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
00872         if (len > gTempBuffArraySize) {
00873             wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
00874             janBuf.reset(wBufPtr, manager);
00875             wideCharBuf = wBufPtr;
00876         } else
00877             wideCharBuf = tmpWBuff;
00878     } else
00879         wideCharBuf = (char *) retVal;
00880 
00881     size_t    flen = strlen(toTranscode);
00882     char    *ptr = wideCharBuf;
00883     size_t rc;
00884 
00885     {
00886       XMLMutexLock lockConverter(&fMutex);
00887       rc = iconvFrom(toTranscode, &flen, &ptr, len);
00888     }
00889 
00890     if (rc == (size_t) -1) {
00891         return NULL;
00892     }
00893     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER)
00894         mbsToXML (wideCharBuf, retVal, wLent);
00895     retVal[wLent] = 0x00;
00896 
00897     return retVal;
00898 }
00899 
00900 
00901 bool IconvGNULCPTranscoder::transcode(const   char* const    toTranscode
00902                        ,       XMLCh* const    toFill
00903                        , const XMLSize_t       maxChars
00904                        , MemoryManager* const  manager)
00905 {
00906     // Check for a couple of psycho corner cases
00907     if (!toTranscode || !maxChars)
00908     {
00909         toFill[0] = 0;
00910         return true;
00911     }
00912 
00913     if (!*toTranscode)
00914     {
00915         toFill[0] = 0;
00916         return true;
00917     }
00918 
00919     XMLSize_t wLent = calcRequiredSize(toTranscode);
00920     if (wLent > maxChars)
00921         wLent = maxChars;
00922 
00923     char    tmpWBuff[gTempBuffArraySize];
00924     char    *wideCharBuf = 0;
00925     char    *wBufPtr = 0;
00926     ArrayJanitor<char>  janBuf(wBufPtr, manager);
00927     size_t    len = wLent * uChSize();
00928 
00929     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
00930         if (len > gTempBuffArraySize) {
00931             wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len];
00932             janBuf.reset(wBufPtr, manager);
00933             wideCharBuf = wBufPtr;
00934         } else
00935             wideCharBuf = tmpWBuff;
00936     } else
00937         wideCharBuf = (char *) toFill;
00938 
00939     size_t    flen = strlen(toTranscode); // wLent;
00940     char    *ptr = wideCharBuf;
00941     size_t rc;
00942 
00943     {
00944       XMLMutexLock lockConverter(&fMutex);
00945       rc = iconvFrom(toTranscode, &flen, &ptr, len);
00946     }
00947 
00948     if (rc == (size_t)-1) {
00949         return false;
00950     }
00951 
00952     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER)
00953         mbsToXML (wideCharBuf, toFill, wLent);
00954 
00955     toFill[wLent] = 0x00;
00956     return true;
00957 }
00958 
00959 
00960 // ---------------------------------------------------------------------------
00961 //  IconvGNULCPTranscoder: Constructors and Destructor
00962 // ---------------------------------------------------------------------------
00963 
00964 
00965 IconvGNULCPTranscoder::IconvGNULCPTranscoder (iconv_t        cd_from,
00966                         iconv_t        cd_to,
00967                         size_t        uchsize,
00968                         unsigned int    ubo,
00969                         MemoryManager* manager)
00970     : IconvGNUWrapper (cd_from, cd_to, uchsize, ubo, manager)
00971 {
00972 }
00973 
00974 
00975 IconvGNULCPTranscoder::~IconvGNULCPTranscoder()
00976 {
00977 }
00978 
00979 
00980 // ---------------------------------------------------------------------------
00981 //  IconvGNUTranscoder: Constructors and Destructor
00982 // ---------------------------------------------------------------------------
00983 IconvGNUTranscoder::IconvGNUTranscoder (const    XMLCh* const    encodingName
00984                       , const XMLSize_t    blockSize
00985                       ,    iconv_t        cd_from
00986                       ,    iconv_t        cd_to
00987                       ,    size_t        uchsize
00988                       ,    unsigned int    ubo
00989                       , MemoryManager* const manager
00990     )
00991     : XMLTranscoder(encodingName, blockSize, manager)
00992     , IconvGNUWrapper (cd_from, cd_to, uchsize, ubo, manager)
00993 {
00994 }
00995 
00996 IconvGNUTranscoder::~IconvGNUTranscoder()
00997 {
00998     if (cdTo() != (iconv_t)-1) {
00999         iconv_close (cdTo());
01000         setCDTo ((iconv_t)-1);
01001     }
01002     if (cdFrom() != (iconv_t)-1) {
01003         iconv_close (cdFrom());
01004         setCDFrom ((iconv_t)-1);
01005     }
01006 }
01007 
01008 // ---------------------------------------------------------------------------
01009 //  IconvGNUTranscoder: Implementation of the virtual transcoder API
01010 // ---------------------------------------------------------------------------
01011 XMLSize_t    IconvGNUTranscoder::transcodeFrom
01012 (
01013     const   XMLByte* const          srcData
01014     , const XMLSize_t               srcCount
01015     ,       XMLCh* const            toFill
01016     , const XMLSize_t               maxChars
01017     ,       XMLSize_t&              bytesEaten
01018     ,       unsigned char* const    charSizes )
01019 {
01020     // Transcode TO XMLCh
01021     const char*  startSrc = (const char*) srcData;
01022     const char*  endSrc = (const char*) srcData + srcCount;
01023 
01024     char    tmpWBuff[gTempBuffArraySize];
01025     char    *startTarget = 0;
01026     char    *wBufPtr = 0;
01027     ArrayJanitor<char>  janBuf(wBufPtr, getMemoryManager());
01028     size_t    len = maxChars * uChSize();
01029 
01030     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
01031         if (len > gTempBuffArraySize) {
01032             wBufPtr = (char*) getMemoryManager()->allocate(len * sizeof(char));//new char[len];
01033             janBuf.reset(wBufPtr, getMemoryManager());
01034             startTarget = wBufPtr;
01035         } else
01036             startTarget = tmpWBuff;
01037     } else
01038         startTarget = (char *) toFill;
01039 
01040     // Do character-by-character transcoding
01041     char    *orgTarget = startTarget;
01042     size_t    srcLen = srcCount;
01043     size_t    prevSrcLen = srcLen;
01044     unsigned int toReturn = 0;
01045     bytesEaten = 0;
01046 
01047     XMLMutexLock lockConverter(&fMutex);
01048 
01049     for (size_t cnt = 0; cnt < maxChars && srcLen; cnt++) {
01050         size_t    rc = iconvFrom(startSrc, &srcLen, &orgTarget, uChSize());
01051         if (rc == (size_t)-1) {
01052             if (errno != E2BIG || prevSrcLen == srcLen) {
01053                 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
01054             }
01055         }
01056         charSizes[cnt] = prevSrcLen - srcLen;
01057         prevSrcLen = srcLen;
01058         bytesEaten += charSizes[cnt];
01059         startSrc = endSrc - srcLen;
01060         toReturn++;
01061     }
01062     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER)
01063         mbsToXML (startTarget, toFill, toReturn);
01064     return toReturn;
01065 }
01066 
01067 XMLSize_t    IconvGNUTranscoder::transcodeTo
01068 (
01069     const   XMLCh* const     srcData
01070     , const XMLSize_t        srcCount
01071     ,       XMLByte* const   toFill
01072     , const XMLSize_t        maxBytes
01073     ,       XMLSize_t&       charsEaten
01074     , const UnRepOpts        /*options*/ )
01075 {
01076     // Transcode FROM XMLCh
01077     char    tmpWBuff[gTempBuffArraySize];
01078     char    *startSrc = tmpWBuff;
01079     char    *wBufPtr = 0;
01080     ArrayJanitor<char>  janBuf(wBufPtr, getMemoryManager());
01081     size_t    len = srcCount * uChSize();
01082 
01083     if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) {
01084         if (len > gTempBuffArraySize) {
01085             wBufPtr = (char*) getMemoryManager()->allocate(len * sizeof(char));//new char[len];
01086             janBuf.reset(wBufPtr, getMemoryManager());
01087             startSrc = wBufPtr;
01088         } else
01089             startSrc = tmpWBuff;
01090         xmlToMbs (srcData, startSrc, srcCount);
01091     } else
01092         startSrc = (char *) srcData;
01093 
01094     char* startTarget = (char *) toFill;
01095     size_t srcLen = len;
01096 
01097     size_t rc;
01098 
01099     {
01100       XMLMutexLock lockConverter(&fMutex);
01101       rc = iconvTo (startSrc, &srcLen, &startTarget, maxBytes);
01102     }
01103 
01104     if (rc == (size_t)-1 && errno != E2BIG) {
01105         ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
01106     }
01107     charsEaten = srcCount - srcLen / uChSize();
01108     return startTarget - (char *)toFill;
01109 }
01110 
01111 bool IconvGNUTranscoder::canTranscodeTo
01112 (
01113     const unsigned int toCheck
01114 )
01115 {
01116     //
01117     //  If the passed value is really a surrogate embedded together, then
01118     //  we need to break it out into its two chars. Else just one.
01119     //
01120     char        srcBuf[MAX_UCHSIZE * 2];
01121     unsigned int    srcCount = 1;
01122     if (toCheck & 0xFFFF0000) {
01123         XMLCh    ch1 = (toCheck >> 10) + 0xD800;
01124         XMLCh    ch2 = (toCheck & 0x3FF) + 0xDC00;
01125         xmlToMbs(&ch1, srcBuf, 1);
01126         xmlToMbs(&ch2, srcBuf + uChSize(), 1);
01127         srcCount++;
01128     } else
01129         xmlToMbs((const XMLCh*) &toCheck, srcBuf, 1);
01130     size_t    len = srcCount * uChSize();
01131     char    tmpBuf[64];
01132     char*    pTmpBuf = tmpBuf;
01133 
01134     XMLMutexLock lockConverter(&fMutex);
01135     size_t rc = iconvTo( srcBuf, &len, &pTmpBuf, 64);
01136 
01137     return (rc != (size_t)-1) && (len == 0);
01138 }
01139 
01140 XERCES_CPP_NAMESPACE_END