GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: IconvGNUTransService.cpp 901107 2010-01-20 08:45:02Z borisk $ 00020 */ 00021 00022 // --------------------------------------------------------------------------- 00023 // Includes 00024 // --------------------------------------------------------------------------- 00025 #if HAVE_CONFIG_H 00026 #include <config.h> 00027 #endif 00028 00029 #include <ctype.h> 00030 00031 #include <locale.h> 00032 #include <errno.h> 00033 00034 #if HAVE_ENDIAN_H 00035 #include <endian.h> 00036 #elif HAVE_MACHINE_ENDIAN_H 00037 #include <machine/endian.h> 00038 #elif HAVE_ARPA_NAMESER_COMPAT_H 00039 #include <arpa/nameser_compat.h> 00040 #endif 00041 00042 #define MAX_UCHSIZE 4 00043 00044 //-------------------------------------------------- 00045 // Macro-definitions to translate "native unicode" 00046 // characters <-> XMLCh with different host byte order 00047 // and encoding schemas. 00048 00049 # if BYTE_ORDER == LITTLE_ENDIAN 00050 # define IXMLCh2WC16(x,w) \ 00051 *(w) = ((*(x)) >> 8) & 0xFF; \ 00052 *((w)+1) = (*(x)) & 0xFF 00053 # define IWC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1)) 00054 # define XMLCh2WC16(x,w) \ 00055 *(w) = (*(x)) & 0xFF; \ 00056 *((w)+1) = ((*(x)) >> 8) & 0xFF 00057 # define WC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w)) 00058 00059 # define IXMLCh2WC32(x,w) \ 00060 *(w) = ((*(x)) >> 24) & 0xFF; \ 00061 *((w)+1) = ((*(x)) >> 16) & 0xFF; \ 00062 *((w)+2) = ((*(x)) >> 8) & 0xFF; \ 00063 *((w)+3) = (*(x)) & 0xFF 00064 # define IWC322XMLCh(w,x) \ 00065 *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \ 00066 ((*((w)+2)) << 8) | (*((w)+3)) 00067 # define XMLCh2WC32(x,w) \ 00068 *((w)+3) = ((*(x)) >> 24) & 0xFF; \ 00069 *((w)+2) = ((*(x)) >> 16) & 0xFF; \ 00070 *((w)+1) = ((*(x)) >> 8) & 0xFF; \ 00071 *(w) = (*(x)) & 0xFF 00072 # define WC322XMLCh(w,x) \ 00073 *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \ 00074 ((*((w)+1)) << 8) | (*(w)) 00075 00076 # else /* BYTE_ORDER != LITTLE_ENDIAN */ 00077 00078 # define XMLCh2WC16(x,w) \ 00079 *(w) = ((*(x)) >> 8) & 0xFF; \ 00080 *((w)+1) = (*(x)) & 0xFF 00081 # define WC162XMLCh(w,x) *(x) = ((*(w)) << 8) | (*((w)+1)) 00082 # define IXMLCh2WC16(x,w) \ 00083 *(w) = (*(x)) & 0xFF; \ 00084 *((w)+1) = ((*(x)) >> 8) & 0xFF 00085 # define IWC162XMLCh(w,x) *(x) = ((*((w)+1)) << 8) | (*(w)) 00086 00087 # define XMLCh2WC32(x,w) \ 00088 *(w) = ((*(x)) >> 24) & 0xFF; \ 00089 *((w)+1) = ((*(x)) >> 16) & 0xFF; \ 00090 *((w)+2) = ((*(x)) >> 8) & 0xFF; \ 00091 *((w)+3) = (*(x)) & 0xFF 00092 # define WC322XMLCh(w,x) \ 00093 *(x) = ((*(w)) << 24) | ((*((w)+1)) << 16) | \ 00094 ((*((w)+2)) << 8) | (*((w)+3)) 00095 # define IXMLCh2WC32(x,w) \ 00096 *((w)+3) = ((*(x)) >> 24) & 0xFF; \ 00097 *((w)+2) = ((*(x)) >> 16) & 0xFF; \ 00098 *((w)+1) = ((*(x)) >> 8) & 0xFF; \ 00099 *(w) = (*(x)) & 0xFF 00100 # define IWC322XMLCh(w,x) \ 00101 *(x) = ((*((w)+3)) << 24) | ((*((w)+2)) << 16) | \ 00102 ((*((w)+1)) << 8) | (*(w)) 00103 # endif /* BYTE_ORDER == LITTLE_ENDIAN */ 00104 00105 #include <wchar.h> 00106 #include <string.h> 00107 #include <stdlib.h> 00108 #include <stdio.h> 00109 00110 #include <xercesc/util/XMLString.hpp> 00111 #include <xercesc/util/XMLUniDefs.hpp> 00112 #include <xercesc/util/XMLUni.hpp> 00113 #include <xercesc/util/PlatformUtils.hpp> 00114 #include <xercesc/util/TranscodingException.hpp> 00115 #include <xercesc/util/Janitor.hpp> 00116 #include "IconvGNUTransService.hpp" 00117 00118 00119 XERCES_CPP_NAMESPACE_BEGIN 00120 00121 // --------------------------------------------------------------------------- 00122 // Description of encoding schemas, supported by iconv() 00123 // --------------------------------------------------------------------------- 00124 typedef struct __IconvGNUEncoding { 00125 const char* fSchema; // schema name 00126 size_t fUChSize; // size of the character 00127 unsigned int fUBO; // byte order, relative to the host 00128 } IconvGNUEncoding; 00129 00130 static const IconvGNUEncoding gIconvGNUEncodings[] = { 00131 { "UTF-16LE", 2, LITTLE_ENDIAN }, 00132 { "UTF-16BE", 2, BIG_ENDIAN }, 00133 { "UCS-2LE", 2, LITTLE_ENDIAN }, 00134 { "UCS-2BE", 2, BIG_ENDIAN }, 00135 { "UCS-2-INTERNAL", 2, BYTE_ORDER }, 00136 { NULL, 0, 0 } 00137 }; 00138 00139 // --------------------------------------------------------------------------- 00140 // Local, const data 00141 // --------------------------------------------------------------------------- 00142 static const unsigned int gTempBuffArraySize = 4096; 00143 static const XMLCh gMyServiceId[] = 00144 { 00145 chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull 00146 }; 00147 00148 00149 // --------------------------------------------------------------------------- 00150 // Local methods 00151 // --------------------------------------------------------------------------- 00152 static XMLSize_t getWideCharLength(const XMLCh* const src) 00153 { 00154 if (!src) 00155 return 0; 00156 00157 XMLSize_t len = 0; 00158 const XMLCh* pTmp = src; 00159 while (*pTmp++) 00160 len++; 00161 return len; 00162 } 00163 00164 00165 //---------------------------------------------------------------------------- 00166 // There is implementation of the libiconv for FreeBSD (available through the 00167 // ports collection). The following is a wrapper around the iconv(). 00168 //---------------------------------------------------------------------------- 00169 00170 IconvGNUWrapper::IconvGNUWrapper (MemoryManager* manager) 00171 : fUChSize(0), fUBO(LITTLE_ENDIAN), 00172 fCDTo((iconv_t)-1), fCDFrom((iconv_t)-1), fMutex(manager) 00173 { 00174 } 00175 00176 IconvGNUWrapper::IconvGNUWrapper ( iconv_t cd_from, 00177 iconv_t cd_to, 00178 size_t uchsize, 00179 unsigned int ubo, 00180 MemoryManager* manager) 00181 : fUChSize(uchsize), fUBO(ubo), 00182 fCDTo(cd_to), fCDFrom(cd_from), fMutex(manager) 00183 { 00184 if (fCDFrom == (iconv_t) -1 || fCDTo == (iconv_t) -1) { 00185 XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService); 00186 } 00187 } 00188 00189 IconvGNUWrapper::~IconvGNUWrapper() 00190 { 00191 } 00192 00193 // Convert "native unicode" character into XMLCh 00194 void IconvGNUWrapper::mbcToXMLCh (const char *mbc, XMLCh *toRet) const 00195 { 00196 if (fUBO == BYTE_ORDER) { 00197 if (fUChSize == sizeof(XMLCh)) 00198 *toRet = *((XMLCh*) mbc); 00199 else if (fUChSize == 2) { 00200 WC162XMLCh( mbc, toRet ); 00201 } else { 00202 WC322XMLCh( mbc, toRet ); 00203 } 00204 } else { 00205 if (fUChSize == 2) { 00206 IWC162XMLCh( mbc, toRet ); 00207 } else { 00208 IWC322XMLCh( mbc, toRet ); 00209 } 00210 } 00211 } 00212 00213 // Convert XMLCh into "native unicode" character 00214 void IconvGNUWrapper::xmlChToMbc (XMLCh xch, char *mbc) const 00215 { 00216 if (fUBO == BYTE_ORDER) { 00217 if (fUChSize == sizeof(XMLCh)) { 00218 memcpy (mbc, &xch, fUChSize); 00219 return; 00220 } 00221 if (fUChSize == 2) { 00222 XMLCh2WC16( &xch, mbc ); 00223 } else { 00224 XMLCh2WC32( &xch, mbc ); 00225 } 00226 } else { 00227 if (fUChSize == 2) { 00228 IXMLCh2WC16( &xch, mbc ); 00229 } else { 00230 IXMLCh2WC32( &xch, mbc ); 00231 } 00232 } 00233 } 00234 00235 // Return uppercase equivalent for XMLCh 00236 XMLCh IconvGNUWrapper::toUpper (const XMLCh ch) 00237 { 00238 if (ch <= 0x7F) 00239 return toupper(ch); 00240 00241 char wcbuf[MAX_UCHSIZE * 2]; 00242 xmlChToMbc (ch, wcbuf); 00243 00244 char tmpArr[4]; 00245 #if ICONV_USES_CONST_POINTER 00246 const char* ptr = wcbuf; 00247 #else 00248 char* ptr = wcbuf; 00249 #endif 00250 size_t len = fUChSize; 00251 char *pTmpArr = tmpArr; 00252 size_t bLen = 2; 00253 00254 if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) 00255 return 0; 00256 tmpArr[1] = toupper (*((unsigned char *)tmpArr)); 00257 *tmpArr = tmpArr[1]; 00258 len = 1; 00259 pTmpArr = wcbuf; 00260 bLen = fUChSize; 00261 ptr = tmpArr; 00262 if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) 00263 return 0; 00264 mbcToXMLCh (wcbuf, (XMLCh*) &ch); 00265 return ch; 00266 } 00267 00268 // Return lowercase equivalent for XMLCh 00269 XMLCh IconvGNUWrapper::toLower (const XMLCh ch) 00270 { 00271 if (ch <= 0x7F) 00272 return tolower(ch); 00273 00274 char wcbuf[MAX_UCHSIZE * 2]; 00275 xmlChToMbc (ch, wcbuf); 00276 00277 char tmpArr[4]; 00278 #if ICONV_USES_CONST_POINTER 00279 const char* ptr = wcbuf; 00280 #else 00281 char* ptr = wcbuf; 00282 #endif 00283 size_t len = fUChSize; 00284 char *pTmpArr = tmpArr; 00285 size_t bLen = 2; 00286 00287 if (::iconv (fCDTo, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) 00288 return 0; 00289 tmpArr[1] = tolower (*((unsigned char*)tmpArr)); 00290 *tmpArr = tmpArr[1]; 00291 len = 1; 00292 pTmpArr = wcbuf; 00293 bLen = fUChSize; 00294 ptr = tmpArr; 00295 if (::iconv (fCDFrom, &ptr, &len, &pTmpArr, &bLen) == (size_t) -1) 00296 return 0; 00297 mbcToXMLCh (wcbuf, (XMLCh*) &ch); 00298 return ch; 00299 } 00300 00301 // Fill array of XMLCh characters with data, supplyed in the array 00302 // of "native unicode" characters. 00303 XMLCh* IconvGNUWrapper::mbsToXML 00304 ( 00305 const char* mbs_str 00306 , XMLCh* xml_str 00307 , size_t cnt 00308 ) const 00309 { 00310 if (mbs_str == NULL || xml_str == NULL || cnt == 0) 00311 return NULL; 00312 if (fUBO == BYTE_ORDER) { 00313 if (fUChSize == sizeof(XMLCh)) { 00314 // null-transformation 00315 memcpy (xml_str, mbs_str, fUChSize * cnt); 00316 return xml_str; 00317 } 00318 if (fUChSize == 2) 00319 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { 00320 WC162XMLCh( mbs_str, xml_str + i); 00321 } 00322 else 00323 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { 00324 WC322XMLCh( mbs_str, xml_str + i ); 00325 } 00326 } else { 00327 if (fUChSize == 2) 00328 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { 00329 IWC162XMLCh( mbs_str, xml_str + i ); 00330 } 00331 else 00332 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize) { 00333 IWC322XMLCh( mbs_str, xml_str + i ); 00334 } 00335 } 00336 return xml_str; 00337 } 00338 00339 // Fill array of "native unicode" characters with data, supplyed 00340 // in the array of XMLCh characters. 00341 char* IconvGNUWrapper::xmlToMbs 00342 ( 00343 const XMLCh* xml_str 00344 , char* mbs_str 00345 , size_t cnt 00346 ) const 00347 { 00348 if (mbs_str == NULL || xml_str == NULL || cnt == 0) 00349 return NULL; 00350 char *toReturn = mbs_str; 00351 if (fUBO == BYTE_ORDER) { 00352 if (fUChSize == sizeof(XMLCh)) { 00353 // null-transformation 00354 memcpy (mbs_str, xml_str, fUChSize * cnt); 00355 return toReturn; 00356 } 00357 if (fUChSize == 2) 00358 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { 00359 XMLCh2WC16( xml_str, mbs_str ); 00360 } 00361 else 00362 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { 00363 XMLCh2WC32( xml_str, mbs_str ); 00364 } 00365 } else { 00366 if (fUChSize == 2) 00367 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { 00368 IXMLCh2WC16( xml_str, mbs_str ); 00369 } 00370 else 00371 for (size_t i = 0; i < cnt; i++, mbs_str += fUChSize, xml_str++) { 00372 IXMLCh2WC32( xml_str, mbs_str ); 00373 } 00374 } 00375 return toReturn; 00376 } 00377 00378 size_t IconvGNUWrapper::iconvFrom ( const char *fromPtr, 00379 size_t *fromLen, 00380 char **toPtr, 00381 size_t toLen ) 00382 { 00383 #if ICONV_USES_CONST_POINTER 00384 const char ** tmpPtr = &fromPtr; 00385 #else 00386 char ** tmpPtr = (char**)&fromPtr; 00387 #endif 00388 return ::iconv (fCDFrom, tmpPtr, fromLen, toPtr, &toLen); 00389 } 00390 00391 size_t IconvGNUWrapper::iconvTo ( const char *fromPtr, 00392 size_t *fromLen, 00393 char **toPtr, 00394 size_t toLen ) 00395 { 00396 #if ICONV_USES_CONST_POINTER 00397 const char ** tmpPtr = &fromPtr; 00398 #else 00399 char ** tmpPtr = (char**)&fromPtr; 00400 #endif 00401 return ::iconv (fCDTo, tmpPtr, fromLen, toPtr, &toLen); 00402 } 00403 00404 00405 // --------------------------------------------------------------------------- 00406 // IconvGNUTransService: Constructors and Destructor 00407 // --------------------------------------------------------------------------- 00408 00409 IconvGNUTransService::IconvGNUTransService(MemoryManager* manager) 00410 : IconvGNUWrapper(manager), fUnicodeCP(0) 00411 { 00412 // Try to obtain local (host) characterset from the setlocale 00413 // and through the environment. Do not call setlocale(LC_*, "")! 00414 // Using an empty string instead of NULL, will modify the libc 00415 // behavior. 00416 // 00417 const char* fLocalCP = setlocale (LC_CTYPE, NULL); 00418 if (fLocalCP == NULL || *fLocalCP == 0 || 00419 strcmp (fLocalCP, "C") == 0 || 00420 strcmp (fLocalCP, "POSIX") == 0) { 00421 fLocalCP = getenv ("LC_ALL"); 00422 if (fLocalCP == NULL) { 00423 fLocalCP = getenv ("LC_CTYPE"); 00424 if (fLocalCP == NULL) 00425 fLocalCP = getenv ("LANG"); 00426 } 00427 } 00428 00429 if (fLocalCP == NULL || *fLocalCP == 0 || 00430 strcmp (fLocalCP, "C") == 0 || 00431 strcmp (fLocalCP, "POSIX") == 0) 00432 fLocalCP = "iso-8859-1"; // fallback locale 00433 else { 00434 const char *ptr = strchr (fLocalCP, '.'); 00435 if (ptr == NULL) 00436 fLocalCP = "iso-8859-1"; // fallback locale 00437 else 00438 fLocalCP = ptr + 1; 00439 } 00440 00441 // Select the native unicode characters encoding schema 00442 const IconvGNUEncoding *eptr; 00443 // first - try to use the schema with character size equal to XMLCh, and same endianness 00444 for (eptr = gIconvGNUEncodings; eptr->fSchema; eptr++) 00445 { 00446 if (eptr->fUChSize != sizeof(XMLCh) || eptr->fUBO != BYTE_ORDER) 00447 continue; 00448 00449 // try to create conversion descriptor 00450 iconv_t cd_to = iconv_open(fLocalCP, eptr->fSchema); 00451 if (cd_to == (iconv_t)-1) 00452 continue; 00453 iconv_t cd_from = iconv_open(eptr->fSchema, fLocalCP); 00454 if (cd_from == (iconv_t)-1) { 00455 iconv_close (cd_to); 00456 continue; 00457 } 00458 00459 // got it 00460 setUChSize(eptr->fUChSize); 00461 setUBO(eptr->fUBO); 00462 setCDTo(cd_to); 00463 setCDFrom(cd_from); 00464 fUnicodeCP = eptr->fSchema; 00465 break; 00466 } 00467 if (fUnicodeCP == NULL) 00468 // try to use any known schema 00469 for (eptr = gIconvGNUEncodings; eptr->fSchema; eptr++) 00470 { 00471 // try to create conversion descriptor 00472 iconv_t cd_to = iconv_open(fLocalCP, eptr->fSchema); 00473 if (cd_to == (iconv_t)-1) 00474 continue; 00475 iconv_t cd_from = iconv_open(eptr->fSchema, fLocalCP); 00476 if (cd_from == (iconv_t)-1) { 00477 iconv_close (cd_to); 00478 continue; 00479 } 00480 00481 // got it 00482 setUChSize(eptr->fUChSize); 00483 setUBO(eptr->fUBO); 00484 setCDTo(cd_to); 00485 setCDFrom(cd_from); 00486 fUnicodeCP = eptr->fSchema; 00487 break; 00488 } 00489 00490 if (fUnicodeCP == NULL || cdTo() == (iconv_t)-1 || cdFrom() == (iconv_t)-1) 00491 XMLPlatformUtils::panic (PanicHandler::Panic_NoTransService); 00492 } 00493 00494 IconvGNUTransService::~IconvGNUTransService() 00495 { 00496 if (cdTo() != (iconv_t) -1) { 00497 iconv_close (cdTo()); 00498 setCDTo ((iconv_t)-1); 00499 } 00500 if (cdFrom() != (iconv_t) -1) { 00501 iconv_close (cdFrom()); 00502 setCDFrom ((iconv_t)-1); 00503 } 00504 } 00505 00506 // --------------------------------------------------------------------------- 00507 // IconvGNUTransService: The virtual transcoding service API 00508 // --------------------------------------------------------------------------- 00509 int IconvGNUTransService::compareIString(const XMLCh* const comp1 00510 , const XMLCh* const comp2) 00511 { 00512 const XMLCh* cptr1 = comp1; 00513 const XMLCh* cptr2 = comp2; 00514 00515 XMLMutexLock lockConverter(&fMutex); 00516 00517 XMLCh c1 = toUpper(*cptr1); 00518 XMLCh c2 = toUpper(*cptr2); 00519 while ( (*cptr1 != 0) && (*cptr2 != 0) ) { 00520 if (c1 != c2) 00521 break; 00522 c1 = toUpper(*(++cptr1)); 00523 c2 = toUpper(*(++cptr2)); 00524 00525 } 00526 return (int) ( c1 - c2 ); 00527 } 00528 00529 00530 int IconvGNUTransService::compareNIString(const XMLCh* const comp1 00531 , const XMLCh* const comp2 00532 , const XMLSize_t maxChars) 00533 { 00534 unsigned int n = 0; 00535 const XMLCh* cptr1 = comp1; 00536 const XMLCh* cptr2 = comp2; 00537 00538 XMLMutexLock lockConverter(&fMutex); 00539 00540 while (true && maxChars) 00541 { 00542 XMLCh c1 = toUpper(*cptr1); 00543 XMLCh c2 = toUpper(*cptr2); 00544 00545 if (c1 != c2) 00546 return (int) (c1 - c2); 00547 00548 // If either ended, then both ended, so equal 00549 if (!*cptr1 || !*cptr2) 00550 break; 00551 00552 cptr1++; 00553 cptr2++; 00554 00555 // Bump the count of chars done. If it equals the count then we 00556 // are equal for the requested count, so break out and return 00557 // equal. 00558 n++; 00559 if (n == maxChars) 00560 break; 00561 } 00562 00563 return 0; 00564 } 00565 00566 00567 const XMLCh* IconvGNUTransService::getId() const 00568 { 00569 return gMyServiceId; 00570 } 00571 00572 XMLLCPTranscoder* IconvGNUTransService::makeNewLCPTranscoder(MemoryManager* manager) 00573 { 00574 return new (manager) IconvGNULCPTranscoder (cdFrom(), cdTo(), uChSize(), UBO(), manager); 00575 } 00576 00577 bool IconvGNUTransService::supportsSrcOfs() const 00578 { 00579 return true; 00580 } 00581 00582 // --------------------------------------------------------------------------- 00583 // IconvGNUTransService: The protected virtual transcoding service API 00584 // --------------------------------------------------------------------------- 00585 XMLTranscoder* 00586 IconvGNUTransService::makeNewXMLTranscoder 00587 ( 00588 const XMLCh* const encodingName 00589 , XMLTransService::Codes& resValue 00590 , const XMLSize_t blockSize 00591 , MemoryManager* const manager 00592 ) 00593 { 00594 resValue = XMLTransService::UnsupportedEncoding; 00595 IconvGNUTranscoder *newTranscoder = NULL; 00596 00597 char *encLocal = XMLString::transcode(encodingName, manager); 00598 ArrayJanitor<char> janBuf(encLocal, manager); 00599 iconv_t cd_from, cd_to; 00600 00601 cd_from = iconv_open (fUnicodeCP, encLocal); 00602 if (cd_from == (iconv_t)-1) { 00603 resValue = XMLTransService::SupportFilesNotFound; 00604 return NULL; 00605 } 00606 cd_to = iconv_open (encLocal, fUnicodeCP); 00607 if (cd_to == (iconv_t)-1) { 00608 resValue = XMLTransService::SupportFilesNotFound; 00609 iconv_close (cd_from); 00610 return NULL; 00611 } 00612 newTranscoder = new (manager) IconvGNUTranscoder (encodingName, 00613 blockSize, 00614 cd_from, cd_to, 00615 uChSize(), UBO(), manager); 00616 if (newTranscoder) 00617 resValue = XMLTransService::Ok; 00618 return newTranscoder; 00619 } 00620 00621 void IconvGNUTransService::upperCase(XMLCh* const toUpperCase) 00622 { 00623 XMLCh* outPtr = toUpperCase; 00624 00625 XMLMutexLock lockConverter(&fMutex); 00626 00627 while (*outPtr) 00628 { 00629 *outPtr = toUpper(*outPtr); 00630 outPtr++; 00631 } 00632 } 00633 00634 void IconvGNUTransService::lowerCase(XMLCh* const toLowerCase) 00635 { 00636 XMLCh* outPtr = toLowerCase; 00637 00638 XMLMutexLock lockConverter(&fMutex); 00639 00640 while (*outPtr) 00641 { 00642 *outPtr = toLower(*outPtr); 00643 outPtr++; 00644 } 00645 } 00646 00647 // --------------------------------------------------------------------------- 00648 // IconvGNULCPTranscoder: The virtual transcoder API 00649 // --------------------------------------------------------------------------- 00650 XMLSize_t IconvGNULCPTranscoder::calcRequiredSize (const char* const srcText 00651 , MemoryManager* const manager) 00652 { 00653 if (!srcText) 00654 return 0; 00655 00656 size_t len, srcLen; 00657 len = srcLen = strlen(srcText); 00658 if (len == 0) 00659 return 0; 00660 00661 char tmpWideArr[gTempBuffArraySize]; 00662 size_t totalLen = 0; 00663 00664 XMLMutexLock lockConverter(&fMutex); 00665 00666 for (;;) { 00667 char *pTmpArr = tmpWideArr; 00668 const char *ptr = srcText + srcLen - len; 00669 size_t rc = iconvFrom(ptr, &len, &pTmpArr, gTempBuffArraySize); 00670 if (rc == (size_t) -1 && errno != E2BIG) { 00671 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, manager); 00672 /* return 0; */ 00673 } 00674 rc = pTmpArr - (char *) tmpWideArr; 00675 totalLen += rc; 00676 if (rc == 0 || len == 0) 00677 break; 00678 } 00679 return totalLen / uChSize(); 00680 } 00681 00682 00683 XMLSize_t IconvGNULCPTranscoder::calcRequiredSize(const XMLCh* const srcText 00684 , MemoryManager* const manager) 00685 { 00686 if (!srcText) 00687 return 0; 00688 XMLSize_t wLent = getWideCharLength(srcText); 00689 if (wLent == 0) 00690 return 0; 00691 00692 char tmpWBuff[gTempBuffArraySize]; 00693 char *wBuf = 0; 00694 char *wBufPtr = 0; 00695 ArrayJanitor<char> janBuf(wBufPtr, manager); 00696 size_t len = wLent * uChSize(); 00697 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) { 00698 if (len > gTempBuffArraySize) { 00699 wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len]; 00700 janBuf.reset(wBufPtr, manager); 00701 wBuf = wBufPtr; 00702 } else 00703 wBuf = tmpWBuff; 00704 xmlToMbs (srcText, wBuf, wLent); 00705 } else 00706 wBuf = (char *) srcText; 00707 00708 char tmpBuff[gTempBuffArraySize]; 00709 size_t totalLen = 0; 00710 char *srcEnd = wBuf + wLent * uChSize(); 00711 00712 XMLMutexLock lockConverter(&fMutex); 00713 00714 for (;;) { 00715 char *pTmpArr = tmpBuff; 00716 const char *ptr = srcEnd - len; 00717 size_t rc = iconvTo(ptr, &len, &pTmpArr, gTempBuffArraySize); 00718 if (rc == (size_t) -1 && errno != E2BIG) { 00719 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, manager); 00720 /* return 0; */ 00721 } 00722 rc = pTmpArr - tmpBuff; 00723 totalLen += rc; 00724 if (rc == 0 || len == 0) 00725 break; 00726 } 00727 return totalLen; 00728 } 00729 00730 00731 char* IconvGNULCPTranscoder::transcode(const XMLCh* const toTranscode, 00732 MemoryManager* const manager) 00733 { 00734 if (!toTranscode) 00735 return 0; 00736 00737 char* retVal = 0; 00738 if (!*toTranscode) { 00739 retVal = (char*) manager->allocate(sizeof(char));//new char[1]; 00740 retVal[0] = 0; 00741 return retVal; 00742 } 00743 00744 XMLSize_t wLent = getWideCharLength(toTranscode); 00745 00746 // Calc needed size. 00747 XMLSize_t neededLen = calcRequiredSize (toTranscode, manager); 00748 if (neededLen == 0) 00749 return 0; 00750 // allocate output buffer 00751 retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char));//new char[neededLen + 1]; 00752 // prepare the original 00753 char tmpWBuff[gTempBuffArraySize]; 00754 char *wideCharBuf = 0; 00755 char *wBufPtr = 0; 00756 ArrayJanitor<char> janBuf(wBufPtr, manager); 00757 size_t len = wLent * uChSize(); 00758 00759 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) { 00760 if (len > gTempBuffArraySize) { 00761 wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len]; 00762 janBuf.reset(wBufPtr, manager); 00763 wideCharBuf = wBufPtr; 00764 } else 00765 wideCharBuf = tmpWBuff; 00766 xmlToMbs (toTranscode, wideCharBuf, wLent); 00767 } else 00768 wideCharBuf = (char *) toTranscode; 00769 00770 // perform conversion 00771 char* ptr = retVal; 00772 size_t rc; 00773 00774 { 00775 XMLMutexLock lockConverter(&fMutex); 00776 rc = iconvTo(wideCharBuf, &len, &ptr, neededLen); 00777 } 00778 00779 if (rc == (size_t)-1) { 00780 return 0; 00781 } 00782 retVal[neededLen] = 0; 00783 00784 return retVal; 00785 } 00786 00787 00788 bool IconvGNULCPTranscoder::transcode( const XMLCh* const toTranscode 00789 , char* const toFill 00790 , const XMLSize_t maxBytes 00791 , MemoryManager* const manager) 00792 { 00793 // Watch for a couple of pyscho corner cases 00794 if (!toTranscode || !maxBytes) { 00795 toFill[0] = 0; 00796 return true; 00797 } 00798 if (!*toTranscode) { 00799 toFill[0] = 0; 00800 return true; 00801 } 00802 00803 XMLSize_t wLent = getWideCharLength(toTranscode); 00804 if (wLent > maxBytes) 00805 wLent = maxBytes; 00806 00807 // Fill the "unicode" string 00808 char tmpWBuff[gTempBuffArraySize]; 00809 char *wideCharBuf = 0; 00810 char *wBufPtr = 0; 00811 ArrayJanitor<char> janBuf(wBufPtr, manager); 00812 size_t len = wLent * uChSize(); 00813 00814 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) { 00815 if (len > gTempBuffArraySize) { 00816 wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len]; 00817 janBuf.reset(wBufPtr, manager); 00818 wideCharBuf = wBufPtr; 00819 } else 00820 wideCharBuf = tmpWBuff; 00821 xmlToMbs (toTranscode, wideCharBuf, wLent); 00822 } else 00823 wideCharBuf = (char *) toTranscode; 00824 00825 // Ok, go ahead and try the transcoding. If it fails, then ... 00826 char *ptr = toFill; 00827 size_t rc; 00828 00829 { 00830 XMLMutexLock lockConverter(&fMutex); 00831 rc = iconvTo(wideCharBuf, &len, &ptr, maxBytes); 00832 } 00833 00834 if (rc == (size_t)-1) { 00835 return false; 00836 } 00837 00838 // Cap it off 00839 *ptr = 0; 00840 return true; 00841 } 00842 00843 00844 XMLCh* IconvGNULCPTranscoder::transcode(const char* const toTranscode, 00845 MemoryManager* const manager) 00846 { 00847 if (!toTranscode) 00848 return 0; 00849 00850 XMLCh* retVal = 0; 00851 if (!*toTranscode) { 00852 retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1]; 00853 retVal[0] = 0; 00854 return retVal; 00855 } 00856 00857 XMLSize_t wLent = calcRequiredSize(toTranscode, manager); 00858 if (wLent == 0) { 00859 retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1]; 00860 retVal[0] = 0; 00861 return retVal; 00862 } 00863 00864 char tmpWBuff[gTempBuffArraySize]; 00865 char *wideCharBuf = 0; 00866 char *wBufPtr = 0; 00867 ArrayJanitor<char> janBuf(wBufPtr, manager); 00868 size_t len = wLent * uChSize(); 00869 00870 retVal = (XMLCh*) manager->allocate((wLent + 1) * sizeof(XMLCh));//new XMLCh[wLent + 1]; 00871 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) { 00872 if (len > gTempBuffArraySize) { 00873 wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len]; 00874 janBuf.reset(wBufPtr, manager); 00875 wideCharBuf = wBufPtr; 00876 } else 00877 wideCharBuf = tmpWBuff; 00878 } else 00879 wideCharBuf = (char *) retVal; 00880 00881 size_t flen = strlen(toTranscode); 00882 char *ptr = wideCharBuf; 00883 size_t rc; 00884 00885 { 00886 XMLMutexLock lockConverter(&fMutex); 00887 rc = iconvFrom(toTranscode, &flen, &ptr, len); 00888 } 00889 00890 if (rc == (size_t) -1) { 00891 return NULL; 00892 } 00893 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) 00894 mbsToXML (wideCharBuf, retVal, wLent); 00895 retVal[wLent] = 0x00; 00896 00897 return retVal; 00898 } 00899 00900 00901 bool IconvGNULCPTranscoder::transcode(const char* const toTranscode 00902 , XMLCh* const toFill 00903 , const XMLSize_t maxChars 00904 , MemoryManager* const manager) 00905 { 00906 // Check for a couple of psycho corner cases 00907 if (!toTranscode || !maxChars) 00908 { 00909 toFill[0] = 0; 00910 return true; 00911 } 00912 00913 if (!*toTranscode) 00914 { 00915 toFill[0] = 0; 00916 return true; 00917 } 00918 00919 XMLSize_t wLent = calcRequiredSize(toTranscode); 00920 if (wLent > maxChars) 00921 wLent = maxChars; 00922 00923 char tmpWBuff[gTempBuffArraySize]; 00924 char *wideCharBuf = 0; 00925 char *wBufPtr = 0; 00926 ArrayJanitor<char> janBuf(wBufPtr, manager); 00927 size_t len = wLent * uChSize(); 00928 00929 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) { 00930 if (len > gTempBuffArraySize) { 00931 wBufPtr = (char*) manager->allocate(len * sizeof(char));//new char[len]; 00932 janBuf.reset(wBufPtr, manager); 00933 wideCharBuf = wBufPtr; 00934 } else 00935 wideCharBuf = tmpWBuff; 00936 } else 00937 wideCharBuf = (char *) toFill; 00938 00939 size_t flen = strlen(toTranscode); // wLent; 00940 char *ptr = wideCharBuf; 00941 size_t rc; 00942 00943 { 00944 XMLMutexLock lockConverter(&fMutex); 00945 rc = iconvFrom(toTranscode, &flen, &ptr, len); 00946 } 00947 00948 if (rc == (size_t)-1) { 00949 return false; 00950 } 00951 00952 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) 00953 mbsToXML (wideCharBuf, toFill, wLent); 00954 00955 toFill[wLent] = 0x00; 00956 return true; 00957 } 00958 00959 00960 // --------------------------------------------------------------------------- 00961 // IconvGNULCPTranscoder: Constructors and Destructor 00962 // --------------------------------------------------------------------------- 00963 00964 00965 IconvGNULCPTranscoder::IconvGNULCPTranscoder (iconv_t cd_from, 00966 iconv_t cd_to, 00967 size_t uchsize, 00968 unsigned int ubo, 00969 MemoryManager* manager) 00970 : IconvGNUWrapper (cd_from, cd_to, uchsize, ubo, manager) 00971 { 00972 } 00973 00974 00975 IconvGNULCPTranscoder::~IconvGNULCPTranscoder() 00976 { 00977 } 00978 00979 00980 // --------------------------------------------------------------------------- 00981 // IconvGNUTranscoder: Constructors and Destructor 00982 // --------------------------------------------------------------------------- 00983 IconvGNUTranscoder::IconvGNUTranscoder (const XMLCh* const encodingName 00984 , const XMLSize_t blockSize 00985 , iconv_t cd_from 00986 , iconv_t cd_to 00987 , size_t uchsize 00988 , unsigned int ubo 00989 , MemoryManager* const manager 00990 ) 00991 : XMLTranscoder(encodingName, blockSize, manager) 00992 , IconvGNUWrapper (cd_from, cd_to, uchsize, ubo, manager) 00993 { 00994 } 00995 00996 IconvGNUTranscoder::~IconvGNUTranscoder() 00997 { 00998 if (cdTo() != (iconv_t)-1) { 00999 iconv_close (cdTo()); 01000 setCDTo ((iconv_t)-1); 01001 } 01002 if (cdFrom() != (iconv_t)-1) { 01003 iconv_close (cdFrom()); 01004 setCDFrom ((iconv_t)-1); 01005 } 01006 } 01007 01008 // --------------------------------------------------------------------------- 01009 // IconvGNUTranscoder: Implementation of the virtual transcoder API 01010 // --------------------------------------------------------------------------- 01011 XMLSize_t IconvGNUTranscoder::transcodeFrom 01012 ( 01013 const XMLByte* const srcData 01014 , const XMLSize_t srcCount 01015 , XMLCh* const toFill 01016 , const XMLSize_t maxChars 01017 , XMLSize_t& bytesEaten 01018 , unsigned char* const charSizes ) 01019 { 01020 // Transcode TO XMLCh 01021 const char* startSrc = (const char*) srcData; 01022 const char* endSrc = (const char*) srcData + srcCount; 01023 01024 char tmpWBuff[gTempBuffArraySize]; 01025 char *startTarget = 0; 01026 char *wBufPtr = 0; 01027 ArrayJanitor<char> janBuf(wBufPtr, getMemoryManager()); 01028 size_t len = maxChars * uChSize(); 01029 01030 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) { 01031 if (len > gTempBuffArraySize) { 01032 wBufPtr = (char*) getMemoryManager()->allocate(len * sizeof(char));//new char[len]; 01033 janBuf.reset(wBufPtr, getMemoryManager()); 01034 startTarget = wBufPtr; 01035 } else 01036 startTarget = tmpWBuff; 01037 } else 01038 startTarget = (char *) toFill; 01039 01040 // Do character-by-character transcoding 01041 char *orgTarget = startTarget; 01042 size_t srcLen = srcCount; 01043 size_t prevSrcLen = srcLen; 01044 unsigned int toReturn = 0; 01045 bytesEaten = 0; 01046 01047 XMLMutexLock lockConverter(&fMutex); 01048 01049 for (size_t cnt = 0; cnt < maxChars && srcLen; cnt++) { 01050 size_t rc = iconvFrom(startSrc, &srcLen, &orgTarget, uChSize()); 01051 if (rc == (size_t)-1) { 01052 if (errno != E2BIG || prevSrcLen == srcLen) { 01053 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); 01054 } 01055 } 01056 charSizes[cnt] = prevSrcLen - srcLen; 01057 prevSrcLen = srcLen; 01058 bytesEaten += charSizes[cnt]; 01059 startSrc = endSrc - srcLen; 01060 toReturn++; 01061 } 01062 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) 01063 mbsToXML (startTarget, toFill, toReturn); 01064 return toReturn; 01065 } 01066 01067 XMLSize_t IconvGNUTranscoder::transcodeTo 01068 ( 01069 const XMLCh* const srcData 01070 , const XMLSize_t srcCount 01071 , XMLByte* const toFill 01072 , const XMLSize_t maxBytes 01073 , XMLSize_t& charsEaten 01074 , const UnRepOpts /*options*/ ) 01075 { 01076 // Transcode FROM XMLCh 01077 char tmpWBuff[gTempBuffArraySize]; 01078 char *startSrc = tmpWBuff; 01079 char *wBufPtr = 0; 01080 ArrayJanitor<char> janBuf(wBufPtr, getMemoryManager()); 01081 size_t len = srcCount * uChSize(); 01082 01083 if (uChSize() != sizeof(XMLCh) || UBO() != BYTE_ORDER) { 01084 if (len > gTempBuffArraySize) { 01085 wBufPtr = (char*) getMemoryManager()->allocate(len * sizeof(char));//new char[len]; 01086 janBuf.reset(wBufPtr, getMemoryManager()); 01087 startSrc = wBufPtr; 01088 } else 01089 startSrc = tmpWBuff; 01090 xmlToMbs (srcData, startSrc, srcCount); 01091 } else 01092 startSrc = (char *) srcData; 01093 01094 char* startTarget = (char *) toFill; 01095 size_t srcLen = len; 01096 01097 size_t rc; 01098 01099 { 01100 XMLMutexLock lockConverter(&fMutex); 01101 rc = iconvTo (startSrc, &srcLen, &startTarget, maxBytes); 01102 } 01103 01104 if (rc == (size_t)-1 && errno != E2BIG) { 01105 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); 01106 } 01107 charsEaten = srcCount - srcLen / uChSize(); 01108 return startTarget - (char *)toFill; 01109 } 01110 01111 bool IconvGNUTranscoder::canTranscodeTo 01112 ( 01113 const unsigned int toCheck 01114 ) 01115 { 01116 // 01117 // If the passed value is really a surrogate embedded together, then 01118 // we need to break it out into its two chars. Else just one. 01119 // 01120 char srcBuf[MAX_UCHSIZE * 2]; 01121 unsigned int srcCount = 1; 01122 if (toCheck & 0xFFFF0000) { 01123 XMLCh ch1 = (toCheck >> 10) + 0xD800; 01124 XMLCh ch2 = (toCheck & 0x3FF) + 0xDC00; 01125 xmlToMbs(&ch1, srcBuf, 1); 01126 xmlToMbs(&ch2, srcBuf + uChSize(), 1); 01127 srcCount++; 01128 } else 01129 xmlToMbs((const XMLCh*) &toCheck, srcBuf, 1); 01130 size_t len = srcCount * uChSize(); 01131 char tmpBuf[64]; 01132 char* pTmpBuf = tmpBuf; 01133 01134 XMLMutexLock lockConverter(&fMutex); 01135 size_t rc = iconvTo( srcBuf, &len, &pTmpBuf, 64); 01136 01137 return (rc != (size_t)-1) && (len == 0); 01138 } 01139 01140 XERCES_CPP_NAMESPACE_END