GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: IconvTransService.cpp 695885 2008-09-16 14:00:19Z borisk $ 00020 */ 00021 00022 00023 // --------------------------------------------------------------------------- 00024 // Includes 00025 // --------------------------------------------------------------------------- 00026 00027 #if HAVE_CONFIG_H 00028 # include <config.h> 00029 #endif 00030 00031 #if HAVE_WCHAR_H 00032 # include <wchar.h> 00033 #endif 00034 #if HAVE_WCTYPE_H 00035 # include <wctype.h> 00036 #endif 00037 00038 // Fill in for broken or missing wctype functions on some platforms 00039 #if !HAVE_TOWUPPER 00040 # include <towupper.h> 00041 #endif 00042 #if !HAVE_TOWLOWER 00043 # include <towlower.h> 00044 #endif 00045 00046 #include <string.h> 00047 #include <stdlib.h> 00048 #include <stdio.h> 00049 00050 #include "IconvTransService.hpp" 00051 #include <xercesc/util/XMLUniDefs.hpp> 00052 #include <xercesc/util/XMLUni.hpp> 00053 #include <xercesc/framework/MemoryManager.hpp> 00054 00055 00056 XERCES_CPP_NAMESPACE_BEGIN 00057 00058 // --------------------------------------------------------------------------- 00059 // Local, const data 00060 // --------------------------------------------------------------------------- 00061 static const int gTempBuffArraySize = 1024; 00062 static const XMLCh gMyServiceId[] = 00063 { 00064 chLatin_I, chLatin_C, chLatin_o, chLatin_n, chLatin_v, chNull 00065 }; 00066 00067 // --------------------------------------------------------------------------- 00068 // the following is defined by 'man mbrtowc': 00069 // --------------------------------------------------------------------------- 00070 static const size_t TRANSCODING_ERROR = (size_t)(-1); 00071 00072 // --------------------------------------------------------------------------- 00073 // Local methods 00074 // --------------------------------------------------------------------------- 00075 static unsigned int getWideCharLength(const XMLCh* const src) 00076 { 00077 if (!src) 00078 return 0; 00079 00080 unsigned int len = 0; 00081 const XMLCh* pTmp = src; 00082 while (*pTmp++) 00083 len++; 00084 return len; 00085 } 00086 00087 00088 00089 // --------------------------------------------------------------------------- 00090 // IconvTransService: Constructors and Destructor 00091 // --------------------------------------------------------------------------- 00092 IconvTransService::IconvTransService(MemoryManager* /* manager */) 00093 { 00094 } 00095 00096 IconvTransService::~IconvTransService() 00097 { 00098 } 00099 00100 00101 // --------------------------------------------------------------------------- 00102 // IconvTransService: The virtual transcoding service API 00103 // --------------------------------------------------------------------------- 00104 int IconvTransService::compareIString( const XMLCh* const comp1 00105 , const XMLCh* const comp2) 00106 { 00107 const XMLCh* cptr1 = comp1; 00108 const XMLCh* cptr2 = comp2; 00109 00110 while ( (*cptr1 != 0) && (*cptr2 != 0) ) 00111 { 00112 wint_t wch1 = towupper(*cptr1); 00113 wint_t wch2 = towupper(*cptr2); 00114 if (wch1 != wch2) 00115 break; 00116 00117 cptr1++; 00118 cptr2++; 00119 } 00120 return (int) ( towupper(*cptr1) - towupper(*cptr2) ); 00121 } 00122 00123 00124 int IconvTransService::compareNIString( const XMLCh* const comp1 00125 , const XMLCh* const comp2 00126 , const XMLSize_t maxChars) 00127 { 00128 unsigned int n = 0; 00129 const XMLCh* cptr1 = comp1; 00130 const XMLCh* cptr2 = comp2; 00131 00132 while (true && maxChars) 00133 { 00134 wint_t wch1 = towupper(*cptr1); 00135 wint_t wch2 = towupper(*cptr2); 00136 00137 if (wch1 != wch2) 00138 return (int) (wch1 - wch2); 00139 00140 // If either ended, then both ended, so equal 00141 if (!*cptr1 || !*cptr2) 00142 break; 00143 00144 cptr1++; 00145 cptr2++; 00146 00147 // Bump the count of chars done. If it equals the count then we 00148 // are equal for the requested count, so break out and return 00149 // equal. 00150 n++; 00151 if (n == maxChars) 00152 break; 00153 } 00154 00155 return 0; 00156 } 00157 00158 00159 const XMLCh* IconvTransService::getId() const 00160 { 00161 return gMyServiceId; 00162 } 00163 00164 XMLLCPTranscoder* IconvTransService::makeNewLCPTranscoder(MemoryManager* manager) 00165 { 00166 // Just allocate a new transcoder of our type 00167 return new (manager) IconvLCPTranscoder; 00168 } 00169 00170 bool IconvTransService::supportsSrcOfs() const 00171 { 00172 return true; 00173 } 00174 00175 00176 // --------------------------------------------------------------------------- 00177 // IconvTransService: The protected virtual transcoding service API 00178 // --------------------------------------------------------------------------- 00179 XMLTranscoder* 00180 IconvTransService::makeNewXMLTranscoder(const XMLCh* const 00181 , XMLTransService::Codes& resValue 00182 , const XMLSize_t 00183 , MemoryManager* const) 00184 { 00185 // 00186 // NOTE: We don't use the block size here 00187 // 00188 // This is a minimalist transcoding service, that only supports a local 00189 // default transcoder. All named encodings return zero as a failure, 00190 // which means that only the intrinsic encodings supported by the parser 00191 // itself will work for XML data. 00192 // 00193 resValue = XMLTransService::UnsupportedEncoding; 00194 return 0; 00195 } 00196 00197 00198 void IconvTransService::upperCase(XMLCh* const toUpperCase) 00199 { 00200 XMLCh* outPtr = toUpperCase; 00201 while (*outPtr) 00202 { 00203 *outPtr = towupper(*outPtr); 00204 outPtr++; 00205 } 00206 } 00207 00208 00209 void IconvTransService::lowerCase(XMLCh* const toLowerCase) 00210 { 00211 XMLCh* outPtr = toLowerCase; 00212 while (*outPtr) 00213 { 00214 *outPtr = towlower(*outPtr); 00215 outPtr++; 00216 } 00217 } 00218 00219 00220 // --------------------------------------------------------------------------- 00221 // IconvLCPTranscoder: The virtual transcoder API 00222 // --------------------------------------------------------------------------- 00223 XMLSize_t IconvLCPTranscoder::calcRequiredSize(const char* const srcText 00224 , MemoryManager* const) 00225 { 00226 if (!srcText) 00227 return 0; 00228 00229 XMLSize_t len = 0; 00230 const char *src = srcText; 00231 #if HAVE_MBRLEN 00232 mbstate_t st; 00233 memset(&st, 0, sizeof(st)); 00234 #endif 00235 for ( ; *src; ++len) 00236 { 00237 #if HAVE_MBRLEN 00238 int l=::mbrlen( src, MB_CUR_MAX, &st ); 00239 #else 00240 int l=::mblen( src, MB_CUR_MAX ); 00241 #endif 00242 if( l == TRANSCODING_ERROR ) 00243 return 0; 00244 src += l; 00245 } 00246 return len; 00247 } 00248 00249 00250 XMLSize_t IconvLCPTranscoder::calcRequiredSize(const XMLCh* const srcText 00251 , MemoryManager* const manager) 00252 { 00253 if (!srcText) 00254 return 0; 00255 00256 XMLSize_t wLent = getWideCharLength(srcText); 00257 wchar_t tmpWideCharArr[gTempBuffArraySize]; 00258 wchar_t* allocatedArray = 0; 00259 wchar_t* wideCharBuf = 0; 00260 00261 if (wLent >= gTempBuffArraySize) 00262 wideCharBuf = allocatedArray = (wchar_t*) 00263 manager->allocate 00264 ( 00265 (wLent + 1) * sizeof(wchar_t) 00266 );//new wchar_t[wLent + 1]; 00267 else 00268 wideCharBuf = tmpWideCharArr; 00269 00270 for (XMLSize_t i = 0; i < wLent; i++) 00271 { 00272 wideCharBuf[i] = srcText[i]; 00273 } 00274 wideCharBuf[wLent] = 0x00; 00275 00276 const XMLSize_t retVal = ::wcstombs(NULL, wideCharBuf, 0); 00277 00278 if (allocatedArray) 00279 manager->deallocate(allocatedArray); 00280 00281 if (retVal == ~0) 00282 return 0; 00283 return retVal; 00284 } 00285 00286 00287 bool IconvLCPTranscoder::transcode( const XMLCh* const toTranscode 00288 , char* const toFill 00289 , const XMLSize_t maxBytes 00290 , MemoryManager* const manager) 00291 { 00292 // Watch for a couple of pyscho corner cases 00293 if (!toTranscode || !maxBytes) 00294 { 00295 toFill[0] = 0; 00296 return true; 00297 } 00298 00299 if (!*toTranscode) 00300 { 00301 toFill[0] = 0; 00302 return true; 00303 } 00304 00305 unsigned int wLent = getWideCharLength(toTranscode); 00306 wchar_t tmpWideCharArr[gTempBuffArraySize]; 00307 wchar_t* allocatedArray = 0; 00308 wchar_t* wideCharBuf = 0; 00309 00310 if (wLent > maxBytes) { 00311 wLent = maxBytes; 00312 } 00313 00314 if (maxBytes >= gTempBuffArraySize) { 00315 wideCharBuf = allocatedArray = (wchar_t*) 00316 manager->allocate 00317 ( 00318 (maxBytes + 1) * sizeof(wchar_t) 00319 );//new wchar_t[maxBytes + 1]; 00320 } 00321 else 00322 wideCharBuf = tmpWideCharArr; 00323 00324 for (unsigned int i = 0; i < wLent; i++) 00325 { 00326 wideCharBuf[i] = toTranscode[i]; 00327 } 00328 wideCharBuf[wLent] = 0x00; 00329 00330 // Ok, go ahead and try the transcoding. If it fails, then ... 00331 size_t mblen = ::wcstombs(toFill, wideCharBuf, maxBytes); 00332 if (mblen == (size_t)-1) 00333 { 00334 if (allocatedArray) 00335 manager->deallocate(allocatedArray); 00336 return false; 00337 } 00338 00339 // Cap it off just in case 00340 toFill[mblen] = 0; 00341 00342 if (allocatedArray) 00343 manager->deallocate(allocatedArray); 00344 00345 return true; 00346 } 00347 00348 00349 bool IconvLCPTranscoder::transcode( const char* const toTranscode 00350 , XMLCh* const toFill 00351 , const XMLSize_t maxChars 00352 , MemoryManager* const manager) 00353 { 00354 // Check for a couple of psycho corner cases 00355 if (!toTranscode || !maxChars) 00356 { 00357 toFill[0] = 0; 00358 return true; 00359 } 00360 00361 if (!*toTranscode) 00362 { 00363 toFill[0] = 0; 00364 return true; 00365 } 00366 00367 XMLSize_t len = calcRequiredSize(toTranscode); 00368 wchar_t tmpWideCharArr[gTempBuffArraySize]; 00369 wchar_t* allocatedArray = 0; 00370 wchar_t* wideCharBuf = 0; 00371 00372 if (len > maxChars) { 00373 len = maxChars; 00374 } 00375 00376 if (maxChars >= gTempBuffArraySize) 00377 wideCharBuf = allocatedArray = (wchar_t*) manager->allocate 00378 ( 00379 (maxChars + 1) * sizeof(wchar_t) 00380 );//new wchar_t[maxChars + 1]; 00381 else 00382 wideCharBuf = tmpWideCharArr; 00383 00384 if (::mbstowcs(wideCharBuf, toTranscode, maxChars) == (size_t)-1) 00385 { 00386 if (allocatedArray) 00387 manager->deallocate(allocatedArray); 00388 return false; 00389 } 00390 00391 for (XMLSize_t i = 0; i < len; i++) 00392 { 00393 toFill[i] = (XMLCh) wideCharBuf[i]; 00394 } 00395 toFill[len] = 0x00; 00396 00397 if (allocatedArray) 00398 manager->deallocate(allocatedArray); 00399 00400 return true; 00401 } 00402 00403 00404 template <typename T> 00405 void reallocString(T *&ref, size_t &size, MemoryManager* const manager, bool releaseOld) 00406 { 00407 T *tmp = (T*)manager->allocate(2 * size * sizeof(T)); 00408 memcpy(tmp, ref, size * sizeof(T)); 00409 if (releaseOld) manager->deallocate(ref); 00410 ref = tmp; 00411 size *= 2; 00412 } 00413 00414 00415 char* IconvLCPTranscoder::transcode(const XMLCh* const toTranscode, 00416 MemoryManager* const manager) 00417 { 00418 if (!toTranscode) 00419 return 0; 00420 size_t srcCursor = 0, dstCursor = 0; 00421 size_t resultSize = gTempBuffArraySize; 00422 char localBuffer[gTempBuffArraySize]; 00423 char* resultString = localBuffer; 00424 00425 #if HAVE_WCSRTOMBS 00426 mbstate_t st; 00427 memset(&st, 0, sizeof(st)); 00428 wchar_t srcBuffer[gTempBuffArraySize]; 00429 srcBuffer[gTempBuffArraySize - 1] = 0; 00430 const wchar_t *src = 0; 00431 00432 while (toTranscode[srcCursor] || src) 00433 { 00434 if (src == 0) // copy a piece of the source string into a local 00435 // buffer, converted to wchar_t and NULL-terminated. 00436 // after that, src points to the beginning of the 00437 // local buffer and is used for the call to ::wcsrtombs 00438 { 00439 size_t i; 00440 for (i=0; i<gTempBuffArraySize-1; ++i) 00441 { 00442 srcBuffer[i] = toTranscode[srcCursor]; 00443 if (srcBuffer[i] == '\0') 00444 break; 00445 ++srcCursor; 00446 } 00447 src = srcBuffer; 00448 } 00449 00450 size_t len = ::wcsrtombs(resultString + dstCursor, &src, resultSize - dstCursor, &st); 00451 if (len == TRANSCODING_ERROR) 00452 { 00453 dstCursor = 0; 00454 break; 00455 } 00456 dstCursor += len; 00457 if (src != 0) // conversion not finished. This *always* means there 00458 // was not enough room in the destination buffer. 00459 { 00460 reallocString<char>(resultString, resultSize, manager, resultString != localBuffer); 00461 } 00462 } 00463 #else 00464 while (toTranscode[srcCursor]) 00465 { 00466 char mbBuf[16]; // MB_CUR_MAX is not defined as a constant on some platforms 00467 int len = wctomb(mbBuf, toTranscode[srcCursor++]); 00468 if (len < 0) 00469 { 00470 dstCursor = 0; 00471 break; 00472 } 00473 if (dstCursor + len >= resultSize - 1) 00474 reallocString<char>(resultString, resultSize, manager, resultString != localBuffer); 00475 for (int j=0; j<len; ++j) 00476 resultString[dstCursor++] = mbBuf[j]; 00477 } 00478 #endif 00479 00480 if (resultString == localBuffer) 00481 { 00482 resultString = (char*)manager->allocate((dstCursor + 1) * sizeof(char)); 00483 memcpy(resultString, localBuffer, dstCursor * sizeof(char)); 00484 } 00485 00486 resultString[dstCursor] = '\0'; 00487 return resultString; 00488 } 00489 00490 XMLCh* IconvLCPTranscoder::transcode(const char* const toTranscode, 00491 MemoryManager* const manager) 00492 { 00493 if (!toTranscode) 00494 return 0; 00495 size_t resultSize = gTempBuffArraySize; 00496 size_t srcCursor = 0, dstCursor = 0; 00497 00498 #if HAVE_MBSRTOWCS 00499 wchar_t localBuffer[gTempBuffArraySize]; 00500 wchar_t *tmpString = localBuffer; 00501 00502 mbstate_t st; 00503 memset(&st, 0, sizeof(st)); 00504 const char *src = toTranscode; 00505 00506 while(true) 00507 { 00508 size_t len = ::mbsrtowcs(tmpString + dstCursor, &src, resultSize - dstCursor, &st); 00509 if (len == TRANSCODING_ERROR) 00510 { 00511 dstCursor = 0; 00512 break; 00513 } 00514 dstCursor += len; 00515 if (src == 0) // conversion finished 00516 break; 00517 if (dstCursor >= resultSize - 1) 00518 reallocString<wchar_t>(tmpString, resultSize, manager, tmpString != localBuffer); 00519 } 00520 // make a final copy, converting from wchar_t to XMLCh: 00521 XMLCh* resultString = (XMLCh*)manager->allocate((dstCursor + 1) * sizeof(XMLCh)); 00522 size_t i; 00523 for (i=0; i<dstCursor; ++i) 00524 resultString[i] = tmpString[i]; 00525 if (tmpString != localBuffer) // did we allocate something? 00526 manager->deallocate(tmpString); 00527 #else 00528 XMLCh localBuffer[gTempBuffArraySize]; 00529 XMLCh* resultString = localBuffer; 00530 size_t srcLen = strlen(toTranscode); 00531 00532 while(srcLen > srcCursor) 00533 { 00534 wchar_t wcBuf[1]; 00535 int len = mbtowc(wcBuf, toTranscode + srcCursor, srcLen - srcCursor); 00536 if (len <= 0) 00537 { 00538 if (len < 0) 00539 dstCursor = 0; 00540 break; 00541 } 00542 srcCursor += len; 00543 if (dstCursor + 1 >= resultSize - 1) 00544 reallocString<XMLCh>(resultString, resultSize, manager, resultString != localBuffer); 00545 resultString[dstCursor++] = wcBuf[0]; 00546 } 00547 00548 if (resultString == localBuffer) 00549 { 00550 resultString = (XMLCh*)manager->allocate((dstCursor + 1) * sizeof(XMLCh)); 00551 memcpy(resultString, localBuffer, dstCursor * sizeof(XMLCh)); 00552 } 00553 #endif 00554 00555 resultString[dstCursor] = L'\0'; 00556 return resultString; 00557 } 00558 00559 00560 // --------------------------------------------------------------------------- 00561 // IconvLCPTranscoder: Constructors and Destructor 00562 // --------------------------------------------------------------------------- 00563 IconvLCPTranscoder::IconvLCPTranscoder() 00564 { 00565 } 00566 00567 IconvLCPTranscoder::~IconvLCPTranscoder() 00568 { 00569 } 00570 00571 XERCES_CPP_NAMESPACE_END