GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: MacOSUnicodeConverter.cpp 695759 2008-09-16 08:04:55Z borisk $ 00020 */ 00021 00022 00023 // --------------------------------------------------------------------------- 00024 // Includes 00025 // --------------------------------------------------------------------------- 00026 #include <xercesc/util/XercesDefs.hpp> 00027 00028 #include <algorithm> 00029 #include <cstddef> 00030 #include <cstring> 00031 00032 #if defined(__APPLE__) 00033 // Framework includes 00034 #include <CoreServices/CoreServices.h> 00035 #else 00036 // Classic includes otherwise 00037 #include <MacErrors.h> 00038 #include <Script.h> 00039 #include <TextUtils.h> 00040 #include <TextEncodingConverter.h> 00041 #include <TextCommon.h> 00042 #include <CodeFragments.h> 00043 #include <UnicodeConverter.h> 00044 #include <UnicodeUtilities.h> 00045 #include <CFCharacterSet.h> 00046 #include <CFString.h> 00047 #endif 00048 00049 #include <xercesc/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.hpp> 00050 #include <xercesc/util/XMLUniDefs.hpp> 00051 #include <xercesc/util/XMLUni.hpp> 00052 #include <xercesc/util/XMLString.hpp> 00053 #include <xercesc/util/TranscodingException.hpp> 00054 #include <xercesc/util/PlatformUtils.hpp> 00055 #include <xercesc/util/Janitor.hpp> 00056 00057 XERCES_CPP_NAMESPACE_BEGIN 00058 00059 // --------------------------------------------------------------------------- 00060 // Typedefs 00061 // --------------------------------------------------------------------------- 00062 00063 // TempBufs are used for cases where we need a temporary buffer while processing. 00064 const std::size_t kTempBufCount = 512; 00065 typedef char TempCharBuf[kTempBufCount]; 00066 typedef UniChar TempUniBuf[kTempBufCount]; 00067 typedef XMLCh TempXMLBuf[kTempBufCount]; 00068 00069 00070 // --------------------------------------------------------------------------- 00071 // Local, const data 00072 // --------------------------------------------------------------------------- 00073 const XMLCh MacOSUnicodeConverter::fgMyServiceId[] = 00074 { 00075 chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chNull 00076 }; 00077 00078 00079 const XMLCh MacOSUnicodeConverter::fgMacLCPEncodingName[] = 00080 { 00081 chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chLatin_L 00082 , chLatin_C, chLatin_P, chLatin_E, chLatin_n, chLatin_c, chLatin_o 00083 , chLatin_d, chLatin_i, chLatin_n, chLatin_g, chNull 00084 }; 00085 00086 00087 00088 // --------------------------------------------------------------------------- 00089 // MacOSUnicodeConverter: Constructors and Destructor 00090 // --------------------------------------------------------------------------- 00091 MacOSUnicodeConverter::MacOSUnicodeConverter(MemoryManager* manager) 00092 : fCollator(NULL) 00093 { 00094 // Test for presense of unicode collation functions 00095 fHasUnicodeCollation = (UCCompareText != NULL); 00096 00097 // Create a unicode collator for doing string comparisons 00098 if (fHasUnicodeCollation) 00099 { 00100 // Configure collation options 00101 UCCollateOptions collateOptions = 00102 kUCCollateComposeInsensitiveMask 00103 | kUCCollateWidthInsensitiveMask 00104 | kUCCollateCaseInsensitiveMask 00105 | kUCCollatePunctuationSignificantMask 00106 ; 00107 00108 OSStatus status = UCCreateCollator(NULL, 0, collateOptions, &fCollator); 00109 } 00110 } 00111 00112 00113 MacOSUnicodeConverter::~MacOSUnicodeConverter() 00114 { 00115 // Dispose our collator 00116 if (fCollator != NULL) 00117 UCDisposeCollator(&fCollator); 00118 } 00119 00120 00121 // --------------------------------------------------------------------------- 00122 // MacOSUnicodeConverter: The virtual transcoding service API 00123 // --------------------------------------------------------------------------- 00124 int MacOSUnicodeConverter::compareIString( const XMLCh* const comp1 00125 , const XMLCh* const comp2) 00126 { 00127 // If unicode collation routines are available, use them. 00128 // This should be the case on Mac OS 8.6 and later, 00129 // with Carbon 1.0.2 or later, and under Mac OS X. 00130 // 00131 // Otherwise, but only for Metrowerks, since only Metrowerks 00132 // has a c library with a valid set of wchar routines, 00133 // fall back to the standard library. 00134 00135 if (fHasUnicodeCollation && fCollator != NULL) 00136 { 00137 std::size_t cnt1 = XMLString::stringLen(comp1); 00138 std::size_t cnt2 = XMLString::stringLen(comp2); 00139 00140 Boolean equivalent = false; 00141 SInt32 order = 0; 00142 OSStatus status = UCCompareText( 00143 fCollator, 00144 reinterpret_cast<const UniChar*>(comp1), 00145 cnt1, 00146 reinterpret_cast<const UniChar*>(comp2), 00147 cnt2, 00148 &equivalent, 00149 &order 00150 ); 00151 00152 return ((status != noErr) || equivalent) ? 0 : order; 00153 } 00154 else 00155 { 00156 // For some reason there is no platform utils available 00157 // where we expect it. Bail. 00158 XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService); 00159 return 0; 00160 } 00161 } 00162 00163 00164 int MacOSUnicodeConverter::compareNIString( const XMLCh* const comp1 00165 , const XMLCh* const comp2 00166 , const XMLSize_t maxChars) 00167 { 00168 // If unicode collation routines are available, use them. 00169 // This should be the case on Mac OS 8.6 and later, 00170 // with Carbon 1.0.2 or later, and under Mac OS X. 00171 // 00172 // Otherwise, but only for Metrowerks, since only Metrowerks 00173 // has a c library with a valid set of wchar routines, 00174 // fall back to the standard library. 00175 00176 if (fHasUnicodeCollation && fCollator != NULL) 00177 { 00178 std::size_t cnt1 = XMLString::stringLen(comp1); 00179 std::size_t cnt2 = XMLString::stringLen(comp2); 00180 00181 // Restrict view of source characters to first {maxChars} 00182 if (cnt1 > maxChars) 00183 cnt1 = maxChars; 00184 00185 if (cnt2 > maxChars) 00186 cnt2 = maxChars; 00187 00188 Boolean equivalent = false; 00189 SInt32 order = 0; 00190 OSStatus status = UCCompareText( 00191 fCollator, 00192 reinterpret_cast<const UniChar*>(comp1), 00193 cnt1, 00194 reinterpret_cast<const UniChar*>(comp2), 00195 cnt2, 00196 &equivalent, 00197 &order 00198 ); 00199 00200 return ((status != noErr) || equivalent) ? 0 : order; 00201 } 00202 else 00203 { 00204 // For some reason there is no platform utils available 00205 // where we expect it. Bail. 00206 XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService); 00207 return 0; 00208 } 00209 } 00210 00211 00212 const XMLCh* MacOSUnicodeConverter::getId() const 00213 { 00214 return fgMyServiceId; 00215 } 00216 00217 TextEncoding 00218 MacOSUnicodeConverter::discoverLCPEncoding() 00219 { 00220 TextEncoding encoding = 0; 00221 00222 // Ask the OS for the best text encoding for this application 00223 // We would call GetApplicationTextEncoding(), but it's available only in 00224 // Carbon (not CarbonCore), and we try to link with frameworks only in CoreServices. 00225 // encoding = GetApplicationTextEncoding(); 00226 00227 // Get TextEncoding for the current Mac System Script, falling back to Mac Roman 00228 if (noErr != UpgradeScriptInfoToTextEncoding( 00229 smSystemScript, kTextLanguageDontCare, kTextRegionDontCare, 00230 NULL, &encoding)) 00231 encoding = CreateTextEncoding(kTextEncodingMacRoman, 00232 kTextEncodingDefaultVariant, 00233 kTextEncodingDefaultFormat); 00234 00235 // Traditionally, the Mac transcoder has used the current system script 00236 // as the LCP text encoding. 00237 // 00238 // As of Xerces 2.6, this continues to be the case if XML_MACOS_LCP_TRADITIONAL 00239 // is defined. 00240 // 00241 // Otherwise, but only for Mac OS X, utf-8 will be used instead. 00242 // Since posix paths are utf-8 encoding on OS X, and the OS X 00243 // terminal uses utf-8 by default, this seems to make the most sense. 00244 #if !defined(XML_MACOS_LCP_TRADITIONAL) 00245 if (true /*gMacOSXOrBetter*/) 00246 { 00247 // Manufacture a text encoding for UTF8 00248 encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 00249 kTextEncodingDefaultVariant, 00250 kUnicodeUTF8Format); 00251 } 00252 #endif 00253 00254 return encoding; 00255 } 00256 00257 00258 XMLLCPTranscoder* MacOSUnicodeConverter::makeNewLCPTranscoder(MemoryManager* manager) 00259 { 00260 XMLLCPTranscoder* result = NULL; 00261 OSStatus status = noErr; 00262 00263 // Discover the text encoding to use for the LCP 00264 TextEncoding lcpTextEncoding = discoverLCPEncoding(); 00265 00266 // We implement the LCP transcoder in terms of the XMLTranscoder. 00267 // Create an XMLTranscoder for this encoding 00268 XMLTransService::Codes resValue; 00269 XMLTranscoder* xmlTrans = makeNewXMLTranscoder(fgMacLCPEncodingName, 00270 resValue, kTempBufCount, 00271 lcpTextEncoding, manager); 00272 00273 if (xmlTrans) 00274 { 00275 // Pass the XMLTranscoder over to the LPC transcoder 00276 if (resValue == XMLTransService::Ok) 00277 result = new (manager) MacOSLCPTranscoder(xmlTrans, manager); 00278 else 00279 delete xmlTrans; 00280 } 00281 00282 return result; 00283 } 00284 00285 00286 bool MacOSUnicodeConverter::supportsSrcOfs() const 00287 { 00288 // For now, we don't support source offsets 00289 return false; 00290 } 00291 00292 00293 void MacOSUnicodeConverter::upperCase(XMLCh* const toUpperCase) 00294 { 00295 #if TARGET_API_MAC_CARBON 00296 00297 // If we're targeting carbon, use the CFString conversion to uppercase 00298 int len = XMLString::stringLen(toUpperCase); 00299 CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy( 00300 kCFAllocatorDefault, 00301 (UniChar*)toUpperCase, 00302 len, // length 00303 len, // capacity 00304 kCFAllocatorNull); 00305 CFStringUppercase(cfString, NULL); 00306 CFRelease(cfString); 00307 00308 #elif (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T) 00309 00310 // Use this if there's a reasonable c library available. 00311 // Metrowerks does this reasonably 00312 wchar_t c; 00313 for (XMLCh* p = (XMLCh*)toUpperCase; ((c = *p) != 0); ) 00314 *p++ = std::towupper(c); 00315 00316 #else 00317 #error Sorry, no support for upperCase 00318 #endif 00319 } 00320 00321 00322 void MacOSUnicodeConverter::lowerCase(XMLCh* const toLowerCase) 00323 { 00324 #if TARGET_API_MAC_CARBON 00325 00326 // If we're targeting carbon, use the CFString conversion to uppercase 00327 int len = XMLString::stringLen(toLowerCase); 00328 CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy( 00329 kCFAllocatorDefault, 00330 (UniChar*)toLowerCase, 00331 len, // length 00332 len, // capacity 00333 kCFAllocatorNull); 00334 CFStringLowercase(cfString, NULL); 00335 CFRelease(cfString); 00336 00337 #elif (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T) 00338 00339 // Use this if there's a reasonable c library available. 00340 // Metrowerks does this reasonably 00341 wchar_t c; 00342 for (XMLCh* p = (XMLCh*)toLowerCase; ((c = *p) != 0); ) 00343 *p++ = std::towlower(c); 00344 00345 #else 00346 #error Sorry, no support for lowerCase 00347 #endif 00348 } 00349 00350 00351 void 00352 MacOSUnicodeConverter::ConvertWideToNarrow(const XMLCh* wide, char* narrow, std::size_t maxChars) 00353 { 00354 while (maxChars-- > 0) 00355 if ((*narrow++ = *wide++) == 0) 00356 break; 00357 } 00358 00359 00360 void 00361 MacOSUnicodeConverter::CopyCStringToPascal(const char* c, Str255 pas) 00362 { 00363 int len = strlen(c); 00364 if (len > sizeof(pas)-1) 00365 len = sizeof(pas)-1; 00366 memmove(&pas[1], c, len); 00367 pas[0] = len; 00368 } 00369 00370 00371 // --------------------------------------------------------------------------- 00372 // MacOSTransService: The protected virtual transcoding service API 00373 // --------------------------------------------------------------------------- 00374 XMLTranscoder* 00375 MacOSUnicodeConverter::makeNewXMLTranscoder(const XMLCh* const encodingName 00376 , XMLTransService::Codes& resValue 00377 , const XMLSize_t blockSize 00378 , MemoryManager* const manager) 00379 { 00380 XMLTranscoder* result = NULL; 00381 resValue = XMLTransService::Ok; 00382 00383 TextToUnicodeInfo textToUnicodeInfo = NULL; 00384 UnicodeToTextInfo unicodeToTextInfo = NULL; 00385 00386 // Map the encoding to a Mac OS Encoding value 00387 Str255 pasEncodingName; 00388 char cEncodingName[256]; 00389 ConvertWideToNarrow(encodingName, cEncodingName, sizeof(cEncodingName)); 00390 CopyCStringToPascal(cEncodingName, pasEncodingName); 00391 00392 TextEncoding textEncoding = 0; 00393 OSStatus status = TECGetTextEncodingFromInternetName ( 00394 &textEncoding, 00395 pasEncodingName); 00396 00397 // Make a transcoder for that encoding 00398 if (status == noErr) 00399 result = makeNewXMLTranscoder(encodingName, resValue, blockSize, textEncoding, manager); 00400 else 00401 resValue = XMLTransService::UnsupportedEncoding; 00402 00403 return result; 00404 } 00405 00406 00407 XMLTranscoder* 00408 MacOSUnicodeConverter::makeNewXMLTranscoder(const XMLCh* const encodingName 00409 , XMLTransService::Codes& resValue 00410 , const XMLSize_t blockSize 00411 , TextEncoding textEncoding 00412 , MemoryManager* const manager) 00413 { 00414 XMLTranscoder* result = NULL; 00415 resValue = XMLTransService::Ok; 00416 OSStatus status = noErr; 00417 00418 TECObjectRef textToUnicode = NULL; 00419 TECObjectRef unicodeToText = NULL; 00420 00421 // We convert to and from utf16 00422 TextEncoding utf16Encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 00423 kTextEncodingDefaultVariant, 00424 kUnicode16BitFormat); 00425 00426 // Create a TEC from our encoding to utf16 00427 if (status == noErr) 00428 status = TECCreateConverter(&textToUnicode, textEncoding, utf16Encoding); 00429 00430 // Create a TEC from utf16 to our encoding 00431 if (status == noErr) 00432 status = TECCreateConverter(&unicodeToText, utf16Encoding, textEncoding); 00433 00434 if (status != noErr) 00435 { 00436 // Clean up on error 00437 if (textToUnicode != NULL) 00438 TECDisposeConverter(textToUnicode); 00439 00440 if (unicodeToText != NULL) 00441 TECDisposeConverter(unicodeToText); 00442 00443 resValue = XMLTransService::UnsupportedEncoding; 00444 } 00445 else 00446 { 00447 // Create our transcoder, passing in the converters 00448 result = new (manager) MacOSTranscoder(encodingName, textToUnicode, unicodeToText, blockSize, manager); 00449 } 00450 00451 return result; 00452 } 00453 00454 00455 // --------------------------------------------------------------------------- 00456 // IsMacOSUnicodeConverterSupported 00457 // --------------------------------------------------------------------------- 00458 bool 00459 MacOSUnicodeConverter::IsMacOSUnicodeConverterSupported(void) 00460 { 00461 return UpgradeScriptInfoToTextEncoding != (void*)NULL 00462 && CreateTextToUnicodeInfoByEncoding != (void*)NULL 00463 ; 00464 } 00465 00466 00467 // --------------------------------------------------------------------------- 00468 // MacOSTranscoder: Constructors and Destructor 00469 // --------------------------------------------------------------------------- 00470 MacOSTranscoder::MacOSTranscoder(const XMLCh* const encodingName 00471 , TECObjectRef textToUnicode 00472 , TECObjectRef unicodeToText 00473 , const XMLSize_t blockSize 00474 , MemoryManager* const manager) : 00475 XMLTranscoder(encodingName, blockSize, manager), 00476 mTextToUnicode(textToUnicode), 00477 mUnicodeToText(unicodeToText) 00478 { 00479 } 00480 00481 00482 MacOSTranscoder::~MacOSTranscoder() 00483 { 00484 // Dispose our text encoding converters 00485 TECDisposeConverter(mTextToUnicode); 00486 TECDisposeConverter(mUnicodeToText); 00487 } 00488 00489 00490 // --------------------------------------------------------------------------- 00491 // MacOSTranscoder: The virtual transcoder API 00492 // --------------------------------------------------------------------------- 00493 00494 XMLSize_t 00495 MacOSTranscoder::transcodeFrom( const XMLByte* const srcData 00496 , const XMLSize_t srcCount 00497 , XMLCh* const toFill 00498 , const XMLSize_t maxChars 00499 , XMLSize_t& bytesEaten 00500 , unsigned char* const charSizes) 00501 { 00502 // Reset the tec state (since we don't know that we're part of a 00503 // larger run of text). 00504 TECClearConverterContextInfo(mTextToUnicode); 00505 00506 // Do the conversion 00507 ByteCount bytesConsumed = 0; 00508 ByteCount bytesProduced = 0; 00509 OSStatus status = TECConvertText(mTextToUnicode, 00510 (ConstTextPtr) srcData, 00511 srcCount, // inputBufferLength 00512 &bytesConsumed, // actualInputLength 00513 (TextPtr) toFill, // outputBuffer 00514 maxChars * sizeof(XMLCh), // outputBufferLength 00515 &bytesProduced); // actualOutputLength 00516 00517 // Ignorable error codes 00518 if( status == kTECUsedFallbacksStatus 00519 || status == kTECOutputBufferFullStatus 00520 || status == kTECPartialCharErr 00521 ) 00522 status = noErr; 00523 00524 if (status != noErr) 00525 ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq); 00526 00527 std::size_t charsProduced = bytesProduced / sizeof(XMLCh); 00528 00529 bytesEaten = bytesConsumed; 00530 return charsProduced; 00531 } 00532 00533 00534 XMLSize_t 00535 MacOSTranscoder::transcodeTo(const XMLCh* const srcData 00536 , const XMLSize_t srcCount 00537 , XMLByte* const toFill 00538 , const XMLSize_t maxBytes 00539 , XMLSize_t& charsEaten 00540 , const UnRepOpts options) 00541 { 00542 // Reset the tec state (since we don't know that we're part of a 00543 // larger run of text). 00544 TECClearConverterContextInfo(mUnicodeToText); 00545 00546 // Do the conversion 00547 ByteCount bytesConsumed = 0; 00548 ByteCount bytesProduced = 0; 00549 OSStatus status = TECConvertText(mUnicodeToText, 00550 (ConstTextPtr) srcData, 00551 srcCount * sizeof(XMLCh), // inputBufferLength 00552 &bytesConsumed, // actualInputLength 00553 (TextPtr) toFill, // outputBuffer 00554 maxBytes, // outputBufferLength 00555 &bytesProduced); // actualOutputLength 00556 00557 // Ignorable error codes 00558 if( status == kTECUsedFallbacksStatus 00559 || status == kTECOutputBufferFullStatus 00560 || status == kTECPartialCharErr 00561 ) 00562 status = noErr; 00563 00564 std::size_t charsConsumed = bytesConsumed / sizeof(XMLCh); 00565 00566 // Deal with errors 00567 if (status != noErr) 00568 { 00569 if (status == kTECUnmappableElementErr && options == UnRep_Throw) 00570 { 00571 XMLCh tmpBuf[17]; 00572 XMLString::binToText(srcData[charsConsumed], tmpBuf, 16, 16); 00573 ThrowXML2 00574 ( 00575 TranscodingException 00576 , XMLExcepts::Trans_Unrepresentable 00577 , tmpBuf 00578 , getEncodingName() 00579 ); 00580 } 00581 } 00582 00583 charsEaten = charsConsumed; 00584 return bytesProduced; 00585 } 00586 00587 00588 bool 00589 MacOSTranscoder::canTranscodeTo(const unsigned int toCheck) 00590 { 00591 // 00592 // If the passed value is really a surrogate embedded together, then 00593 // we need to break it out into its two chars. Else just one. 00594 // 00595 unsigned int srcCnt = 0; 00596 UniChar srcBuf[2]; 00597 00598 if (toCheck & 0xFFFF0000) 00599 { 00600 srcBuf[srcCnt++] = XMLCh(toCheck >> 10) + 0xD800; 00601 srcBuf[srcCnt++] = XMLCh(toCheck & 0x3FF) + 0xDC00; 00602 } 00603 else 00604 { 00605 srcBuf[srcCnt++] = XMLCh(toCheck); 00606 } 00607 00608 // Clear the converter state: we're in a new run of text 00609 TECClearConverterContextInfo(mUnicodeToText); 00610 00611 // 00612 // Use a local temp buffer that would hold any sane multi-byte char 00613 // sequence and try to transcode this guy into it. 00614 // 00615 char tmpBuf[64]; 00616 00617 ByteCount bytesConsumed = 0; 00618 ByteCount bytesProduced = 0; 00619 OSStatus status = TECConvertText(mUnicodeToText, 00620 (ConstTextPtr) srcBuf, 00621 srcCnt * sizeof(XMLCh), // inputBufferLength 00622 &bytesConsumed, // actualInputLength 00623 (TextPtr) tmpBuf, // outputBuffer 00624 sizeof(tmpBuf), // outputBufferLength 00625 &bytesProduced); // actualOutputLength 00626 00627 std::size_t charsConsumed = bytesConsumed / sizeof(XMLCh); 00628 00629 // Return true if we transcoded the character(s) 00630 // successfully 00631 return status == noErr && charsConsumed == srcCnt; 00632 } 00633 00634 00635 // --------------------------------------------------------------------------- 00636 // MacOSLCPTranscoder: Constructors and Destructor 00637 // --------------------------------------------------------------------------- 00638 MacOSLCPTranscoder::MacOSLCPTranscoder(XMLTranscoder* const transcoder, MemoryManager* const manager) 00639 : mTranscoder(transcoder), 00640 mManager(manager), 00641 mMutex (manager) 00642 { 00643 } 00644 00645 00646 MacOSLCPTranscoder::~MacOSLCPTranscoder() 00647 { 00648 // Dispose the XMLTranscoder we're using 00649 delete mTranscoder; 00650 } 00651 00652 00653 // --------------------------------------------------------------------------- 00654 // MacOSLCPTranscoder: Implementation of the virtual transcoder interface 00655 // --------------------------------------------------------------------------- 00656 00657 // --------------------------------------------------------------------------- 00658 // In order to implement calcRequiredSize we have to go ahead and do the 00659 // conversion, which seems quite painful. The Mac Unicode converter has 00660 // no way of saying "don't actually do the conversion." So we end up 00661 // converting twice. It would be nice if the calling code could do some 00662 // extra buffering to avoid this result. 00663 // --------------------------------------------------------------------------- 00664 XMLSize_t MacOSLCPTranscoder::calcRequiredSize(const char* const srcText 00665 , MemoryManager* const manager) 00666 { 00667 if (!srcText) 00668 return 0; 00669 00670 // Lock our mutex to gain exclusive access to the transcoder 00671 // since the lcp transcoders are used globally. 00672 XMLMutexLock lock(&mMutex); 00673 00674 std::size_t totalCharsProduced = 0; 00675 00676 const char* src = srcText; 00677 XMLSize_t srcCnt = std::strlen(src); 00678 00679 // Iterate over the characters, converting into a temporary buffer which we'll discard. 00680 // All this to get the size required. 00681 while (srcCnt > 0) 00682 { 00683 TempXMLBuf tmpBuf; 00684 XMLSize_t bytesConsumed = 0; 00685 XMLSize_t charsProduced = mTranscoder->transcodeFrom((XMLByte*)src, srcCnt, 00686 tmpBuf, kTempBufCount, 00687 bytesConsumed, 00688 NULL); 00689 00690 src += bytesConsumed; 00691 srcCnt -= bytesConsumed; 00692 00693 totalCharsProduced += charsProduced; 00694 00695 // Bail out if nothing more was produced 00696 if (charsProduced == 0) 00697 break; 00698 } 00699 00700 // Return number of XMLCh characters required (not counting terminating NULL!) 00701 return totalCharsProduced; 00702 } 00703 00704 00705 // --------------------------------------------------------------------------- 00706 // In order to implement calcRequiredSize we have to go ahead and do the 00707 // conversion, which seems quite painful. The Mac Unicode converter has 00708 // no way of saying "don't actually do the conversion." So we end up 00709 // converting twice. It would be nice if the calling code could do some 00710 // extra buffering to avoid this result. 00711 // --------------------------------------------------------------------------- 00712 XMLSize_t MacOSLCPTranscoder::calcRequiredSize(const XMLCh* const srcText 00713 , MemoryManager* const manager) 00714 { 00715 if (!srcText) 00716 return 0; 00717 00718 // Lock our mutex to gain exclusive access to the transcoder 00719 // since the lcp transcoders are used globally. 00720 XMLMutexLock lock(&mMutex); 00721 std::size_t totalBytesProduced = 0; 00722 00723 const XMLCh* src = srcText; 00724 XMLSize_t srcCnt = XMLString::stringLen(src); 00725 00726 // Iterate over the characters, converting into a temporary buffer which we'll discard. 00727 // All this to get the size required. 00728 while (srcCnt > 0) 00729 { 00730 TempCharBuf tmpBuf; 00731 XMLSize_t charsConsumed = 0; 00732 XMLSize_t bytesProduced = mTranscoder->transcodeTo(src, srcCnt, 00733 (XMLByte*)tmpBuf, kTempBufCount, 00734 charsConsumed, 00735 XMLTranscoder::UnRep_RepChar); 00736 00737 src += charsConsumed; 00738 srcCnt -= charsConsumed; 00739 00740 totalBytesProduced += bytesProduced; 00741 00742 // Bail out if nothing more was produced 00743 if (bytesProduced == 0) 00744 break; 00745 } 00746 00747 // Return number of characters required (not counting terminating NULL!) 00748 return totalBytesProduced; 00749 } 00750 00751 00752 char* 00753 MacOSLCPTranscoder::transcode(const XMLCh* const srcText, 00754 MemoryManager* const manager) 00755 { 00756 if (!srcText) 00757 return NULL; 00758 00759 // Lock our mutex to gain exclusive access to the transcoder 00760 // since the lcp transcoders are used globally. 00761 XMLMutexLock lock(&mMutex); 00762 00763 ArrayJanitor<char> result(0); 00764 const XMLCh* src = srcText; 00765 XMLSize_t srcCnt = XMLString::stringLen(src); 00766 std::size_t resultCnt = 0; 00767 00768 // Iterate over the characters, buffering into a local temporary 00769 // buffer, which we dump into an allocated (and reallocated, as necessary) 00770 // string for return. 00771 while (srcCnt > 0) 00772 { 00773 // Transcode some characters 00774 TempCharBuf tmpBuf; 00775 XMLSize_t charsConsumed = 0; 00776 XMLSize_t bytesProduced = mTranscoder->transcodeTo(src, srcCnt, 00777 (XMLByte*)tmpBuf, kTempBufCount, 00778 charsConsumed, 00779 XMLTranscoder::UnRep_RepChar); 00780 src += charsConsumed; 00781 srcCnt -= charsConsumed; 00782 00783 // Move the data to result buffer, reallocating as needed 00784 if (bytesProduced > 0) 00785 { 00786 // Allocate space for result 00787 std::size_t newCnt = resultCnt + bytesProduced; 00788 ArrayJanitor<char> newResult 00789 ( 00790 (char*) manager->allocate((newCnt + 1) * sizeof(char)) //new char[newCnt + 1] 00791 , manager 00792 ); 00793 if (newResult.get() != NULL) 00794 { 00795 // Incorporate previous result 00796 if (result.get() != NULL) 00797 std::memcpy(newResult.get(), result.get(), resultCnt); 00798 result.reset(newResult.release()); 00799 00800 // Copy in new data 00801 std::memcpy(result.get() + resultCnt, tmpBuf, bytesProduced); 00802 resultCnt = newCnt; 00803 00804 // Terminate the result 00805 result[resultCnt] = '\0'; 00806 } 00807 } 00808 else 00809 break; 00810 } 00811 00812 if (!result.get()) 00813 { 00814 // No error, and no result: we probably processed a zero length 00815 // input, in which case we want a valid zero length output. 00816 result.reset 00817 ( 00818 (char*) manager->allocate(sizeof(char))//new char[1] 00819 , manager 00820 ); 00821 result[0] = '\0'; 00822 } 00823 00824 return result.release(); 00825 } 00826 00827 00828 XMLCh* 00829 MacOSLCPTranscoder::transcode(const char* const srcText, 00830 MemoryManager* const manager) 00831 { 00832 if (!srcText) 00833 return NULL; 00834 00835 // Lock our mutex to gain exclusive access to the transcoder 00836 // since the lcp transcoders are used globally. 00837 XMLMutexLock lock(&mMutex); 00838 00839 ArrayJanitor<XMLCh> result(0); 00840 const char* src = srcText; 00841 std::size_t srcCnt = std::strlen(src); 00842 std::size_t resultCnt = 0; 00843 00844 // Iterate over the characters, buffering into a local temporary 00845 // buffer, which we dump into an allocated (and reallocated, as necessary) 00846 // string for return. 00847 while (srcCnt > 0) 00848 { 00849 // Transcode some characters 00850 TempXMLBuf tmpBuf; 00851 XMLSize_t bytesConsumed = 0; 00852 XMLSize_t charsProduced = mTranscoder->transcodeFrom((XMLByte*)src, srcCnt, 00853 tmpBuf, kTempBufCount, 00854 bytesConsumed, 00855 NULL); 00856 src += bytesConsumed; 00857 srcCnt -= bytesConsumed; 00858 00859 // Move the data to result buffer, reallocating as needed 00860 if (charsProduced > 0) 00861 { 00862 // Allocate space for result 00863 std::size_t newCnt = resultCnt + charsProduced; 00864 ArrayJanitor<XMLCh> newResult 00865 ( 00866 (XMLCh*) manager->allocate((newCnt + 1) * sizeof(XMLCh)) //new XMLCh[newCnt + 1] 00867 , manager 00868 ); 00869 if (newResult.get() != NULL) 00870 { 00871 // Incorporate previous result 00872 if (result.get() != NULL) 00873 std::memcpy(newResult.get(), result.get(), resultCnt * sizeof(XMLCh)); 00874 result.reset(newResult.release()); 00875 00876 // Copy in new data 00877 std::memcpy(result.get() + resultCnt, tmpBuf, charsProduced * sizeof(XMLCh)); 00878 resultCnt = newCnt; 00879 00880 result[resultCnt] = 0; 00881 } 00882 } 00883 else 00884 break; 00885 } 00886 00887 if (!result.get()) 00888 { 00889 // No error, and no result: we probably processed a zero length 00890 // input, in which case we want a valid zero length output. 00891 result.reset 00892 ( 00893 (XMLCh*) manager->allocate(sizeof(XMLCh))//new XMLCh[1] 00894 , manager 00895 ); 00896 result[0] = '\0'; 00897 } 00898 00899 return result.release(); 00900 } 00901 00902 00903 bool 00904 MacOSLCPTranscoder::transcode( const char* const toTranscode 00905 , XMLCh* const toFill 00906 , const XMLSize_t maxChars 00907 , MemoryManager* const manager) 00908 { 00909 // toFill must contain space for maxChars XMLCh characters + 1 (for terminating NULL). 00910 00911 // Check for a couple of psycho corner cases 00912 if (!toTranscode || !maxChars || !*toTranscode) 00913 { 00914 toFill[0] = 0; 00915 return true; 00916 } 00917 00918 // Lock our mutex to gain exclusive access to the transcoder 00919 // since the lcp transcoders are used globally. 00920 XMLMutexLock lock(&mMutex); 00921 00922 // Call the transcoder to do the work 00923 XMLSize_t srcLen = std::strlen(toTranscode); 00924 XMLSize_t bytesConsumed = 0; 00925 XMLSize_t charsProduced = mTranscoder->transcodeFrom((XMLByte*)toTranscode, srcLen, 00926 toFill, maxChars, 00927 bytesConsumed, 00928 NULL); 00929 00930 // Zero terminate the output string 00931 toFill[charsProduced] = L'\0'; 00932 00933 // Return true if we consumed all of the characters 00934 return (bytesConsumed == srcLen); 00935 } 00936 00937 00938 bool 00939 MacOSLCPTranscoder::transcode( const XMLCh* const toTranscode 00940 , char* const toFill 00941 , const XMLSize_t maxChars 00942 , MemoryManager* const manager) 00943 { 00944 // toFill must contain space for maxChars bytes + 1 (for terminating NULL). 00945 00946 // Check for a couple of psycho corner cases 00947 if (!toTranscode || !maxChars || !*toTranscode) 00948 { 00949 toFill[0] = 0; 00950 return true; 00951 } 00952 00953 // Lock our mutex to gain exclusive access to the transcoder 00954 // since the lcp transcoders are used globally. 00955 XMLMutexLock lock(&mMutex); 00956 00957 // Call the transcoder to do the work 00958 XMLSize_t srcLen = XMLString::stringLen(toTranscode); 00959 XMLSize_t charsConsumed = 0; 00960 XMLSize_t bytesProduced = mTranscoder->transcodeTo(toTranscode, srcLen, 00961 (XMLByte*)toFill, maxChars, 00962 charsConsumed, 00963 XMLTranscoder::UnRep_RepChar); 00964 00965 // Zero terminate the output string 00966 toFill[bytesProduced] = '\0'; 00967 00968 // Return true if we consumed all of the characters 00969 return (charsConsumed == srcLen); 00970 } 00971 00972 00973 XERCES_CPP_NAMESPACE_END