GME  13
MacOSUnicodeConverter.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  *      $Id: MacOSUnicodeConverter.cpp 695759 2008-09-16 08:04:55Z borisk $
00020  */
00021  
00022  
00023 // ---------------------------------------------------------------------------
00024 //  Includes
00025 // ---------------------------------------------------------------------------
00026 #include <xercesc/util/XercesDefs.hpp>
00027 
00028 #include <algorithm>
00029 #include <cstddef>
00030 #include <cstring>
00031 
00032 #if defined(__APPLE__)
00033     //  Framework includes
00034     #include <CoreServices/CoreServices.h>
00035 #else
00036     //  Classic includes otherwise
00037     #include <MacErrors.h>
00038     #include <Script.h>
00039     #include <TextUtils.h>
00040     #include <TextEncodingConverter.h>
00041     #include <TextCommon.h>
00042     #include <CodeFragments.h>
00043     #include <UnicodeConverter.h>
00044     #include <UnicodeUtilities.h>
00045     #include <CFCharacterSet.h>
00046     #include <CFString.h>
00047 #endif
00048 
00049 #include <xercesc/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.hpp>
00050 #include <xercesc/util/XMLUniDefs.hpp>
00051 #include <xercesc/util/XMLUni.hpp>
00052 #include <xercesc/util/XMLString.hpp>
00053 #include <xercesc/util/TranscodingException.hpp>
00054 #include <xercesc/util/PlatformUtils.hpp>
00055 #include <xercesc/util/Janitor.hpp>
00056 
00057 XERCES_CPP_NAMESPACE_BEGIN
00058 
00059 // ---------------------------------------------------------------------------
00060 //      Typedefs
00061 // ---------------------------------------------------------------------------
00062 
00063 //      TempBufs are used for cases where we need a temporary buffer while processing.
00064 const std::size_t kTempBufCount = 512;
00065 typedef char    TempCharBuf[kTempBufCount];
00066 typedef UniChar TempUniBuf[kTempBufCount];
00067 typedef XMLCh   TempXMLBuf[kTempBufCount];
00068 
00069 
00070 // ---------------------------------------------------------------------------
00071 //  Local, const data
00072 // ---------------------------------------------------------------------------
00073 const XMLCh MacOSUnicodeConverter::fgMyServiceId[] =
00074 {
00075     chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chNull
00076 };
00077 
00078 
00079 const XMLCh MacOSUnicodeConverter::fgMacLCPEncodingName[] =
00080 {
00081         chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chLatin_L
00082     ,   chLatin_C, chLatin_P, chLatin_E, chLatin_n, chLatin_c, chLatin_o
00083     ,   chLatin_d, chLatin_i, chLatin_n, chLatin_g, chNull
00084 };
00085 
00086 
00087 
00088 // ---------------------------------------------------------------------------
00089 //  MacOSUnicodeConverter: Constructors and Destructor
00090 // ---------------------------------------------------------------------------
00091 MacOSUnicodeConverter::MacOSUnicodeConverter(MemoryManager* manager)
00092   : fCollator(NULL)
00093 {
00094         //      Test for presense of unicode collation functions
00095         fHasUnicodeCollation = (UCCompareText != NULL);
00096     
00097     //  Create a unicode collator for doing string comparisons
00098     if (fHasUnicodeCollation)
00099     {
00100                 //  Configure collation options
00101         UCCollateOptions collateOptions =
00102                                                                 kUCCollateComposeInsensitiveMask
00103                                                                 | kUCCollateWidthInsensitiveMask
00104                                                                 | kUCCollateCaseInsensitiveMask
00105                                                                 | kUCCollatePunctuationSignificantMask
00106                                                                 ;
00107                                                 
00108         OSStatus status = UCCreateCollator(NULL, 0, collateOptions, &fCollator);
00109     }
00110 }
00111 
00112 
00113 MacOSUnicodeConverter::~MacOSUnicodeConverter()
00114 {
00115     //  Dispose our collator
00116     if (fCollator != NULL)
00117         UCDisposeCollator(&fCollator);
00118 }
00119 
00120 
00121 // ---------------------------------------------------------------------------
00122 //  MacOSUnicodeConverter: The virtual transcoding service API
00123 // ---------------------------------------------------------------------------
00124 int MacOSUnicodeConverter::compareIString(  const XMLCh* const    comp1
00125                                           , const XMLCh* const    comp2)
00126 {
00127         //      If unicode collation routines are available, use them.
00128         //      This should be the case on Mac OS 8.6 and later,
00129         //      with Carbon 1.0.2 or later, and under Mac OS X.
00130         //
00131         //      Otherwise, but only for Metrowerks, since only Metrowerks
00132         //      has a c library with a valid set of wchar routines,
00133         //      fall back to the standard library.
00134 
00135         if (fHasUnicodeCollation && fCollator != NULL)
00136         {
00137                 std::size_t cnt1 = XMLString::stringLen(comp1);
00138                 std::size_t cnt2 = XMLString::stringLen(comp2);
00139                 
00140         Boolean equivalent = false;
00141         SInt32 order = 0;
00142         OSStatus status = UCCompareText(
00143                                 fCollator,
00144                                 reinterpret_cast<const UniChar*>(comp1),
00145                                 cnt1,
00146                                 reinterpret_cast<const UniChar*>(comp2),
00147                                 cnt2,
00148                                 &equivalent,
00149                                 &order
00150                                 );
00151                                                                         
00152         return ((status != noErr) || equivalent) ? 0 : order;
00153         }
00154         else
00155         {
00156                 //      For some reason there is no platform utils available
00157                 //      where we expect it. Bail.
00158                 XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService);
00159                 return 0;
00160         }
00161 }
00162 
00163 
00164 int MacOSUnicodeConverter::compareNIString( const XMLCh* const  comp1
00165                                         , const XMLCh* const    comp2
00166                                         , const XMLSize_t       maxChars)
00167 {
00168         //      If unicode collation routines are available, use them.
00169         //      This should be the case on Mac OS 8.6 and later,
00170         //      with Carbon 1.0.2 or later, and under Mac OS X.
00171         //
00172         //      Otherwise, but only for Metrowerks, since only Metrowerks
00173         //      has a c library with a valid set of wchar routines,
00174         //      fall back to the standard library.
00175 
00176         if (fHasUnicodeCollation && fCollator != NULL)
00177         {
00178                 std::size_t cnt1 = XMLString::stringLen(comp1);
00179                 std::size_t cnt2 = XMLString::stringLen(comp2);
00180                 
00181                 //      Restrict view of source characters to first {maxChars}
00182                 if (cnt1 > maxChars)
00183                         cnt1 = maxChars;
00184                         
00185                 if (cnt2 > maxChars)
00186                         cnt2 = maxChars;
00187                 
00188         Boolean equivalent = false;
00189         SInt32 order = 0;
00190         OSStatus status = UCCompareText(
00191                                 fCollator,      
00192                                 reinterpret_cast<const UniChar*>(comp1),
00193                                 cnt1,
00194                                 reinterpret_cast<const UniChar*>(comp2),
00195                                 cnt2,
00196                                 &equivalent,
00197                                 &order
00198                                 );
00199                                 
00200         return ((status != noErr) || equivalent) ? 0 : order;
00201         }
00202         else
00203         {
00204                 //      For some reason there is no platform utils available
00205                 //      where we expect it. Bail.
00206                 XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService);
00207                 return 0;
00208         }
00209 }
00210 
00211 
00212 const XMLCh* MacOSUnicodeConverter::getId() const
00213 {
00214     return fgMyServiceId;
00215 }
00216 
00217 TextEncoding
00218 MacOSUnicodeConverter::discoverLCPEncoding()
00219 {
00220         TextEncoding encoding = 0;
00221         
00222     //  Ask the OS for the best text encoding for this application
00223     //  We would call GetApplicationTextEncoding(), but it's available only in
00224     //  Carbon (not CarbonCore), and we try to link with frameworks only in CoreServices.
00225     //      encoding = GetApplicationTextEncoding();
00226     
00227         //      Get TextEncoding for the current Mac System Script, falling back to Mac Roman
00228         if (noErr != UpgradeScriptInfoToTextEncoding(
00229                                         smSystemScript, kTextLanguageDontCare, kTextRegionDontCare,
00230                                         NULL, &encoding))
00231                 encoding = CreateTextEncoding(kTextEncodingMacRoman,
00232                                                                         kTextEncodingDefaultVariant,
00233                                                                         kTextEncodingDefaultFormat);
00234 
00235         //  Traditionally, the Mac transcoder has used the current system script
00236         //  as the LCP text encoding.
00237         //
00238         //  As of Xerces 2.6, this continues to be the case if XML_MACOS_LCP_TRADITIONAL
00239         //  is defined.
00240         //
00241         //  Otherwise, but only for Mac OS X,  utf-8 will be used instead.
00242         //  Since posix paths are utf-8 encoding on OS X, and the OS X
00243         //  terminal uses utf-8 by default, this seems to make the most sense.
00244         #if !defined(XML_MACOS_LCP_TRADITIONAL)
00245         if (true /*gMacOSXOrBetter*/)
00246         {
00247                 //  Manufacture a text encoding for UTF8
00248                 encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
00249                                                                         kTextEncodingDefaultVariant,
00250                                                                         kUnicodeUTF8Format);
00251         }
00252         #endif
00253         
00254         return encoding;
00255 }
00256 
00257 
00258 XMLLCPTranscoder* MacOSUnicodeConverter::makeNewLCPTranscoder(MemoryManager* manager)
00259 {
00260         XMLLCPTranscoder* result = NULL;
00261         OSStatus status = noErr;
00262         
00263         //  Discover the text encoding to use for the LCP
00264         TextEncoding lcpTextEncoding = discoverLCPEncoding();
00265 
00266     //  We implement the LCP transcoder in terms of the XMLTranscoder.
00267         //  Create an XMLTranscoder for this encoding
00268         XMLTransService::Codes resValue;
00269     XMLTranscoder* xmlTrans = makeNewXMLTranscoder(fgMacLCPEncodingName,
00270                                 resValue, kTempBufCount,
00271                                                                 lcpTextEncoding, manager);
00272     
00273     if (xmlTrans)
00274     {
00275         //  Pass the XMLTranscoder over to the LPC transcoder
00276         if (resValue == XMLTransService::Ok)
00277             result = new (manager) MacOSLCPTranscoder(xmlTrans, manager);
00278         else
00279             delete xmlTrans;
00280     }
00281         
00282     return result;
00283 }
00284 
00285 
00286 bool MacOSUnicodeConverter::supportsSrcOfs() const
00287 {
00288         // For now, we don't support source offsets
00289     return false;
00290 }
00291 
00292 
00293 void MacOSUnicodeConverter::upperCase(XMLCh* const toUpperCase)
00294 {
00295 #if TARGET_API_MAC_CARBON
00296 
00297    // If we're targeting carbon, use the CFString conversion to uppercase
00298    int len = XMLString::stringLen(toUpperCase);
00299    CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy(
00300         kCFAllocatorDefault,
00301         (UniChar*)toUpperCase,
00302         len,            // length
00303         len,            // capacity
00304         kCFAllocatorNull);
00305    CFStringUppercase(cfString, NULL);
00306    CFRelease(cfString);
00307 
00308 #elif (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
00309 
00310         // Use this if there's a reasonable c library available.
00311         // Metrowerks does this reasonably
00312         wchar_t c;
00313         for (XMLCh* p = (XMLCh*)toUpperCase; ((c = *p) != 0); )
00314                 *p++ = std::towupper(c);
00315 
00316 #else
00317         #error Sorry, no support for upperCase
00318 #endif
00319 }
00320 
00321 
00322 void MacOSUnicodeConverter::lowerCase(XMLCh* const toLowerCase)
00323 {
00324 #if TARGET_API_MAC_CARBON
00325 
00326    // If we're targeting carbon, use the CFString conversion to uppercase
00327    int len = XMLString::stringLen(toLowerCase);
00328    CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy(
00329         kCFAllocatorDefault,
00330         (UniChar*)toLowerCase,
00331         len,            // length
00332         len,            // capacity
00333         kCFAllocatorNull);
00334    CFStringLowercase(cfString, NULL);
00335    CFRelease(cfString);
00336 
00337 #elif (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
00338 
00339         // Use this if there's a reasonable c library available.
00340         // Metrowerks does this reasonably
00341         wchar_t c;
00342         for (XMLCh* p = (XMLCh*)toLowerCase; ((c = *p) != 0); )
00343                 *p++ = std::towlower(c);
00344 
00345 #else
00346         #error Sorry, no support for lowerCase
00347 #endif
00348 }
00349 
00350 
00351 void
00352 MacOSUnicodeConverter::ConvertWideToNarrow(const XMLCh* wide, char* narrow, std::size_t maxChars)
00353 {
00354         while (maxChars-- > 0)
00355                 if ((*narrow++ = *wide++) == 0)
00356                         break;
00357 }
00358 
00359 
00360 void
00361 MacOSUnicodeConverter::CopyCStringToPascal(const char* c, Str255 pas)
00362 {
00363         int len = strlen(c);
00364         if (len > sizeof(pas)-1)
00365                 len = sizeof(pas)-1;
00366         memmove(&pas[1], c, len);
00367         pas[0] = len;
00368 }
00369 
00370 
00371 // ---------------------------------------------------------------------------
00372 //  MacOSTransService: The protected virtual transcoding service API
00373 // ---------------------------------------------------------------------------
00374 XMLTranscoder*
00375 MacOSUnicodeConverter::makeNewXMLTranscoder(const   XMLCh* const                encodingName
00376                                         ,       XMLTransService::Codes& resValue
00377                                         , const XMLSize_t               blockSize
00378                                         ,       MemoryManager* const    manager)
00379 {
00380         XMLTranscoder* result = NULL;
00381         resValue = XMLTransService::Ok;
00382         
00383         TextToUnicodeInfo textToUnicodeInfo = NULL;
00384         UnicodeToTextInfo unicodeToTextInfo = NULL;
00385 
00386         //      Map the encoding to a Mac OS Encoding value
00387         Str255 pasEncodingName;
00388         char cEncodingName[256];
00389         ConvertWideToNarrow(encodingName, cEncodingName, sizeof(cEncodingName));
00390         CopyCStringToPascal(cEncodingName, pasEncodingName);
00391         
00392         TextEncoding textEncoding = 0;
00393         OSStatus status = TECGetTextEncodingFromInternetName (
00394                                                         &textEncoding,
00395                                                         pasEncodingName);
00396                             
00397     //  Make a transcoder for that encoding
00398         if (status == noErr)
00399                 result = makeNewXMLTranscoder(encodingName, resValue, blockSize, textEncoding, manager);
00400         else
00401                 resValue = XMLTransService::UnsupportedEncoding;
00402         
00403         return result;
00404 }
00405 
00406 
00407 XMLTranscoder*
00408 MacOSUnicodeConverter::makeNewXMLTranscoder(const   XMLCh* const                encodingName
00409                                         ,       XMLTransService::Codes& resValue
00410                                         , const XMLSize_t               blockSize
00411                                                                                 ,               TextEncoding            textEncoding
00412                                         ,       MemoryManager* const    manager)
00413 {
00414     XMLTranscoder* result = NULL;
00415         resValue = XMLTransService::Ok;
00416     OSStatus status = noErr;
00417     
00418     TECObjectRef textToUnicode = NULL;
00419     TECObjectRef unicodeToText = NULL;
00420     
00421     //  We convert to and from utf16
00422     TextEncoding utf16Encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
00423                                         kTextEncodingDefaultVariant,
00424                                         kUnicode16BitFormat);
00425 
00426     //  Create a TEC from our encoding to utf16
00427     if (status == noErr)
00428         status = TECCreateConverter(&textToUnicode, textEncoding, utf16Encoding);
00429 
00430     //  Create a TEC from utf16 to our encoding
00431     if (status == noErr)
00432         status = TECCreateConverter(&unicodeToText, utf16Encoding, textEncoding);
00433 
00434         if (status != noErr)
00435         {
00436         //  Clean up on error
00437                 if (textToUnicode != NULL)
00438             TECDisposeConverter(textToUnicode);
00439                         
00440                 if (unicodeToText != NULL)
00441             TECDisposeConverter(unicodeToText);
00442 
00443                 resValue = XMLTransService::UnsupportedEncoding;
00444         }
00445         else
00446     {
00447         //  Create our transcoder, passing in the converters
00448                 result = new (manager) MacOSTranscoder(encodingName, textToUnicode, unicodeToText, blockSize, manager);
00449     }
00450         
00451     return result;
00452 }
00453 
00454 
00455 // ---------------------------------------------------------------------------
00456 //  IsMacOSUnicodeConverterSupported
00457 // ---------------------------------------------------------------------------
00458 bool
00459 MacOSUnicodeConverter::IsMacOSUnicodeConverterSupported(void)
00460 {
00461     return UpgradeScriptInfoToTextEncoding != (void*)NULL
00462         && CreateTextToUnicodeInfoByEncoding != (void*)NULL
00463         ;
00464 }
00465 
00466 
00467 // ---------------------------------------------------------------------------
00468 //  MacOSTranscoder: Constructors and Destructor
00469 // ---------------------------------------------------------------------------
00470 MacOSTranscoder::MacOSTranscoder(const  XMLCh* const    encodingName
00471                                                                 , TECObjectRef          textToUnicode
00472                                                                 , TECObjectRef          unicodeToText
00473                                 , const XMLSize_t       blockSize
00474                                 , MemoryManager* const  manager) :
00475     XMLTranscoder(encodingName, blockSize, manager),
00476     mTextToUnicode(textToUnicode),
00477     mUnicodeToText(unicodeToText)
00478 {
00479 }
00480 
00481 
00482 MacOSTranscoder::~MacOSTranscoder()
00483 {
00484         //      Dispose our text encoding converters
00485         TECDisposeConverter(mTextToUnicode);
00486         TECDisposeConverter(mUnicodeToText);
00487 }
00488 
00489 
00490 // ---------------------------------------------------------------------------
00491 //  MacOSTranscoder: The virtual transcoder API
00492 // ---------------------------------------------------------------------------
00493 
00494 XMLSize_t
00495 MacOSTranscoder::transcodeFrom(  const  XMLByte* const          srcData
00496                                 , const XMLSize_t               srcCount
00497                                 ,       XMLCh* const            toFill
00498                                 , const XMLSize_t               maxChars
00499                                 ,       XMLSize_t&              bytesEaten
00500                                 ,       unsigned char* const    charSizes)
00501 {
00502         //  Reset the tec state (since we don't know that we're part of a
00503         //  larger run of text).
00504         TECClearConverterContextInfo(mTextToUnicode);
00505         
00506     //  Do the conversion
00507     ByteCount bytesConsumed = 0;
00508     ByteCount bytesProduced = 0;
00509     OSStatus status = TECConvertText(mTextToUnicode,
00510                 (ConstTextPtr) srcData,
00511                 srcCount,                   // inputBufferLength
00512                 &bytesConsumed,                         // actualInputLength
00513                 (TextPtr) toFill,           // outputBuffer
00514                 maxChars * sizeof(XMLCh),       // outputBufferLength
00515                 &bytesProduced);                        // actualOutputLength
00516 
00517     //  Ignorable error codes
00518     if(    status == kTECUsedFallbacksStatus
00519         || status == kTECOutputBufferFullStatus
00520         || status == kTECPartialCharErr
00521                 )
00522         status = noErr;
00523         
00524     if (status != noErr)
00525         ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
00526         
00527         std::size_t charsProduced = bytesProduced / sizeof(XMLCh);
00528         
00529     bytesEaten = bytesConsumed;
00530     return charsProduced;
00531 }
00532 
00533 
00534 XMLSize_t
00535 MacOSTranscoder::transcodeTo(const  XMLCh* const    srcData
00536                             , const XMLSize_t       srcCount
00537                             ,       XMLByte* const  toFill
00538                             , const XMLSize_t       maxBytes
00539                             ,       XMLSize_t&      charsEaten
00540                             , const UnRepOpts       options)
00541 {
00542         //  Reset the tec state (since we don't know that we're part of a
00543         //  larger run of text).
00544         TECClearConverterContextInfo(mUnicodeToText);
00545         
00546     //  Do the conversion
00547     ByteCount bytesConsumed = 0;
00548     ByteCount bytesProduced = 0;
00549     OSStatus status = TECConvertText(mUnicodeToText,
00550                 (ConstTextPtr) srcData,
00551                 srcCount * sizeof(XMLCh),   // inputBufferLength
00552                 &bytesConsumed,                         // actualInputLength
00553                 (TextPtr) toFill,           // outputBuffer
00554                 maxBytes,                   // outputBufferLength
00555                 &bytesProduced);                        // actualOutputLength
00556 
00557     //  Ignorable error codes
00558     if(    status == kTECUsedFallbacksStatus
00559         || status == kTECOutputBufferFullStatus
00560         || status == kTECPartialCharErr
00561                 )
00562         status = noErr;
00563         
00564     std::size_t charsConsumed = bytesConsumed / sizeof(XMLCh);
00565     
00566     //  Deal with errors
00567     if (status != noErr)
00568     {
00569         if (status == kTECUnmappableElementErr && options == UnRep_Throw)
00570         {
00571                 XMLCh tmpBuf[17];
00572             XMLString::binToText(srcData[charsConsumed], tmpBuf, 16, 16);
00573             ThrowXML2
00574             (
00575                 TranscodingException
00576                 , XMLExcepts::Trans_Unrepresentable
00577                 , tmpBuf
00578                 , getEncodingName()
00579             );
00580         }
00581     }
00582         
00583     charsEaten = charsConsumed;
00584     return bytesProduced;
00585 }
00586 
00587 
00588 bool
00589 MacOSTranscoder::canTranscodeTo(const unsigned int toCheck)
00590 {
00591         //
00592     //  If the passed value is really a surrogate embedded together, then
00593     //  we need to break it out into its two chars. Else just one.
00594     //
00595     unsigned int    srcCnt = 0;
00596     UniChar         srcBuf[2];
00597 
00598     if (toCheck & 0xFFFF0000)
00599     {
00600         srcBuf[srcCnt++] = XMLCh(toCheck >> 10)   + 0xD800;
00601         srcBuf[srcCnt++] = XMLCh(toCheck & 0x3FF) + 0xDC00;
00602     }
00603     else
00604     {
00605         srcBuf[srcCnt++] = XMLCh(toCheck);
00606     }
00607 
00608         //  Clear the converter state: we're in a new run of text
00609         TECClearConverterContextInfo(mUnicodeToText);
00610 
00611     //
00612     //  Use a local temp buffer that would hold any sane multi-byte char
00613     //  sequence and try to transcode this guy into it.
00614     //
00615     char tmpBuf[64];
00616 
00617     ByteCount bytesConsumed = 0;
00618     ByteCount bytesProduced = 0;
00619     OSStatus status = TECConvertText(mUnicodeToText,
00620                 (ConstTextPtr) srcBuf,
00621                 srcCnt * sizeof(XMLCh),     // inputBufferLength
00622                 &bytesConsumed,                         // actualInputLength
00623                 (TextPtr) tmpBuf,           // outputBuffer
00624                 sizeof(tmpBuf),             // outputBufferLength
00625                 &bytesProduced);                        // actualOutputLength
00626 
00627     std::size_t charsConsumed = bytesConsumed / sizeof(XMLCh);
00628         
00629         //      Return true if we transcoded the character(s)
00630         //      successfully
00631         return status == noErr && charsConsumed == srcCnt;
00632 }
00633 
00634 
00635 // ---------------------------------------------------------------------------
00636 //  MacOSLCPTranscoder: Constructors and Destructor
00637 // ---------------------------------------------------------------------------
00638 MacOSLCPTranscoder::MacOSLCPTranscoder(XMLTranscoder* const transcoder, MemoryManager* const manager)
00639  : mTranscoder(transcoder),
00640    mManager(manager),
00641    mMutex (manager)
00642 {
00643 }
00644 
00645 
00646 MacOSLCPTranscoder::~MacOSLCPTranscoder()
00647 {
00648         //      Dispose the XMLTranscoder we're using
00649     delete mTranscoder;
00650 }
00651 
00652 
00653 // ---------------------------------------------------------------------------
00654 //  MacOSLCPTranscoder: Implementation of the virtual transcoder interface
00655 // ---------------------------------------------------------------------------
00656 
00657 // ---------------------------------------------------------------------------
00658 //      In order to implement calcRequiredSize we have to go ahead and do the
00659 //      conversion, which seems quite painful. The Mac Unicode converter has
00660 //      no way of saying "don't actually do the conversion." So we end up
00661 //      converting twice. It would be nice if the calling code could do some
00662 //      extra buffering to avoid this result.
00663 // ---------------------------------------------------------------------------
00664 XMLSize_t MacOSLCPTranscoder::calcRequiredSize(const char* const srcText
00665                                      , MemoryManager* const manager)
00666 {
00667         if (!srcText)
00668                 return 0;
00669                 
00670         //  Lock our mutex to gain exclusive access to the transcoder
00671         //  since the lcp transcoders are used globally.
00672         XMLMutexLock lock(&mMutex);
00673 
00674         std::size_t totalCharsProduced = 0;
00675 
00676         const char* src = srcText;
00677         XMLSize_t srcCnt = std::strlen(src);
00678     
00679     //  Iterate over the characters, converting into a temporary buffer which we'll discard.
00680     //  All this to get the size required.
00681         while (srcCnt > 0)
00682     {
00683         TempXMLBuf tmpBuf;
00684         XMLSize_t bytesConsumed = 0;
00685                 XMLSize_t charsProduced = mTranscoder->transcodeFrom((XMLByte*)src, srcCnt,
00686                                                                                                                 tmpBuf, kTempBufCount,
00687                                                                                                                 bytesConsumed,
00688                                                                                                                 NULL);
00689                 
00690         src     += bytesConsumed;
00691         srcCnt  -= bytesConsumed;
00692 
00693         totalCharsProduced += charsProduced;
00694         
00695         //  Bail out if nothing more was produced
00696         if (charsProduced == 0)
00697             break;
00698     }
00699 
00700         //      Return number of XMLCh characters required (not counting terminating NULL!)
00701         return totalCharsProduced;
00702 }
00703 
00704 
00705 // ---------------------------------------------------------------------------
00706 //      In order to implement calcRequiredSize we have to go ahead and do the
00707 //      conversion, which seems quite painful. The Mac Unicode converter has
00708 //      no way of saying "don't actually do the conversion." So we end up
00709 //      converting twice. It would be nice if the calling code could do some
00710 //      extra buffering to avoid this result.
00711 // ---------------------------------------------------------------------------
00712 XMLSize_t MacOSLCPTranscoder::calcRequiredSize(const XMLCh* const srcText
00713                                      , MemoryManager* const manager)
00714 {
00715         if (!srcText)
00716                 return 0;
00717 
00718         //  Lock our mutex to gain exclusive access to the transcoder
00719         //  since the lcp transcoders are used globally.
00720         XMLMutexLock lock(&mMutex);
00721         std::size_t     totalBytesProduced = 0;
00722 
00723         const XMLCh*    src     = srcText;
00724         XMLSize_t    srcCnt  = XMLString::stringLen(src);
00725     
00726     //  Iterate over the characters, converting into a temporary buffer which we'll discard.
00727     //  All this to get the size required.
00728     while (srcCnt > 0)
00729     {
00730         TempCharBuf tmpBuf;
00731         XMLSize_t charsConsumed = 0;
00732                 XMLSize_t bytesProduced = mTranscoder->transcodeTo(src, srcCnt,
00733                                             (XMLByte*)tmpBuf, kTempBufCount,
00734                                             charsConsumed,
00735                                             XMLTranscoder::UnRep_RepChar);
00736         
00737         src     += charsConsumed;
00738         srcCnt  -= charsConsumed;
00739 
00740         totalBytesProduced += bytesProduced;
00741         
00742         //  Bail out if nothing more was produced
00743         if (bytesProduced == 0)
00744             break;
00745     }
00746 
00747         //      Return number of characters required (not counting terminating NULL!)
00748         return totalBytesProduced;
00749 }
00750 
00751 
00752 char*
00753 MacOSLCPTranscoder::transcode(const XMLCh* const srcText,
00754                               MemoryManager* const manager)
00755 {
00756         if (!srcText)
00757                 return NULL;
00758 
00759         //  Lock our mutex to gain exclusive access to the transcoder
00760         //  since the lcp transcoders are used globally.
00761         XMLMutexLock lock(&mMutex);
00762 
00763         ArrayJanitor<char> result(0);
00764         const XMLCh* src                = srcText;
00765         XMLSize_t srcCnt                = XMLString::stringLen(src);
00766         std::size_t resultCnt   = 0;
00767 
00768     //  Iterate over the characters, buffering into a local temporary
00769     //  buffer, which we dump into an allocated (and reallocated, as necessary)
00770     //  string for return.
00771     while (srcCnt > 0)
00772     {
00773                 //  Transcode some characters
00774         TempCharBuf tmpBuf;
00775         XMLSize_t charsConsumed = 0;
00776         XMLSize_t bytesProduced = mTranscoder->transcodeTo(src, srcCnt,
00777                                             (XMLByte*)tmpBuf, kTempBufCount,
00778                                             charsConsumed,
00779                                             XMLTranscoder::UnRep_RepChar);
00780         src     += charsConsumed;
00781         srcCnt  -= charsConsumed;
00782 
00783                 //      Move the data to result buffer, reallocating as needed
00784                 if (bytesProduced > 0)
00785                 {
00786                         //      Allocate space for result
00787                         std::size_t newCnt = resultCnt + bytesProduced;
00788                         ArrayJanitor<char> newResult
00789             (
00790                 (char*) manager->allocate((newCnt + 1) * sizeof(char)) //new char[newCnt + 1]
00791                 , manager
00792             );
00793                         if (newResult.get() != NULL)
00794                         {
00795                                 //      Incorporate previous result
00796                                 if (result.get() != NULL)
00797                                         std::memcpy(newResult.get(), result.get(), resultCnt);
00798                                 result.reset(newResult.release());
00799 
00800                                 //      Copy in new data
00801                                 std::memcpy(result.get() + resultCnt, tmpBuf, bytesProduced);
00802                                 resultCnt = newCnt;
00803                                 
00804                 //  Terminate the result
00805                                 result[resultCnt] = '\0';                                       
00806                         }
00807                 }
00808         else
00809             break;
00810     }
00811 
00812     if (!result.get())
00813         {
00814                 //      No error, and no result: we probably processed a zero length
00815                 //      input, in which case we want a valid zero length output.
00816                 result.reset
00817         (
00818             (char*) manager->allocate(sizeof(char))//new char[1]
00819             , manager
00820         );
00821                 result[0] = '\0';
00822         }
00823 
00824         return result.release();
00825 }
00826 
00827 
00828 XMLCh*
00829 MacOSLCPTranscoder::transcode(const char* const srcText,
00830                               MemoryManager* const manager)
00831 {
00832         if (!srcText)
00833                 return NULL;
00834 
00835         //  Lock our mutex to gain exclusive access to the transcoder
00836         //  since the lcp transcoders are used globally.
00837         XMLMutexLock lock(&mMutex);
00838 
00839         ArrayJanitor<XMLCh> result(0);
00840         const char* src                 = srcText;
00841         std::size_t srcCnt              = std::strlen(src);
00842         std::size_t resultCnt   = 0;
00843 
00844     //  Iterate over the characters, buffering into a local temporary
00845     //  buffer, which we dump into an allocated (and reallocated, as necessary)
00846     //  string for return.
00847     while (srcCnt > 0)
00848     {
00849         //  Transcode some characters
00850                 TempXMLBuf tmpBuf;
00851         XMLSize_t bytesConsumed = 0;
00852                 XMLSize_t charsProduced = mTranscoder->transcodeFrom((XMLByte*)src, srcCnt,
00853                                                                                                 tmpBuf, kTempBufCount,
00854                                                                                                 bytesConsumed,
00855                                                                                                 NULL);
00856         src     += bytesConsumed;
00857         srcCnt  -= bytesConsumed;
00858 
00859                 //      Move the data to result buffer, reallocating as needed
00860                 if (charsProduced > 0)
00861                 {
00862                         //      Allocate space for result
00863                         std::size_t newCnt = resultCnt + charsProduced;
00864                         ArrayJanitor<XMLCh> newResult
00865             (
00866                 (XMLCh*) manager->allocate((newCnt + 1) * sizeof(XMLCh)) //new XMLCh[newCnt + 1]
00867                 , manager
00868             );
00869                         if (newResult.get() != NULL)
00870                         {
00871                                 //      Incorporate previous result
00872                                 if (result.get() != NULL)
00873                                         std::memcpy(newResult.get(), result.get(), resultCnt * sizeof(XMLCh));
00874                                 result.reset(newResult.release());
00875 
00876                                 //      Copy in new data
00877                                 std::memcpy(result.get() + resultCnt, tmpBuf, charsProduced * sizeof(XMLCh));
00878                                 resultCnt = newCnt;
00879                                 
00880                                 result[resultCnt] = 0;                  
00881                         }
00882                 }
00883         else
00884             break;
00885     }
00886 
00887     if (!result.get())
00888         {
00889                 //      No error, and no result: we probably processed a zero length
00890                 //      input, in which case we want a valid zero length output.
00891                 result.reset
00892         (
00893             (XMLCh*) manager->allocate(sizeof(XMLCh))//new XMLCh[1]
00894             , manager
00895         );
00896                 result[0] = '\0';
00897         }
00898         
00899         return result.release();
00900 }
00901 
00902 
00903 bool
00904 MacOSLCPTranscoder::transcode(           const   char* const    toTranscode
00905                                     ,       XMLCh* const    toFill
00906                                     , const XMLSize_t       maxChars
00907                                     , MemoryManager* const  manager)
00908 {
00909     // toFill must contain space for maxChars XMLCh characters + 1 (for terminating NULL).
00910 
00911     // Check for a couple of psycho corner cases
00912     if (!toTranscode || !maxChars || !*toTranscode)
00913     {
00914         toFill[0] = 0;
00915         return true;
00916     }
00917 
00918         //  Lock our mutex to gain exclusive access to the transcoder
00919         //  since the lcp transcoders are used globally.
00920         XMLMutexLock lock(&mMutex);
00921 
00922     //  Call the transcoder to do the work
00923     XMLSize_t srcLen = std::strlen(toTranscode);
00924     XMLSize_t bytesConsumed = 0;
00925     XMLSize_t charsProduced = mTranscoder->transcodeFrom((XMLByte*)toTranscode, srcLen,
00926                                             toFill, maxChars,
00927                                                                                         bytesConsumed,
00928                                                                                         NULL);
00929 
00930     //  Zero terminate the output string
00931     toFill[charsProduced] = L'\0';
00932     
00933     //  Return true if we consumed all of the characters
00934     return (bytesConsumed == srcLen);
00935 }
00936 
00937 
00938 bool
00939 MacOSLCPTranscoder::transcode(          const   XMLCh* const    toTranscode
00940                                     ,       char* const     toFill
00941                                     , const XMLSize_t       maxChars
00942                                     , MemoryManager* const  manager)
00943 {
00944     //  toFill must contain space for maxChars bytes + 1 (for terminating NULL).
00945 
00946     // Check for a couple of psycho corner cases
00947     if (!toTranscode || !maxChars || !*toTranscode)
00948     {
00949         toFill[0] = 0;
00950         return true;
00951     }
00952 
00953         //  Lock our mutex to gain exclusive access to the transcoder
00954         //  since the lcp transcoders are used globally.
00955         XMLMutexLock lock(&mMutex);
00956 
00957     //  Call the transcoder to do the work
00958     XMLSize_t srcLen = XMLString::stringLen(toTranscode);
00959     XMLSize_t charsConsumed = 0;
00960     XMLSize_t bytesProduced = mTranscoder->transcodeTo(toTranscode, srcLen,
00961                                             (XMLByte*)toFill, maxChars,
00962                                             charsConsumed,
00963                                             XMLTranscoder::UnRep_RepChar);
00964 
00965     //  Zero terminate the output string
00966     toFill[bytesProduced] = '\0';
00967     
00968     //  Return true if we consumed all of the characters
00969     return (charsConsumed == srcLen);
00970 }
00971 
00972 
00973 XERCES_CPP_NAMESPACE_END