GME  13
Win32TransService.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  * 
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  * 
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: Win32TransService.cpp 676954 2008-07-15 16:29:19Z dbertoni $
00020  */
00021 
00022 
00023 // ---------------------------------------------------------------------------
00024 //  Includes
00025 // ---------------------------------------------------------------------------
00026 #if HAVE_CONFIG_H
00027 #       include <config.h>
00028 #endif
00029 
00030 #include <xercesc/util/PlatformUtils.hpp>
00031 #include <xercesc/util/TranscodingException.hpp>
00032 #include <xercesc/util/XMLException.hpp>
00033 #include <xercesc/util/XMLString.hpp>
00034 #include <xercesc/util/XMLUniDefs.hpp>
00035 #include <xercesc/util/XMLUni.hpp>
00036 #include <xercesc/util/RefHashTableOf.hpp>
00037 #include "Win32TransService.hpp"
00038 
00039 XERCES_CPP_NAMESPACE_BEGIN
00040 
00041 
00042 // ---------------------------------------------------------------------------
00043 //  Local, const data
00044 // ---------------------------------------------------------------------------
00045 static const XMLCh gMyServiceId[] =
00046 {
00047     chLatin_W, chLatin_i, chLatin_n, chDigit_3, chDigit_2, chNull
00048 };
00049 
00050 
00051 #if !HAVE_WCSUPR
00052 void _wcsupr(LPWSTR str)
00053 {
00054     int nLen=XMLString::stringLen(str);
00055     ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, str, nLen, str, nLen);
00056 }
00057 #endif
00058 
00059 #if !HAVE_WCSLWR
00060 void _wcslwr(LPWSTR str)
00061 {
00062     int nLen=XMLString::stringLen(str);
00063     ::LCMapStringW( GetThreadLocale(), LCMAP_LOWERCASE, str, nLen, str, nLen);
00064 }
00065 #endif
00066 
00067 #if !HAVE_WCSNICMP
00068 int _wcsnicmp(LPCWSTR comp1, LPCWSTR comp2, unsigned int nLen)
00069 {
00070     unsigned int len = XMLString::stringLen( comp1);
00071     unsigned int otherLen = XMLString::stringLen( comp2);
00072     unsigned int countChar = 0;
00073     unsigned int maxChars;
00074     int          theResult = 0;
00075 
00076     // Determine at what string index the comparison stops.
00077     len = ( len > nLen ) ? nLen : len;
00078     otherLen = ( otherLen > nLen ) ? nLen : otherLen;
00079     maxChars = ( len > otherLen ) ? otherLen : len;
00080 
00081     // Handle situation when one argument or the other is NULL
00082     // by returning +/- string length of non-NULL argument (inferred
00083     // from XMLString::CompareNString).
00084 
00085     // Obs. Definition of stringLen(XMLCh*) implies NULL ptr and ptr
00086     // to Empty String are equivalent.  It handles NULL args, BTW.
00087 
00088     if ( !comp1 )
00089     {
00090         // Negative because null ptr (c1) less than string (c2).
00091         return ( 0 - otherLen );
00092     }
00093 
00094     if ( !comp2 )
00095     {
00096         // Positive because string (c1) still greater than null ptr (c2).
00097         return len;
00098     }
00099 
00100     // Copy const parameter strings (plus terminating nul) into locals.
00101     XMLCh* firstBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++len) * sizeof(XMLCh) );//new XMLCh[ ++len];
00102     XMLCh* secondBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++otherLen) * sizeof(XMLCh) );//new XMLCh[ ++otherLen];
00103     memcpy( firstBuf, comp1, len * sizeof(XMLCh));
00104     memcpy( secondBuf, comp2, otherLen * sizeof(XMLCh));
00105 
00106     // Then uppercase both strings, losing their case info.
00107     ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)firstBuf, len, (LPWSTR)firstBuf, len);
00108     ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)secondBuf, otherLen, (LPWSTR)secondBuf, otherLen);
00109 
00110     // Strings are equal until proven otherwise.
00111     while ( ( countChar < maxChars ) && ( !theResult ) )
00112     {
00113         theResult = (int)(firstBuf[countChar]) - (int)(secondBuf[countChar]);
00114         ++countChar;
00115     }
00116 
00117     XMLPlatformUtils::fgMemoryManager->deallocate(firstBuf);//delete [] firstBuf;
00118     XMLPlatformUtils::fgMemoryManager->deallocate(secondBuf);//delete [] secondBuf;
00119 
00120     return theResult;
00121 }
00122 #endif
00123 
00124 #if !HAVE_WCSICMP
00125 int _wcsicmp(LPCWSTR comp1, LPCWSTR comp2)
00126 {
00127     unsigned int len = XMLString::stringLen( comp1);
00128     unsigned int otherLen = XMLString::stringLen( comp2);
00129     // Must compare terminating NUL to return difference if one string is shorter than the other.
00130     unsigned int maxChars = ( len > otherLen ) ? otherLen : len;
00131     return _wcsnicmp(comp1, comp2, maxChars+1);
00132 }
00133 #endif
00134 
00135 // it's a local function (instead of a static function) so that we are not 
00136 // forced to include <windows.h> in the header
00137 bool isAlias(const   HKEY            encodingKey
00138              ,       char* const     aliasBuf = 0
00139              , const unsigned int    nameBufSz = 0)
00140 {
00141     unsigned long theType;
00142     unsigned long theSize = nameBufSz;
00143     return (::RegQueryValueExA
00144     (
00145         encodingKey
00146         , "AliasForCharset"
00147         , 0
00148         , &theType
00149         , (unsigned char*)aliasBuf
00150         , &theSize
00151     ) == ERROR_SUCCESS);
00152 }
00153 
00154 // ---------------------------------------------------------------------------
00155 //  This is the simple CPMapEntry class. It just contains an encoding name
00156 //  and a code page for that encoding.
00157 // ---------------------------------------------------------------------------
00158 class CPMapEntry : public XMemory
00159 {
00160 public :
00161     // -----------------------------------------------------------------------
00162     //  Constructors and Destructor
00163     // -----------------------------------------------------------------------
00164     CPMapEntry
00165     (
00166         const   XMLCh* const    encodingName
00167         , const unsigned int    ieId
00168         , MemoryManager*        manager
00169     );
00170 
00171     CPMapEntry
00172     (
00173         const   char* const     encodingName
00174         , const unsigned int    ieId
00175         , MemoryManager*        manager
00176     );
00177 
00178     ~CPMapEntry();
00179 
00180 
00181     // -----------------------------------------------------------------------
00182     //  Getter methods
00183     // -----------------------------------------------------------------------
00184     const XMLCh* getEncodingName() const;
00185     const XMLCh* getKey() const;
00186     unsigned int getIEEncoding() const;
00187 
00188 
00189 private :
00190     // -----------------------------------------------------------------------
00191     //  Unimplemented constructors and operators
00192     // -----------------------------------------------------------------------
00193     CPMapEntry();
00194     CPMapEntry(const CPMapEntry&);
00195     CPMapEntry& operator=(const CPMapEntry&);
00196 
00197 
00198     // -----------------------------------------------------------------------
00199     //  Private data members
00200     //
00201     //  fEncodingName
00202     //      This is the encoding name for the code page that this instance
00203     //      represents.
00204     //
00205     //  fIEId
00206     //      This is the code page id.
00207     // -----------------------------------------------------------------------
00208     XMLCh*          fEncodingName;
00209     unsigned int    fIEId;
00210     MemoryManager*  fManager;
00211 };
00212 
00213 // ---------------------------------------------------------------------------
00214 //  CPMapEntry: Constructors and Destructor
00215 // ---------------------------------------------------------------------------
00216 CPMapEntry::CPMapEntry( const   char* const     encodingName
00217                         , const unsigned int    ieId
00218                         , MemoryManager*        manager) :
00219     fEncodingName(0)
00220     , fIEId(ieId)
00221     , fManager(manager)
00222 {
00223     // Transcode the name to Unicode and store that copy
00224     int targetLen=::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, NULL, 0);
00225     if(targetLen!=0)
00226     {
00227         fEncodingName = (XMLCh*) fManager->allocate
00228         (
00229             (targetLen + 1) * sizeof(XMLCh)
00230         );//new XMLCh[targetLen + 1];
00231         ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, (LPWSTR)fEncodingName, targetLen);
00232         fEncodingName[targetLen] = 0;
00233 
00234         //
00235         //  Upper case it because we are using a hash table and need to be
00236         //  sure that we find all case combinations.
00237         //
00238         _wcsupr(fEncodingName);
00239   }
00240 }
00241 
00242 CPMapEntry::CPMapEntry( const   XMLCh* const    encodingName
00243                         , const unsigned int    ieId
00244                         , MemoryManager*        manager) :
00245 
00246     fEncodingName(0)
00247     , fIEId(ieId)
00248     , fManager(manager)
00249 {
00250     fEncodingName = XMLString::replicate(encodingName, fManager);
00251 
00252     //
00253     //  Upper case it because we are using a hash table and need to be
00254     //  sure that we find all case combinations.
00255     //
00256     _wcsupr(fEncodingName);
00257 }
00258 
00259 CPMapEntry::~CPMapEntry()
00260 {
00261     fManager->deallocate(fEncodingName);//delete [] fEncodingName;
00262 }
00263 
00264 
00265 // ---------------------------------------------------------------------------
00266 //  CPMapEntry: Getter methods
00267 // ---------------------------------------------------------------------------
00268 const XMLCh* CPMapEntry::getEncodingName() const
00269 {
00270     return fEncodingName;
00271 }
00272 
00273 unsigned int CPMapEntry::getIEEncoding() const
00274 {
00275     return fIEId;
00276 }
00277 
00278 
00279 static bool onXPOrLater = false;
00280 
00281 
00282 //---------------------------------------------------------------------------
00283 //
00284 //  class Win32TransService Implementation ...
00285 //
00286 //---------------------------------------------------------------------------
00287 
00288 
00289 // ---------------------------------------------------------------------------
00290 //  Win32TransService: Constructors and Destructor
00291 // ---------------------------------------------------------------------------
00292 Win32TransService::Win32TransService(MemoryManager* manager) :
00293     fCPMap(NULL)
00294     , fManager(manager)
00295 {
00296     // Figure out if we are on XP or later and save that flag for later use.
00297     // We need this because of certain code page conversion calls.
00298     OSVERSIONINFO   OSVer;
00299     OSVer.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
00300     ::GetVersionEx(&OSVer);
00301 
00302     if ((OSVer.dwPlatformId == VER_PLATFORM_WIN32_NT) &&
00303         ((OSVer.dwMajorVersion == 5) && (OSVer.dwMinorVersion > 0)))
00304     {
00305         onXPOrLater = true;
00306     }
00307 
00308     fCPMap = new RefHashTableOf<CPMapEntry>(109);
00309 
00310     //
00311     //  Open up the registry key that contains the info we want. Note that,
00312     //  if this key does not exist, then we just return. It will just mean
00313     //  that we don't have any support except for intrinsic encodings supported
00314     //  by the parser itself (and the LCP support of course.
00315     //
00316     HKEY charsetKey;
00317     if (::RegOpenKeyExA
00318     (
00319         HKEY_CLASSES_ROOT
00320         , "MIME\\Database\\Charset"
00321         , 0
00322         , KEY_READ
00323         , &charsetKey))
00324     {
00325         return;
00326     }
00327 
00328     //
00329     //  Read in the registry keys that hold the code page ids. Skip for now
00330     //  those entries which indicate that they are aliases for some other
00331     //  encodings. We'll come back and do a second round for those and look
00332     //  up the original name and get the code page id.
00333     //
00334     //  Note that we have to use A versions here so that this will run on
00335     //  98, and transcode the strings to Unicode.
00336     //
00337     const unsigned int nameBufSz = 1024;
00338     char nameBuf[nameBufSz + 1];
00339     unsigned int subIndex;
00340     unsigned long theSize;
00341     for (subIndex = 0;;++subIndex)
00342     {
00343         // Get the name of the next key
00344         theSize = nameBufSz;
00345         if (::RegEnumKeyExA
00346         (
00347             charsetKey
00348             , subIndex
00349             , nameBuf
00350             , &theSize
00351             , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS)
00352         {
00353             break;
00354         }
00355 
00356         // Open this subkey
00357         HKEY encodingKey;
00358         if (::RegOpenKeyExA
00359         (
00360             charsetKey
00361             , nameBuf
00362             , 0
00363             , KEY_READ
00364             , &encodingKey))
00365         {
00366             continue;
00367         }
00368 
00369         //
00370         //  Lts see if its an alias. If so, then ignore it in this first
00371         //  loop. Else, we'll add a new entry for this one.
00372         //
00373         if (!isAlias(encodingKey))
00374         {
00375             //
00376             //  Lets get the two values out of this key that we are
00377             //  interested in. There should be a code page entry and an
00378             //  IE entry.
00379             //
00380             //  The Codepage entry is the default code page for a computer using that charset
00381             //  while the InternetEncoding holds the code page that represents that charset
00382             //
00383             unsigned long theType;
00384             unsigned int CPId;
00385             unsigned int IEId;
00386 
00387             theSize = sizeof(unsigned int);
00388             if (::RegQueryValueExA
00389             (
00390                 encodingKey
00391                 , "Codepage"
00392                 , 0
00393                 , &theType
00394                 , (unsigned char*)&CPId
00395                 , &theSize) != ERROR_SUCCESS)
00396             {
00397                 ::RegCloseKey(encodingKey);
00398                 continue;
00399             }
00400 
00401             //
00402             //  If this is not a valid Id, and it might not be because its
00403             //  not loaded on this system, then don't take it.
00404             //
00405             if (::IsValidCodePage(CPId))
00406             {
00407                 theSize = sizeof(unsigned int);
00408                 if (::RegQueryValueExA
00409                 (
00410                     encodingKey
00411                     , "InternetEncoding"
00412                     , 0
00413                     , &theType
00414                     , (unsigned char*)&IEId
00415                     , &theSize) != ERROR_SUCCESS)
00416                 {
00417                     ::RegCloseKey(encodingKey);
00418                     continue;
00419                 }
00420 
00421                 CPMapEntry* newEntry = new (fManager) CPMapEntry(nameBuf, IEId, fManager);
00422                 fCPMap->put((void*)newEntry->getEncodingName(), newEntry);
00423             }
00424         }
00425 
00426         // And close the subkey handle
00427         ::RegCloseKey(encodingKey);
00428     }
00429 
00430     //
00431     //  Now loop one more time and this time we do just the aliases. For
00432     //  each one we find, we look up that name in the map we've already
00433     //  built and add a new entry with this new name and the same id
00434     //  values we stored for the original.
00435     //
00436     char aliasBuf[nameBufSz + 1];
00437     for (subIndex = 0;;++subIndex)
00438     {
00439         // Get the name of the next key
00440         theSize = nameBufSz;
00441         if (::RegEnumKeyExA
00442         (
00443             charsetKey
00444             , subIndex
00445             , nameBuf
00446             , &theSize
00447             , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS)
00448         {
00449             break;
00450         }
00451 
00452         // Open this subkey
00453         HKEY encodingKey;
00454         if (::RegOpenKeyExA
00455         (
00456             charsetKey
00457             , nameBuf
00458             , 0
00459             , KEY_READ
00460             , &encodingKey))
00461         {
00462             continue;
00463         }
00464 
00465         //
00466         //  If its an alias, look up the name in the map. If we find it,
00467         //  then construct a new one with the new name and the aliased
00468         //  ids.
00469         //
00470         if (isAlias(encodingKey, aliasBuf, nameBufSz))
00471         {
00472             int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, NULL, 0);
00473             if(targetLen!=0)
00474             {
00475                 XMLCh* uniAlias = (XMLCh*) fManager->allocate
00476                 (
00477                     (targetLen + 1) * sizeof(XMLCh)
00478                 );//new XMLCh[targetLen + 1];
00479                 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, (LPWSTR)uniAlias, targetLen);
00480                 uniAlias[targetLen] = 0;
00481                 _wcsupr(uniAlias);
00482 
00483                 // Look up the alias name
00484                 CPMapEntry* aliasedEntry = fCPMap->get(uniAlias);
00485                 if (aliasedEntry)
00486                 {
00487                     int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, NULL, 0);
00488                     if(targetLen!=0)
00489                     {
00490                         XMLCh* uniName = (XMLCh*) fManager->allocate
00491                         (
00492                             (targetLen + 1) * sizeof(XMLCh)
00493                         );//new XMLCh[targetLen + 1];
00494                         ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, (LPWSTR)uniName, targetLen);
00495                         uniName[targetLen] = 0;
00496                         _wcsupr(uniName);
00497 
00498                         //
00499                         //  If the name is actually different, then take it.
00500                         //  Otherwise, don't take it. They map aliases that are
00501                         //  just different case.
00502                         //
00503                                                 if (!XMLString::equals(uniName, aliasedEntry->getEncodingName()))
00504                         {
00505                             CPMapEntry* newEntry = new (fManager) CPMapEntry(uniName, aliasedEntry->getIEEncoding(), fManager);
00506                             fCPMap->put((void*)newEntry->getEncodingName(), newEntry);
00507                         }
00508 
00509                         fManager->deallocate(uniName);//delete [] uniName;
00510                     }
00511                 }
00512                 fManager->deallocate(uniAlias);//delete [] uniAlias;
00513             }
00514         }
00515 
00516         // And close the subkey handle
00517         ::RegCloseKey(encodingKey);
00518     }
00519 
00520     // And close the main key handle
00521     ::RegCloseKey(charsetKey);
00522 }
00523 
00524 Win32TransService::~Win32TransService()
00525 {
00526     delete fCPMap;
00527 }
00528 
00529 
00530 // ---------------------------------------------------------------------------
00531 //  Win32TransService: The virtual transcoding service API
00532 // ---------------------------------------------------------------------------
00533 int Win32TransService::compareIString(  const   XMLCh* const    comp1
00534                                         , const XMLCh* const    comp2)
00535 {
00536     return _wcsicmp(comp1, comp2);
00537 }
00538 
00539 
00540 int Win32TransService::compareNIString( const   XMLCh* const    comp1
00541                                         , const XMLCh* const    comp2
00542                                         , const XMLSize_t       maxChars)
00543 {
00544     return _wcsnicmp(comp1, comp2, maxChars);
00545 }
00546 
00547 
00548 const XMLCh* Win32TransService::getId() const
00549 {
00550     return gMyServiceId;
00551 }
00552 
00553 XMLLCPTranscoder* Win32TransService::makeNewLCPTranscoder(MemoryManager* manager)
00554 {
00555     // Just allocate a new LCP transcoder of our type
00556     return new (manager) Win32LCPTranscoder;
00557 }
00558 
00559 
00560 bool Win32TransService::supportsSrcOfs() const
00561 {
00562     //
00563     //  Since the only mechanism we have to translate XML text in this
00564     //  transcoder basically require us to do work that allows us to support
00565     //  source offsets, we might as well do it.
00566     //
00567     return true;
00568 }
00569 
00570 
00571 void Win32TransService::upperCase(XMLCh* const toUpperCase)
00572 {
00573     _wcsupr(toUpperCase);
00574 }
00575 
00576 void Win32TransService::lowerCase(XMLCh* const toLowerCase)
00577 {
00578     _wcslwr(toLowerCase);
00579 }
00580 
00581 XMLTranscoder*
00582 Win32TransService::makeNewXMLTranscoder(const   XMLCh* const            encodingName
00583                                         ,       XMLTransService::Codes& resValue
00584                                         , const XMLSize_t               blockSize
00585                                         ,       MemoryManager* const    manager)
00586 {
00587     const XMLSize_t upLen = 1024;
00588     XMLCh upEncoding[upLen + 1];
00589 
00590     //
00591     //  Get an upper cased copy of the encoding name, since we use a hash
00592     //  table and we store them all in upper case.
00593     //
00594     XMLString::copyNString(upEncoding, encodingName, upLen);
00595     _wcsupr(upEncoding);
00596 
00597     // Now to try to find this guy in the CP map
00598     CPMapEntry* theEntry = fCPMap->get(upEncoding);
00599 
00600     // If not found, then return a null pointer
00601     if (!theEntry)
00602     {
00603         resValue = XMLTransService::UnsupportedEncoding;
00604         return 0;
00605     }
00606 
00607     // We found it, so return a Win32 transcoder for this encoding
00608     return new (manager) Win32Transcoder
00609     (
00610         encodingName
00611         , theEntry->getIEEncoding()
00612         , blockSize
00613         , manager
00614     );
00615 }
00616 
00617 
00618 
00619 
00620 
00621 
00622 
00623 
00624 //---------------------------------------------------------------------------
00625 //
00626 //  class Win32Transcoder Implementation ...
00627 //
00628 //---------------------------------------------------------------------------
00629 
00630 
00631 inline DWORD
00632 getFlagsValue(
00633             UINT    idCP,
00634             DWORD   desiredFlags)
00635 {
00636     if (idCP == 50220 ||
00637         idCP == 50227 ||
00638         (idCP >= 57002 &&
00639          idCP <= 57011))
00640     {
00641         // These code pages do not support any
00642         // flag options.
00643         return 0;
00644     }
00645     else if (idCP == 65001)
00646     {
00647         // UTF-8 only supports MB_ERR_INVALID_CHARS on
00648         // versions of Windows since XP
00649         if (!onXPOrLater)
00650         {
00651             return 0;
00652         }
00653         else
00654         {
00655             return desiredFlags & MB_ERR_INVALID_CHARS ?
00656                         MB_ERR_INVALID_CHARS : 0;
00657         }
00658     }
00659     else
00660     {
00661         return desiredFlags;
00662     }
00663 }
00664 
00665 
00666 
00667 // ---------------------------------------------------------------------------
00668 //  Win32Transcoder: Constructors and Destructor
00669 // ---------------------------------------------------------------------------
00670 Win32Transcoder::Win32Transcoder(const  XMLCh* const   encodingName
00671                                 , const unsigned int   ieCP
00672                                 , const XMLSize_t      blockSize
00673                                 , MemoryManager* const manager) :
00674 
00675     XMLTranscoder(encodingName, blockSize, manager)
00676     , fIECP(ieCP)
00677     , fUsedDef(FALSE)
00678     , fPtrUsedDef(0)
00679     , fFromFlags(getFlagsValue(ieCP, MB_PRECOMPOSED | MB_ERR_INVALID_CHARS))
00680 #if defined(WC_NO_BEST_FIT_CHARS)
00681     , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS | WC_NO_BEST_FIT_CHARS))
00682 #else
00683     , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS))
00684 #endif
00685 {
00686     // Some code pages require that MultiByteToWideChar and WideCharToMultiByte
00687     // be passed 0 for their second parameters (dwFlags).  If that's the case,
00688     // it's also necessary to pass null pointers for the last two parameters
00689     // to WideCharToMultiByte.  This is apparently because it's impossible to
00690     // determine whether or not a substitution (replacement) character was used.
00691     if (fToFlags)
00692     {
00693         fPtrUsedDef = &fUsedDef;
00694     }
00695 }
00696 
00697 Win32Transcoder::~Win32Transcoder()
00698 {
00699 }
00700 
00701 
00702 // ---------------------------------------------------------------------------
00703 //  Win32Transcoder: The virtual transcoder API
00704 // ---------------------------------------------------------------------------
00705 XMLSize_t
00706 Win32Transcoder::transcodeFrom( const   XMLByte* const      srcData
00707                                 , const XMLSize_t           srcCount
00708                                 ,       XMLCh* const        toFill
00709                                 , const XMLSize_t           maxChars
00710                                 ,       XMLSize_t&          bytesEaten
00711                                 ,       unsigned char* const charSizes)
00712 {
00713     // Get temp pointers to the in and out buffers, and the chars sizes one
00714     XMLCh*          outPtr = toFill;
00715     const XMLByte*  inPtr  = srcData;
00716     unsigned char*  sizesPtr = charSizes;
00717 
00718     // Calc end pointers for each of them
00719     XMLCh*          outEnd = toFill + maxChars;
00720     const XMLByte*  inEnd  = srcData + srcCount;
00721 
00722     //
00723     //  Now loop until we either get our max chars, or cannot get a whole
00724     //  character from the input buffer.
00725     //
00726     bytesEaten = 0;
00727     while ((outPtr < outEnd) && (inPtr < inEnd))
00728     {
00729         //
00730         //  If we are looking at a leading byte of a multibyte sequence,
00731         //  then we are going to eat 2 bytes, else 1.
00732         //
00733         unsigned char toEat = ::IsDBCSLeadByteEx(fIECP, *inPtr) ?
00734                                     2 : 1;
00735 
00736         // Make sure a whole char is in the source
00737         if (inPtr + toEat > inEnd)
00738             break;
00739 
00740         // Try to translate this next char and check for an error
00741         const unsigned int converted = ::MultiByteToWideChar
00742         (
00743             fIECP
00744             , fFromFlags
00745             , (const char*)inPtr
00746             , toEat
00747             , outPtr
00748             , 1
00749         );
00750 
00751         if (converted != 1)
00752         {
00753             if (toEat == 1)
00754             {
00755                 XMLCh tmpBuf[17];
00756                 XMLString::binToText((unsigned int)(*inPtr), tmpBuf, 16, 16, getMemoryManager());
00757                 ThrowXMLwithMemMgr2
00758                 (
00759                     TranscodingException
00760                     , XMLExcepts::Trans_BadSrcCP
00761                     , tmpBuf
00762                     , getEncodingName()
00763                     , getMemoryManager()
00764                 );
00765             }
00766              else
00767             {
00768                 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
00769             }
00770         }
00771 
00772         // Update the char sizes array for this round
00773         *sizesPtr++ = toEat;
00774 
00775         // And update the bytes eaten count
00776         bytesEaten += toEat;
00777 
00778         // And update our in/out ptrs
00779         inPtr += toEat;
00780         outPtr++;
00781     }
00782 
00783     // Return the chars we output
00784     return (outPtr - toFill);
00785 }
00786 
00787 
00788 XMLSize_t
00789 Win32Transcoder::transcodeTo(const  XMLCh* const    srcData
00790                             , const XMLSize_t       srcCount
00791                             ,       XMLByte* const  toFill
00792                             , const XMLSize_t       maxBytes
00793                             ,       XMLSize_t&      charsEaten
00794                             , const UnRepOpts       options)
00795 {
00796     // Get pointers to the start and end of each buffer
00797     const XMLCh*    srcPtr = srcData;
00798     const XMLCh*    srcEnd = srcData + srcCount;
00799     XMLByte*        outPtr = toFill;
00800     XMLByte*        outEnd = toFill + maxBytes;
00801 
00802     //
00803     //  Now loop until we either get our max chars, or cannot get a whole
00804     //  character from the input buffer.
00805     //
00806     //  NOTE: We have to use a loop for this unfortunately because the
00807     //  conversion API is too dumb to tell us how many chars it converted if
00808     //  it couldn't do the whole source.
00809     //
00810     fUsedDef = FALSE;
00811     while ((outPtr < outEnd) && (srcPtr < srcEnd))
00812     {
00813         //
00814         //  Do one char and see if it made it.
00815         const int bytesStored = ::WideCharToMultiByte
00816         (
00817             fIECP
00818             , fToFlags
00819             , srcPtr
00820             , 1
00821             , (char*)outPtr
00822             , (int)(outEnd - outPtr)
00823             , 0
00824             , fPtrUsedDef
00825         );
00826 
00827         // If we didn't transcode anything, then we are done
00828         if (!bytesStored)
00829             break;
00830 
00831         //
00832         //  If the defaault char was used and the options indicate that
00833         //  this isn't allowed, then throw.
00834         //
00835         if (fUsedDef && (options == UnRep_Throw))
00836         {
00837             XMLCh tmpBuf[17];
00838             XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager());
00839             ThrowXMLwithMemMgr2
00840             (
00841                 TranscodingException
00842                 , XMLExcepts::Trans_Unrepresentable
00843                 , tmpBuf
00844                 , getEncodingName()
00845                 , getMemoryManager()
00846             );
00847         }
00848 
00849         // Update our pointers
00850         outPtr += bytesStored;
00851         srcPtr++;
00852     }
00853 
00854     // Update the chars eaten
00855     charsEaten = srcPtr - srcData;
00856 
00857     // And return the bytes we stored
00858     return outPtr - toFill;
00859 }
00860 
00861 
00862 bool Win32Transcoder::canTranscodeTo(const unsigned int toCheck)
00863 {
00864     //
00865     //  If the passed value is really a surrogate embedded together, then
00866     //  we need to break it out into its two chars. Else just one.
00867     //
00868     XMLCh           srcBuf[2];
00869     unsigned int    srcCount = 1;
00870     if (toCheck & 0xFFFF0000)
00871     {
00872         srcBuf[0] = XMLCh((toCheck >> 10) + 0xD800);
00873         srcBuf[1] = XMLCh((toCheck & 0x3FF) + 0xDC00);
00874         srcCount++;
00875     }
00876      else
00877     {
00878         srcBuf[0] = XMLCh(toCheck);
00879     }
00880 
00881     //
00882     //  Use a local temp buffer that would hold any sane multi-byte char
00883     //  sequence and try to transcode this guy into it.
00884     //
00885     char tmpBuf[64];
00886 
00887     fUsedDef = FALSE;
00888 
00889     const unsigned int bytesStored = ::WideCharToMultiByte
00890     (
00891         fIECP
00892         , fToFlags
00893         , srcBuf
00894         , srcCount
00895         , tmpBuf
00896         , 64
00897         , 0
00898         , fPtrUsedDef
00899     );
00900 
00901     if (!bytesStored || fUsedDef)
00902         return false;
00903 
00904     return true;
00905 }
00906 
00907 
00908 
00909 
00910 //---------------------------------------------------------------------------
00911 //
00912 //  class Win32Transcoder Implementation ...
00913 //
00914 //---------------------------------------------------------------------------
00915 
00916 // ---------------------------------------------------------------------------
00917 //  Win32LCPTranscoder: Constructors and Destructor
00918 // ---------------------------------------------------------------------------
00919 Win32LCPTranscoder::Win32LCPTranscoder()
00920 {
00921 }
00922 
00923 Win32LCPTranscoder::~Win32LCPTranscoder()
00924 {
00925 }
00926 
00927 
00928 // ---------------------------------------------------------------------------
00929 //  Win32LCPTranscoder: Implementation of the virtual transcoder interface
00930 // ---------------------------------------------------------------------------
00931 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const char* const srcText
00932                                                   , MemoryManager* const /*manager*/)
00933 {
00934     if (!srcText)
00935         return 0;
00936 
00937     return ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, srcText, -1, NULL, 0);
00938 }
00939 
00940 
00941 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const XMLCh* const srcText
00942                                                   , MemoryManager* const /*manager*/)
00943 {
00944     if (!srcText)
00945         return 0;
00946 
00947     return ::WideCharToMultiByte(CP_ACP, 0, srcText, -1, NULL, 0, NULL, NULL);
00948 }
00949 
00950 char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode,
00951                                     MemoryManager* const manager)
00952 {
00953     if (!toTranscode)
00954         return 0;
00955 
00956     char* retVal = 0;
00957     if (*toTranscode)
00958     {
00959         // Calc the needed size
00960         const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager);
00961 
00962         // Allocate a buffer of that size plus one for the null and transcode
00963         retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char)); //new char[neededLen + 1];
00964         ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, retVal, (int)neededLen+1, NULL, NULL);
00965 
00966         // And cap it off anyway just to make sure
00967         retVal[neededLen] = 0;
00968     }
00969      else
00970     {
00971         retVal = (char*) manager->allocate(sizeof(char)); //new char[1];
00972         retVal[0] = 0;
00973     }
00974     return retVal;
00975 }
00976 
00977 XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode,
00978                                      MemoryManager* const manager)
00979 {
00980     if (!toTranscode)
00981         return 0;
00982 
00983     XMLCh* retVal = 0;
00984     if (*toTranscode)
00985     {
00986         // Calculate the buffer size required
00987         const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager);
00988         if (neededLen == 0)
00989         {
00990             retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
00991             retVal[0] = 0;
00992             return retVal;
00993         }
00994 
00995         // Allocate a buffer of that size plus one for the null and transcode
00996         retVal = (XMLCh*) manager->allocate((neededLen + 1) * sizeof(XMLCh)); //new XMLCh[neededLen + 1];
00997         ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)retVal, (int)neededLen + 1);
00998 
00999         // Cap it off just to make sure. We are so paranoid!
01000         retVal[neededLen] = 0;
01001     }
01002      else
01003     {
01004         retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1];
01005         retVal[0] = 0;
01006     }
01007     return retVal;
01008 }
01009 
01010 
01011 bool Win32LCPTranscoder::transcode( const   char* const     toTranscode
01012                                     ,       XMLCh* const    toFill
01013                                     , const XMLSize_t       maxChars
01014                                     , MemoryManager* const  /*manager*/)
01015 {
01016     // Check for a couple of psycho corner cases
01017     if (!toTranscode || !maxChars)
01018     {
01019         toFill[0] = 0;
01020         return true;
01021     }
01022 
01023     if (!*toTranscode)
01024     {
01025         toFill[0] = 0;
01026         return true;
01027     }
01028 
01029     // This one has a fixed size output, so try it and if it fails it fails
01030     if ( 0 == ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)toFill, (int)(maxChars + 1)) )
01031         return false;
01032     return true;
01033 }
01034 
01035 
01036 bool Win32LCPTranscoder::transcode( const   XMLCh* const    toTranscode
01037                                     ,       char* const     toFill
01038                                     , const XMLSize_t       maxBytes
01039                                     , MemoryManager* const  /*manager*/)
01040 {
01041     // Watch for a couple of pyscho corner cases
01042     if (!toTranscode || !maxBytes)
01043     {
01044         toFill[0] = 0;
01045         return true;
01046     }
01047 
01048     if (!*toTranscode)
01049     {
01050         toFill[0] = 0;
01051         return true;
01052     }
01053 
01054     // This one has a fixed size output, so try it and if it fails it fails
01055     if ( 0 == ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, toFill, (int)(maxBytes + 1), NULL, NULL) )
01056         return false;
01057 
01058     // Cap it off just in case
01059     toFill[maxBytes] = 0;
01060     return true;
01061 }
01062 
01063 
01064 XERCES_CPP_NAMESPACE_END