GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: Win32TransService.cpp 676954 2008-07-15 16:29:19Z dbertoni $ 00020 */ 00021 00022 00023 // --------------------------------------------------------------------------- 00024 // Includes 00025 // --------------------------------------------------------------------------- 00026 #if HAVE_CONFIG_H 00027 # include <config.h> 00028 #endif 00029 00030 #include <xercesc/util/PlatformUtils.hpp> 00031 #include <xercesc/util/TranscodingException.hpp> 00032 #include <xercesc/util/XMLException.hpp> 00033 #include <xercesc/util/XMLString.hpp> 00034 #include <xercesc/util/XMLUniDefs.hpp> 00035 #include <xercesc/util/XMLUni.hpp> 00036 #include <xercesc/util/RefHashTableOf.hpp> 00037 #include "Win32TransService.hpp" 00038 00039 XERCES_CPP_NAMESPACE_BEGIN 00040 00041 00042 // --------------------------------------------------------------------------- 00043 // Local, const data 00044 // --------------------------------------------------------------------------- 00045 static const XMLCh gMyServiceId[] = 00046 { 00047 chLatin_W, chLatin_i, chLatin_n, chDigit_3, chDigit_2, chNull 00048 }; 00049 00050 00051 #if !HAVE_WCSUPR 00052 void _wcsupr(LPWSTR str) 00053 { 00054 int nLen=XMLString::stringLen(str); 00055 ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, str, nLen, str, nLen); 00056 } 00057 #endif 00058 00059 #if !HAVE_WCSLWR 00060 void _wcslwr(LPWSTR str) 00061 { 00062 int nLen=XMLString::stringLen(str); 00063 ::LCMapStringW( GetThreadLocale(), LCMAP_LOWERCASE, str, nLen, str, nLen); 00064 } 00065 #endif 00066 00067 #if !HAVE_WCSNICMP 00068 int _wcsnicmp(LPCWSTR comp1, LPCWSTR comp2, unsigned int nLen) 00069 { 00070 unsigned int len = XMLString::stringLen( comp1); 00071 unsigned int otherLen = XMLString::stringLen( comp2); 00072 unsigned int countChar = 0; 00073 unsigned int maxChars; 00074 int theResult = 0; 00075 00076 // Determine at what string index the comparison stops. 00077 len = ( len > nLen ) ? nLen : len; 00078 otherLen = ( otherLen > nLen ) ? nLen : otherLen; 00079 maxChars = ( len > otherLen ) ? otherLen : len; 00080 00081 // Handle situation when one argument or the other is NULL 00082 // by returning +/- string length of non-NULL argument (inferred 00083 // from XMLString::CompareNString). 00084 00085 // Obs. Definition of stringLen(XMLCh*) implies NULL ptr and ptr 00086 // to Empty String are equivalent. It handles NULL args, BTW. 00087 00088 if ( !comp1 ) 00089 { 00090 // Negative because null ptr (c1) less than string (c2). 00091 return ( 0 - otherLen ); 00092 } 00093 00094 if ( !comp2 ) 00095 { 00096 // Positive because string (c1) still greater than null ptr (c2). 00097 return len; 00098 } 00099 00100 // Copy const parameter strings (plus terminating nul) into locals. 00101 XMLCh* firstBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++len) * sizeof(XMLCh) );//new XMLCh[ ++len]; 00102 XMLCh* secondBuf = (XMLCh*) XMLPlatformUtils::fgMemoryManager->allocate( (++otherLen) * sizeof(XMLCh) );//new XMLCh[ ++otherLen]; 00103 memcpy( firstBuf, comp1, len * sizeof(XMLCh)); 00104 memcpy( secondBuf, comp2, otherLen * sizeof(XMLCh)); 00105 00106 // Then uppercase both strings, losing their case info. 00107 ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)firstBuf, len, (LPWSTR)firstBuf, len); 00108 ::LCMapStringW( GetThreadLocale(), LCMAP_UPPERCASE, (LPWSTR)secondBuf, otherLen, (LPWSTR)secondBuf, otherLen); 00109 00110 // Strings are equal until proven otherwise. 00111 while ( ( countChar < maxChars ) && ( !theResult ) ) 00112 { 00113 theResult = (int)(firstBuf[countChar]) - (int)(secondBuf[countChar]); 00114 ++countChar; 00115 } 00116 00117 XMLPlatformUtils::fgMemoryManager->deallocate(firstBuf);//delete [] firstBuf; 00118 XMLPlatformUtils::fgMemoryManager->deallocate(secondBuf);//delete [] secondBuf; 00119 00120 return theResult; 00121 } 00122 #endif 00123 00124 #if !HAVE_WCSICMP 00125 int _wcsicmp(LPCWSTR comp1, LPCWSTR comp2) 00126 { 00127 unsigned int len = XMLString::stringLen( comp1); 00128 unsigned int otherLen = XMLString::stringLen( comp2); 00129 // Must compare terminating NUL to return difference if one string is shorter than the other. 00130 unsigned int maxChars = ( len > otherLen ) ? otherLen : len; 00131 return _wcsnicmp(comp1, comp2, maxChars+1); 00132 } 00133 #endif 00134 00135 // it's a local function (instead of a static function) so that we are not 00136 // forced to include <windows.h> in the header 00137 bool isAlias(const HKEY encodingKey 00138 , char* const aliasBuf = 0 00139 , const unsigned int nameBufSz = 0) 00140 { 00141 unsigned long theType; 00142 unsigned long theSize = nameBufSz; 00143 return (::RegQueryValueExA 00144 ( 00145 encodingKey 00146 , "AliasForCharset" 00147 , 0 00148 , &theType 00149 , (unsigned char*)aliasBuf 00150 , &theSize 00151 ) == ERROR_SUCCESS); 00152 } 00153 00154 // --------------------------------------------------------------------------- 00155 // This is the simple CPMapEntry class. It just contains an encoding name 00156 // and a code page for that encoding. 00157 // --------------------------------------------------------------------------- 00158 class CPMapEntry : public XMemory 00159 { 00160 public : 00161 // ----------------------------------------------------------------------- 00162 // Constructors and Destructor 00163 // ----------------------------------------------------------------------- 00164 CPMapEntry 00165 ( 00166 const XMLCh* const encodingName 00167 , const unsigned int ieId 00168 , MemoryManager* manager 00169 ); 00170 00171 CPMapEntry 00172 ( 00173 const char* const encodingName 00174 , const unsigned int ieId 00175 , MemoryManager* manager 00176 ); 00177 00178 ~CPMapEntry(); 00179 00180 00181 // ----------------------------------------------------------------------- 00182 // Getter methods 00183 // ----------------------------------------------------------------------- 00184 const XMLCh* getEncodingName() const; 00185 const XMLCh* getKey() const; 00186 unsigned int getIEEncoding() const; 00187 00188 00189 private : 00190 // ----------------------------------------------------------------------- 00191 // Unimplemented constructors and operators 00192 // ----------------------------------------------------------------------- 00193 CPMapEntry(); 00194 CPMapEntry(const CPMapEntry&); 00195 CPMapEntry& operator=(const CPMapEntry&); 00196 00197 00198 // ----------------------------------------------------------------------- 00199 // Private data members 00200 // 00201 // fEncodingName 00202 // This is the encoding name for the code page that this instance 00203 // represents. 00204 // 00205 // fIEId 00206 // This is the code page id. 00207 // ----------------------------------------------------------------------- 00208 XMLCh* fEncodingName; 00209 unsigned int fIEId; 00210 MemoryManager* fManager; 00211 }; 00212 00213 // --------------------------------------------------------------------------- 00214 // CPMapEntry: Constructors and Destructor 00215 // --------------------------------------------------------------------------- 00216 CPMapEntry::CPMapEntry( const char* const encodingName 00217 , const unsigned int ieId 00218 , MemoryManager* manager) : 00219 fEncodingName(0) 00220 , fIEId(ieId) 00221 , fManager(manager) 00222 { 00223 // Transcode the name to Unicode and store that copy 00224 int targetLen=::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, NULL, 0); 00225 if(targetLen!=0) 00226 { 00227 fEncodingName = (XMLCh*) fManager->allocate 00228 ( 00229 (targetLen + 1) * sizeof(XMLCh) 00230 );//new XMLCh[targetLen + 1]; 00231 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, encodingName, -1, (LPWSTR)fEncodingName, targetLen); 00232 fEncodingName[targetLen] = 0; 00233 00234 // 00235 // Upper case it because we are using a hash table and need to be 00236 // sure that we find all case combinations. 00237 // 00238 _wcsupr(fEncodingName); 00239 } 00240 } 00241 00242 CPMapEntry::CPMapEntry( const XMLCh* const encodingName 00243 , const unsigned int ieId 00244 , MemoryManager* manager) : 00245 00246 fEncodingName(0) 00247 , fIEId(ieId) 00248 , fManager(manager) 00249 { 00250 fEncodingName = XMLString::replicate(encodingName, fManager); 00251 00252 // 00253 // Upper case it because we are using a hash table and need to be 00254 // sure that we find all case combinations. 00255 // 00256 _wcsupr(fEncodingName); 00257 } 00258 00259 CPMapEntry::~CPMapEntry() 00260 { 00261 fManager->deallocate(fEncodingName);//delete [] fEncodingName; 00262 } 00263 00264 00265 // --------------------------------------------------------------------------- 00266 // CPMapEntry: Getter methods 00267 // --------------------------------------------------------------------------- 00268 const XMLCh* CPMapEntry::getEncodingName() const 00269 { 00270 return fEncodingName; 00271 } 00272 00273 unsigned int CPMapEntry::getIEEncoding() const 00274 { 00275 return fIEId; 00276 } 00277 00278 00279 static bool onXPOrLater = false; 00280 00281 00282 //--------------------------------------------------------------------------- 00283 // 00284 // class Win32TransService Implementation ... 00285 // 00286 //--------------------------------------------------------------------------- 00287 00288 00289 // --------------------------------------------------------------------------- 00290 // Win32TransService: Constructors and Destructor 00291 // --------------------------------------------------------------------------- 00292 Win32TransService::Win32TransService(MemoryManager* manager) : 00293 fCPMap(NULL) 00294 , fManager(manager) 00295 { 00296 // Figure out if we are on XP or later and save that flag for later use. 00297 // We need this because of certain code page conversion calls. 00298 OSVERSIONINFO OSVer; 00299 OSVer.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); 00300 ::GetVersionEx(&OSVer); 00301 00302 if ((OSVer.dwPlatformId == VER_PLATFORM_WIN32_NT) && 00303 ((OSVer.dwMajorVersion == 5) && (OSVer.dwMinorVersion > 0))) 00304 { 00305 onXPOrLater = true; 00306 } 00307 00308 fCPMap = new RefHashTableOf<CPMapEntry>(109); 00309 00310 // 00311 // Open up the registry key that contains the info we want. Note that, 00312 // if this key does not exist, then we just return. It will just mean 00313 // that we don't have any support except for intrinsic encodings supported 00314 // by the parser itself (and the LCP support of course. 00315 // 00316 HKEY charsetKey; 00317 if (::RegOpenKeyExA 00318 ( 00319 HKEY_CLASSES_ROOT 00320 , "MIME\\Database\\Charset" 00321 , 0 00322 , KEY_READ 00323 , &charsetKey)) 00324 { 00325 return; 00326 } 00327 00328 // 00329 // Read in the registry keys that hold the code page ids. Skip for now 00330 // those entries which indicate that they are aliases for some other 00331 // encodings. We'll come back and do a second round for those and look 00332 // up the original name and get the code page id. 00333 // 00334 // Note that we have to use A versions here so that this will run on 00335 // 98, and transcode the strings to Unicode. 00336 // 00337 const unsigned int nameBufSz = 1024; 00338 char nameBuf[nameBufSz + 1]; 00339 unsigned int subIndex; 00340 unsigned long theSize; 00341 for (subIndex = 0;;++subIndex) 00342 { 00343 // Get the name of the next key 00344 theSize = nameBufSz; 00345 if (::RegEnumKeyExA 00346 ( 00347 charsetKey 00348 , subIndex 00349 , nameBuf 00350 , &theSize 00351 , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS) 00352 { 00353 break; 00354 } 00355 00356 // Open this subkey 00357 HKEY encodingKey; 00358 if (::RegOpenKeyExA 00359 ( 00360 charsetKey 00361 , nameBuf 00362 , 0 00363 , KEY_READ 00364 , &encodingKey)) 00365 { 00366 continue; 00367 } 00368 00369 // 00370 // Lts see if its an alias. If so, then ignore it in this first 00371 // loop. Else, we'll add a new entry for this one. 00372 // 00373 if (!isAlias(encodingKey)) 00374 { 00375 // 00376 // Lets get the two values out of this key that we are 00377 // interested in. There should be a code page entry and an 00378 // IE entry. 00379 // 00380 // The Codepage entry is the default code page for a computer using that charset 00381 // while the InternetEncoding holds the code page that represents that charset 00382 // 00383 unsigned long theType; 00384 unsigned int CPId; 00385 unsigned int IEId; 00386 00387 theSize = sizeof(unsigned int); 00388 if (::RegQueryValueExA 00389 ( 00390 encodingKey 00391 , "Codepage" 00392 , 0 00393 , &theType 00394 , (unsigned char*)&CPId 00395 , &theSize) != ERROR_SUCCESS) 00396 { 00397 ::RegCloseKey(encodingKey); 00398 continue; 00399 } 00400 00401 // 00402 // If this is not a valid Id, and it might not be because its 00403 // not loaded on this system, then don't take it. 00404 // 00405 if (::IsValidCodePage(CPId)) 00406 { 00407 theSize = sizeof(unsigned int); 00408 if (::RegQueryValueExA 00409 ( 00410 encodingKey 00411 , "InternetEncoding" 00412 , 0 00413 , &theType 00414 , (unsigned char*)&IEId 00415 , &theSize) != ERROR_SUCCESS) 00416 { 00417 ::RegCloseKey(encodingKey); 00418 continue; 00419 } 00420 00421 CPMapEntry* newEntry = new (fManager) CPMapEntry(nameBuf, IEId, fManager); 00422 fCPMap->put((void*)newEntry->getEncodingName(), newEntry); 00423 } 00424 } 00425 00426 // And close the subkey handle 00427 ::RegCloseKey(encodingKey); 00428 } 00429 00430 // 00431 // Now loop one more time and this time we do just the aliases. For 00432 // each one we find, we look up that name in the map we've already 00433 // built and add a new entry with this new name and the same id 00434 // values we stored for the original. 00435 // 00436 char aliasBuf[nameBufSz + 1]; 00437 for (subIndex = 0;;++subIndex) 00438 { 00439 // Get the name of the next key 00440 theSize = nameBufSz; 00441 if (::RegEnumKeyExA 00442 ( 00443 charsetKey 00444 , subIndex 00445 , nameBuf 00446 , &theSize 00447 , 0, 0, 0, 0) == ERROR_NO_MORE_ITEMS) 00448 { 00449 break; 00450 } 00451 00452 // Open this subkey 00453 HKEY encodingKey; 00454 if (::RegOpenKeyExA 00455 ( 00456 charsetKey 00457 , nameBuf 00458 , 0 00459 , KEY_READ 00460 , &encodingKey)) 00461 { 00462 continue; 00463 } 00464 00465 // 00466 // If its an alias, look up the name in the map. If we find it, 00467 // then construct a new one with the new name and the aliased 00468 // ids. 00469 // 00470 if (isAlias(encodingKey, aliasBuf, nameBufSz)) 00471 { 00472 int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, NULL, 0); 00473 if(targetLen!=0) 00474 { 00475 XMLCh* uniAlias = (XMLCh*) fManager->allocate 00476 ( 00477 (targetLen + 1) * sizeof(XMLCh) 00478 );//new XMLCh[targetLen + 1]; 00479 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, aliasBuf, -1, (LPWSTR)uniAlias, targetLen); 00480 uniAlias[targetLen] = 0; 00481 _wcsupr(uniAlias); 00482 00483 // Look up the alias name 00484 CPMapEntry* aliasedEntry = fCPMap->get(uniAlias); 00485 if (aliasedEntry) 00486 { 00487 int targetLen = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, NULL, 0); 00488 if(targetLen!=0) 00489 { 00490 XMLCh* uniName = (XMLCh*) fManager->allocate 00491 ( 00492 (targetLen + 1) * sizeof(XMLCh) 00493 );//new XMLCh[targetLen + 1]; 00494 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, nameBuf, -1, (LPWSTR)uniName, targetLen); 00495 uniName[targetLen] = 0; 00496 _wcsupr(uniName); 00497 00498 // 00499 // If the name is actually different, then take it. 00500 // Otherwise, don't take it. They map aliases that are 00501 // just different case. 00502 // 00503 if (!XMLString::equals(uniName, aliasedEntry->getEncodingName())) 00504 { 00505 CPMapEntry* newEntry = new (fManager) CPMapEntry(uniName, aliasedEntry->getIEEncoding(), fManager); 00506 fCPMap->put((void*)newEntry->getEncodingName(), newEntry); 00507 } 00508 00509 fManager->deallocate(uniName);//delete [] uniName; 00510 } 00511 } 00512 fManager->deallocate(uniAlias);//delete [] uniAlias; 00513 } 00514 } 00515 00516 // And close the subkey handle 00517 ::RegCloseKey(encodingKey); 00518 } 00519 00520 // And close the main key handle 00521 ::RegCloseKey(charsetKey); 00522 } 00523 00524 Win32TransService::~Win32TransService() 00525 { 00526 delete fCPMap; 00527 } 00528 00529 00530 // --------------------------------------------------------------------------- 00531 // Win32TransService: The virtual transcoding service API 00532 // --------------------------------------------------------------------------- 00533 int Win32TransService::compareIString( const XMLCh* const comp1 00534 , const XMLCh* const comp2) 00535 { 00536 return _wcsicmp(comp1, comp2); 00537 } 00538 00539 00540 int Win32TransService::compareNIString( const XMLCh* const comp1 00541 , const XMLCh* const comp2 00542 , const XMLSize_t maxChars) 00543 { 00544 return _wcsnicmp(comp1, comp2, maxChars); 00545 } 00546 00547 00548 const XMLCh* Win32TransService::getId() const 00549 { 00550 return gMyServiceId; 00551 } 00552 00553 XMLLCPTranscoder* Win32TransService::makeNewLCPTranscoder(MemoryManager* manager) 00554 { 00555 // Just allocate a new LCP transcoder of our type 00556 return new (manager) Win32LCPTranscoder; 00557 } 00558 00559 00560 bool Win32TransService::supportsSrcOfs() const 00561 { 00562 // 00563 // Since the only mechanism we have to translate XML text in this 00564 // transcoder basically require us to do work that allows us to support 00565 // source offsets, we might as well do it. 00566 // 00567 return true; 00568 } 00569 00570 00571 void Win32TransService::upperCase(XMLCh* const toUpperCase) 00572 { 00573 _wcsupr(toUpperCase); 00574 } 00575 00576 void Win32TransService::lowerCase(XMLCh* const toLowerCase) 00577 { 00578 _wcslwr(toLowerCase); 00579 } 00580 00581 XMLTranscoder* 00582 Win32TransService::makeNewXMLTranscoder(const XMLCh* const encodingName 00583 , XMLTransService::Codes& resValue 00584 , const XMLSize_t blockSize 00585 , MemoryManager* const manager) 00586 { 00587 const XMLSize_t upLen = 1024; 00588 XMLCh upEncoding[upLen + 1]; 00589 00590 // 00591 // Get an upper cased copy of the encoding name, since we use a hash 00592 // table and we store them all in upper case. 00593 // 00594 XMLString::copyNString(upEncoding, encodingName, upLen); 00595 _wcsupr(upEncoding); 00596 00597 // Now to try to find this guy in the CP map 00598 CPMapEntry* theEntry = fCPMap->get(upEncoding); 00599 00600 // If not found, then return a null pointer 00601 if (!theEntry) 00602 { 00603 resValue = XMLTransService::UnsupportedEncoding; 00604 return 0; 00605 } 00606 00607 // We found it, so return a Win32 transcoder for this encoding 00608 return new (manager) Win32Transcoder 00609 ( 00610 encodingName 00611 , theEntry->getIEEncoding() 00612 , blockSize 00613 , manager 00614 ); 00615 } 00616 00617 00618 00619 00620 00621 00622 00623 00624 //--------------------------------------------------------------------------- 00625 // 00626 // class Win32Transcoder Implementation ... 00627 // 00628 //--------------------------------------------------------------------------- 00629 00630 00631 inline DWORD 00632 getFlagsValue( 00633 UINT idCP, 00634 DWORD desiredFlags) 00635 { 00636 if (idCP == 50220 || 00637 idCP == 50227 || 00638 (idCP >= 57002 && 00639 idCP <= 57011)) 00640 { 00641 // These code pages do not support any 00642 // flag options. 00643 return 0; 00644 } 00645 else if (idCP == 65001) 00646 { 00647 // UTF-8 only supports MB_ERR_INVALID_CHARS on 00648 // versions of Windows since XP 00649 if (!onXPOrLater) 00650 { 00651 return 0; 00652 } 00653 else 00654 { 00655 return desiredFlags & MB_ERR_INVALID_CHARS ? 00656 MB_ERR_INVALID_CHARS : 0; 00657 } 00658 } 00659 else 00660 { 00661 return desiredFlags; 00662 } 00663 } 00664 00665 00666 00667 // --------------------------------------------------------------------------- 00668 // Win32Transcoder: Constructors and Destructor 00669 // --------------------------------------------------------------------------- 00670 Win32Transcoder::Win32Transcoder(const XMLCh* const encodingName 00671 , const unsigned int ieCP 00672 , const XMLSize_t blockSize 00673 , MemoryManager* const manager) : 00674 00675 XMLTranscoder(encodingName, blockSize, manager) 00676 , fIECP(ieCP) 00677 , fUsedDef(FALSE) 00678 , fPtrUsedDef(0) 00679 , fFromFlags(getFlagsValue(ieCP, MB_PRECOMPOSED | MB_ERR_INVALID_CHARS)) 00680 #if defined(WC_NO_BEST_FIT_CHARS) 00681 , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS | WC_NO_BEST_FIT_CHARS)) 00682 #else 00683 , fToFlags(getFlagsValue(ieCP, WC_COMPOSITECHECK | WC_SEPCHARS)) 00684 #endif 00685 { 00686 // Some code pages require that MultiByteToWideChar and WideCharToMultiByte 00687 // be passed 0 for their second parameters (dwFlags). If that's the case, 00688 // it's also necessary to pass null pointers for the last two parameters 00689 // to WideCharToMultiByte. This is apparently because it's impossible to 00690 // determine whether or not a substitution (replacement) character was used. 00691 if (fToFlags) 00692 { 00693 fPtrUsedDef = &fUsedDef; 00694 } 00695 } 00696 00697 Win32Transcoder::~Win32Transcoder() 00698 { 00699 } 00700 00701 00702 // --------------------------------------------------------------------------- 00703 // Win32Transcoder: The virtual transcoder API 00704 // --------------------------------------------------------------------------- 00705 XMLSize_t 00706 Win32Transcoder::transcodeFrom( const XMLByte* const srcData 00707 , const XMLSize_t srcCount 00708 , XMLCh* const toFill 00709 , const XMLSize_t maxChars 00710 , XMLSize_t& bytesEaten 00711 , unsigned char* const charSizes) 00712 { 00713 // Get temp pointers to the in and out buffers, and the chars sizes one 00714 XMLCh* outPtr = toFill; 00715 const XMLByte* inPtr = srcData; 00716 unsigned char* sizesPtr = charSizes; 00717 00718 // Calc end pointers for each of them 00719 XMLCh* outEnd = toFill + maxChars; 00720 const XMLByte* inEnd = srcData + srcCount; 00721 00722 // 00723 // Now loop until we either get our max chars, or cannot get a whole 00724 // character from the input buffer. 00725 // 00726 bytesEaten = 0; 00727 while ((outPtr < outEnd) && (inPtr < inEnd)) 00728 { 00729 // 00730 // If we are looking at a leading byte of a multibyte sequence, 00731 // then we are going to eat 2 bytes, else 1. 00732 // 00733 unsigned char toEat = ::IsDBCSLeadByteEx(fIECP, *inPtr) ? 00734 2 : 1; 00735 00736 // Make sure a whole char is in the source 00737 if (inPtr + toEat > inEnd) 00738 break; 00739 00740 // Try to translate this next char and check for an error 00741 const unsigned int converted = ::MultiByteToWideChar 00742 ( 00743 fIECP 00744 , fFromFlags 00745 , (const char*)inPtr 00746 , toEat 00747 , outPtr 00748 , 1 00749 ); 00750 00751 if (converted != 1) 00752 { 00753 if (toEat == 1) 00754 { 00755 XMLCh tmpBuf[17]; 00756 XMLString::binToText((unsigned int)(*inPtr), tmpBuf, 16, 16, getMemoryManager()); 00757 ThrowXMLwithMemMgr2 00758 ( 00759 TranscodingException 00760 , XMLExcepts::Trans_BadSrcCP 00761 , tmpBuf 00762 , getEncodingName() 00763 , getMemoryManager() 00764 ); 00765 } 00766 else 00767 { 00768 ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager()); 00769 } 00770 } 00771 00772 // Update the char sizes array for this round 00773 *sizesPtr++ = toEat; 00774 00775 // And update the bytes eaten count 00776 bytesEaten += toEat; 00777 00778 // And update our in/out ptrs 00779 inPtr += toEat; 00780 outPtr++; 00781 } 00782 00783 // Return the chars we output 00784 return (outPtr - toFill); 00785 } 00786 00787 00788 XMLSize_t 00789 Win32Transcoder::transcodeTo(const XMLCh* const srcData 00790 , const XMLSize_t srcCount 00791 , XMLByte* const toFill 00792 , const XMLSize_t maxBytes 00793 , XMLSize_t& charsEaten 00794 , const UnRepOpts options) 00795 { 00796 // Get pointers to the start and end of each buffer 00797 const XMLCh* srcPtr = srcData; 00798 const XMLCh* srcEnd = srcData + srcCount; 00799 XMLByte* outPtr = toFill; 00800 XMLByte* outEnd = toFill + maxBytes; 00801 00802 // 00803 // Now loop until we either get our max chars, or cannot get a whole 00804 // character from the input buffer. 00805 // 00806 // NOTE: We have to use a loop for this unfortunately because the 00807 // conversion API is too dumb to tell us how many chars it converted if 00808 // it couldn't do the whole source. 00809 // 00810 fUsedDef = FALSE; 00811 while ((outPtr < outEnd) && (srcPtr < srcEnd)) 00812 { 00813 // 00814 // Do one char and see if it made it. 00815 const int bytesStored = ::WideCharToMultiByte 00816 ( 00817 fIECP 00818 , fToFlags 00819 , srcPtr 00820 , 1 00821 , (char*)outPtr 00822 , (int)(outEnd - outPtr) 00823 , 0 00824 , fPtrUsedDef 00825 ); 00826 00827 // If we didn't transcode anything, then we are done 00828 if (!bytesStored) 00829 break; 00830 00831 // 00832 // If the defaault char was used and the options indicate that 00833 // this isn't allowed, then throw. 00834 // 00835 if (fUsedDef && (options == UnRep_Throw)) 00836 { 00837 XMLCh tmpBuf[17]; 00838 XMLString::binToText((unsigned int)*srcPtr, tmpBuf, 16, 16, getMemoryManager()); 00839 ThrowXMLwithMemMgr2 00840 ( 00841 TranscodingException 00842 , XMLExcepts::Trans_Unrepresentable 00843 , tmpBuf 00844 , getEncodingName() 00845 , getMemoryManager() 00846 ); 00847 } 00848 00849 // Update our pointers 00850 outPtr += bytesStored; 00851 srcPtr++; 00852 } 00853 00854 // Update the chars eaten 00855 charsEaten = srcPtr - srcData; 00856 00857 // And return the bytes we stored 00858 return outPtr - toFill; 00859 } 00860 00861 00862 bool Win32Transcoder::canTranscodeTo(const unsigned int toCheck) 00863 { 00864 // 00865 // If the passed value is really a surrogate embedded together, then 00866 // we need to break it out into its two chars. Else just one. 00867 // 00868 XMLCh srcBuf[2]; 00869 unsigned int srcCount = 1; 00870 if (toCheck & 0xFFFF0000) 00871 { 00872 srcBuf[0] = XMLCh((toCheck >> 10) + 0xD800); 00873 srcBuf[1] = XMLCh((toCheck & 0x3FF) + 0xDC00); 00874 srcCount++; 00875 } 00876 else 00877 { 00878 srcBuf[0] = XMLCh(toCheck); 00879 } 00880 00881 // 00882 // Use a local temp buffer that would hold any sane multi-byte char 00883 // sequence and try to transcode this guy into it. 00884 // 00885 char tmpBuf[64]; 00886 00887 fUsedDef = FALSE; 00888 00889 const unsigned int bytesStored = ::WideCharToMultiByte 00890 ( 00891 fIECP 00892 , fToFlags 00893 , srcBuf 00894 , srcCount 00895 , tmpBuf 00896 , 64 00897 , 0 00898 , fPtrUsedDef 00899 ); 00900 00901 if (!bytesStored || fUsedDef) 00902 return false; 00903 00904 return true; 00905 } 00906 00907 00908 00909 00910 //--------------------------------------------------------------------------- 00911 // 00912 // class Win32Transcoder Implementation ... 00913 // 00914 //--------------------------------------------------------------------------- 00915 00916 // --------------------------------------------------------------------------- 00917 // Win32LCPTranscoder: Constructors and Destructor 00918 // --------------------------------------------------------------------------- 00919 Win32LCPTranscoder::Win32LCPTranscoder() 00920 { 00921 } 00922 00923 Win32LCPTranscoder::~Win32LCPTranscoder() 00924 { 00925 } 00926 00927 00928 // --------------------------------------------------------------------------- 00929 // Win32LCPTranscoder: Implementation of the virtual transcoder interface 00930 // --------------------------------------------------------------------------- 00931 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const char* const srcText 00932 , MemoryManager* const /*manager*/) 00933 { 00934 if (!srcText) 00935 return 0; 00936 00937 return ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, srcText, -1, NULL, 0); 00938 } 00939 00940 00941 XMLSize_t Win32LCPTranscoder::calcRequiredSize(const XMLCh* const srcText 00942 , MemoryManager* const /*manager*/) 00943 { 00944 if (!srcText) 00945 return 0; 00946 00947 return ::WideCharToMultiByte(CP_ACP, 0, srcText, -1, NULL, 0, NULL, NULL); 00948 } 00949 00950 char* Win32LCPTranscoder::transcode(const XMLCh* const toTranscode, 00951 MemoryManager* const manager) 00952 { 00953 if (!toTranscode) 00954 return 0; 00955 00956 char* retVal = 0; 00957 if (*toTranscode) 00958 { 00959 // Calc the needed size 00960 const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager); 00961 00962 // Allocate a buffer of that size plus one for the null and transcode 00963 retVal = (char*) manager->allocate((neededLen + 1) * sizeof(char)); //new char[neededLen + 1]; 00964 ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, retVal, (int)neededLen+1, NULL, NULL); 00965 00966 // And cap it off anyway just to make sure 00967 retVal[neededLen] = 0; 00968 } 00969 else 00970 { 00971 retVal = (char*) manager->allocate(sizeof(char)); //new char[1]; 00972 retVal[0] = 0; 00973 } 00974 return retVal; 00975 } 00976 00977 XMLCh* Win32LCPTranscoder::transcode(const char* const toTranscode, 00978 MemoryManager* const manager) 00979 { 00980 if (!toTranscode) 00981 return 0; 00982 00983 XMLCh* retVal = 0; 00984 if (*toTranscode) 00985 { 00986 // Calculate the buffer size required 00987 const XMLSize_t neededLen = calcRequiredSize(toTranscode, manager); 00988 if (neededLen == 0) 00989 { 00990 retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1]; 00991 retVal[0] = 0; 00992 return retVal; 00993 } 00994 00995 // Allocate a buffer of that size plus one for the null and transcode 00996 retVal = (XMLCh*) manager->allocate((neededLen + 1) * sizeof(XMLCh)); //new XMLCh[neededLen + 1]; 00997 ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)retVal, (int)neededLen + 1); 00998 00999 // Cap it off just to make sure. We are so paranoid! 01000 retVal[neededLen] = 0; 01001 } 01002 else 01003 { 01004 retVal = (XMLCh*) manager->allocate(sizeof(XMLCh)); //new XMLCh[1]; 01005 retVal[0] = 0; 01006 } 01007 return retVal; 01008 } 01009 01010 01011 bool Win32LCPTranscoder::transcode( const char* const toTranscode 01012 , XMLCh* const toFill 01013 , const XMLSize_t maxChars 01014 , MemoryManager* const /*manager*/) 01015 { 01016 // Check for a couple of psycho corner cases 01017 if (!toTranscode || !maxChars) 01018 { 01019 toFill[0] = 0; 01020 return true; 01021 } 01022 01023 if (!*toTranscode) 01024 { 01025 toFill[0] = 0; 01026 return true; 01027 } 01028 01029 // This one has a fixed size output, so try it and if it fails it fails 01030 if ( 0 == ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, toTranscode, -1, (LPWSTR)toFill, (int)(maxChars + 1)) ) 01031 return false; 01032 return true; 01033 } 01034 01035 01036 bool Win32LCPTranscoder::transcode( const XMLCh* const toTranscode 01037 , char* const toFill 01038 , const XMLSize_t maxBytes 01039 , MemoryManager* const /*manager*/) 01040 { 01041 // Watch for a couple of pyscho corner cases 01042 if (!toTranscode || !maxBytes) 01043 { 01044 toFill[0] = 0; 01045 return true; 01046 } 01047 01048 if (!*toTranscode) 01049 { 01050 toFill[0] = 0; 01051 return true; 01052 } 01053 01054 // This one has a fixed size output, so try it and if it fails it fails 01055 if ( 0 == ::WideCharToMultiByte(CP_ACP, 0, (LPCWSTR)toTranscode, -1, toFill, (int)(maxBytes + 1), NULL, NULL) ) 01056 return false; 01057 01058 // Cap it off just in case 01059 toFill[maxBytes] = 0; 01060 return true; 01061 } 01062 01063 01064 XERCES_CPP_NAMESPACE_END