GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: ReaderMgr.cpp 833045 2009-11-05 13:21:27Z borisk $ 00020 */ 00021 00022 // --------------------------------------------------------------------------- 00023 // Includes 00024 // --------------------------------------------------------------------------- 00025 #include <xercesc/util/BinMemInputStream.hpp> 00026 #include <xercesc/util/Janitor.hpp> 00027 #include <xercesc/util/PlatformUtils.hpp> 00028 #include <xercesc/util/RuntimeException.hpp> 00029 #include <xercesc/util/UnexpectedEOFException.hpp> 00030 #include <xercesc/util/XMLURL.hpp> 00031 #include <xercesc/util/XMLUniDefs.hpp> 00032 #include <xercesc/util/XMLUni.hpp> 00033 #include <xercesc/util/XMLUri.hpp> 00034 #include <xercesc/sax/InputSource.hpp> 00035 #include <xercesc/framework/LocalFileInputSource.hpp> 00036 #include <xercesc/framework/URLInputSource.hpp> 00037 #include <xercesc/framework/XMLBuffer.hpp> 00038 #include <xercesc/framework/XMLDocumentHandler.hpp> 00039 #include <xercesc/framework/XMLEntityDecl.hpp> 00040 #include <xercesc/framework/XMLEntityHandler.hpp> 00041 #include <xercesc/internal/EndOfEntityException.hpp> 00042 #include <xercesc/internal/ReaderMgr.hpp> 00043 #include <xercesc/util/OutOfMemoryException.hpp> 00044 #include <xercesc/util/XMLResourceIdentifier.hpp> 00045 00046 XERCES_CPP_NAMESPACE_BEGIN 00047 00048 // --------------------------------------------------------------------------- 00049 // ReaderMgr: Constructors and Destructor 00050 // --------------------------------------------------------------------------- 00051 ReaderMgr::ReaderMgr(MemoryManager* const manager) : 00052 00053 fCurEntity(0) 00054 , fCurReader(0) 00055 , fEntityHandler(0) 00056 , fEntityStack(0) 00057 , fNextReaderNum(1) 00058 , fReaderStack(0) 00059 , fThrowEOE(false) 00060 , fXMLVersion(XMLReader::XMLV1_0) 00061 , fStandardUriConformant(false) 00062 , fMemoryManager(manager) 00063 { 00064 } 00065 00066 ReaderMgr::~ReaderMgr() 00067 { 00068 // 00069 // Clean up the reader and entity stacks. Note that we don't own the 00070 // entities, so we don't delete the current entity (and the entity stack 00071 // does not own its elements either, so deleting it will not delete the 00072 // entities it still references!) 00073 // 00074 delete fCurReader; 00075 delete fReaderStack; 00076 delete fEntityStack; 00077 } 00078 00079 00080 // --------------------------------------------------------------------------- 00081 // ReaderMgr: Getter methods 00082 // --------------------------------------------------------------------------- 00083 bool ReaderMgr::isEmpty() const 00084 { 00085 return fReaderStack->empty(); 00086 } 00087 00088 00089 // --------------------------------------------------------------------------- 00090 // ReaderMgr: Scanning APIs 00091 // --------------------------------------------------------------------------- 00092 XMLCh ReaderMgr::getNextChar() 00093 { 00094 XMLCh chRet; 00095 if (fCurReader->getNextChar(chRet)) 00096 return chRet; 00097 00098 // 00099 // Didn't get anything back so this reader is hosed. So lets move to 00100 // the next reader on the stack. If this fails, it will be because 00101 // its the end of the original file, and we just return zero. 00102 // 00103 // If its the end of an entity and fThrowEOE is set, it will throw out 00104 // of here. Otherwise, it will take us down to the next reader and 00105 // we'll have more chars. 00106 // 00107 if (!popReader()) 00108 return XMLCh(0); 00109 00110 // Else try again and return the new character 00111 fCurReader->getNextChar(chRet); 00112 return chRet; 00113 } 00114 00115 00116 void ReaderMgr::getSpaces(XMLBuffer& toFill) 00117 { 00118 // Reset the buffer before we start 00119 toFill.reset(); 00120 00121 // 00122 // Get all the spaces from the current reader. If it returns true, 00123 // it hit a non-space and we are done. Else we have to pop a reader 00124 // and keep going. 00125 // 00126 while (!fCurReader->getSpaces(toFill)) 00127 { 00128 // We wore that one out, so lets pop a reader and try again 00129 if (!popReader()) 00130 break; 00131 } 00132 } 00133 00134 00135 void ReaderMgr::getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck) 00136 { 00137 // Reset the target buffer before we start 00138 toFill.reset(); 00139 00140 // 00141 // Ok, enter a loop where we ask the current reader to get chars until 00142 // it meets the criteria. It returns false if it came back due to eating 00143 // up all of its data. Else it returned because something matched, and 00144 // we are done. 00145 // 00146 while (!fCurReader->getUpToCharOrWS(toFill, toCheck)) 00147 { 00148 // We ate that one up, lets try to pop another. If not, break out 00149 if (!popReader()) 00150 break; 00151 } 00152 } 00153 00154 00155 XMLCh ReaderMgr::peekNextChar() 00156 { 00157 XMLCh chRet; 00158 if (fCurReader->peekNextChar(chRet)) 00159 return chRet; 00160 00161 // 00162 // Didn't get anything back so this reader is hosed. So lets move to 00163 // the next reader on the stack. If this fails, it will be because 00164 // its the end of the original file, and we just return zero. 00165 // 00166 if (!popReader()) 00167 return XMLCh(0); 00168 00169 // Else peek again and return the character 00170 fCurReader->peekNextChar(chRet); 00171 return chRet; 00172 } 00173 00174 00175 bool ReaderMgr::skippedChar(const XMLCh toCheck) 00176 { 00177 while (true) 00178 { 00179 // If we get it, then just return true now 00180 if (fCurReader->skippedChar(toCheck)) 00181 return true; 00182 00183 // 00184 // Check to see if we hit end of input on this reader. If so, then 00185 // lets pop and try again. Else, we failed. If we cannot pop another 00186 // then we failed. 00187 // 00188 if (!fCurReader->getNoMoreFlag()) 00189 break; 00190 00191 if (!popReader()) 00192 break; 00193 } 00194 return false; 00195 } 00196 00197 00198 bool ReaderMgr::skippedSpace() 00199 { 00200 while (true) 00201 { 00202 // If we get it, then just return true now 00203 if (fCurReader->skippedSpace()) 00204 return true; 00205 00206 // 00207 // Check to see if we hit end of input on this reader. If so, then 00208 // lets pop and try again. Else, we failed. If we cannot pop another 00209 // then we failed. 00210 // 00211 if (!fCurReader->getNoMoreFlag()) 00212 break; 00213 00214 if (!popReader()) 00215 break; 00216 } 00217 return false; 00218 } 00219 00220 00221 bool ReaderMgr::skipIfQuote(XMLCh& chGotten) 00222 { 00223 while (true) 00224 { 00225 // If we get it, then just return true now 00226 if (fCurReader->skipIfQuote(chGotten)) 00227 return true; 00228 00229 // 00230 // Check to see if we hit end of input on this reader. If so, then 00231 // lets pop and try again. Else, we failed. If we cannot pop another 00232 // then we failed. 00233 // 00234 if (!fCurReader->getNoMoreFlag()) 00235 break; 00236 00237 if (!popReader()) 00238 break; 00239 } 00240 return false; 00241 } 00242 00243 void ReaderMgr::skipPastSpaces(bool& skippedSomething, bool inDecl /* = false */) 00244 { 00245 // we rely on the fact that fCurReader->skipSpaces will NOT reset the flag to false, but only 00246 // set it to true if a space is found 00247 skippedSomething = false; 00248 // 00249 // Skip all the spaces in the current reader. If it returned because 00250 // it hit a non-space, break out. Else we have to pop another entity 00251 // and keep going. 00252 // 00253 while (!fCurReader->skipSpaces(skippedSomething, inDecl)) 00254 { 00255 // Try to pop another entity. If we can't then we are done 00256 if (!popReader()) 00257 break; 00258 } 00259 } 00260 00261 void ReaderMgr::skipPastSpaces() 00262 { 00263 // we are not using it, so we don't care to initialize it 00264 bool tmpFlag; 00265 // 00266 // Skip all the spaces in the current reader. If it returned because 00267 // it hit a non-space, break out. Else we have to pop another entity 00268 // and keep going. 00269 // 00270 while (!fCurReader->skipSpaces(tmpFlag, false)) 00271 { 00272 // Try to pop another entity. If we can't then we are done 00273 if (!popReader()) 00274 break; 00275 } 00276 } 00277 00278 void ReaderMgr::skipQuotedString(const XMLCh quoteCh) 00279 { 00280 XMLCh nextCh; 00281 // If we get an end of file char, then return 00282 while ((nextCh = getNextChar())!=0) 00283 { 00284 // If we get the quote char, then break out 00285 if (nextCh == quoteCh) 00286 break; 00287 } 00288 } 00289 00290 00291 XMLCh ReaderMgr::skipUntilIn(const XMLCh* const listToSkip) 00292 { 00293 XMLCh nextCh; 00294 // If we get an end of file char, then return 00295 while ((nextCh = peekNextChar())!=0) 00296 { 00297 if (XMLString::indexOf(listToSkip, nextCh) != -1) 00298 break; 00299 00300 // Its one of ours so eat it 00301 getNextChar(); 00302 } 00303 return nextCh; 00304 } 00305 00306 00307 XMLCh ReaderMgr::skipUntilInOrWS(const XMLCh* const listToSkip) 00308 { 00309 XMLCh nextCh; 00310 // If we get an end of file char, then return 00311 while ((nextCh = peekNextChar())!=0) 00312 { 00313 if (fCurReader->isWhitespace(nextCh)) 00314 break; 00315 00316 if (XMLString::indexOf(listToSkip, nextCh) != -1) 00317 break; 00318 00319 // Its one of ours, so eat it 00320 getNextChar(); 00321 } 00322 return nextCh; 00323 } 00324 00325 00326 00327 // --------------------------------------------------------------------------- 00328 // ReaderMgr: Control methods 00329 // --------------------------------------------------------------------------- 00330 00331 // 00332 // If the reader stack is empty, then there is only the original main XML 00333 // entity left. If its empty, then we have no more input. 00334 // 00335 bool ReaderMgr::atEOF() const 00336 { 00337 return fReaderStack->empty() && fCurReader->getNoMoreFlag(); 00338 } 00339 00340 00341 // 00342 // This method is called in the case of errors to clean up the stack when 00343 // entities have been incorrectly left on the stack due to syntax errors. 00344 // It just cleans back the stack, and sends no entity events. 00345 // 00346 void ReaderMgr::cleanStackBackTo(const XMLSize_t readerNum) 00347 { 00348 // 00349 // Just start popping readers until we find the one with the indicated 00350 // reader number. 00351 // 00352 while (true) 00353 { 00354 if (fCurReader->getReaderNum() == readerNum) 00355 break; 00356 00357 if (fReaderStack->empty()) 00358 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::RdrMgr_ReaderIdNotFound, fMemoryManager); 00359 00360 delete fCurReader; 00361 fCurReader = fReaderStack->pop(); 00362 fCurEntity = fEntityStack->pop(); 00363 } 00364 } 00365 00366 00367 XMLReader* ReaderMgr::createReader( const InputSource& src 00368 , const bool 00369 , const XMLReader::RefFrom refFrom 00370 , const XMLReader::Types type 00371 , const XMLReader::Sources source 00372 , const bool calcSrcOfs 00373 , XMLSize_t lowWaterMark) 00374 { 00375 // 00376 // Ask the input source to create us an input stream. The particular 00377 // type of input source will know what kind to create. 00378 // 00379 BinInputStream* newStream = src.makeStream(); 00380 if (!newStream) 00381 return 0; 00382 00383 Janitor<BinInputStream> streamJanitor(newStream); 00384 00385 // 00386 // Create a new reader and return it. If the source has an encoding that 00387 // it wants to force, then we call the constructor that does that. 00388 // Otherwise, we just call the one that provides the provisional encoding 00389 // to be possibly updated later by the encoding="" setting. 00390 // 00391 XMLReader* retVal = 0; 00392 00393 // XMLReader ctor invokes refreshRawBuffer() which calls 00394 // newStream->readBytes(). 00395 // This readBytes() may throw exception, which neither 00396 // refresRawBuffer(), nor XMLReader ctor catches. 00397 // We need to handle this exception to avoid leak on newStream. 00398 00399 try { 00400 if (src.getEncoding()) 00401 { 00402 retVal = new (fMemoryManager) XMLReader 00403 ( 00404 src.getPublicId() 00405 , src.getSystemId() 00406 , newStream 00407 , src.getEncoding() 00408 , refFrom 00409 , type 00410 , source 00411 , false 00412 , calcSrcOfs 00413 , lowWaterMark 00414 , fXMLVersion 00415 , fMemoryManager 00416 ); 00417 } 00418 else 00419 { 00420 retVal = new (fMemoryManager) XMLReader 00421 ( 00422 src.getPublicId() 00423 , src.getSystemId() 00424 , newStream 00425 , refFrom 00426 , type 00427 , source 00428 , false 00429 , calcSrcOfs 00430 , lowWaterMark 00431 , fXMLVersion 00432 , fMemoryManager 00433 ); 00434 } 00435 } 00436 catch(const OutOfMemoryException&) 00437 { 00438 streamJanitor.release(); 00439 00440 throw; 00441 } 00442 00443 assert(retVal); 00444 00445 streamJanitor.release(); 00446 00447 // Set the next available reader number on this reader 00448 retVal->setReaderNum(fNextReaderNum++); 00449 return retVal; 00450 } 00451 00452 00453 XMLReader* ReaderMgr::createReader( const XMLCh* const sysId 00454 , const XMLCh* const pubId 00455 , const bool xmlDecl 00456 , const XMLReader::RefFrom refFrom 00457 , const XMLReader::Types type 00458 , const XMLReader::Sources source 00459 , InputSource*& srcToFill 00460 , const bool calcSrcOfs 00461 , XMLSize_t lowWaterMark 00462 , const bool disableDefaultEntityResolution) 00463 { 00464 //Normalize sysId 00465 XMLBuffer normalizedSysId(1023, fMemoryManager); 00466 if(sysId) 00467 XMLString::removeChar(sysId, 0xFFFF, normalizedSysId); 00468 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); 00469 00470 // Create a buffer for expanding the system id 00471 XMLBuffer expSysId(1023, fMemoryManager); 00472 00473 // 00474 // Allow the entity handler to expand the system id if they choose 00475 // to do so. 00476 // 00477 if (fEntityHandler) 00478 { 00479 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) 00480 expSysId.set(normalizedURI); 00481 } 00482 else 00483 { 00484 expSysId.set(normalizedURI); 00485 } 00486 00487 // Call the entity resolver interface to get an input source 00488 srcToFill = 0; 00489 if (fEntityHandler) 00490 { 00491 LastExtEntityInfo lastInfo; 00492 getLastExtEntityInfo(lastInfo); 00493 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity, 00494 expSysId.getRawBuffer(), XMLUni::fgZeroLenString, pubId, lastInfo.systemId, 00495 this); 00496 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); 00497 } 00498 00499 // 00500 // If they didn't create a source via the entity resolver, then we 00501 // have to create one on our own. 00502 // 00503 if (!srcToFill) 00504 { 00505 if (disableDefaultEntityResolution) 00506 return 0; 00507 00508 LastExtEntityInfo lastInfo; 00509 getLastExtEntityInfo(lastInfo); 00510 00511 // Keep this #if 0 block as it was exposing a threading problem on AIX. 00512 // Got rid of the problem by changing XMLURL to not throw malformedurl 00513 // exceptions. 00514 #if 0 00515 try 00516 { 00517 XMLURL urlTmp(lastInfo.systemId, expSysId.getRawBuffer(), fMemoryManager); 00518 if (urlTmp.isRelative()) 00519 { 00520 ThrowXMLwithMemMgr 00521 ( 00522 MalformedURLException 00523 , XMLExcepts::URL_NoProtocolPresent 00524 , fMemoryManager 00525 ); 00526 } 00527 else { 00528 if (fStandardUriConformant && urlTmp.hasInvalidChar()) 00529 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 00530 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); 00531 } 00532 } 00533 00534 catch(const MalformedURLException& e) 00535 { 00536 // Its not a URL, so lets assume its a local file name if non-standard uri is allowed 00537 if (!fStandardUriConformant) 00538 srcToFill = new (fMemoryManager) LocalFileInputSource 00539 ( 00540 lastInfo.systemId 00541 , expSysId.getRawBuffer() 00542 , fMemoryManager 00543 ); 00544 else 00545 throw e; 00546 } 00547 #else 00548 XMLURL urlTmp(fMemoryManager); 00549 if ((!urlTmp.setURL(lastInfo.systemId, expSysId.getRawBuffer(), urlTmp)) || 00550 (urlTmp.isRelative())) 00551 { 00552 if (!fStandardUriConformant) 00553 { 00554 XMLBuffer resolvedSysId(1023, fMemoryManager); 00555 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); 00556 00557 srcToFill = new (fMemoryManager) LocalFileInputSource 00558 ( 00559 lastInfo.systemId 00560 , resolvedSysId.getRawBuffer() 00561 , fMemoryManager 00562 ); 00563 } 00564 else 00565 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 00566 } 00567 else 00568 { 00569 if (fStandardUriConformant && urlTmp.hasInvalidChar()) 00570 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 00571 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); 00572 } 00573 #endif 00574 } 00575 00576 // Put a janitor on the input source 00577 Janitor<InputSource> janSrc(srcToFill); 00578 00579 // 00580 // Now call the other version with the input source that we have, and 00581 // return the resulting reader. 00582 // 00583 XMLReader* retVal = createReader 00584 ( 00585 *srcToFill 00586 , xmlDecl 00587 , refFrom 00588 , type 00589 , source 00590 , calcSrcOfs 00591 , lowWaterMark 00592 ); 00593 00594 // Either way, we can release the input source now 00595 janSrc.orphan(); 00596 00597 // If it failed for any reason, then return zero. 00598 if (!retVal) 00599 return 0; 00600 00601 // Give this reader the next available reader number and return it 00602 retVal->setReaderNum(fNextReaderNum++); 00603 return retVal; 00604 } 00605 00606 00607 XMLReader* ReaderMgr::createReader( const XMLCh* const baseURI 00608 , const XMLCh* const sysId 00609 , const XMLCh* const pubId 00610 , const bool xmlDecl 00611 , const XMLReader::RefFrom refFrom 00612 , const XMLReader::Types type 00613 , const XMLReader::Sources source 00614 , InputSource*& srcToFill 00615 , const bool calcSrcOfs 00616 , XMLSize_t lowWaterMark 00617 , const bool disableDefaultEntityResolution) 00618 { 00619 //Normalize sysId 00620 XMLBuffer normalizedSysId(1023, fMemoryManager); 00621 XMLString::removeChar(sysId, 0xFFFF, normalizedSysId); 00622 const XMLCh* normalizedURI = normalizedSysId.getRawBuffer(); 00623 00624 // Create a buffer for expanding the system id 00625 XMLBuffer expSysId(1023, fMemoryManager); 00626 00627 // 00628 // Allow the entity handler to expand the system id if they choose 00629 // to do so. 00630 // 00631 if (fEntityHandler) 00632 { 00633 if (!fEntityHandler->expandSystemId(normalizedURI, expSysId)) 00634 expSysId.set(normalizedURI); 00635 } 00636 else 00637 { 00638 expSysId.set(normalizedURI); 00639 } 00640 00641 // Call the entity resolver interface to get an input source 00642 srcToFill = 0; 00643 if (fEntityHandler) 00644 { 00645 XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity, 00646 expSysId.getRawBuffer(), XMLUni::fgZeroLenString, pubId, baseURI, 00647 this); 00648 srcToFill = fEntityHandler->resolveEntity(&resourceIdentifier); 00649 } 00650 00651 // 00652 // If they didn't create a source via the entity resolver, then we 00653 // have to create one on our own. 00654 // 00655 if (!srcToFill) 00656 { 00657 if (disableDefaultEntityResolution) 00658 return 0; 00659 00660 LastExtEntityInfo lastInfo; 00661 00662 const XMLCh* baseuri=baseURI; 00663 if(!baseuri || !*baseuri) 00664 { 00665 getLastExtEntityInfo(lastInfo); 00666 baseuri = lastInfo.systemId; 00667 } 00668 00669 XMLURL urlTmp(fMemoryManager); 00670 if ((!urlTmp.setURL(baseuri, expSysId.getRawBuffer(), urlTmp)) || 00671 (urlTmp.isRelative())) 00672 { 00673 if (!fStandardUriConformant) 00674 { 00675 XMLBuffer resolvedSysId(1023, fMemoryManager); 00676 XMLUri::normalizeURI(expSysId.getRawBuffer(), resolvedSysId); 00677 00678 srcToFill = new (fMemoryManager) LocalFileInputSource 00679 ( 00680 baseuri 00681 , resolvedSysId.getRawBuffer() 00682 , fMemoryManager 00683 ); 00684 } 00685 else 00686 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 00687 } 00688 else 00689 { 00690 if (fStandardUriConformant && urlTmp.hasInvalidChar()) 00691 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_MalformedURL, fMemoryManager); 00692 srcToFill = new (fMemoryManager) URLInputSource(urlTmp, fMemoryManager); 00693 } 00694 } 00695 00696 // Put a janitor on the input source 00697 Janitor<InputSource> janSrc(srcToFill); 00698 00699 // 00700 // Now call the other version with the input source that we have, and 00701 // return the resulting reader. 00702 // 00703 XMLReader* retVal = createReader 00704 ( 00705 *srcToFill 00706 , xmlDecl 00707 , refFrom 00708 , type 00709 , source 00710 , calcSrcOfs 00711 , lowWaterMark 00712 ); 00713 00714 // Either way, we can release the input source now 00715 janSrc.orphan(); 00716 00717 // If it failed for any reason, then return zero. 00718 if (!retVal) 00719 return 0; 00720 00721 // Give this reader the next available reader number and return it 00722 retVal->setReaderNum(fNextReaderNum++); 00723 return retVal; 00724 } 00725 00726 00727 XMLReader* 00728 ReaderMgr::createIntEntReader( const XMLCh* const sysId 00729 , const XMLReader::RefFrom refFrom 00730 , const XMLReader::Types type 00731 , const XMLCh* const dataBuf 00732 , const XMLSize_t dataLen 00733 , const bool copyBuf 00734 , const bool calcSrcOfs 00735 , XMLSize_t lowWaterMark) 00736 { 00737 // 00738 // This one is easy, we just create an input stream for the data and 00739 // provide a few extra goodies. 00740 // 00741 // NOTE: We use a special encoding string that will be recognized 00742 // as a 'do nothing' transcoder for the already internalized XMLCh 00743 // data that makes up an internal entity. 00744 // 00745 BinMemInputStream* newStream = new (fMemoryManager) BinMemInputStream 00746 ( 00747 (const XMLByte*)dataBuf 00748 , dataLen * sizeof(XMLCh) 00749 , copyBuf ? BinMemInputStream::BufOpt_Copy 00750 : BinMemInputStream::BufOpt_Reference 00751 , fMemoryManager 00752 ); 00753 if (!newStream) 00754 return 0; 00755 00756 XMLReader* retVal = new (fMemoryManager) XMLReader 00757 ( 00758 sysId 00759 , 0 00760 , newStream 00761 , XMLRecognizer::XERCES_XMLCH 00762 , refFrom 00763 , type 00764 , XMLReader::Source_Internal 00765 , false 00766 , calcSrcOfs 00767 , lowWaterMark 00768 , fXMLVersion 00769 , fMemoryManager 00770 ); 00771 00772 // If it failed for any reason, then return zero. 00773 if (!retVal) { 00774 delete newStream; 00775 return 0; 00776 } 00777 00778 // Set the reader number to the next available number 00779 retVal->setReaderNum(fNextReaderNum++); 00780 return retVal; 00781 } 00782 00783 00784 const XMLCh* ReaderMgr::getCurrentEncodingStr() const 00785 { 00786 const XMLEntityDecl* theEntity; 00787 const XMLReader* theReader = getLastExtEntity(theEntity); 00788 00789 return theReader->getEncodingStr(); 00790 } 00791 00792 00793 const XMLEntityDecl* ReaderMgr::getCurrentEntity() const 00794 { 00795 return fCurEntity; 00796 } 00797 00798 00799 XMLEntityDecl* ReaderMgr::getCurrentEntity() 00800 { 00801 return fCurEntity; 00802 } 00803 00804 00805 XMLSize_t ReaderMgr::getReaderDepth() const 00806 { 00807 // If the stack doesn't exist, its obviously zero 00808 if (!fEntityStack) 00809 return 0; 00810 00811 // 00812 // The return is the stack size, plus one if there is a current 00813 // reader. So if there is no current reader and none on the stack, 00814 // its zero, else its some non-zero value. 00815 // 00816 XMLSize_t retVal = fEntityStack->size(); 00817 if (fCurReader) 00818 retVal++; 00819 return retVal; 00820 } 00821 00822 void ReaderMgr::getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const 00823 { 00824 // 00825 // If the reader stack never got created or we've not managed to open any 00826 // main entity yet, then we can't give this information. 00827 // 00828 if (!fReaderStack || !fCurReader) 00829 { 00830 lastInfo.systemId = XMLUni::fgZeroLenString; 00831 lastInfo.publicId = XMLUni::fgZeroLenString; 00832 lastInfo.lineNumber = 0; 00833 lastInfo.colNumber = 0; 00834 return; 00835 } 00836 00837 // We have at least one entity so get the data 00838 const XMLEntityDecl* theEntity; 00839 const XMLReader* theReader = getLastExtEntity(theEntity); 00840 00841 // Fill in the info structure with the reader we found 00842 lastInfo.systemId = theReader->getSystemId(); 00843 lastInfo.publicId = theReader->getPublicId(); 00844 lastInfo.lineNumber = theReader->getLineNumber(); 00845 lastInfo.colNumber = theReader->getColumnNumber(); 00846 } 00847 00848 00849 bool ReaderMgr::isScanningPERefOutOfLiteral() const 00850 { 00851 // If the current reader is not for an entity, then definitely not 00852 if (!fCurEntity) 00853 return false; 00854 00855 // 00856 // If this is a PE entity, and its not being expanded in a literal 00857 // then its true. 00858 // 00859 if ((fCurReader->getType() == XMLReader::Type_PE) 00860 && (fCurReader->getRefFrom() == XMLReader::RefFrom_NonLiteral)) 00861 { 00862 return true; 00863 } 00864 return false; 00865 } 00866 00867 00868 bool ReaderMgr::pushReader( XMLReader* const reader 00869 , XMLEntityDecl* const entity) 00870 { 00871 // 00872 // First, if an entity was passed, we have to confirm that this entity 00873 // is not already on the entity stack. If so, then this is a recursive 00874 // entity expansion, so we issue an error and refuse to put the reader 00875 // on the stack. 00876 // 00877 // If there is no entity passed, then its not an entity being pushed, so 00878 // nothing to do. If there is no entity stack yet, then of coures it 00879 // cannot already be there. 00880 // 00881 if (entity && fEntityStack) 00882 { 00883 const XMLSize_t count = fEntityStack->size(); 00884 const XMLCh* const theName = entity->getName(); 00885 for (XMLSize_t index = 0; index < count; index++) 00886 { 00887 const XMLEntityDecl* curDecl = fEntityStack->elementAt(index); 00888 if (curDecl) 00889 { 00890 if (XMLString::equals(theName, curDecl->getName())) 00891 { 00892 // Oops, already there so delete reader and return 00893 delete reader; 00894 return false; 00895 } 00896 } 00897 } 00898 } 00899 00900 // 00901 // Fault in the reader stack. Give it an initial capacity of 16, and 00902 // tell it it does own its elements. 00903 // 00904 if (!fReaderStack) 00905 fReaderStack = new (fMemoryManager) RefStackOf<XMLReader>(16, true, fMemoryManager); 00906 00907 // And the entity stack, which does not own its elements 00908 if (!fEntityStack) 00909 fEntityStack = new (fMemoryManager) RefStackOf<XMLEntityDecl>(16, false, fMemoryManager); 00910 00911 // 00912 // Push the current reader and entity onto their respective stacks. 00913 // Note that the the current entity can be null if the current reader 00914 // is not for an entity. 00915 // 00916 if (fCurReader) 00917 { 00918 fReaderStack->push(fCurReader); 00919 fEntityStack->push(fCurEntity); 00920 } 00921 00922 // 00923 // Make the passed reader and entity the current top of stack. The 00924 // passed entity can (and often is) null. 00925 // 00926 fCurReader = reader; 00927 fCurEntity = entity; 00928 00929 return true; 00930 } 00931 00932 00933 void ReaderMgr::reset() 00934 { 00935 // Reset all of the flags 00936 fThrowEOE = false; 00937 00938 // Delete the current reader and flush the reader stack 00939 delete fCurReader; 00940 fCurReader = 0; 00941 if (fReaderStack) 00942 fReaderStack->removeAllElements(); 00943 00944 // 00945 // And do the same for the entity stack, but don't delete the current 00946 // entity (if any) since we don't own them. 00947 // 00948 fCurEntity = 0; 00949 if (fEntityStack) 00950 fEntityStack->removeAllElements(); 00951 } 00952 00953 00954 // --------------------------------------------------------------------------- 00955 // ReaderMgr: Implement the SAX Locator interface 00956 // --------------------------------------------------------------------------- 00957 const XMLCh* ReaderMgr::getPublicId() const 00958 { 00959 if (!fReaderStack && !fCurReader) 00960 return XMLUni::fgZeroLenString; 00961 00962 const XMLEntityDecl* theEntity; 00963 return getLastExtEntity(theEntity)->getPublicId(); 00964 } 00965 00966 const XMLCh* ReaderMgr::getSystemId() const 00967 { 00968 if (!fReaderStack && !fCurReader) 00969 return XMLUni::fgZeroLenString; 00970 00971 const XMLEntityDecl* theEntity; 00972 return getLastExtEntity(theEntity)->getSystemId(); 00973 } 00974 00975 XMLFileLoc ReaderMgr::getColumnNumber() const 00976 { 00977 if (!fReaderStack && !fCurReader) 00978 return 0; 00979 00980 const XMLEntityDecl* theEntity; 00981 return getLastExtEntity(theEntity)->getColumnNumber(); 00982 } 00983 00984 XMLFileLoc ReaderMgr::getLineNumber() const 00985 { 00986 if (!fReaderStack && !fCurReader) 00987 return 0; 00988 00989 const XMLEntityDecl* theEntity; 00990 return getLastExtEntity(theEntity)->getLineNumber(); 00991 } 00992 00993 00994 00995 // --------------------------------------------------------------------------- 00996 // ReaderMgr: Private helper methods 00997 // --------------------------------------------------------------------------- 00998 const XMLReader* 00999 ReaderMgr::getLastExtEntity(const XMLEntityDecl*& itsEntity) const 01000 { 01001 // 01002 // Scan down the reader stack until we find a reader for an entity that 01003 // is external. First check that there is anything in the stack at all, 01004 // in which case the current reader is the main file and that's the one 01005 // that we want. 01006 // 01007 const XMLReader* theReader = fCurReader; 01008 01009 // 01010 // If there is a current entity and it is not an external entity, then 01011 // search the stack; else, keep the reader that we've got since its 01012 // either an external entity reader or the main file reader. 01013 // 01014 const XMLEntityDecl* curEntity = fCurEntity; 01015 if (curEntity && !curEntity->isExternal()) 01016 { 01017 XMLSize_t index = fReaderStack->size(); 01018 if (index) 01019 { 01020 while (true) 01021 { 01022 // Move down to the previous element and get a pointer to it 01023 index--; 01024 curEntity = fEntityStack->elementAt(index); 01025 01026 // 01027 // If its null or its an external entity, then this reader 01028 // is what we want, so break out with that one. 01029 // 01030 if (!curEntity) 01031 { 01032 theReader = fReaderStack->elementAt(index); 01033 break; 01034 } 01035 else if (curEntity->isExternal()) 01036 { 01037 theReader = fReaderStack->elementAt(index); 01038 break; 01039 } 01040 01041 // We hit the end, so leave the main file reader as the one 01042 if (!index) 01043 break; 01044 } 01045 } 01046 } 01047 01048 itsEntity = curEntity; 01049 return theReader; 01050 } 01051 01052 01053 bool ReaderMgr::popReader() 01054 { 01055 // 01056 // We didn't get any more, so try to pop off a reader. If the reader 01057 // stack is empty, then we are at the end, so return false. 01058 // 01059 if (fReaderStack->empty()) 01060 return false; 01061 01062 // 01063 // Remember the current entity, before we pop off a new one. We might 01064 // need this to throw the end of entity exception at the end. 01065 // 01066 XMLEntityDecl* prevEntity = fCurEntity; 01067 const bool prevReaderThrowAtEnd = fCurReader->getThrowAtEnd(); 01068 const XMLSize_t readerNum = fCurReader->getReaderNum(); 01069 01070 // 01071 // Delete the current reader and pop a new reader and entity off 01072 // the stacks. 01073 // 01074 delete fCurReader; 01075 fCurReader = fReaderStack->pop(); 01076 fCurEntity = fEntityStack->pop(); 01077 01078 // 01079 // If there was a previous entity, and either the fThrowEOE flag is set 01080 // or reader was marked as such, then throw an end of entity. 01081 // 01082 if (prevEntity && (fThrowEOE || prevReaderThrowAtEnd)) 01083 throw EndOfEntityException(prevEntity, readerNum); 01084 01085 while (true) 01086 { 01087 // 01088 // They don't want us to throw, so lets just return with a new 01089 // reader. Here we have to do a loop because we might have multiple 01090 // readers on these stack that are empty (i.e. the last char in them 01091 // was the ';' at the end of the entity ref that caused the next 01092 // entity to be pushed. 01093 // 01094 // So we loop until we find a non-empty reader, or hit the main 01095 // file entity. If we find one with some chars available, then break 01096 // out and take that one. 01097 // 01098 if (fCurReader->charsLeftInBuffer()) 01099 break; 01100 01101 fCurReader->refreshCharBuffer(); 01102 if (fCurReader->charsLeftInBuffer()) 01103 break; 01104 01105 // 01106 // The current one is hosed. So, if the reader stack is empty we 01107 // are dead meat and can give up now. 01108 // 01109 if (fReaderStack->empty()) 01110 return false; 01111 01112 // Else pop again and try it one more time 01113 delete fCurReader; 01114 fCurReader = fReaderStack->pop(); 01115 fCurEntity = fEntityStack->pop(); 01116 } 01117 return true; 01118 } 01119 01120 XERCES_CPP_NAMESPACE_END