GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: CurlURLInputStream.cpp 936316 2010-04-21 14:19:58Z borisk $ 00020 */ 00021 00022 #if HAVE_CONFIG_H 00023 #include <config.h> 00024 #endif 00025 00026 #include <stdio.h> 00027 #include <stdlib.h> 00028 #include <string.h> 00029 #if HAVE_ERRNO_H 00030 #include <errno.h> 00031 #endif 00032 #if HAVE_UNISTD_H 00033 #include <unistd.h> 00034 #endif 00035 #if HAVE_SYS_TYPES_H 00036 #include <sys/types.h> 00037 #endif 00038 #if HAVE_SYS_TIME_H 00039 #include <sys/time.h> 00040 #endif 00041 00042 #include <xercesc/util/XercesDefs.hpp> 00043 #include <xercesc/util/XMLNetAccessor.hpp> 00044 #include <xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp> 00045 #include <xercesc/util/XMLString.hpp> 00046 #include <xercesc/util/XMLExceptMsgs.hpp> 00047 #include <xercesc/util/Janitor.hpp> 00048 #include <xercesc/util/XMLUniDefs.hpp> 00049 #include <xercesc/util/TransService.hpp> 00050 #include <xercesc/util/TranscodingException.hpp> 00051 #include <xercesc/util/PlatformUtils.hpp> 00052 00053 XERCES_CPP_NAMESPACE_BEGIN 00054 00055 00056 CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/) 00057 : fMulti(0) 00058 , fEasy(0) 00059 , fMemoryManager(urlSource.getMemoryManager()) 00060 , fURLSource(urlSource) 00061 , fTotalBytesRead(0) 00062 , fWritePtr(0) 00063 , fBytesRead(0) 00064 , fBytesToRead(0) 00065 , fDataAvailable(false) 00066 , fBufferHeadPtr(fBuffer) 00067 , fBufferTailPtr(fBuffer) 00068 , fPayload(0) 00069 , fPayloadLen(0) 00070 , fContentType(0) 00071 { 00072 // Allocate the curl multi handle 00073 fMulti = curl_multi_init(); 00074 00075 // Allocate the curl easy handle 00076 fEasy = curl_easy_init(); 00077 00078 // Set URL option 00079 TranscodeToStr url(fURLSource.getURLText(), "ISO8859-1", fMemoryManager); 00080 curl_easy_setopt(fEasy, CURLOPT_URL, (char*)url.str()); 00081 00082 // Set up a way to recieve the data 00083 curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this); // Pass this pointer to write function 00084 curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback); // Our static write function 00085 00086 // Do redirects 00087 curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, (long)1); 00088 curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, (long)6); 00089 00090 // Add username and password if authentication is required 00091 const XMLCh *username = urlSource.getUser(); 00092 const XMLCh *password = urlSource.getPassword(); 00093 if(username && password) { 00094 XMLBuffer userPassBuf(256, fMemoryManager); 00095 userPassBuf.append(username); 00096 userPassBuf.append(chColon); 00097 userPassBuf.append(password); 00098 00099 TranscodeToStr userPass(userPassBuf.getRawBuffer(), "ISO8859-1", fMemoryManager); 00100 00101 curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY); 00102 curl_easy_setopt(fEasy, CURLOPT_USERPWD, (char*)userPass.str()); 00103 } 00104 00105 if(httpInfo) { 00106 // Set the correct HTTP method 00107 switch(httpInfo->fHTTPMethod) { 00108 case XMLNetHTTPInfo::GET: 00109 break; 00110 case XMLNetHTTPInfo::PUT: 00111 curl_easy_setopt(fEasy, CURLOPT_UPLOAD, (long)1); 00112 break; 00113 case XMLNetHTTPInfo::POST: 00114 curl_easy_setopt(fEasy, CURLOPT_POST, (long)1); 00115 break; 00116 } 00117 00118 // Add custom headers 00119 if(httpInfo->fHeaders) { 00120 struct curl_slist *headersList = 0; 00121 00122 const char *headersBuf = httpInfo->fHeaders; 00123 const char *headersBufEnd = httpInfo->fHeaders + httpInfo->fHeadersLen; 00124 00125 const char *headerStart = headersBuf; 00126 while(headersBuf < headersBufEnd) { 00127 if(*headersBuf == '\r' && (headersBuf + 1) < headersBufEnd && 00128 *(headersBuf + 1) == '\n') { 00129 00130 XMLSize_t length = headersBuf - headerStart; 00131 ArrayJanitor<char> header((char*)fMemoryManager->allocate((length + 1) * sizeof(char)), 00132 fMemoryManager); 00133 memcpy(header.get(), headerStart, length); 00134 header.get()[length] = 0; 00135 00136 headersList = curl_slist_append(headersList, header.get()); 00137 00138 headersBuf += 2; 00139 headerStart = headersBuf; 00140 continue; 00141 } 00142 ++headersBuf; 00143 } 00144 curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, headersList); 00145 curl_slist_free_all(headersList); 00146 } 00147 00148 // Set up the payload 00149 if(httpInfo->fPayload) { 00150 fPayload = httpInfo->fPayload; 00151 fPayloadLen = httpInfo->fPayloadLen; 00152 curl_easy_setopt(fEasy, CURLOPT_READDATA, this); 00153 curl_easy_setopt(fEasy, CURLOPT_READFUNCTION, staticReadCallback); 00154 curl_easy_setopt(fEasy, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fPayloadLen); 00155 } 00156 } 00157 00158 // Add easy handle to the multi stack 00159 curl_multi_add_handle(fMulti, fEasy); 00160 00161 // Start reading, to get the content type 00162 while(fBufferHeadPtr == fBuffer) 00163 { 00164 int runningHandles = 0; 00165 readMore(&runningHandles); 00166 if(runningHandles == 0) break; 00167 } 00168 00169 // Find the content type 00170 char *contentType8 = 0; 00171 curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8); 00172 if(contentType8) 00173 fContentType = TranscodeFromStr((XMLByte*)contentType8, XMLString::stringLen(contentType8), "ISO8859-1", fMemoryManager).adopt(); 00174 } 00175 00176 00177 CurlURLInputStream::~CurlURLInputStream() 00178 { 00179 // Remove the easy handle from the multi stack 00180 curl_multi_remove_handle(fMulti, fEasy); 00181 00182 // Cleanup the easy handle 00183 curl_easy_cleanup(fEasy); 00184 00185 // Cleanup the multi handle 00186 curl_multi_cleanup(fMulti); 00187 00188 if(fContentType) fMemoryManager->deallocate(fContentType); 00189 } 00190 00191 00192 size_t 00193 CurlURLInputStream::staticWriteCallback(char *buffer, 00194 size_t size, 00195 size_t nitems, 00196 void *outstream) 00197 { 00198 return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems); 00199 } 00200 00201 size_t 00202 CurlURLInputStream::staticReadCallback(char *buffer, 00203 size_t size, 00204 size_t nitems, 00205 void *stream) 00206 { 00207 return ((CurlURLInputStream*)stream)->readCallback(buffer, size, nitems); 00208 } 00209 00210 size_t 00211 CurlURLInputStream::writeCallback(char *buffer, 00212 size_t size, 00213 size_t nitems) 00214 { 00215 XMLSize_t cnt = size * nitems; 00216 XMLSize_t totalConsumed = 0; 00217 00218 // Consume as many bytes as possible immediately into the buffer 00219 XMLSize_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt; 00220 memcpy(fWritePtr, buffer, consume); 00221 fWritePtr += consume; 00222 fBytesRead += consume; 00223 fTotalBytesRead += consume; 00224 fBytesToRead -= consume; 00225 00226 //printf("write callback consuming %d bytes\n", consume); 00227 00228 // If bytes remain, rebuffer as many as possible into our holding buffer 00229 buffer += consume; 00230 totalConsumed += consume; 00231 cnt -= consume; 00232 if (cnt > 0) 00233 { 00234 XMLSize_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer); 00235 consume = (cnt > bufAvail) ? bufAvail : cnt; 00236 memcpy(fBufferHeadPtr, buffer, consume); 00237 fBufferHeadPtr += consume; 00238 buffer += consume; 00239 totalConsumed += consume; 00240 //printf("write callback rebuffering %d bytes\n", consume); 00241 } 00242 00243 // Return the total amount we've consumed. If we don't consume all the bytes 00244 // then an error will be generated. Since our buffer size is equal to the 00245 // maximum size that curl will write, this should never happen unless there 00246 // is a logic error somewhere here. 00247 return totalConsumed; 00248 } 00249 00250 size_t 00251 CurlURLInputStream::readCallback(char *buffer, 00252 size_t size, 00253 size_t nitems) 00254 { 00255 XMLSize_t len = size * nitems; 00256 if(len > fPayloadLen) len = fPayloadLen; 00257 00258 memcpy(buffer, fPayload, len); 00259 00260 fPayload += len; 00261 fPayloadLen -= len; 00262 00263 return len; 00264 } 00265 00266 bool CurlURLInputStream::readMore(int *runningHandles) 00267 { 00268 // Ask the curl to do some work 00269 CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles); 00270 00271 // Process messages from curl 00272 int msgsInQueue = 0; 00273 for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; ) 00274 { 00275 //printf("msg %d, %d from curl\n", msg->msg, msg->data.result); 00276 00277 if (msg->msg != CURLMSG_DONE) 00278 return true; 00279 00280 switch (msg->data.result) 00281 { 00282 case CURLE_OK: 00283 // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below... 00284 break; 00285 00286 case CURLE_UNSUPPORTED_PROTOCOL: 00287 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager); 00288 break; 00289 00290 case CURLE_COULDNT_RESOLVE_HOST: 00291 case CURLE_COULDNT_RESOLVE_PROXY: 00292 { 00293 if (fURLSource.getHost()) 00294 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager); 00295 else 00296 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::File_CouldNotOpenFile, fURLSource.getURLText(), fMemoryManager); 00297 break; 00298 } 00299 00300 case CURLE_COULDNT_CONNECT: 00301 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager); 00302 break; 00303 00304 case CURLE_RECV_ERROR: 00305 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager); 00306 break; 00307 00308 default: 00309 ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager); 00310 break; 00311 } 00312 } 00313 00314 // If nothing is running any longer, bail out 00315 if(*runningHandles == 0) 00316 return false; 00317 00318 // If there is no further data to read, and we haven't 00319 // read any yet on this invocation, call select to wait for data 00320 if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0) 00321 { 00322 fd_set readSet; 00323 fd_set writeSet; 00324 fd_set exceptSet; 00325 int fdcnt=0; 00326 00327 FD_ZERO(&readSet); 00328 FD_ZERO(&writeSet); 00329 FD_ZERO(&exceptSet); 00330 00331 // Ask curl for the file descriptors to wait on 00332 curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt); 00333 00334 // Wait on the file descriptors 00335 timeval tv; 00336 tv.tv_sec = 2; 00337 tv.tv_usec = 0; 00338 select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv); 00339 } 00340 00341 return curlResult == CURLM_CALL_MULTI_PERFORM; 00342 } 00343 00344 XMLSize_t 00345 CurlURLInputStream::readBytes(XMLByte* const toFill 00346 , const XMLSize_t maxToRead) 00347 { 00348 fBytesRead = 0; 00349 fBytesToRead = maxToRead; 00350 fWritePtr = toFill; 00351 00352 for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); ) 00353 { 00354 // First, any buffered data we have available 00355 XMLSize_t bufCnt = fBufferHeadPtr - fBufferTailPtr; 00356 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt; 00357 if (bufCnt > 0) 00358 { 00359 memcpy(fWritePtr, fBufferTailPtr, bufCnt); 00360 fWritePtr += bufCnt; 00361 fBytesRead += bufCnt; 00362 fTotalBytesRead += bufCnt; 00363 fBytesToRead -= bufCnt; 00364 00365 fBufferTailPtr += bufCnt; 00366 if (fBufferTailPtr == fBufferHeadPtr) 00367 fBufferHeadPtr = fBufferTailPtr = fBuffer; 00368 00369 //printf("consuming %d buffered bytes\n", bufCnt); 00370 00371 tryAgain = true; 00372 continue; 00373 } 00374 00375 // Ask the curl to do some work 00376 int runningHandles = 0; 00377 tryAgain = readMore(&runningHandles); 00378 00379 // If nothing is running any longer, bail out 00380 if (runningHandles == 0) 00381 break; 00382 } 00383 00384 return fBytesRead; 00385 } 00386 00387 const XMLCh *CurlURLInputStream::getContentType() const 00388 { 00389 return fContentType; 00390 } 00391 00392 XERCES_CPP_NAMESPACE_END