GME  13
CurlURLInputStream.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: CurlURLInputStream.cpp 936316 2010-04-21 14:19:58Z borisk $
00020  */
00021 
00022 #if HAVE_CONFIG_H
00023   #include <config.h>
00024 #endif
00025 
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #if HAVE_ERRNO_H
00030   #include <errno.h>
00031 #endif
00032 #if HAVE_UNISTD_H
00033   #include <unistd.h>
00034 #endif
00035 #if HAVE_SYS_TYPES_H
00036   #include <sys/types.h>
00037 #endif
00038 #if HAVE_SYS_TIME_H
00039   #include <sys/time.h>
00040 #endif
00041 
00042 #include <xercesc/util/XercesDefs.hpp>
00043 #include <xercesc/util/XMLNetAccessor.hpp>
00044 #include <xercesc/util/NetAccessors/Curl/CurlURLInputStream.hpp>
00045 #include <xercesc/util/XMLString.hpp>
00046 #include <xercesc/util/XMLExceptMsgs.hpp>
00047 #include <xercesc/util/Janitor.hpp>
00048 #include <xercesc/util/XMLUniDefs.hpp>
00049 #include <xercesc/util/TransService.hpp>
00050 #include <xercesc/util/TranscodingException.hpp>
00051 #include <xercesc/util/PlatformUtils.hpp>
00052 
00053 XERCES_CPP_NAMESPACE_BEGIN
00054 
00055 
00056 CurlURLInputStream::CurlURLInputStream(const XMLURL& urlSource, const XMLNetHTTPInfo* httpInfo/*=0*/)
00057       : fMulti(0)
00058       , fEasy(0)
00059       , fMemoryManager(urlSource.getMemoryManager())
00060       , fURLSource(urlSource)
00061       , fTotalBytesRead(0)
00062       , fWritePtr(0)
00063       , fBytesRead(0)
00064       , fBytesToRead(0)
00065       , fDataAvailable(false)
00066       , fBufferHeadPtr(fBuffer)
00067       , fBufferTailPtr(fBuffer)
00068       , fPayload(0)
00069       , fPayloadLen(0)
00070       , fContentType(0)
00071 {
00072         // Allocate the curl multi handle
00073         fMulti = curl_multi_init();
00074 
00075         // Allocate the curl easy handle
00076         fEasy = curl_easy_init();
00077 
00078         // Set URL option
00079     TranscodeToStr url(fURLSource.getURLText(), "ISO8859-1", fMemoryManager);
00080         curl_easy_setopt(fEasy, CURLOPT_URL, (char*)url.str());
00081 
00082     // Set up a way to recieve the data
00083         curl_easy_setopt(fEasy, CURLOPT_WRITEDATA, this);                                               // Pass this pointer to write function
00084         curl_easy_setopt(fEasy, CURLOPT_WRITEFUNCTION, staticWriteCallback);    // Our static write function
00085 
00086         // Do redirects
00087         curl_easy_setopt(fEasy, CURLOPT_FOLLOWLOCATION, (long)1);
00088         curl_easy_setopt(fEasy, CURLOPT_MAXREDIRS, (long)6);
00089 
00090     // Add username and password if authentication is required
00091     const XMLCh *username = urlSource.getUser();
00092     const XMLCh *password = urlSource.getPassword();
00093     if(username && password) {
00094         XMLBuffer userPassBuf(256, fMemoryManager);
00095         userPassBuf.append(username);
00096         userPassBuf.append(chColon);
00097         userPassBuf.append(password);
00098 
00099         TranscodeToStr userPass(userPassBuf.getRawBuffer(), "ISO8859-1", fMemoryManager);
00100 
00101         curl_easy_setopt(fEasy, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
00102         curl_easy_setopt(fEasy, CURLOPT_USERPWD, (char*)userPass.str());
00103     }
00104 
00105     if(httpInfo) {
00106         // Set the correct HTTP method
00107         switch(httpInfo->fHTTPMethod) {
00108         case XMLNetHTTPInfo::GET:
00109             break;
00110         case XMLNetHTTPInfo::PUT:
00111             curl_easy_setopt(fEasy, CURLOPT_UPLOAD, (long)1);
00112             break;
00113         case XMLNetHTTPInfo::POST:
00114             curl_easy_setopt(fEasy, CURLOPT_POST, (long)1);
00115             break;
00116         }
00117 
00118         // Add custom headers
00119         if(httpInfo->fHeaders) {
00120             struct curl_slist *headersList = 0;
00121 
00122             const char *headersBuf = httpInfo->fHeaders;
00123             const char *headersBufEnd = httpInfo->fHeaders + httpInfo->fHeadersLen;
00124 
00125             const char *headerStart = headersBuf;
00126             while(headersBuf < headersBufEnd) {
00127                 if(*headersBuf == '\r' && (headersBuf + 1) < headersBufEnd &&
00128                    *(headersBuf + 1) == '\n') {
00129 
00130                     XMLSize_t length = headersBuf - headerStart;
00131                     ArrayJanitor<char> header((char*)fMemoryManager->allocate((length + 1) * sizeof(char)),
00132                                               fMemoryManager);
00133                     memcpy(header.get(), headerStart, length);
00134                     header.get()[length] = 0;
00135 
00136                     headersList = curl_slist_append(headersList, header.get());
00137 
00138                     headersBuf += 2;
00139                     headerStart = headersBuf;
00140                     continue;
00141                 }
00142                 ++headersBuf;
00143             }
00144             curl_easy_setopt(fEasy, CURLOPT_HTTPHEADER, headersList);
00145             curl_slist_free_all(headersList);
00146         }
00147 
00148         // Set up the payload
00149         if(httpInfo->fPayload) {
00150             fPayload = httpInfo->fPayload;
00151             fPayloadLen = httpInfo->fPayloadLen;
00152             curl_easy_setopt(fEasy, CURLOPT_READDATA, this);
00153             curl_easy_setopt(fEasy, CURLOPT_READFUNCTION, staticReadCallback);
00154             curl_easy_setopt(fEasy, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fPayloadLen);
00155         }
00156     }
00157 
00158         // Add easy handle to the multi stack
00159         curl_multi_add_handle(fMulti, fEasy);
00160 
00161     // Start reading, to get the content type
00162         while(fBufferHeadPtr == fBuffer)
00163         {
00164                 int runningHandles = 0;
00165         readMore(&runningHandles);
00166                 if(runningHandles == 0) break;
00167         }
00168 
00169     // Find the content type
00170     char *contentType8 = 0;
00171     curl_easy_getinfo(fEasy, CURLINFO_CONTENT_TYPE, &contentType8);
00172     if(contentType8)
00173         fContentType = TranscodeFromStr((XMLByte*)contentType8, XMLString::stringLen(contentType8), "ISO8859-1", fMemoryManager).adopt();
00174 }
00175 
00176 
00177 CurlURLInputStream::~CurlURLInputStream()
00178 {
00179         // Remove the easy handle from the multi stack
00180         curl_multi_remove_handle(fMulti, fEasy);
00181 
00182         // Cleanup the easy handle
00183         curl_easy_cleanup(fEasy);
00184 
00185         // Cleanup the multi handle
00186         curl_multi_cleanup(fMulti);
00187 
00188     if(fContentType) fMemoryManager->deallocate(fContentType);
00189 }
00190 
00191 
00192 size_t
00193 CurlURLInputStream::staticWriteCallback(char *buffer,
00194                                         size_t size,
00195                                         size_t nitems,
00196                                         void *outstream)
00197 {
00198         return ((CurlURLInputStream*)outstream)->writeCallback(buffer, size, nitems);
00199 }
00200 
00201 size_t
00202 CurlURLInputStream::staticReadCallback(char *buffer,
00203                                        size_t size,
00204                                        size_t nitems,
00205                                        void *stream)
00206 {
00207     return ((CurlURLInputStream*)stream)->readCallback(buffer, size, nitems);
00208 }
00209 
00210 size_t
00211 CurlURLInputStream::writeCallback(char *buffer,
00212                                   size_t size,
00213                                   size_t nitems)
00214 {
00215         XMLSize_t cnt = size * nitems;
00216         XMLSize_t totalConsumed = 0;
00217 
00218         // Consume as many bytes as possible immediately into the buffer
00219         XMLSize_t consume = (cnt > fBytesToRead) ? fBytesToRead : cnt;
00220         memcpy(fWritePtr, buffer, consume);
00221         fWritePtr               += consume;
00222         fBytesRead              += consume;
00223         fTotalBytesRead += consume;
00224         fBytesToRead    -= consume;
00225 
00226         //printf("write callback consuming %d bytes\n", consume);
00227 
00228         // If bytes remain, rebuffer as many as possible into our holding buffer
00229         buffer                  += consume;
00230         totalConsumed   += consume;
00231         cnt                             -= consume;
00232         if (cnt > 0)
00233         {
00234                 XMLSize_t bufAvail = sizeof(fBuffer) - (fBufferHeadPtr - fBuffer);
00235                 consume = (cnt > bufAvail) ? bufAvail : cnt;
00236                 memcpy(fBufferHeadPtr, buffer, consume);
00237                 fBufferHeadPtr  += consume;
00238                 buffer                  += consume;
00239                 totalConsumed   += consume;
00240                 //printf("write callback rebuffering %d bytes\n", consume);
00241         }
00242 
00243         // Return the total amount we've consumed. If we don't consume all the bytes
00244         // then an error will be generated. Since our buffer size is equal to the
00245         // maximum size that curl will write, this should never happen unless there
00246         // is a logic error somewhere here.
00247         return totalConsumed;
00248 }
00249 
00250 size_t
00251 CurlURLInputStream::readCallback(char *buffer,
00252                                  size_t size,
00253                                  size_t nitems)
00254 {
00255     XMLSize_t len = size * nitems;
00256     if(len > fPayloadLen) len = fPayloadLen;
00257 
00258     memcpy(buffer, fPayload, len);
00259 
00260     fPayload += len;
00261     fPayloadLen -= len;
00262 
00263     return len;
00264 }
00265 
00266 bool CurlURLInputStream::readMore(int *runningHandles)
00267 {
00268     // Ask the curl to do some work
00269     CURLMcode curlResult = curl_multi_perform(fMulti, runningHandles);
00270 
00271     // Process messages from curl
00272     int msgsInQueue = 0;
00273     for (CURLMsg* msg = NULL; (msg = curl_multi_info_read(fMulti, &msgsInQueue)) != NULL; )
00274     {
00275         //printf("msg %d, %d from curl\n", msg->msg, msg->data.result);
00276 
00277         if (msg->msg != CURLMSG_DONE)
00278             return true;
00279 
00280         switch (msg->data.result)
00281         {
00282         case CURLE_OK:
00283             // We completed successfully. runningHandles should have dropped to zero, so we'll bail out below...
00284             break;
00285 
00286         case CURLE_UNSUPPORTED_PROTOCOL:
00287             ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::URL_UnsupportedProto, fMemoryManager);
00288             break;
00289 
00290         case CURLE_COULDNT_RESOLVE_HOST:
00291         case CURLE_COULDNT_RESOLVE_PROXY:
00292           {
00293             if (fURLSource.getHost())
00294               ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_TargetResolution, fURLSource.getHost(), fMemoryManager);
00295             else
00296               ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::File_CouldNotOpenFile, fURLSource.getURLText(), fMemoryManager);
00297             break;
00298           }
00299 
00300         case CURLE_COULDNT_CONNECT:
00301             ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ConnSocket, fURLSource.getURLText(), fMemoryManager);
00302             break;
00303 
00304         case CURLE_RECV_ERROR:
00305             ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_ReadSocket, fURLSource.getURLText(), fMemoryManager);
00306             break;
00307 
00308         default:
00309             ThrowXMLwithMemMgr1(NetAccessorException, XMLExcepts::NetAcc_InternalError, fURLSource.getURLText(), fMemoryManager);
00310             break;
00311         }
00312     }
00313 
00314     // If nothing is running any longer, bail out
00315     if(*runningHandles == 0)
00316         return false;
00317 
00318     // If there is no further data to read, and we haven't
00319     // read any yet on this invocation, call select to wait for data
00320     if (curlResult != CURLM_CALL_MULTI_PERFORM && fBytesRead == 0)
00321     {
00322         fd_set readSet;
00323         fd_set writeSet;
00324         fd_set exceptSet;
00325         int fdcnt=0;
00326 
00327         FD_ZERO(&readSet);
00328         FD_ZERO(&writeSet);
00329         FD_ZERO(&exceptSet);
00330 
00331         // Ask curl for the file descriptors to wait on
00332         curl_multi_fdset(fMulti, &readSet, &writeSet, &exceptSet, &fdcnt);
00333 
00334         // Wait on the file descriptors
00335         timeval tv;
00336         tv.tv_sec  = 2;
00337         tv.tv_usec = 0;
00338         select(fdcnt+1, &readSet, &writeSet, &exceptSet, &tv);
00339     }
00340 
00341     return curlResult == CURLM_CALL_MULTI_PERFORM;
00342 }
00343 
00344 XMLSize_t
00345 CurlURLInputStream::readBytes(XMLByte* const          toFill
00346                                      , const XMLSize_t maxToRead)
00347 {
00348         fBytesRead = 0;
00349         fBytesToRead = maxToRead;
00350         fWritePtr = toFill;
00351 
00352         for (bool tryAgain = true; fBytesToRead > 0 && (tryAgain || fBytesRead == 0); )
00353         {
00354                 // First, any buffered data we have available
00355                 XMLSize_t bufCnt = fBufferHeadPtr - fBufferTailPtr;
00356                 bufCnt = (bufCnt > fBytesToRead) ? fBytesToRead : bufCnt;
00357                 if (bufCnt > 0)
00358                 {
00359                         memcpy(fWritePtr, fBufferTailPtr, bufCnt);
00360                         fWritePtr               += bufCnt;
00361                         fBytesRead              += bufCnt;
00362                         fTotalBytesRead += bufCnt;
00363                         fBytesToRead    -= bufCnt;
00364 
00365                         fBufferTailPtr  += bufCnt;
00366                         if (fBufferTailPtr == fBufferHeadPtr)
00367                                 fBufferHeadPtr = fBufferTailPtr = fBuffer;
00368 
00369                         //printf("consuming %d buffered bytes\n", bufCnt);
00370 
00371                         tryAgain = true;
00372                         continue;
00373                 }
00374 
00375                 // Ask the curl to do some work
00376                 int runningHandles = 0;
00377         tryAgain = readMore(&runningHandles);
00378 
00379                 // If nothing is running any longer, bail out
00380                 if (runningHandles == 0)
00381                         break;
00382         }
00383 
00384         return fBytesRead;
00385 }
00386 
00387 const XMLCh *CurlURLInputStream::getContentType() const
00388 {
00389     return fContentType;
00390 }
00391 
00392 XERCES_CPP_NAMESPACE_END