GME  13
XMLUri.cpp
Go to the documentation of this file.
00001 /*
00002  * Licensed to the Apache Software Foundation (ASF) under one or more
00003  * contributor license agreements.  See the NOTICE file distributed with
00004  * this work for additional information regarding copyright ownership.
00005  * The ASF licenses this file to You under the Apache License, Version 2.0
00006  * (the "License"); you may not use this file except in compliance with
00007  * the License.  You may obtain a copy of the License at
00008  *
00009  *      http://www.apache.org/licenses/LICENSE-2.0
00010  *
00011  * Unless required by applicable law or agreed to in writing, software
00012  * distributed under the License is distributed on an "AS IS" BASIS,
00013  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  * See the License for the specific language governing permissions and
00015  * limitations under the License.
00016  */
00017 
00018 /*
00019  * $Id: XMLUri.cpp 881714 2009-11-18 10:39:06Z borisk $
00020  */
00021 
00022 // ---------------------------------------------------------------------------
00023 //  Includes
00024 // ---------------------------------------------------------------------------
00025 #include <xercesc/util/Janitor.hpp>
00026 #include <xercesc/util/XMLURL.hpp>
00027 #include <xercesc/util/XMLUri.hpp>
00028 #include <xercesc/util/XMLChar.hpp>
00029 #include <xercesc/util/OutOfMemoryException.hpp>
00030 
00031 XERCES_CPP_NAMESPACE_BEGIN
00032 
00033 // ---------------------------------------------------------------------------
00034 //  XMLUri: static data
00035 // ---------------------------------------------------------------------------
00036 
00037 //      Amended by RFC2732
00038 //      reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
00039 //                      "$" | "," | "[" | "]"
00040 //
00041 const XMLCh XMLUri::RESERVED_CHARACTERS[] =
00042 {
00043     chSemiColon, chForwardSlash, chQuestion, chColon, chAt,
00044     chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare,
00045     chCloseSquare, chNull
00046 };
00047 
00048 //
00049 //      mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
00050 //                      "(" | ")"
00051 //
00052 const XMLCh XMLUri::MARK_CHARACTERS[] =
00053 {
00054     chDash, chUnderscore, chPeriod, chBang, chTilde,
00055     chAsterisk, chSingleQuote, chOpenParen, chCloseParen, chNull
00056 };
00057 
00058 // combination of MARK and RESERVED
00059 const XMLCh XMLUri::MARK_OR_RESERVED_CHARACTERS[] =
00060 {
00061     chDash, chUnderscore, chPeriod, chBang, chTilde,
00062     chAsterisk, chSingleQuote, chOpenParen, chCloseParen,
00063     chSemiColon, chForwardSlash, chQuestion, chColon, chAt,
00064     chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare,
00065     chCloseSquare, chNull
00066 };
00067 
00068 //
00069 //      scheme        = alpha *( alpha | digit | "+" | "-" | "." )
00070 //
00071 const XMLCh XMLUri::SCHEME_CHARACTERS[] =
00072 {
00073     chPlus, chDash, chPeriod, chNull
00074 };
00075 
00076 //
00077 //      userinfo      = *( unreserved | escaped |
00078 //                         ";" | ":" | "&" | "=" | "+" | "$" | "," )
00079 //
00080 const XMLCh XMLUri::USERINFO_CHARACTERS[] =
00081 {
00082     chSemiColon, chColon, chAmpersand, chEqual, chPlus,
00083     chDollarSign, chPeriod, chNull
00084 };
00085 
00086 //
00087 //      reg_name     = 1*( unreserved | escaped | "$" | "," |
00088 //                         ";" | ":" | "@" | "&" | "=" | "+" )
00089 //
00090 const XMLCh XMLUri::REG_NAME_CHARACTERS[] =
00091 {
00092     chDollarSign, chComma, chSemiColon, chColon, chAt,
00093     chAmpersand, chEqual, chPlus, chNull
00094 };
00095 
00096 //      pchar plus ';' and '/'.
00097 //      pchar         = unreserved | escaped |
00098 //                      ":" | "@" | "&" | "=" | "+" | "$" | ","
00099 const XMLCh XMLUri::PATH_CHARACTERS[] =
00100 {
00101     chSemiColon, chForwardSlash, chColon, chAt, chAmpersand,
00102     chEqual, chPlus, chDollarSign, chComma, chNull
00103 };
00104 
00105 
00106 // ---------------------------------------------------------------------------
00107 //  Local methods and data
00108 // ---------------------------------------------------------------------------
00109 static const int BUF_LEN = 64;
00110 
00111 //
00112 // "Scheme"
00113 // "SchemeSpecificPart"
00114 // "Parameters"
00115 // "UserInfo"
00116 // "Host"
00117 // "Port"
00118 // "RegName"
00119 // "Path"
00120 // "Query"
00121 // "Fragment"
00122 //
00123 static const XMLCh errMsg_SCHEME[] =
00124 {
00125     chLatin_s, chLatin_c, chLatin_h, chLatin_e,
00126     chLatin_m, chLatin_e, chNull
00127 };
00128 
00129 static const XMLCh errMsg_SCHEMESPART[] =
00130 {
00131     chLatin_s, chLatin_c, chLatin_h, chLatin_e, chLatin_m, chLatin_e,
00132     chLatin_S, chLatin_p, chLatin_e, chLatin_c, chLatin_i, chLatin_f,
00133     chLatin_i, chLatin_c, chLatin_P, chLatin_a, chLatin_r, chLatin_t,
00134     chNull
00135 };
00136 
00137 static const XMLCh errMsg_PARAMS[] =
00138 {
00139     chLatin_p, chLatin_a, chLatin_r, chLatin_a, chLatin_m,
00140     chLatin_e, chLatin_t, chLatin_e, chLatin_r, chLatin_s, chNull
00141 };
00142 
00143 static const XMLCh errMsg_USERINFO[] =
00144 {
00145     chLatin_u, chLatin_s, chLatin_e, chLatin_r,
00146     chLatin_i, chLatin_n, chLatin_f, chLatin_o, chNull
00147 };
00148 
00149 static const XMLCh errMsg_HOST[] =
00150 {
00151     chLatin_h, chLatin_o, chLatin_s, chLatin_t, chNull
00152 };
00153 
00154 static const XMLCh errMsg_PORT[] =
00155 {
00156     chLatin_p, chLatin_o, chLatin_r, chLatin_t, chNull
00157 };
00158 
00159 static const XMLCh errMsg_REGNAME[] =
00160 {
00161     chLatin_R, chLatin_e, chLatin_g,
00162     chLatin_N, chLatin_a, chLatin_m, chLatin_e, chNull
00163 };
00164 
00165 static const XMLCh errMsg_PATH[] =
00166 {
00167     chLatin_p, chLatin_a, chLatin_t, chLatin_h, chNull
00168 };
00169 
00170 static const XMLCh errMsg_QUERY[] =
00171 {
00172     chLatin_q, chLatin_u, chLatin_e, chLatin_r, chLatin_y, chNull
00173 };
00174 
00175 static const XMLCh errMsg_FRAGMENT[] =
00176 {
00177     chLatin_f, chLatin_r, chLatin_a, chLatin_g,
00178     chLatin_m, chLatin_e, chLatin_n, chLatin_t, chNull
00179 };
00180 
00181 //
00182 //  "//"
00183 //  "/"
00184 //  "./"
00185 //  "/."
00186 //  "/../"
00187 //  "/.."
00188 //
00189 static const XMLCh DOUBLE_SLASH[] =
00190 {
00191     chForwardSlash, chForwardSlash, chNull
00192 };
00193 
00194 static const XMLCh SINGLE_SLASH[] =
00195 {
00196     chForwardSlash, chNull
00197 };
00198 
00199 static const XMLCh SLASH_DOT_SLASH[] =
00200 {
00201     chForwardSlash, chPeriod, chForwardSlash, chNull
00202 };
00203 
00204 static const XMLCh SLASH_DOT[] =
00205 {
00206     chForwardSlash, chPeriod, chNull
00207 };
00208 
00209 static const XMLCh SLASH_DOTDOT_SLASH[] =
00210 {
00211     chForwardSlash, chPeriod, chPeriod, chForwardSlash, chNull
00212 };
00213 
00214 static const XMLCh SLASH_DOTDOT[] =
00215 {
00216     chForwardSlash, chPeriod, chPeriod, chNull
00217 };
00218 
00219 //
00220 //  ":/?#"
00221 //
00222 // REVISIT: why?
00223 static const XMLCh SCHEME_SEPARATORS[] =
00224 {
00225     chColon, chForwardSlash, chQuestion, chPound, chNull
00226 };
00227 
00228 //
00229 //  "?#"
00230 //
00231 static const XMLCh PATH_SEPARATORS[] =
00232 {
00233     chQuestion, chPound, chNull
00234 };
00235 
00236 // ---------------------------------------------------------------------------
00237 //  XMLUri: Constructors and Helper methods
00238 // ---------------------------------------------------------------------------
00239 // ctor# 2
00240 
00241 typedef JanitorMemFunCall<XMLUri>   CleanupType;
00242 
00243 XMLUri::XMLUri(const XMLCh* const uriSpec,
00244                MemoryManager* const manager)
00245 : fPort(-1)
00246 , fScheme(0)
00247 , fUserInfo(0)
00248 , fHost(0)
00249 , fRegAuth(0)
00250 , fPath(0)
00251 , fQueryString(0)
00252 , fFragment(0)
00253 , fURIText(0)
00254 , fMemoryManager(manager)
00255 {
00256     CleanupType cleanup(this, &XMLUri::cleanUp);
00257 
00258     try {
00259         initialize((XMLUri *)0, uriSpec);
00260     }
00261     catch(const OutOfMemoryException&)
00262     {
00263         cleanup.release();
00264 
00265         throw;
00266     }
00267 
00268     cleanup.release();
00269 }
00270 
00271 // ctor# 7 relative ctor
00272 XMLUri::XMLUri(const XMLUri* const      baseURI
00273               , const XMLCh* const   uriSpec
00274               , MemoryManager* const manager)
00275 : fPort(-1)
00276 , fScheme(0)
00277 , fUserInfo(0)
00278 , fHost(0)
00279 , fRegAuth(0)
00280 , fPath(0)
00281 , fQueryString(0)
00282 , fFragment(0)
00283 , fURIText(0)
00284 , fMemoryManager(manager)
00285 {
00286     CleanupType cleanup(this, &XMLUri::cleanUp);
00287 
00288     try {
00289         initialize(baseURI, uriSpec);
00290     }
00291     catch(const OutOfMemoryException&)
00292     {
00293         cleanup.release();
00294 
00295         throw;
00296     }
00297 
00298     cleanup.release();
00299 }
00300 
00301 //Copy constructor
00302 XMLUri::XMLUri(const XMLUri& toCopy)
00303 : XSerializable(toCopy)
00304 , XMemory(toCopy)
00305 , fPort(-1)
00306 , fScheme(0)
00307 , fUserInfo(0)
00308 , fHost(0)
00309 , fRegAuth(0)
00310 , fPath(0)
00311 , fQueryString(0)
00312 , fFragment(0)
00313 , fURIText(0)
00314 , fMemoryManager(toCopy.fMemoryManager)
00315 {
00316     CleanupType cleanup(this, &XMLUri::cleanUp);
00317 
00318     try {
00319         initialize(toCopy);
00320     }
00321     catch(const OutOfMemoryException&)
00322     {
00323         cleanup.release();
00324 
00325         throw;
00326     }
00327 
00328     cleanup.release();
00329 }
00330 
00331 XMLUri& XMLUri::operator=(const XMLUri& toAssign)
00332 {
00333     cleanUp();
00334 
00335     CleanupType cleanup(this, &XMLUri::cleanUp);
00336 
00337     try {
00338         initialize(toAssign);
00339     }
00340     catch(const OutOfMemoryException&)
00341     {
00342         cleanup.release();
00343 
00344         throw;
00345     }
00346 
00347     cleanup.release();
00348 
00349     return *this;
00350 }
00351 
00352 XMLUri::~XMLUri()
00353 {
00354     cleanUp();
00355 }
00356 
00357 void XMLUri::cleanUp()
00358 {
00359     if (fScheme)
00360         XMLString::release(&fScheme, fMemoryManager);//delete[] fScheme;
00361 
00362     if (fUserInfo)
00363         XMLString::release(&fUserInfo, fMemoryManager);//delete[] fUserInfo;
00364 
00365     if (fHost)
00366         XMLString::release(&fHost, fMemoryManager);//delete[] fHost;
00367 
00368     if (fRegAuth)
00369         XMLString::release(&fRegAuth, fMemoryManager);//delete[] fRegAuth;
00370 
00371     if (fPath)
00372         XMLString::release(&fPath, fMemoryManager);//delete[] fPath;
00373 
00374     if (fQueryString)
00375         XMLString::release(&fQueryString, fMemoryManager);//delete[] fQueryString;
00376 
00377     if (fFragment)
00378         XMLString::release(&fFragment, fMemoryManager);//delete[] fFragment;
00379 
00380     XMLString::release(&fURIText, fMemoryManager);//delete[] fURIText;
00381 }
00382 
00383 void XMLUri::initialize(const XMLUri& toCopy)
00384 {
00385     //
00386     // assuming that all fields from the toCopy are valid,
00387     // therefore need NOT to go through various setXXX() methods
00388     //
00389     fMemoryManager = toCopy.fMemoryManager;
00390     fScheme = XMLString::replicate(toCopy.fScheme, fMemoryManager);
00391     fUserInfo = XMLString::replicate(toCopy.fUserInfo, fMemoryManager);
00392     fHost = XMLString::replicate(toCopy.fHost, fMemoryManager);
00393     fPort = toCopy.fPort;
00394     fRegAuth = XMLString::replicate(toCopy.fRegAuth, fMemoryManager);
00395     fPath = XMLString::replicate(toCopy.fPath, fMemoryManager);
00396     fQueryString = XMLString::replicate(toCopy.fQueryString, fMemoryManager);
00397     fFragment = XMLString::replicate(toCopy.fFragment, fMemoryManager);
00398 }
00399 
00400 void XMLUri::initialize(const XMLUri* const baseURI
00401                       , const XMLCh*  const uriSpec)
00402 {
00403 
00404     // get a trimmed version of uriSpec
00405     // uriSpec will NO LONGER be used in this function.
00406     //
00407     XMLCh* trimmedUriSpec = XMLString::replicate(uriSpec, fMemoryManager);
00408     XMLString::trim(trimmedUriSpec);
00409     ArrayJanitor<XMLCh> janName(trimmedUriSpec, fMemoryManager);
00410     XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec);
00411 
00412     if ( !baseURI &&
00413         (!trimmedUriSpec || trimmedUriSpecLen == 0))
00414     {
00415         ThrowXMLwithMemMgr1(MalformedURLException
00416                , XMLExcepts::XMLNUM_URI_Component_Empty
00417                , errMsg_PARAMS
00418                , fMemoryManager);
00419     }
00420 
00421         // just make a copy of the base if spec is empty
00422         if (!trimmedUriSpec || trimmedUriSpecLen == 0)
00423     {
00424         initialize(*baseURI);
00425         return;
00426         }
00427 
00428         XMLSize_t index = 0;
00429         bool foundScheme = false;
00430 
00431         // Check for scheme, which must be before `/', '?' or '#'.
00432         int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon);
00433         int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash);
00434         int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion);
00435         int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound);
00436 
00437         if ((colonIdx <= 0) ||
00438             (colonIdx > slashIdx && slashIdx != -1) ||
00439             (colonIdx > queryIdx && queryIdx != -1) ||
00440             (colonIdx > fragmentIdx && fragmentIdx != -1))
00441         {
00442             // A standalone base is a valid URI according to spec
00443             if ( colonIdx == 0 || (!baseURI && fragmentIdx != 0) )
00444             {
00445                 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme, fMemoryManager);
00446             }
00447         }
00448         else
00449         {
00450             foundScheme = true;
00451             initializeScheme(trimmedUriSpec);
00452             index = XMLString::stringLen(fScheme)+1;
00453         }
00454 
00455     // It's an error if we stop here
00456     if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound)))
00457     {
00458         ThrowXMLwithMemMgr1(MalformedURLException
00459                 , XMLExcepts::XMLNUM_URI_Component_Empty
00460                 , errMsg_PATH
00461                 , fMemoryManager);
00462     }
00463 
00464         // two slashes means generic URI syntax, so we get the authority
00465     XMLCh* authUriSpec = (XMLCh*) fMemoryManager->allocate
00466     (
00467         (trimmedUriSpecLen+1) * sizeof(XMLCh)
00468     );//new XMLCh[trimmedUriSpecLen+1];
00469     ArrayJanitor<XMLCh> authName(authUriSpec, fMemoryManager);
00470     XMLString::subString(authUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager);
00471 
00472     if (((index+1) < trimmedUriSpecLen) &&
00473         XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
00474     {
00475         index += 2;
00476         XMLSize_t startPos = index;
00477 
00478         // get authority - everything up to path, query or fragment
00479         XMLCh testChar;
00480         while (index < trimmedUriSpecLen)
00481         {
00482             testChar = trimmedUriSpec[index];
00483             if (testChar == chForwardSlash ||
00484                 testChar == chQuestion     ||
00485                 testChar == chPound         )
00486             {
00487                 break;
00488             }
00489 
00490             index++;
00491         }
00492 
00493         // if we found authority, parse it out, otherwise we set the
00494         // host to empty string
00495         if (index > startPos)
00496         {
00497             XMLString::subString(authUriSpec, trimmedUriSpec, startPos, index, fMemoryManager);
00498             initializeAuthority(authUriSpec);
00499         }
00500         else
00501         {
00502             //fHost = 0;
00503             setHost(XMLUni::fgZeroLenString);
00504         }
00505     }
00506 
00507     // we need to check if index has exceed the lenght or not
00508     if (index >= trimmedUriSpecLen)
00509         return;
00510 
00511     XMLCh* pathUriSpec = (XMLCh*) fMemoryManager->allocate
00512     (
00513         (trimmedUriSpecLen+1) * sizeof(XMLCh)
00514     );//new XMLCh[trimmedUriSpecLen+1];
00515     ArrayJanitor<XMLCh> pathUriSpecName(pathUriSpec, fMemoryManager);
00516     XMLString::subString(pathUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager);
00517 
00518         initializePath(pathUriSpec);
00519 
00520         // Resolve relative URI to base URI - see RFC 2396 Section 5.2
00521         // In some cases, it might make more sense to throw an exception
00522         // (when scheme is specified is the string spec and the base URI
00523         // is also specified, for example), but we're just following the
00524         // RFC specifications
00525         if ( baseURI )
00526     {
00527         // check to see if this is the current doc - RFC 2396 5.2 #2
00528         // note that this is slightly different from the RFC spec in that
00529         // we don't include the check for query string being null
00530         // - this handles cases where the urispec is just a query
00531         // string or a fragment (e.g. "?y" or "#s") -
00532         // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
00533         // identified this as a bug in the RFC
00534         if ((!fPath || !*fPath) &&
00535             fScheme == 0 &&
00536             fHost == 0 && fRegAuth == 0)
00537         {
00538             fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager);
00539             fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
00540             fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager);
00541             fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager);
00542             fPort = baseURI->getPort();
00543             fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager);
00544             fMemoryManager->deallocate(fPath);//delete [] fPath;
00545             fPath = XMLString::replicate(baseURI->getPath(), fMemoryManager);
00546 
00547             if ( !fQueryString )
00548             {
00549                 fQueryString = XMLString::replicate(baseURI->getQueryString(), fMemoryManager);
00550             }
00551             return;
00552         }
00553 
00554         // check for scheme - RFC 2396 5.2 #3
00555         // if we found a scheme, it means absolute URI, so we're done
00556         if (fScheme == 0)
00557         {
00558             fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager);
00559         }
00560         else
00561         {
00562             return;
00563         }
00564 
00565         // check for authority - RFC 2396 5.2 #4
00566         // if we found a host, then we've got a network path, so we're done
00567         if (fHost == 0 && fRegAuth == 0)
00568         {
00569             fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
00570             fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager);
00571             fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager);
00572             fPort = baseURI->getPort();
00573             fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager);
00574         }
00575         else
00576         {
00577             return;
00578         }
00579 
00580         // check for absolute path - RFC 2396 5.2 #5
00581         if ((fPath && *fPath) &&
00582             XMLString::startsWith(fPath, SINGLE_SLASH))
00583         {
00584             return;
00585         }
00586 
00587         // if we get to this point, we need to resolve relative path
00588         // RFC 2396 5.2 #6
00589 
00590         XMLCh* basePath = XMLString::replicate(baseURI->getPath(), fMemoryManager);
00591         ArrayJanitor<XMLCh> basePathName(basePath, fMemoryManager);
00592 
00593         XMLSize_t bufLen = trimmedUriSpecLen+XMLString::stringLen(fPath)+XMLString::stringLen(basePath)+1;
00594         XMLCh* path = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
00595         ArrayJanitor<XMLCh> pathName(path, fMemoryManager);
00596         path[0] = 0;
00597 
00598         XMLCh* tmp1 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
00599         ArrayJanitor<XMLCh> tmp1Name(tmp1, fMemoryManager);
00600         XMLCh* tmp2 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen];
00601         ArrayJanitor<XMLCh> tmp2Name(tmp2, fMemoryManager);
00602 
00603         // 6a - get all but the last segment of the base URI path
00604         if (basePath)
00605         {
00606             int lastSlash = XMLString::lastIndexOf(basePath, chForwardSlash);
00607             if (lastSlash != -1)
00608             {
00609                 XMLString::subString(path, basePath, 0, lastSlash+1, fMemoryManager);
00610             }
00611         }
00612 
00613         // 6b - append the relative URI path
00614         XMLString::catString(path, fPath);
00615 
00616         // 6c - remove all "./" where "." is a complete path segment
00617         int iIndex = -1;
00618         while ((iIndex = XMLString::patternMatch(path, SLASH_DOT_SLASH)) != -1)
00619         {
00620             XMLString::subString(tmp1, path, 0, iIndex, fMemoryManager);
00621             XMLString::subString(tmp2, path, iIndex+2, XMLString::stringLen(path), fMemoryManager);
00622 
00623             path[0] = 0;
00624             XMLString::catString(path, tmp1);
00625             XMLString::catString(path, tmp2);
00626         }
00627 
00628         // 6d - remove "." if path ends with "." as a complete path segment
00629         if (XMLString::endsWith(path, SLASH_DOT))
00630         {
00631             path[XMLString::stringLen(path) - 1] = chNull;
00632         }
00633 
00634         // 6e - remove all "<segment>/../" where "<segment>" is a complete
00635         // path segment not equal to ".."
00636         iIndex = -1;
00637         int segIndex = -1;
00638         int offset = 1;
00639 
00640         while ((iIndex = XMLString::patternMatch(&(path[offset]), SLASH_DOTDOT_SLASH)) != -1)
00641         {
00642                         // Undo offset
00643                         iIndex += offset;
00644 
00645                         // Find start of <segment> within substring ending at found point.
00646                         XMLString::subString(tmp1, path, 0, iIndex-1, fMemoryManager);
00647                         segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash);
00648 
00649                         // Ensure <segment> exists and != ".."
00650             if (segIndex != -1                &&
00651                 (path[segIndex+1] != chPeriod ||
00652                  path[segIndex+2] != chPeriod ||
00653                                  segIndex + 3 != iIndex))
00654             {
00655 
00656                 XMLString::subString(tmp1, path, 0, segIndex, fMemoryManager);
00657                 XMLString::subString(tmp2, path, iIndex+3, XMLString::stringLen(path), fMemoryManager);
00658 
00659                 path[0] = 0;
00660                 XMLString::catString(path, tmp1);
00661                 XMLString::catString(path, tmp2);
00662 
00663                 offset = (segIndex == 0 ? 1 : segIndex);
00664             }
00665             else
00666             {
00667                 offset += 4;
00668             }
00669         }// while
00670 
00671         // 6f - remove ending "<segment>/.." where "<segment>" is a
00672         // complete path segment
00673         if (XMLString::endsWith(path, SLASH_DOTDOT))
00674         {
00675                         // Find start of <segment> within substring ending at found point.
00676             index = XMLString::stringLen(path) - 3;
00677                         XMLString::subString(tmp1, path, 0, index-1, fMemoryManager);
00678                         segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash);
00679 
00680             if (segIndex != -1                &&
00681                 (path[segIndex+1] != chPeriod ||
00682                  path[segIndex+2] != chPeriod ||
00683                                  segIndex + 3 != (int)index))
00684             {
00685                 path[segIndex+1] = chNull;
00686             }
00687         }
00688 
00689         if (getPath())
00690             fMemoryManager->deallocate(fPath);//delete [] fPath;
00691 
00692         fPath = XMLString::replicate(path, fMemoryManager);
00693 
00694     }
00695 }
00696 
00697 // ---------------------------------------------------------------------------
00698 //  Components initialization
00699 // ---------------------------------------------------------------------------
00700 
00701 //
00702 // authority     = server | reg_name
00703 // server        = [ [ userinfo "@" ] hostport ]
00704 // hostport      = host [ ":" port ]
00705 //
00706 // reg_name      = 1*( unreserved | escaped | "$" | "," |
00707 //                    ";" | ":" | "@" | "&" | "=" | "+" )
00708 //
00709 // userinfo      = *( unreserved | escaped |
00710 //                 ";" | ":" | "&" | "=" | "+" | "$" | "," )
00711 //
00712 
00713 void XMLUri::initializeAuthority(const XMLCh* const uriSpec)
00714 {
00715 
00716     int index = 0;
00717     XMLSize_t start = 0;
00718     const XMLSize_t end = XMLString::stringLen(uriSpec);
00719 
00720     //
00721     // server = [ [ userinfo "@" ] hostport ]
00722     // userinfo is everything up @,
00723     //
00724     XMLCh* userinfo = (XMLCh*) fMemoryManager->allocate
00725     (
00726         (end+1) * sizeof(XMLCh)
00727     );//new XMLCh[end+1];
00728     ArrayJanitor<XMLCh> userName(userinfo, fMemoryManager);
00729     index = XMLString::indexOf(&(uriSpec[start]), chAt);
00730 
00731     if ( index != -1)
00732     {
00733         XMLString::subString(userinfo, &(uriSpec[start]), 0, index, fMemoryManager);
00734         index++; // skip the @
00735         start += index;
00736     }
00737     else
00738     {
00739         userinfo = 0;
00740     }
00741 
00742     //
00743     // hostport = host [ ":" port ]
00744     // host is everything up to ':', or up to
00745     // and including ']' if followed by ':'.
00746     //
00747     XMLCh* host = (XMLCh*) fMemoryManager->allocate
00748     (
00749         (end+1) * sizeof(XMLCh)
00750     );//new XMLCh[end+1];
00751     ArrayJanitor<XMLCh> hostName(host, fMemoryManager);
00752 
00753     // Search for port boundary.
00754     if (start < end && uriSpec[start] == chOpenSquare)
00755     {
00756         index = XMLString::indexOf(&(uriSpec[start]), chCloseSquare);
00757         if (index != -1)
00758         {
00759             // skip the ']'
00760             index = ((start + index + 1) < end
00761               && uriSpec[start + index + 1] == chColon) ? index+1 : -1;
00762         }
00763     }
00764     else
00765     {
00766         index = XMLString::indexOf(&(uriSpec[start]), chColon);
00767     }
00768 
00769     if ( index != -1 )
00770     {
00771         XMLString::subString(host, &(uriSpec[start]), 0, index, fMemoryManager);
00772         index++;  // skip the :
00773         start +=index;
00774     }
00775     else
00776     {
00777         XMLString::subString(host, &(uriSpec[start]), 0, end-start, fMemoryManager);
00778         start = end;
00779     }
00780 
00781     // port is everything after ":"
00782 
00783     XMLCh* portStr = (XMLCh*) fMemoryManager->allocate
00784     (
00785         (end+1) * sizeof(XMLCh)
00786     );//new XMLCh[end+1];
00787     ArrayJanitor<XMLCh> portName(portStr, fMemoryManager);
00788     int port = -1;
00789 
00790     if ((host && *host) &&   // non empty host
00791         (index != -1)                    &&   // ":" found
00792         (start < end)                     )   // ":" is not the last
00793     {
00794         XMLString::subString(portStr, &(uriSpec[start]), 0, end-start, fMemoryManager);
00795 
00796         if (portStr && *portStr)
00797         {
00798             port = XMLString::parseInt(portStr, fMemoryManager);
00799         }
00800     } // if > 0
00801 
00802     // Check if we have server based authority.
00803     if (isValidServerBasedAuthority(host, port, userinfo, fMemoryManager))
00804     {
00805         if (fHost)
00806             fMemoryManager->deallocate(fHost);//delete [] fHost;
00807 
00808         if (fUserInfo)
00809             fMemoryManager->deallocate(fUserInfo);//delete[] fUserInfo;
00810 
00811         fHost = XMLString::replicate(host, fMemoryManager);
00812         fPort = port;
00813         fUserInfo = XMLString::replicate(userinfo, fMemoryManager);
00814 
00815         return;
00816     }
00817     // This must be registry based authority or the URI is malformed.
00818     setRegBasedAuthority(uriSpec);
00819 }
00820 
00821 // scheme = alpha *( alpha | digit | "+" | "-" | "." )
00822 void XMLUri::initializeScheme(const XMLCh* const uriSpec)
00823 {
00824     const XMLCh* tmpPtr = XMLString::findAny(uriSpec, SCHEME_SEPARATORS);
00825 
00826     if ( !tmpPtr )
00827     {
00828         ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme, fMemoryManager);
00829     }
00830         else
00831     {
00832         XMLCh* scheme = (XMLCh*) fMemoryManager->allocate
00833         (
00834             (XMLString::stringLen(uriSpec) + 1) * sizeof(XMLCh)
00835         );//new XMLCh[XMLString::stringLen(uriSpec)+1];
00836         ArrayJanitor<XMLCh> tmpName(scheme, fMemoryManager);
00837         XMLString::subString(scheme, uriSpec, 0, (tmpPtr - uriSpec), fMemoryManager);
00838         setScheme(scheme);
00839         }
00840 
00841 }
00842 
00843 void XMLUri::initializePath(const XMLCh* const uriSpec)
00844 {
00845     if ( !uriSpec )
00846     {
00847         ThrowXMLwithMemMgr1(MalformedURLException
00848                 , XMLExcepts::XMLNUM_URI_Component_Empty
00849                 , errMsg_PATH
00850                 , fMemoryManager);
00851     }
00852 
00853     XMLSize_t index = 0;
00854     XMLSize_t start = 0;
00855     XMLSize_t end = XMLString::stringLen(uriSpec);
00856     XMLCh testChar = 0;
00857 
00858     // path - everything up to query string or fragment
00859     if (start < end)
00860     {
00861         // RFC 2732 only allows '[' and ']' to appear in the opaque part.
00862         if (!getScheme() || uriSpec[start] == chForwardSlash)
00863         {
00864             // Scan path.
00865             // abs_path = "/"  path_segments
00866             // rel_path = rel_segment [ abs_path ]
00867             while (index < end)
00868             {
00869                 testChar = uriSpec[index];
00870                 if (testChar == chQuestion || testChar == chPound)
00871                 {
00872                     break;
00873                 }
00874 
00875                 // check for valid escape sequence
00876                 if (testChar == chPercent)
00877                 {
00878                     if (index+2 >= end ||
00879                         !XMLString::isHex(uriSpec[index+1]) ||
00880                         !XMLString::isHex(uriSpec[index+2]))
00881                     {
00882                         XMLCh value1[BUF_LEN+1];
00883                         XMLString::moveChars(value1, &(uriSpec[index]), 3);
00884                         value1[3] = chNull;
00885                         ThrowXMLwithMemMgr2(MalformedURLException
00886                                 , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
00887                                 , errMsg_PATH
00888                                 , value1
00889                                 , fMemoryManager);
00890                     }
00891                 }
00892                 else if (!isUnreservedCharacter(testChar) &&
00893                          !isPathCharacter(testChar))
00894                 {
00895                     XMLCh value1[BUF_LEN+1];
00896                     value1[0] = testChar;
00897                     value1[1] = chNull;
00898                     ThrowXMLwithMemMgr2(MalformedURLException
00899                             , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
00900                             , errMsg_PATH
00901                             , value1
00902                             , fMemoryManager);
00903                 }
00904 
00905                 index++;
00906             }//while (index < end)
00907         }
00908         else
00909         {
00910             // Scan opaque part.
00911             // opaque_part = uric_no_slash *uric
00912             while (index < end)
00913             {
00914                 testChar = uriSpec[index];
00915                 if (testChar == chQuestion || testChar == chPound)
00916                 {
00917                     break;
00918                 }
00919 
00920                 // check for valid escape sequence
00921                 if (testChar == chPercent)
00922                 {
00923                     if (index+2 >= end ||
00924                         !XMLString::isHex(uriSpec[index+1]) ||
00925                         !XMLString::isHex(uriSpec[index+2]))
00926                     {
00927                         XMLCh value1[BUF_LEN+1];
00928                         XMLString::moveChars(value1, &(uriSpec[index]), 3);
00929                         value1[3] = chNull;
00930                         ThrowXMLwithMemMgr2(MalformedURLException
00931                                 , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
00932                                 , errMsg_PATH
00933                                 , value1
00934                                 , fMemoryManager);
00935                     }
00936                 }
00937                 // If the scheme specific part is opaque, it can contain '['
00938                 // and ']'. uric_no_slash wasn't modified by RFC 2732, which
00939                 // I've interpreted as an error in the spec, since the
00940                 // production should be equivalent to (uric - '/'), and uric
00941                 // contains '[' and ']'.
00942                 else if (!isReservedOrUnreservedCharacter(testChar))
00943                 {
00944                     XMLCh value1[BUF_LEN+1];
00945                     value1[0] = testChar;
00946                     value1[1] = chNull;
00947                     ThrowXMLwithMemMgr2(MalformedURLException
00948                             , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
00949                             , errMsg_PATH
00950                             , value1
00951                             , fMemoryManager);
00952                 }
00953 
00954                 index++;
00955             }//while (index < end)
00956         }
00957     } //if (start < end)
00958 
00959     if (getPath())
00960     {
00961         fMemoryManager->deallocate(fPath);//delete [] fPath;
00962     }
00963 
00964     fPath = (XMLCh*) fMemoryManager->allocate((index+1) * sizeof(XMLCh));//new XMLCh[index+1];
00965     XMLString::subString(fPath, uriSpec, start, index, fMemoryManager);
00966 
00967     // query - starts with ? and up to fragment or end
00968     if (testChar == chQuestion)
00969     {
00970         index++;
00971         start = index;
00972         while (index < end)
00973         {
00974             testChar = uriSpec[index];
00975             if (testChar == chPound)
00976             {
00977                 break;
00978             }
00979 
00980             if (testChar == chPercent)
00981             {
00982                 if (index+2 >= end ||
00983                     !XMLString::isHex(uriSpec[index+1]) ||
00984                     !XMLString::isHex(uriSpec[index+2]))
00985                 {
00986                     XMLCh value1[BUF_LEN+1];
00987                     XMLString::moveChars(value1, &(uriSpec[index]), 3);
00988                     value1[3] = chNull;
00989                     ThrowXMLwithMemMgr2(MalformedURLException
00990                             , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
00991                             , errMsg_QUERY
00992                             , value1
00993                             , fMemoryManager);
00994                 }
00995             }
00996             else if (!isReservedOrUnreservedCharacter(testChar))
00997             {
00998                 XMLCh value1[BUF_LEN+1];
00999                 value1[0] = testChar;
01000                 value1[1] = chNull;
01001                 ThrowXMLwithMemMgr2(MalformedURLException
01002                         , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
01003                         , errMsg_QUERY
01004                         , value1
01005                         , fMemoryManager);
01006             }
01007             index++;
01008         }
01009 
01010         if (getQueryString())
01011         {
01012             fMemoryManager->deallocate(fQueryString);//delete [] fQueryString;
01013         }
01014 
01015         fQueryString = (XMLCh*) fMemoryManager->allocate
01016         (
01017             (index - start + 1) * sizeof(XMLCh)
01018         );//new XMLCh[index - start + 1];
01019         XMLString::subString(fQueryString, uriSpec, start, index, fMemoryManager);
01020     }
01021 
01022     // fragment - starts with #
01023     if (testChar == chPound)
01024     {
01025         index++;
01026         start = index;
01027         while (index < end)
01028         {
01029             testChar = uriSpec[index];
01030 
01031             if (testChar == chPercent)
01032             {
01033                 if (index+2 >= end ||
01034                     !XMLString::isHex(uriSpec[index+1]) ||
01035                     !XMLString::isHex(uriSpec[index+2]))
01036                 {
01037                     XMLCh value1[BUF_LEN+1];
01038                     XMLString::moveChars(value1, &(uriSpec[index]), 3);
01039                     value1[3] = chNull;
01040                     ThrowXMLwithMemMgr2(MalformedURLException
01041                             , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
01042                             , errMsg_FRAGMENT
01043                             , value1
01044                             , fMemoryManager);
01045                 }
01046             }
01047             else if (!isReservedOrUnreservedCharacter(testChar))
01048             {
01049                 XMLCh value1[BUF_LEN+1];
01050                 value1[0] = testChar;
01051                 value1[1] = chNull;
01052                 ThrowXMLwithMemMgr2(MalformedURLException
01053                         , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
01054                         , errMsg_FRAGMENT
01055                         , value1
01056                         , fMemoryManager);
01057             }
01058 
01059             index++;
01060 
01061         }
01062 
01063         if (getFragment())
01064             fMemoryManager->deallocate(fFragment);//delete [] fFragment;
01065 
01066         //make sure that there is something following the '#'
01067         if (index > start)
01068         {
01069             fFragment = (XMLCh*) fMemoryManager->allocate
01070             (
01071                 (index - start + 1) * sizeof(XMLCh)
01072             );//new XMLCh[index - start + 1];
01073             XMLString::subString(fFragment, uriSpec, start, index, fMemoryManager);
01074         }
01075         else
01076         {
01077             // RFC 2396, 4.0. URI Reference
01078             // URI-reference = [absoulteURI | relativeURI] [# fragment]
01079             //
01080             // RFC 2396, 4.1. Fragment Identifier
01081             // fragment = *uric
01082             //
01083             // empty fragment is valid
01084             fFragment = 0;
01085         }
01086     }
01087 
01088 }
01089 
01090 // ---------------------------------------------------------------------------
01091 //  Setter
01092 // ---------------------------------------------------------------------------
01093 void XMLUri::setScheme(const XMLCh* const newScheme)
01094 {
01095     if ( !newScheme )
01096     {
01097         ThrowXMLwithMemMgr1(MalformedURLException
01098                 , XMLExcepts::XMLNUM_URI_Component_Set_Null
01099                 , errMsg_SCHEME
01100                 , fMemoryManager);
01101     }
01102 
01103     if (!isConformantSchemeName(newScheme))
01104     {
01105         ThrowXMLwithMemMgr2(MalformedURLException
01106                 , XMLExcepts::XMLNUM_URI_Component_Not_Conformant
01107                 , errMsg_SCHEME
01108                 , newScheme
01109                 , fMemoryManager);
01110     }
01111 
01112     if (getScheme())
01113     {
01114         fMemoryManager->deallocate(fScheme);//delete [] fScheme;
01115     }
01116 
01117     fScheme = XMLString::replicate(newScheme, fMemoryManager);
01118     XMLString::lowerCase(fScheme);
01119 }
01120 
01121 //
01122 // server = [ [ userinfo "@" ] hostport ]
01123 // hostport = host [":" port]
01124 //
01125 // setUserInfo(), setHost() and setPort() are closely related
01126 // three methods, in a word, userinfo and port has dependency
01127 // on host.
01128 //
01129 // if host is not present, userinfo must be null and port = -1
01130 //
01131 void XMLUri::setUserInfo(const XMLCh* const newUserInfo)
01132 {
01133     if ( newUserInfo &&
01134          !getHost()    )
01135     {
01136         ThrowXMLwithMemMgr2(MalformedURLException
01137                 , XMLExcepts::XMLNUM_URI_NullHost
01138                 , errMsg_USERINFO
01139                 , newUserInfo
01140                 , fMemoryManager);
01141     }
01142 
01143     isConformantUserInfo(newUserInfo, fMemoryManager);
01144 
01145     if (getUserInfo())
01146     {
01147         fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo;
01148     }
01149 
01150     //sometimes we get passed a empty string rather than a null.
01151     //Other procedures rely on it being null
01152     if(newUserInfo && *newUserInfo) {
01153         fUserInfo = XMLString::replicate(newUserInfo, fMemoryManager);
01154     }
01155     else
01156         fUserInfo = 0;
01157 
01158 }
01159 
01160 void XMLUri::setHost(const XMLCh* const newHost)
01161 {
01162     if ( !newHost )
01163     {
01164         if (getHost())
01165             fMemoryManager->deallocate(fHost);//delete [] fHost;
01166 
01167         fHost = 0;
01168         setUserInfo(0);
01169         setPort(-1);
01170 
01171         return;
01172     }
01173 
01174     if ( *newHost && !isWellFormedAddress(newHost, fMemoryManager))
01175     {
01176         ThrowXMLwithMemMgr2(MalformedURLException
01177                 , XMLExcepts::XMLNUM_URI_Component_Not_Conformant
01178                 , errMsg_HOST
01179                 , newHost
01180                 , fMemoryManager);
01181     }
01182 
01183     if (getHost())
01184     {
01185         fMemoryManager->deallocate(fHost);//delete [] fHost;
01186     }
01187 
01188     fHost = XMLString::replicate(newHost, fMemoryManager);
01189     setRegBasedAuthority(0);
01190 }
01191 
01192 void XMLUri::setPort(int newPort)
01193 {
01194     if (newPort >= 0 && newPort <= 65535)
01195     {
01196         if (!getHost())
01197         {
01198             XMLCh value1[BUF_LEN+1];
01199             XMLString::binToText(newPort, value1, BUF_LEN, 10, fMemoryManager);
01200             ThrowXMLwithMemMgr2(MalformedURLException
01201                     , XMLExcepts::XMLNUM_URI_NullHost
01202                     , errMsg_PORT
01203                     , value1
01204                     , fMemoryManager);
01205         }
01206     }
01207     else if (newPort != -1)
01208     {
01209         XMLCh value1[BUF_LEN+1];
01210         XMLString::binToText(newPort, value1, BUF_LEN, 10, fMemoryManager);
01211         ThrowXMLwithMemMgr1(MalformedURLException
01212                 , XMLExcepts::XMLNUM_URI_PortNo_Invalid
01213                 , value1
01214                 , fMemoryManager);
01215     }
01216 
01217     fPort = newPort;
01218 }
01219 
01220 void XMLUri::setRegBasedAuthority(const XMLCh* const newRegAuth)
01221 {
01222     if ( !newRegAuth )
01223     {
01224         if (getRegBasedAuthority())
01225             fMemoryManager->deallocate(fRegAuth);//delete [] fRegAuth;
01226 
01227         fRegAuth = 0;
01228         return;
01229     }
01230     // reg_name = 1*( unreserved | escaped | "$" | "," |
01231     //            ";" | ":" | "@" | "&" | "=" | "+" )
01232     else if ( !*newRegAuth || !isValidRegistryBasedAuthority(newRegAuth) )
01233     {
01234         ThrowXMLwithMemMgr2(MalformedURLException
01235                 , XMLExcepts::XMLNUM_URI_Component_Not_Conformant
01236                 , errMsg_REGNAME
01237                 , newRegAuth
01238                 , fMemoryManager);
01239     }
01240 
01241     if (getRegBasedAuthority())
01242         fMemoryManager->deallocate(fRegAuth);//delete [] fRegAuth;
01243 
01244     fRegAuth = XMLString::replicate(newRegAuth, fMemoryManager);
01245     setHost(0);
01246 }
01247 
01248 //
01249 // setPath(), setQueryString() and setFragment() are closely
01250 // related three methods as well.
01251 //
01252 void XMLUri::setPath(const XMLCh* const newPath)
01253 {
01254     if (!newPath)
01255     {
01256         if (getPath())
01257             fMemoryManager->deallocate(fPath);//delete [] fPath;
01258 
01259         fPath = 0;
01260         setQueryString(0);
01261         setFragment(0);
01262     }
01263     else
01264     {
01265         initializePath(newPath);
01266     }
01267 }
01268 
01269 //
01270 // fragment = *uric
01271 //
01272 void XMLUri::setFragment(const XMLCh* const newFragment)
01273 {
01274         if ( !newFragment )
01275     {
01276         if (getFragment())
01277             fMemoryManager->deallocate(fFragment);//delete [] fFragment;
01278 
01279         fFragment = 0;
01280         }
01281         else if (!isGenericURI())
01282     {
01283         ThrowXMLwithMemMgr2(MalformedURLException
01284                 , XMLExcepts::XMLNUM_URI_Component_for_GenURI_Only
01285                 , errMsg_FRAGMENT
01286                 , newFragment
01287                 , fMemoryManager);
01288         }
01289         else if ( !getPath() )
01290     {
01291         ThrowXMLwithMemMgr2(MalformedURLException
01292                , XMLExcepts::XMLNUM_URI_NullPath
01293                , errMsg_FRAGMENT
01294                , newFragment
01295                , fMemoryManager);
01296         }
01297         else if (!isURIString(newFragment))
01298     {
01299         ThrowXMLwithMemMgr1(MalformedURLException
01300                 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
01301                 , errMsg_FRAGMENT
01302                 , fMemoryManager);
01303         }
01304         else
01305     {
01306         if (getFragment())
01307         {
01308             fMemoryManager->deallocate(fFragment);//delete [] fFragment;
01309         }
01310 
01311         fFragment = XMLString::replicate(newFragment, fMemoryManager);
01312         }
01313 }
01314 
01315 //
01316 // query = *uric
01317 //
01318 void XMLUri::setQueryString(const XMLCh* const newQueryString)
01319 {
01320         if ( !newQueryString )
01321     {
01322         if (getQueryString())
01323             fMemoryManager->deallocate(fQueryString);//delete [] fQueryString;
01324 
01325         fQueryString = 0;
01326         }
01327         else if (!isGenericURI())
01328     {
01329         ThrowXMLwithMemMgr2(MalformedURLException
01330                 , XMLExcepts::XMLNUM_URI_Component_for_GenURI_Only
01331                 , errMsg_QUERY
01332                 , newQueryString
01333                 , fMemoryManager);
01334         }
01335         else if ( !getPath() )
01336     {
01337         ThrowXMLwithMemMgr2(MalformedURLException
01338                 , XMLExcepts::XMLNUM_URI_NullPath
01339                 , errMsg_QUERY
01340                 , newQueryString
01341                 , fMemoryManager);
01342         }
01343         else if (!isURIString(newQueryString))
01344     {
01345         ThrowXMLwithMemMgr2(MalformedURLException
01346                , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
01347                , errMsg_QUERY
01348                , newQueryString
01349                , fMemoryManager);
01350         }
01351         else
01352     {
01353         if (getQueryString())
01354         {
01355             fMemoryManager->deallocate(fQueryString);//delete [] fQueryString;
01356         }
01357 
01358         fQueryString = XMLString::replicate(newQueryString, fMemoryManager);
01359         }
01360 }
01361 
01362 // ---------------------------------------------------------------------------
01363 //  XMLUri: Public, static methods
01364 // ---------------------------------------------------------------------------
01365 
01366 //
01367 //  scheme = alpha *( alpha | digit | "+" | "-" | "." )
01368 //  alphanum = alpha | digit
01369 //
01370 bool XMLUri::isConformantSchemeName(const XMLCh* const scheme)
01371 {
01372         if ( !scheme )
01373         return false;
01374 
01375     const XMLCh* tmpStr = scheme;
01376     if (!XMLString::isAlpha(*tmpStr))     // first: alpha
01377         return false;
01378 
01379     // second onwards: ( alpha | digit | "+" | "-" | "." )
01380     tmpStr++;
01381     while (*tmpStr)
01382     {
01383         if ( !XMLString::isAlphaNum(*tmpStr) &&
01384              (XMLString::indexOf(SCHEME_CHARACTERS, *tmpStr) == -1))
01385             return false;
01386 
01387         tmpStr++;
01388     }
01389 
01390     return true;
01391 }
01392 
01393 //
01394 // userinfo = *( unreserved | escaped |
01395 //              ";" | ":" | "&" | "=" | "+" | "$" | "," )
01396 //
01397 void XMLUri::isConformantUserInfo(const XMLCh* const userInfo
01398                                   , MemoryManager* const manager)
01399 {
01400         if ( !userInfo )
01401         return;
01402 
01403     const XMLCh* tmpStr = userInfo;
01404     while (*tmpStr)
01405     {
01406         if ( isUnreservedCharacter(*tmpStr) ||
01407             (XMLString::indexOf(USERINFO_CHARACTERS, *tmpStr) != -1))
01408         {
01409             tmpStr++;
01410         }
01411         else if (*tmpStr == chPercent)               // '%'
01412         {
01413             if (XMLString::isHex(*(tmpStr+1)) &&     // 1st hex
01414                 XMLString::isHex(*(tmpStr+2))  )     // 2nd hex
01415             {
01416                 tmpStr+=3;
01417             }
01418             else
01419             {
01420                 XMLCh value1[BUF_LEN+1];
01421                 value1[0] = chPercent;
01422                 value1[1] = *(tmpStr+1);
01423                 value1[2] = *(tmpStr+2);
01424                 value1[3] = chNull;
01425 
01426                 ThrowXMLwithMemMgr2(MalformedURLException
01427                         , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence
01428                         , errMsg_USERINFO
01429                         , value1
01430                         , manager);
01431             }
01432         }
01433         else
01434         {
01435             ThrowXMLwithMemMgr2(MalformedURLException
01436                     , XMLExcepts::XMLNUM_URI_Component_Invalid_Char
01437                     , errMsg_USERINFO
01438                     , userInfo
01439                     , manager);
01440         }
01441     } //while
01442 
01443     return;
01444 }
01445 
01446 bool XMLUri::isValidServerBasedAuthority(const XMLCh* const host,
01447                                          const XMLSize_t hostLen,
01448                                          const int port,
01449                                          const XMLCh* const userinfo,
01450                                          const XMLSize_t userLen)
01451 {
01452     // The order is important, do not change
01453     if (!isWellFormedAddress(host, hostLen))
01454         return false;
01455 
01456     // check port number
01457     if ((port > 65535) || (port < 0 && port != -1))
01458         return false;
01459 
01460     // check userinfo
01461     XMLSize_t index = 0;
01462     while (index < userLen)
01463     {
01464         if (isUnreservedCharacter(userinfo[index]) ||
01465             (XMLString::indexOf(USERINFO_CHARACTERS, userinfo[index]) != -1))
01466         {
01467             index++;
01468         }
01469         else if (userinfo[index] == chPercent)               // '%'
01470         {
01471             if (XMLString::isHex(userinfo[index+1]) &&     // 1st hex
01472                 XMLString::isHex(userinfo[index+2])  )     // 2nd hex
01473                 index +=3;
01474             else
01475                 return false;
01476         }
01477         else
01478             return false;
01479     } //while
01480 
01481     return true;
01482 }
01483 
01484 bool XMLUri::isValidServerBasedAuthority(const XMLCh* const host
01485                                          , const int port
01486                                          , const XMLCh* const userinfo
01487                                          , MemoryManager* const manager)
01488 {
01489     // The order is important, do not change
01490     if (!isWellFormedAddress(host, manager))
01491         return false;
01492 
01493     // check port number
01494     if ((port > 65535) || (port < 0 && port != -1))
01495         return false;
01496 
01497     // check userinfo
01498     if (!userinfo)
01499         return true;
01500 
01501     const XMLCh* tmpStr = userinfo;
01502     while (*tmpStr)
01503     {
01504         if ( isUnreservedCharacter(*tmpStr) ||
01505             (XMLString::indexOf(USERINFO_CHARACTERS, *tmpStr) != -1))
01506         {
01507             tmpStr++;
01508         }
01509         else if (*tmpStr == chPercent)               // '%'
01510         {
01511             if (XMLString::isHex(*(tmpStr+1)) &&     // 1st hex
01512                 XMLString::isHex(*(tmpStr+2))  )     // 2nd hex
01513             {
01514                 tmpStr+=3;
01515             }
01516             else
01517                 return false;
01518         }
01519         else
01520             return false;
01521     } //while
01522 
01523     return true;
01524 }
01525 
01526 bool XMLUri::isValidRegistryBasedAuthority(const XMLCh* const authority,
01527                                            const XMLSize_t authLen)
01528 {
01529     // check authority
01530     XMLSize_t index = 0;
01531     while (index < authLen)
01532     {
01533         if (isUnreservedCharacter(authority[index]) ||
01534             (XMLString::indexOf(REG_NAME_CHARACTERS, authority[index]) != -1))
01535         {
01536             index++;
01537         }
01538         else if (authority[index] == chPercent)               // '%'
01539         {
01540             if (XMLString::isHex(authority[index+1]) &&     // 1st hex
01541                 XMLString::isHex(authority[index+2])  )     // 2nd hex
01542                 index +=3;
01543             else
01544                 return false;
01545         }
01546         else
01547             return false;
01548     } //while
01549 
01550     return true;
01551 }
01552 
01553 bool XMLUri::isValidRegistryBasedAuthority(const XMLCh* const authority)
01554 {
01555     // check authority
01556     if (!authority)
01557         return false;
01558 
01559     const XMLCh* tmpStr = authority;
01560     while (*tmpStr)
01561     {
01562         if (isUnreservedCharacter(*tmpStr) ||
01563             (XMLString::indexOf(REG_NAME_CHARACTERS, *tmpStr) != -1))
01564         {
01565             tmpStr++;
01566         }
01567         else if (*tmpStr == chPercent)               // '%'
01568         {
01569             if (XMLString::isHex(*(tmpStr+1)) &&     // 1st hex
01570                 XMLString::isHex(*(tmpStr+2))  )     // 2nd hex
01571             {
01572                 tmpStr+=3;
01573             }
01574             else
01575                 return false;
01576         }
01577         else
01578             return false;
01579     } //while
01580 
01581     return true;
01582 }
01583 
01584 //
01585 // uric     = reserved | unreserved | escaped
01586 // escaped  = "%" hex hex
01587 // hex      = digit | "A" | "B" | "C" | "D" | "E" | "F" |
01588 //                    "a" | "b" | "c" | "d" | "e" | "f"
01589 //
01590 bool XMLUri::isURIString(const XMLCh* const uricString)
01591 {
01592         if (!uricString || !*uricString)
01593         return false;
01594 
01595     const XMLCh* tmpStr = uricString;
01596 
01597     while (*tmpStr)
01598     {
01599         if (isReservedOrUnreservedCharacter(*tmpStr))
01600         {
01601             tmpStr++;
01602         }
01603         else if (*tmpStr == chPercent)               // '%'
01604         {
01605             if (XMLString::isHex(*(tmpStr+1)) &&     // 1st hex
01606                 XMLString::isHex(*(tmpStr+2))  )     // 2nd hex
01607             {
01608                 tmpStr+=3;
01609             }
01610             else
01611             {
01612                 return false;
01613             }
01614         }
01615         else
01616         {
01617             return false;
01618         }
01619     }
01620 
01621     return true;
01622 }
01623 
01624 //
01625 //  host          = hostname | IPv4address
01626 //
01627 //  hostname      = *( domainlabel "." ) toplabel [ "." ]
01628 //  domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
01629 //  toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
01630 //
01631 //  IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
01632 //
01633 bool XMLUri::isWellFormedAddress(const XMLCh* const addrString
01634                                  , MemoryManager* const manager)
01635 {
01636     // Check that we have a non-zero length string.
01637     if (!addrString || !*addrString)
01638         return false;
01639 
01640     // Get address length.
01641     XMLSize_t addrStrLen = XMLString::stringLen(addrString);
01642 
01643     // Check if the host is a valid IPv6reference.
01644     if (*addrString == chOpenSquare)
01645     {
01646         return isWellFormedIPv6Reference(addrString, addrStrLen);
01647     }
01648 
01649     //
01650     // Cannot start with a '.', '-', or end with a '-'.
01651     //
01652     if (*addrString == chPeriod ||
01653         *addrString == chDash ||
01654         addrString[addrStrLen-1] == chDash)
01655         return false;
01656 
01657     // rightmost domain label starting with digit indicates IP address
01658     // since top level domain label can only start with an alpha
01659     // see RFC 2396 Section 3.2.2
01660 
01661     int lastPeriodPos = XMLString::lastIndexOf(addrString, chPeriod);
01662 
01663     // if the string ends with "."
01664     // get the second last "."
01665     if (XMLSize_t(lastPeriodPos + 1) == addrStrLen)
01666     {
01667         XMLCh* tmp2 = (XMLCh*) manager->allocate
01668         (
01669             addrStrLen * sizeof(XMLCh)
01670         );//new XMLCh[addrStrLen];
01671         XMLString::subString(tmp2, addrString, 0, lastPeriodPos, manager);
01672         lastPeriodPos = XMLString::lastIndexOf(tmp2, chPeriod);
01673         manager->deallocate(tmp2);//delete [] tmp2;
01674 
01675         if ( XMLString::isDigit(addrString[lastPeriodPos + 1]))
01676                         return false;
01677     }
01678 
01679     if (XMLString::isDigit(addrString[lastPeriodPos + 1]))
01680     {
01681         return isWellFormedIPv4Address(addrString, addrStrLen);
01682     } // end of IPv4address
01683     else
01684     {
01685         //
01686         //  hostname      = *( domainlabel "." ) toplabel [ "." ]
01687         //  domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
01688         //  toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
01689 
01690         // RFC 2396 states that hostnames take the form described in
01691         // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
01692         // to RFC 1034, hostnames are limited to 255 characters.
01693         if (addrStrLen > 255) {
01694             return false;
01695         }
01696 
01697         unsigned int labelCharCount = 0;
01698 
01699         // domain labels can contain alphanumerics and '-"
01700         // but must start and end with an alphanumeric
01701         for (XMLSize_t i = 0; i < addrStrLen; i++)
01702         {
01703             if (addrString[i] == chPeriod)
01704             {
01705               if (((i > 0)  &&
01706                    (!XMLString::isAlphaNum(addrString[i-1]))) ||
01707                   ((i + 1 < addrStrLen) &&
01708                    (!XMLString::isAlphaNum(addrString[i+1])))  )
01709                 {
01710                     return false;
01711                 }
01712                 labelCharCount = 0;
01713             }
01714             else if (!XMLString::isAlphaNum(addrString[i]) &&
01715                       addrString[i] != chDash)
01716             {
01717                 return false;
01718             }
01719             // RFC 1034: Labels must be 63 characters or less.
01720             else if (++labelCharCount > 63) {
01721                 return false;
01722             }
01723         } //for
01724     }
01725 
01726     return true;
01727 }
01728 
01729 //
01730 //  RFC 2732 amended RFC 2396 by replacing the definition
01731 //  of IPv4address with the one defined by RFC 2373.
01732 //
01733 //  IPv4address   = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
01734 //
01735 bool XMLUri::isWellFormedIPv4Address(const XMLCh* const addr, const XMLSize_t length)
01736 {
01737     int numDots = 0;
01738     int numDigits = 0;
01739 
01740     // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
01741     //
01742     // make sure that
01743     // 1) we see only digits and dot separators,
01744     // 2) that any dot separator is preceded and followed by a digit
01745     // 3) that we find 3 dots
01746     // 4) that each segment contains 1 to 3 digits.
01747     // 5) that each segment is not greater than 255.
01748     for (XMLSize_t i = 0; i < length; ++i)
01749     {
01750         if (addr[i] == chPeriod)
01751         {
01752             if ((i == 0) ||
01753                 (i+1 == length) ||
01754                 !XMLString::isDigit(addr[i+1]))
01755             {
01756                return false;
01757             }
01758             numDigits = 0;
01759             if (++numDots > 3)
01760                 return false;
01761         }
01762         else if (!XMLString::isDigit(addr[i]))
01763         {
01764             return false;
01765         }
01766         // Check that that there are no more than three digits
01767         // in this segment.
01768         else if (++numDigits > 3)
01769         {
01770             return false;
01771         }
01772         // Check that this segment is not greater than 255.
01773         else if (numDigits == 3)
01774         {
01775             XMLCh first = addr[i-2];
01776             XMLCh second = addr[i-1];
01777             XMLCh last = addr[i];
01778             if (!(first < chDigit_2 ||
01779                  (first == chDigit_2 &&
01780                  (second < chDigit_5 ||
01781                  (second == chDigit_5 && last <= chDigit_5)))))
01782             {
01783                 return false;
01784             }
01785         }
01786     } //for
01787     return (numDots == 3);
01788 }
01789 
01790 //
01791 //  IPv6reference = "[" IPv6address "]"
01792 //
01793 bool XMLUri::isWellFormedIPv6Reference(const XMLCh* const addr, const XMLSize_t length)
01794 {
01795     XMLSize_t end = length-1;
01796 
01797     // Check if string is a potential match for IPv6reference.
01798     if (!(length > 2 && addr[0] == chOpenSquare && addr[end] == chCloseSquare))
01799     {
01800         return false;
01801     }
01802 
01803     // Counter for the number of 16-bit sections read in the address.
01804     int counter = 0;
01805 
01806     // Scan hex sequence before possible '::' or IPv4 address.
01807     int iIndex = scanHexSequence(addr, 1, end, counter);
01808     if (iIndex == -1)
01809         return false;
01810 
01811     XMLSize_t index=(XMLSize_t)iIndex;
01812     // Address must contain 128-bits of information.
01813     if (index == end)
01814     {
01815        return (counter == 8);
01816     }
01817 
01818     if (index+1 < end && addr[index] == chColon)
01819     {
01820         if (addr[index+1] == chColon)
01821         {
01822             // '::' represents at least one 16-bit group of zeros.
01823             if (++counter > 8)
01824             {
01825                 return false;
01826             }
01827             index += 2;
01828             // Trailing zeros will fill out the rest of the address.
01829             if (index == end)
01830             {
01831                 return true;
01832             }
01833         }
01834         // If the second character wasn't ':', in order to be valid,
01835         // the remainder of the string must match IPv4Address,
01836         // and we must have read exactly 6 16-bit groups.
01837         else
01838         {
01839             if (counter == 6)
01840                 return isWellFormedIPv4Address(addr+index+1, end-index-1);
01841             else
01842                 return false;
01843         }
01844     }
01845     else
01846     {
01847        return false;
01848     }
01849 
01850     // 3. Scan hex sequence after '::'.
01851     int prevCount = counter;
01852     iIndex = scanHexSequence(addr, index, end, counter);
01853     if (iIndex == -1)
01854         return false;
01855 
01856     index=(XMLSize_t)iIndex;
01857     // If this is the end of the address then
01858     // we've got 128-bits of information.
01859     if (index == end)
01860     {
01861         return true;
01862     }
01863 
01864     // The address ends in an IPv4 address, or it is invalid.
01865     // scanHexSequence has already made sure that we have the right number of bits.
01866     XMLSize_t shiftCount = (counter > prevCount) ? index+1 : index;
01867     return isWellFormedIPv4Address(addr + shiftCount, end - shiftCount);
01868 }
01869 
01870 //
01871 //  For use with isWellFormedIPv6Reference only.
01872 //
01873 int XMLUri::scanHexSequence (const XMLCh* const addr, XMLSize_t index, XMLSize_t end, int& counter)
01874 {
01875     XMLCh testChar = chNull;
01876     int numDigits = 0;
01877     XMLSize_t start = index;
01878 
01879     // Trying to match the following productions:
01880     // hexseq = hex4 *( ":" hex4)
01881     // hex4   = 1*4HEXDIG
01882     for (; index < end; ++index)
01883     {
01884         testChar = addr[index];
01885         if (testChar == chColon)
01886         {
01887             // IPv6 addresses are 128-bit, so there can be at most eight sections.
01888             if (numDigits > 0 && ++counter > 8)
01889             {
01890                 return -1;
01891             }
01892             // This could be '::'.
01893             if (numDigits == 0 || ((index+1 < end) && addr[index+1] == chColon))
01894             {
01895                 return (int)index;
01896             }
01897             numDigits = 0;
01898         }
01899         // This might be invalid or an IPv4address. If it's potentially an IPv4address,
01900         // backup to just after the last valid character that matches hexseq.
01901         else if (!XMLString::isHex(testChar))
01902         {
01903             if (testChar == chPeriod && numDigits < 4 && numDigits > 0 && counter <= 6)
01904             {
01905                 int back = (int)index - numDigits - 1;
01906                 return (back >= (int)start) ? back : (int)start;
01907             }
01908             return -1;
01909         }
01910         // There can be at most 4 hex digits per group.
01911         else if (++numDigits > 4)
01912         {
01913             return -1;
01914         }
01915     }
01916     return (numDigits > 0 && ++counter <= 8) ? (int)end : -1;
01917 }
01918 
01919 bool XMLUri::isGenericURI()
01920 {
01921     return (getHost() != 0);
01922 }
01923 
01924 
01925 //
01926 //  This method will take the broken out parts of the URI and build up the
01927 //  full text. We don't do this unless someone asks us to, since its often
01928 //  never required.
01929 //
01930 void XMLUri::buildFullText()
01931 {
01932     // Calculate the worst case size of the buffer required
01933     XMLSize_t bufSize = XMLString::stringLen(fScheme) + 1
01934                            + XMLString::stringLen(fFragment) + 1
01935                            + XMLString::stringLen(fHost ? fHost : fRegAuth) + 2
01936                            + XMLString::stringLen(fPath)
01937                            + XMLString::stringLen(fQueryString) + 1
01938                            + XMLString::stringLen(fUserInfo) + 1
01939                            + 32;
01940 
01941     // Clean up the existing buffer and allocate another
01942     fMemoryManager->deallocate(fURIText);//delete [] fURIText;
01943     fURIText = (XMLCh*) fMemoryManager->allocate(bufSize * sizeof(XMLCh));//new XMLCh[bufSize];
01944     *fURIText = 0;
01945 
01946     XMLCh* outPtr = fURIText;
01947     if (fScheme != 0)
01948     {
01949         XMLString::catString(fURIText, getScheme());
01950         outPtr += XMLString::stringLen(fURIText);
01951         *outPtr++ = chColon;
01952     }
01953 
01954     // Authority
01955     if (fHost || fRegAuth)
01956     {
01957         *outPtr++ = chForwardSlash;
01958         *outPtr++ = chForwardSlash;
01959 
01960         // Server based authority.
01961         if (fHost)
01962         {
01963             if (fUserInfo)
01964             {
01965                 XMLString::copyString(outPtr, fUserInfo);
01966                 outPtr += XMLString::stringLen(fUserInfo);
01967                 *outPtr++ = chAt;
01968             }
01969 
01970             XMLString::copyString(outPtr, fHost);
01971             outPtr += XMLString::stringLen(fHost);
01972 
01973             //
01974             //  If the port is -1, then we don't put it in. Else we need
01975             //  to because it was explicitly provided.
01976             //
01977             if (fPort != -1)
01978             {
01979                 *outPtr++ = chColon;
01980 
01981                 XMLCh tmpBuf[17];
01982                 XMLString::binToText(fPort, tmpBuf, 16, 10, fMemoryManager);
01983                 XMLString::copyString(outPtr, tmpBuf);
01984                 outPtr += XMLString::stringLen(tmpBuf);
01985             }
01986         }
01987         // Registry based authority.
01988         else {
01989             XMLString::copyString(outPtr, fRegAuth);
01990             outPtr += XMLString::stringLen(fRegAuth);
01991         }
01992     }
01993 
01994     if (fPath)
01995     {
01996         XMLString::copyString(outPtr, fPath);
01997         outPtr += XMLString::stringLen(fPath);
01998     }
01999 
02000     if (fQueryString)
02001     {
02002         *outPtr++ = chQuestion;
02003         XMLString::copyString(outPtr, fQueryString);
02004         outPtr += XMLString::stringLen(fQueryString);
02005     }
02006 
02007     if (fFragment)
02008     {
02009         *outPtr++ = chPound;
02010         XMLString::copyString(outPtr, fFragment);
02011         outPtr += XMLString::stringLen(fFragment);
02012     }
02013 
02014     // Cap it off in case the last op was not a string copy
02015     *outPtr = 0;
02016 }
02017 
02018 // NOTE: no check for NULL value of uriStr (caller responsiblilty)
02019 bool XMLUri::isValidURI(const XMLUri* const baseURI
02020                        , const XMLCh* const uriStr
02021                        , bool bAllowSpaces/*=false*/)
02022 {
02023     // get a trimmed version of uriStr
02024     // uriStr will NO LONGER be used in this function.
02025     const XMLCh* trimmedUriSpec = uriStr;
02026 
02027     while (XMLChar1_0::isWhitespace(*trimmedUriSpec))
02028         trimmedUriSpec++;
02029 
02030     XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec);
02031 
02032     while (trimmedUriSpecLen) {
02033         if (XMLChar1_0::isWhitespace(trimmedUriSpec[trimmedUriSpecLen-1]))
02034             trimmedUriSpecLen--;
02035         else
02036             break;
02037     }
02038 
02039     if (trimmedUriSpecLen == 0)
02040     {
02041         if (!baseURI)
02042             return false;
02043         else
02044             return true;
02045     }
02046 
02047     XMLSize_t index = 0;
02048     bool foundScheme = false;
02049 
02050     // Check for scheme, which must be before `/', '?' or '#'.
02051     int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon);
02052     int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash);
02053     int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion);
02054     int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound);
02055 
02056     if ((colonIdx <= 0) ||
02057         (colonIdx > slashIdx && slashIdx != -1) ||
02058         (colonIdx > queryIdx && queryIdx != -1) ||
02059         (colonIdx > fragmentIdx && fragmentIdx != -1))
02060     {
02061         // A standalone base is a valid URI according to spec
02062         if (colonIdx == 0 || (!baseURI && fragmentIdx != 0))
02063             return false;
02064     }
02065     else
02066     {
02067         if (!processScheme(trimmedUriSpec, index))
02068             return false;
02069         foundScheme = true;
02070         ++index;
02071     }
02072 
02073     // It's an error if we stop here
02074     if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound)))
02075         return false;
02076 
02077         // two slashes means generic URI syntax, so we get the authority
02078     const XMLCh* authUriSpec = trimmedUriSpec +  index;
02079     if (((index+1) < trimmedUriSpecLen) &&
02080         XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
02081     {
02082         index += 2;
02083         XMLSize_t startPos = index;
02084 
02085         // get authority - everything up to path, query or fragment
02086         XMLCh testChar;
02087         while (index < trimmedUriSpecLen)
02088         {
02089             testChar = trimmedUriSpec[index];
02090             if (testChar == chForwardSlash ||
02091                 testChar == chQuestion     ||
02092                 testChar == chPound         )
02093             {
02094                 break;
02095             }
02096 
02097             index++;
02098         }
02099 
02100         // if we found authority, parse it out, otherwise we set the
02101         // host to empty string
02102         if (index > startPos)
02103         {
02104             if (!processAuthority(trimmedUriSpec + startPos, index - startPos))
02105                 return false;
02106         }
02107     }
02108 
02109     // we need to check if index has exceed the lenght or not
02110     if (index < trimmedUriSpecLen)
02111     {
02112             if (!processPath(trimmedUriSpec + index, trimmedUriSpecLen - index, foundScheme, bAllowSpaces))
02113             return false;
02114     }
02115 
02116     return true;
02117 }
02118 
02119 // NOTE: no check for NULL value of uriStr (caller responsiblilty)
02120 // NOTE: this routine is the same as above, but it uses a flag to
02121 //       indicate the existance of a baseURI rather than an XMLuri.
02122 bool XMLUri::isValidURI(bool haveBaseURI, const XMLCh* const uriStr, bool bAllowSpaces/*=false*/)
02123 {
02124     // get a trimmed version of uriStr
02125     // uriStr will NO LONGER be used in this function.
02126     const XMLCh* trimmedUriSpec = uriStr;
02127 
02128     while (XMLChar1_0::isWhitespace(*trimmedUriSpec))
02129         trimmedUriSpec++;
02130 
02131     XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec);
02132 
02133     while (trimmedUriSpecLen) {
02134         if (XMLChar1_0::isWhitespace(trimmedUriSpec[trimmedUriSpecLen-1]))
02135             trimmedUriSpecLen--;
02136         else
02137             break;
02138     }
02139 
02140     if (trimmedUriSpecLen == 0)
02141     {
02142         if (!haveBaseURI)
02143             return false;
02144         return true;
02145     }
02146 
02147     XMLSize_t index = 0;
02148     bool foundScheme = false;
02149 
02150     // Check for scheme, which must be before `/', '?' or '#'.
02151     int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon);
02152     int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash);
02153     int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion);
02154     int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound);
02155 
02156     if ((colonIdx <= 0) ||
02157         (colonIdx > slashIdx && slashIdx != -1) ||
02158         (colonIdx > queryIdx && queryIdx != -1) ||
02159         (colonIdx > fragmentIdx && fragmentIdx != -1))
02160     {
02161         // A standalone base is a valid URI according to spec
02162         if (colonIdx == 0 || (!haveBaseURI && fragmentIdx != 0))
02163             return false;
02164     }
02165     else
02166     {
02167         if (!processScheme(trimmedUriSpec, index))
02168             return false;
02169         foundScheme = true;
02170         ++index;
02171     }
02172 
02173     // It's an error if we stop here
02174     if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound)))
02175         return false;
02176 
02177         // two slashes means generic URI syntax, so we get the authority
02178     const XMLCh* authUriSpec = trimmedUriSpec +  index;
02179     if (((index+1) < trimmedUriSpecLen) &&
02180         XMLString::startsWith(authUriSpec, DOUBLE_SLASH))
02181     {
02182         index += 2;
02183         XMLSize_t startPos = index;
02184 
02185         // get authority - everything up to path, query or fragment
02186         XMLCh testChar;
02187         while (index < trimmedUriSpecLen)
02188         {
02189             testChar = trimmedUriSpec[index];
02190             if (testChar == chForwardSlash ||
02191                 testChar == chQuestion     ||
02192                 testChar == chPound         )
02193             {
02194                 break;
02195             }
02196 
02197             index++;
02198         }
02199 
02200         // if we found authority, parse it out, otherwise we set the
02201         // host to empty string
02202         if (index > startPos)
02203         {
02204             if (!processAuthority(trimmedUriSpec + startPos, index - startPos))
02205                 return false;
02206         }
02207     }
02208 
02209     // we need to check if index has exceed the length or not
02210     if (index < trimmedUriSpecLen)
02211     {
02212         if (!processPath(trimmedUriSpec + index, trimmedUriSpecLen - index, foundScheme, bAllowSpaces))
02213             return false;
02214     }
02215 
02216     return true;
02217 }
02218 
02219 bool XMLUri::isWellFormedAddress(const XMLCh* const addrString,
02220                                  const XMLSize_t addrStrLen)
02221 {
02222     // Check that we have a non-zero length string.
02223     if (addrStrLen == 0)
02224         return false;
02225 
02226     // Check if the host is a valid IPv6reference.
02227     if (*addrString == chOpenSquare)
02228     {
02229         return isWellFormedIPv6Reference(addrString, addrStrLen);
02230     }
02231 
02232     //
02233     // Cannot start with a '.', '-', or end with a '-'.
02234     //
02235     if (*addrString == chPeriod ||
02236         *addrString == chDash ||
02237         addrString[addrStrLen-1] == chDash)
02238         return false;
02239 
02240     // rightmost domain label starting with digit indicates IP address
02241     // since top level domain label can only start with an alpha
02242     // see RFC 2396 Section 3.2.2
02243 
02244     int lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, addrStrLen);
02245 
02246     // if the string ends with "."
02247     // get the second last "."
02248     if (XMLSize_t(lastPeriodPos + 1) == addrStrLen)
02249     {
02250         lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, lastPeriodPos);
02251 
02252         if ( XMLString::isDigit(addrString[lastPeriodPos + 1]))
02253                         return false;
02254     }
02255 
02256     if (XMLString::isDigit(addrString[lastPeriodPos + 1]))
02257     {
02258         return isWellFormedIPv4Address(addrString, addrStrLen);
02259     } // end of IPv4address
02260     else
02261     {
02262         //
02263         //  hostname      = *( domainlabel "." ) toplabel [ "." ]
02264         //  domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
02265         //  toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
02266 
02267         // RFC 2396 states that hostnames take the form described in
02268         // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
02269         // to RFC 1034, hostnames are limited to 255 characters.
02270         if (addrStrLen > 255) {
02271             return false;
02272         }
02273 
02274         unsigned int labelCharCount = 0;
02275 
02276         // domain labels can contain alphanumerics and '-"
02277         // but must start and end with an alphanumeric
02278         for (XMLSize_t i = 0; i < addrStrLen; i++)
02279         {
02280             if (addrString[i] == chPeriod)
02281             {
02282               if (((i > 0)  &&
02283                    (!XMLString::isAlphaNum(addrString[i-1]))) ||
02284                   ((i + 1 < addrStrLen) &&
02285                    (!XMLString::isAlphaNum(addrString[i+1])))  )
02286                 {
02287                     return false;
02288                 }
02289                 labelCharCount = 0;
02290             }
02291             else if (!XMLString::isAlphaNum(addrString[i]) &&
02292                       addrString[i] != chDash)
02293             {
02294                 return false;
02295             }
02296             // RFC 1034: Labels must be 63 characters or less.
02297             else if (++labelCharCount > 63) {
02298                 return false;
02299             }
02300         } //for
02301     }
02302 
02303     return true;
02304 }
02305 
02306 bool XMLUri::processScheme(const XMLCh* const schemeStr, XMLSize_t& index)
02307 {
02308     const XMLCh* tmpPtr = XMLString::findAny(schemeStr, SCHEME_SEPARATORS);
02309 
02310     if (tmpPtr) {
02311         index = tmpPtr - schemeStr;
02312         return isConformantSchemeName(schemeStr, index);
02313     }
02314     else {
02315         return false;
02316     }
02317 }
02318 
02319 
02320 bool XMLUri::isConformantSchemeName( const XMLCh* const scheme
02321                                    , const XMLSize_t schemeLen)
02322 {
02323     if (!XMLString::isAlpha(*scheme))     // first: alpha
02324         return false;
02325 
02326     // second onwards: ( alpha | digit | "+" | "-" | "." )
02327     for (XMLSize_t i=1; i<schemeLen; i++)
02328     {
02329         if ( !XMLString::isAlphaNum(scheme[i]) &&
02330              (XMLString::indexOf(SCHEME_CHARACTERS, scheme[i]) == -1))
02331             return false;
02332     }
02333 
02334     return true;
02335 }
02336 
02337 bool XMLUri::processAuthority( const XMLCh* const authSpec
02338                              , const XMLSize_t authLen)
02339 {
02340     int index = XMLString::indexOf(authSpec, chAt);
02341     XMLSize_t start = 0;
02342 
02343     // server = [ [ userinfo "@" ] hostport ]
02344     // userinfo is everything up @,
02345     const XMLCh* userinfo;
02346     int userInfoLen = 0;
02347     if ((index != -1) && (XMLSize_t(index) < authLen))
02348     {
02349         userinfo = authSpec;
02350         userInfoLen = index;
02351         start = index + 1;
02352     }
02353     else
02354     {
02355         userinfo = XMLUni::fgZeroLenString;
02356     }
02357 
02358     // hostport = host [ ":" port ]
02359     // host is everything up to ':', or up to
02360     // and including ']' if followed by ':'.
02361     //
02362     // Search for port boundary.
02363     const XMLCh* host;
02364     XMLSize_t hostLen = 0;
02365     if ((start < authLen) && (authSpec[start] == chOpenSquare))
02366     {
02367         index = XMLString::indexOf(&(authSpec[start]), chCloseSquare);
02368         if ((index != -1) && (XMLSize_t(index) < authLen))
02369         {
02370             // skip the ']'
02371             index = ((start + index + 1) < authLen
02372               && authSpec[start + index + 1] == chColon) ? index+1 : -1;
02373         }
02374     }
02375     else
02376     {
02377         index = XMLString::indexOf(&(authSpec[start]), chColon);
02378         if (index!=-1 && XMLSize_t(index) >= authLen)
02379             index = -1;
02380     }
02381 
02382     host = &(authSpec[start]);
02383     if (index != -1)
02384     {
02385         hostLen = index;
02386         start += index + 1;  // skip the :
02387     }
02388     else
02389     {
02390         hostLen = authLen - start;
02391         start = authLen;
02392     }
02393 
02394     // port is everything after ":"
02395     int port = -1;
02396     if ((hostLen) &&   // non empty host
02397         (index != -1)                    &&   // ":" found
02398         (start < authLen)                     )   // ":" is not the last
02399     {
02400         const XMLCh* portStr = &(authSpec[start]);
02401         if (*portStr)
02402         {
02403             port = 0;
02404             for (XMLSize_t i=0; i<(authLen - start); i++)
02405             {
02406                 if (portStr[i] < chDigit_0 || portStr[i] > chDigit_9)
02407                 {
02408                   // Assume this is a registry-based authority.
02409                   //
02410                   port = -1;
02411                   hostLen = 0;
02412                   host = XMLUni::fgZeroLenString;
02413                   userInfoLen = 0;
02414                   userinfo = XMLUni::fgZeroLenString;
02415                   break;
02416                 }
02417 
02418                 port = (port * 10) + (int) (portStr[i] - chDigit_0);
02419             }
02420         }
02421     }
02422 
02423     return isValidServerBasedAuthority(host, hostLen, port, userinfo, userInfoLen)
02424       || isValidRegistryBasedAuthority(authSpec, authLen);
02425 }
02426 
02427 bool XMLUri::processPath(const XMLCh* const pathStr,
02428                          const XMLSize_t pathStrLen,
02429                          const bool isSchemePresent,
02430                          const bool bAllowSpaces/*=false*/)
02431 {
02432     if (pathStrLen != 0)
02433     {
02434         XMLSize_t index = 0;
02435         XMLCh testChar = chNull;
02436         bool isOpaque = (!isSchemePresent || *pathStr == chForwardSlash);
02437 
02438         // path - everything up to query string or fragment
02439         //
02440         // RFC 2732 only allows '[' and ']' to appear in the opaque part.
02441         while (index < pathStrLen)
02442         {
02443             testChar = pathStr[index];
02444             if (testChar == chQuestion || testChar == chPound)
02445                 break;
02446 
02447             if (testChar == chPercent)
02448             {
02449                 if (index+2 >= pathStrLen ||
02450                     !XMLString::isHex(pathStr[index+1]) ||
02451                     !XMLString::isHex(pathStr[index+2]))
02452                         return false;
02453             }
02454             else if (testChar==chSpace)
02455             {
02456                 if(!bAllowSpaces)
02457                     return false;
02458             }
02459             else if (!isUnreservedCharacter(testChar) &&
02460                      ((isOpaque && !isPathCharacter(testChar)) ||
02461                       (!isOpaque && !isReservedCharacter(testChar))))
02462             {
02463                 return false;
02464             }
02465 
02466             index++;
02467         }
02468 
02469         // query - starts with ? and up to fragment or end
02470         // fragment - starts with #
02471         bool isQuery = (testChar == chQuestion);
02472         if (isQuery || testChar == chPound)
02473         {
02474             index++;
02475             while (index < pathStrLen)
02476             {
02477                 testChar = pathStr[index];
02478                 if (testChar == chPound && isQuery) {
02479                     isQuery = false;
02480                     index++;
02481                     continue;
02482                 }
02483 
02484                 if (testChar == chPercent)
02485                 {
02486                     if (index+2 >= pathStrLen ||
02487                         !XMLString::isHex(pathStr[index+1]) ||
02488                         !XMLString::isHex(pathStr[index+2]))
02489                         return false;
02490                 }
02491                 else if (testChar==chSpace)
02492                 {
02493                     if(!bAllowSpaces)
02494                         return false;
02495                 }
02496                 else if (!isReservedOrUnreservedCharacter(testChar))
02497                 {
02498                     return false;
02499                 }
02500                 index++;
02501             }
02502         }
02503     } //if (pathStrLen...)
02504 
02505     return true;
02506 }
02507 
02508 /***
02509  * [Bug7698]: filenames with embedded spaces in schemaLocation strings not handled properly
02510  *
02511  * This method is called when Scanner/TraverseSchema knows that the URI reference is
02512  * for local file.
02513  *
02514  ***/
02515 void XMLUri::normalizeURI(const XMLCh*     const systemURI,
02516                                 XMLBuffer&       normalizedURI)
02517 {
02518     const XMLCh* pszSrc = systemURI;
02519 
02520     normalizedURI.reset();
02521 
02522     while (*pszSrc) {
02523 
02524         if ((*(pszSrc) == chPercent)
02525         &&  (*(pszSrc+1) == chDigit_2)
02526         &&  (*(pszSrc+2) == chDigit_0))
02527         {
02528             pszSrc += 3;
02529             normalizedURI.append(chSpace);
02530         }
02531         else
02532         {
02533             normalizedURI.append(*pszSrc);
02534             pszSrc++;
02535         }
02536     }
02537 }
02538 
02539 /***
02540  * Support for Serialization/De-serialization
02541  ***/
02542 
02543 IMPL_XSERIALIZABLE_TOCREATE(XMLUri)
02544 
02545 void XMLUri::serialize(XSerializeEngine& serEng)
02546 {
02547 
02548     if (serEng.isStoring())
02549     {
02550         serEng<<fPort;
02551         serEng.writeString(fScheme);
02552         serEng.writeString(fUserInfo);
02553         serEng.writeString(fHost);
02554         serEng.writeString(fRegAuth);
02555         serEng.writeString(fPath);
02556         serEng.writeString(fQueryString);
02557         serEng.writeString(fFragment);
02558         serEng.writeString(fURIText);
02559     }
02560     else
02561     {
02562         serEng>>fPort;
02563         serEng.readString(fScheme);
02564         serEng.readString(fUserInfo);
02565         serEng.readString(fHost);
02566         serEng.readString(fRegAuth);
02567         serEng.readString(fPath);
02568         serEng.readString(fQueryString);
02569         serEng.readString(fFragment);
02570         serEng.readString(fURIText);
02571     }
02572 
02573 }
02574 
02575 XMLUri::XMLUri(MemoryManager* const manager)
02576 : fPort(-1)
02577 , fScheme(0)
02578 , fUserInfo(0)
02579 , fHost(0)
02580 , fRegAuth(0)
02581 , fPath(0)
02582 , fQueryString(0)
02583 , fFragment(0)
02584 , fURIText(0)
02585 , fMemoryManager(manager)
02586 {
02587 }
02588 
02589 XERCES_CPP_NAMESPACE_END