GME
13
|
00001 /* 00002 * Licensed to the Apache Software Foundation (ASF) under one or more 00003 * contributor license agreements. See the NOTICE file distributed with 00004 * this work for additional information regarding copyright ownership. 00005 * The ASF licenses this file to You under the Apache License, Version 2.0 00006 * (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 */ 00017 00018 /* 00019 * $Id: XMLUri.cpp 881714 2009-11-18 10:39:06Z borisk $ 00020 */ 00021 00022 // --------------------------------------------------------------------------- 00023 // Includes 00024 // --------------------------------------------------------------------------- 00025 #include <xercesc/util/Janitor.hpp> 00026 #include <xercesc/util/XMLURL.hpp> 00027 #include <xercesc/util/XMLUri.hpp> 00028 #include <xercesc/util/XMLChar.hpp> 00029 #include <xercesc/util/OutOfMemoryException.hpp> 00030 00031 XERCES_CPP_NAMESPACE_BEGIN 00032 00033 // --------------------------------------------------------------------------- 00034 // XMLUri: static data 00035 // --------------------------------------------------------------------------- 00036 00037 // Amended by RFC2732 00038 // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 00039 // "$" | "," | "[" | "]" 00040 // 00041 const XMLCh XMLUri::RESERVED_CHARACTERS[] = 00042 { 00043 chSemiColon, chForwardSlash, chQuestion, chColon, chAt, 00044 chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare, 00045 chCloseSquare, chNull 00046 }; 00047 00048 // 00049 // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | 00050 // "(" | ")" 00051 // 00052 const XMLCh XMLUri::MARK_CHARACTERS[] = 00053 { 00054 chDash, chUnderscore, chPeriod, chBang, chTilde, 00055 chAsterisk, chSingleQuote, chOpenParen, chCloseParen, chNull 00056 }; 00057 00058 // combination of MARK and RESERVED 00059 const XMLCh XMLUri::MARK_OR_RESERVED_CHARACTERS[] = 00060 { 00061 chDash, chUnderscore, chPeriod, chBang, chTilde, 00062 chAsterisk, chSingleQuote, chOpenParen, chCloseParen, 00063 chSemiColon, chForwardSlash, chQuestion, chColon, chAt, 00064 chAmpersand, chEqual, chPlus, chDollarSign, chComma, chOpenSquare, 00065 chCloseSquare, chNull 00066 }; 00067 00068 // 00069 // scheme = alpha *( alpha | digit | "+" | "-" | "." ) 00070 // 00071 const XMLCh XMLUri::SCHEME_CHARACTERS[] = 00072 { 00073 chPlus, chDash, chPeriod, chNull 00074 }; 00075 00076 // 00077 // userinfo = *( unreserved | escaped | 00078 // ";" | ":" | "&" | "=" | "+" | "$" | "," ) 00079 // 00080 const XMLCh XMLUri::USERINFO_CHARACTERS[] = 00081 { 00082 chSemiColon, chColon, chAmpersand, chEqual, chPlus, 00083 chDollarSign, chPeriod, chNull 00084 }; 00085 00086 // 00087 // reg_name = 1*( unreserved | escaped | "$" | "," | 00088 // ";" | ":" | "@" | "&" | "=" | "+" ) 00089 // 00090 const XMLCh XMLUri::REG_NAME_CHARACTERS[] = 00091 { 00092 chDollarSign, chComma, chSemiColon, chColon, chAt, 00093 chAmpersand, chEqual, chPlus, chNull 00094 }; 00095 00096 // pchar plus ';' and '/'. 00097 // pchar = unreserved | escaped | 00098 // ":" | "@" | "&" | "=" | "+" | "$" | "," 00099 const XMLCh XMLUri::PATH_CHARACTERS[] = 00100 { 00101 chSemiColon, chForwardSlash, chColon, chAt, chAmpersand, 00102 chEqual, chPlus, chDollarSign, chComma, chNull 00103 }; 00104 00105 00106 // --------------------------------------------------------------------------- 00107 // Local methods and data 00108 // --------------------------------------------------------------------------- 00109 static const int BUF_LEN = 64; 00110 00111 // 00112 // "Scheme" 00113 // "SchemeSpecificPart" 00114 // "Parameters" 00115 // "UserInfo" 00116 // "Host" 00117 // "Port" 00118 // "RegName" 00119 // "Path" 00120 // "Query" 00121 // "Fragment" 00122 // 00123 static const XMLCh errMsg_SCHEME[] = 00124 { 00125 chLatin_s, chLatin_c, chLatin_h, chLatin_e, 00126 chLatin_m, chLatin_e, chNull 00127 }; 00128 00129 static const XMLCh errMsg_SCHEMESPART[] = 00130 { 00131 chLatin_s, chLatin_c, chLatin_h, chLatin_e, chLatin_m, chLatin_e, 00132 chLatin_S, chLatin_p, chLatin_e, chLatin_c, chLatin_i, chLatin_f, 00133 chLatin_i, chLatin_c, chLatin_P, chLatin_a, chLatin_r, chLatin_t, 00134 chNull 00135 }; 00136 00137 static const XMLCh errMsg_PARAMS[] = 00138 { 00139 chLatin_p, chLatin_a, chLatin_r, chLatin_a, chLatin_m, 00140 chLatin_e, chLatin_t, chLatin_e, chLatin_r, chLatin_s, chNull 00141 }; 00142 00143 static const XMLCh errMsg_USERINFO[] = 00144 { 00145 chLatin_u, chLatin_s, chLatin_e, chLatin_r, 00146 chLatin_i, chLatin_n, chLatin_f, chLatin_o, chNull 00147 }; 00148 00149 static const XMLCh errMsg_HOST[] = 00150 { 00151 chLatin_h, chLatin_o, chLatin_s, chLatin_t, chNull 00152 }; 00153 00154 static const XMLCh errMsg_PORT[] = 00155 { 00156 chLatin_p, chLatin_o, chLatin_r, chLatin_t, chNull 00157 }; 00158 00159 static const XMLCh errMsg_REGNAME[] = 00160 { 00161 chLatin_R, chLatin_e, chLatin_g, 00162 chLatin_N, chLatin_a, chLatin_m, chLatin_e, chNull 00163 }; 00164 00165 static const XMLCh errMsg_PATH[] = 00166 { 00167 chLatin_p, chLatin_a, chLatin_t, chLatin_h, chNull 00168 }; 00169 00170 static const XMLCh errMsg_QUERY[] = 00171 { 00172 chLatin_q, chLatin_u, chLatin_e, chLatin_r, chLatin_y, chNull 00173 }; 00174 00175 static const XMLCh errMsg_FRAGMENT[] = 00176 { 00177 chLatin_f, chLatin_r, chLatin_a, chLatin_g, 00178 chLatin_m, chLatin_e, chLatin_n, chLatin_t, chNull 00179 }; 00180 00181 // 00182 // "//" 00183 // "/" 00184 // "./" 00185 // "/." 00186 // "/../" 00187 // "/.." 00188 // 00189 static const XMLCh DOUBLE_SLASH[] = 00190 { 00191 chForwardSlash, chForwardSlash, chNull 00192 }; 00193 00194 static const XMLCh SINGLE_SLASH[] = 00195 { 00196 chForwardSlash, chNull 00197 }; 00198 00199 static const XMLCh SLASH_DOT_SLASH[] = 00200 { 00201 chForwardSlash, chPeriod, chForwardSlash, chNull 00202 }; 00203 00204 static const XMLCh SLASH_DOT[] = 00205 { 00206 chForwardSlash, chPeriod, chNull 00207 }; 00208 00209 static const XMLCh SLASH_DOTDOT_SLASH[] = 00210 { 00211 chForwardSlash, chPeriod, chPeriod, chForwardSlash, chNull 00212 }; 00213 00214 static const XMLCh SLASH_DOTDOT[] = 00215 { 00216 chForwardSlash, chPeriod, chPeriod, chNull 00217 }; 00218 00219 // 00220 // ":/?#" 00221 // 00222 // REVISIT: why? 00223 static const XMLCh SCHEME_SEPARATORS[] = 00224 { 00225 chColon, chForwardSlash, chQuestion, chPound, chNull 00226 }; 00227 00228 // 00229 // "?#" 00230 // 00231 static const XMLCh PATH_SEPARATORS[] = 00232 { 00233 chQuestion, chPound, chNull 00234 }; 00235 00236 // --------------------------------------------------------------------------- 00237 // XMLUri: Constructors and Helper methods 00238 // --------------------------------------------------------------------------- 00239 // ctor# 2 00240 00241 typedef JanitorMemFunCall<XMLUri> CleanupType; 00242 00243 XMLUri::XMLUri(const XMLCh* const uriSpec, 00244 MemoryManager* const manager) 00245 : fPort(-1) 00246 , fScheme(0) 00247 , fUserInfo(0) 00248 , fHost(0) 00249 , fRegAuth(0) 00250 , fPath(0) 00251 , fQueryString(0) 00252 , fFragment(0) 00253 , fURIText(0) 00254 , fMemoryManager(manager) 00255 { 00256 CleanupType cleanup(this, &XMLUri::cleanUp); 00257 00258 try { 00259 initialize((XMLUri *)0, uriSpec); 00260 } 00261 catch(const OutOfMemoryException&) 00262 { 00263 cleanup.release(); 00264 00265 throw; 00266 } 00267 00268 cleanup.release(); 00269 } 00270 00271 // ctor# 7 relative ctor 00272 XMLUri::XMLUri(const XMLUri* const baseURI 00273 , const XMLCh* const uriSpec 00274 , MemoryManager* const manager) 00275 : fPort(-1) 00276 , fScheme(0) 00277 , fUserInfo(0) 00278 , fHost(0) 00279 , fRegAuth(0) 00280 , fPath(0) 00281 , fQueryString(0) 00282 , fFragment(0) 00283 , fURIText(0) 00284 , fMemoryManager(manager) 00285 { 00286 CleanupType cleanup(this, &XMLUri::cleanUp); 00287 00288 try { 00289 initialize(baseURI, uriSpec); 00290 } 00291 catch(const OutOfMemoryException&) 00292 { 00293 cleanup.release(); 00294 00295 throw; 00296 } 00297 00298 cleanup.release(); 00299 } 00300 00301 //Copy constructor 00302 XMLUri::XMLUri(const XMLUri& toCopy) 00303 : XSerializable(toCopy) 00304 , XMemory(toCopy) 00305 , fPort(-1) 00306 , fScheme(0) 00307 , fUserInfo(0) 00308 , fHost(0) 00309 , fRegAuth(0) 00310 , fPath(0) 00311 , fQueryString(0) 00312 , fFragment(0) 00313 , fURIText(0) 00314 , fMemoryManager(toCopy.fMemoryManager) 00315 { 00316 CleanupType cleanup(this, &XMLUri::cleanUp); 00317 00318 try { 00319 initialize(toCopy); 00320 } 00321 catch(const OutOfMemoryException&) 00322 { 00323 cleanup.release(); 00324 00325 throw; 00326 } 00327 00328 cleanup.release(); 00329 } 00330 00331 XMLUri& XMLUri::operator=(const XMLUri& toAssign) 00332 { 00333 cleanUp(); 00334 00335 CleanupType cleanup(this, &XMLUri::cleanUp); 00336 00337 try { 00338 initialize(toAssign); 00339 } 00340 catch(const OutOfMemoryException&) 00341 { 00342 cleanup.release(); 00343 00344 throw; 00345 } 00346 00347 cleanup.release(); 00348 00349 return *this; 00350 } 00351 00352 XMLUri::~XMLUri() 00353 { 00354 cleanUp(); 00355 } 00356 00357 void XMLUri::cleanUp() 00358 { 00359 if (fScheme) 00360 XMLString::release(&fScheme, fMemoryManager);//delete[] fScheme; 00361 00362 if (fUserInfo) 00363 XMLString::release(&fUserInfo, fMemoryManager);//delete[] fUserInfo; 00364 00365 if (fHost) 00366 XMLString::release(&fHost, fMemoryManager);//delete[] fHost; 00367 00368 if (fRegAuth) 00369 XMLString::release(&fRegAuth, fMemoryManager);//delete[] fRegAuth; 00370 00371 if (fPath) 00372 XMLString::release(&fPath, fMemoryManager);//delete[] fPath; 00373 00374 if (fQueryString) 00375 XMLString::release(&fQueryString, fMemoryManager);//delete[] fQueryString; 00376 00377 if (fFragment) 00378 XMLString::release(&fFragment, fMemoryManager);//delete[] fFragment; 00379 00380 XMLString::release(&fURIText, fMemoryManager);//delete[] fURIText; 00381 } 00382 00383 void XMLUri::initialize(const XMLUri& toCopy) 00384 { 00385 // 00386 // assuming that all fields from the toCopy are valid, 00387 // therefore need NOT to go through various setXXX() methods 00388 // 00389 fMemoryManager = toCopy.fMemoryManager; 00390 fScheme = XMLString::replicate(toCopy.fScheme, fMemoryManager); 00391 fUserInfo = XMLString::replicate(toCopy.fUserInfo, fMemoryManager); 00392 fHost = XMLString::replicate(toCopy.fHost, fMemoryManager); 00393 fPort = toCopy.fPort; 00394 fRegAuth = XMLString::replicate(toCopy.fRegAuth, fMemoryManager); 00395 fPath = XMLString::replicate(toCopy.fPath, fMemoryManager); 00396 fQueryString = XMLString::replicate(toCopy.fQueryString, fMemoryManager); 00397 fFragment = XMLString::replicate(toCopy.fFragment, fMemoryManager); 00398 } 00399 00400 void XMLUri::initialize(const XMLUri* const baseURI 00401 , const XMLCh* const uriSpec) 00402 { 00403 00404 // get a trimmed version of uriSpec 00405 // uriSpec will NO LONGER be used in this function. 00406 // 00407 XMLCh* trimmedUriSpec = XMLString::replicate(uriSpec, fMemoryManager); 00408 XMLString::trim(trimmedUriSpec); 00409 ArrayJanitor<XMLCh> janName(trimmedUriSpec, fMemoryManager); 00410 XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec); 00411 00412 if ( !baseURI && 00413 (!trimmedUriSpec || trimmedUriSpecLen == 0)) 00414 { 00415 ThrowXMLwithMemMgr1(MalformedURLException 00416 , XMLExcepts::XMLNUM_URI_Component_Empty 00417 , errMsg_PARAMS 00418 , fMemoryManager); 00419 } 00420 00421 // just make a copy of the base if spec is empty 00422 if (!trimmedUriSpec || trimmedUriSpecLen == 0) 00423 { 00424 initialize(*baseURI); 00425 return; 00426 } 00427 00428 XMLSize_t index = 0; 00429 bool foundScheme = false; 00430 00431 // Check for scheme, which must be before `/', '?' or '#'. 00432 int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon); 00433 int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash); 00434 int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion); 00435 int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound); 00436 00437 if ((colonIdx <= 0) || 00438 (colonIdx > slashIdx && slashIdx != -1) || 00439 (colonIdx > queryIdx && queryIdx != -1) || 00440 (colonIdx > fragmentIdx && fragmentIdx != -1)) 00441 { 00442 // A standalone base is a valid URI according to spec 00443 if ( colonIdx == 0 || (!baseURI && fragmentIdx != 0) ) 00444 { 00445 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme, fMemoryManager); 00446 } 00447 } 00448 else 00449 { 00450 foundScheme = true; 00451 initializeScheme(trimmedUriSpec); 00452 index = XMLString::stringLen(fScheme)+1; 00453 } 00454 00455 // It's an error if we stop here 00456 if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound))) 00457 { 00458 ThrowXMLwithMemMgr1(MalformedURLException 00459 , XMLExcepts::XMLNUM_URI_Component_Empty 00460 , errMsg_PATH 00461 , fMemoryManager); 00462 } 00463 00464 // two slashes means generic URI syntax, so we get the authority 00465 XMLCh* authUriSpec = (XMLCh*) fMemoryManager->allocate 00466 ( 00467 (trimmedUriSpecLen+1) * sizeof(XMLCh) 00468 );//new XMLCh[trimmedUriSpecLen+1]; 00469 ArrayJanitor<XMLCh> authName(authUriSpec, fMemoryManager); 00470 XMLString::subString(authUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager); 00471 00472 if (((index+1) < trimmedUriSpecLen) && 00473 XMLString::startsWith(authUriSpec, DOUBLE_SLASH)) 00474 { 00475 index += 2; 00476 XMLSize_t startPos = index; 00477 00478 // get authority - everything up to path, query or fragment 00479 XMLCh testChar; 00480 while (index < trimmedUriSpecLen) 00481 { 00482 testChar = trimmedUriSpec[index]; 00483 if (testChar == chForwardSlash || 00484 testChar == chQuestion || 00485 testChar == chPound ) 00486 { 00487 break; 00488 } 00489 00490 index++; 00491 } 00492 00493 // if we found authority, parse it out, otherwise we set the 00494 // host to empty string 00495 if (index > startPos) 00496 { 00497 XMLString::subString(authUriSpec, trimmedUriSpec, startPos, index, fMemoryManager); 00498 initializeAuthority(authUriSpec); 00499 } 00500 else 00501 { 00502 //fHost = 0; 00503 setHost(XMLUni::fgZeroLenString); 00504 } 00505 } 00506 00507 // we need to check if index has exceed the lenght or not 00508 if (index >= trimmedUriSpecLen) 00509 return; 00510 00511 XMLCh* pathUriSpec = (XMLCh*) fMemoryManager->allocate 00512 ( 00513 (trimmedUriSpecLen+1) * sizeof(XMLCh) 00514 );//new XMLCh[trimmedUriSpecLen+1]; 00515 ArrayJanitor<XMLCh> pathUriSpecName(pathUriSpec, fMemoryManager); 00516 XMLString::subString(pathUriSpec, trimmedUriSpec, index, trimmedUriSpecLen, fMemoryManager); 00517 00518 initializePath(pathUriSpec); 00519 00520 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 00521 // In some cases, it might make more sense to throw an exception 00522 // (when scheme is specified is the string spec and the base URI 00523 // is also specified, for example), but we're just following the 00524 // RFC specifications 00525 if ( baseURI ) 00526 { 00527 // check to see if this is the current doc - RFC 2396 5.2 #2 00528 // note that this is slightly different from the RFC spec in that 00529 // we don't include the check for query string being null 00530 // - this handles cases where the urispec is just a query 00531 // string or a fragment (e.g. "?y" or "#s") - 00532 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 00533 // identified this as a bug in the RFC 00534 if ((!fPath || !*fPath) && 00535 fScheme == 0 && 00536 fHost == 0 && fRegAuth == 0) 00537 { 00538 fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager); 00539 fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo; 00540 fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager); 00541 fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager); 00542 fPort = baseURI->getPort(); 00543 fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager); 00544 fMemoryManager->deallocate(fPath);//delete [] fPath; 00545 fPath = XMLString::replicate(baseURI->getPath(), fMemoryManager); 00546 00547 if ( !fQueryString ) 00548 { 00549 fQueryString = XMLString::replicate(baseURI->getQueryString(), fMemoryManager); 00550 } 00551 return; 00552 } 00553 00554 // check for scheme - RFC 2396 5.2 #3 00555 // if we found a scheme, it means absolute URI, so we're done 00556 if (fScheme == 0) 00557 { 00558 fScheme = XMLString::replicate(baseURI->getScheme(), fMemoryManager); 00559 } 00560 else 00561 { 00562 return; 00563 } 00564 00565 // check for authority - RFC 2396 5.2 #4 00566 // if we found a host, then we've got a network path, so we're done 00567 if (fHost == 0 && fRegAuth == 0) 00568 { 00569 fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo; 00570 fUserInfo = XMLString::replicate(baseURI->getUserInfo(), fMemoryManager); 00571 fHost = XMLString::replicate(baseURI->getHost(), fMemoryManager); 00572 fPort = baseURI->getPort(); 00573 fRegAuth = XMLString::replicate(baseURI->getRegBasedAuthority(), fMemoryManager); 00574 } 00575 else 00576 { 00577 return; 00578 } 00579 00580 // check for absolute path - RFC 2396 5.2 #5 00581 if ((fPath && *fPath) && 00582 XMLString::startsWith(fPath, SINGLE_SLASH)) 00583 { 00584 return; 00585 } 00586 00587 // if we get to this point, we need to resolve relative path 00588 // RFC 2396 5.2 #6 00589 00590 XMLCh* basePath = XMLString::replicate(baseURI->getPath(), fMemoryManager); 00591 ArrayJanitor<XMLCh> basePathName(basePath, fMemoryManager); 00592 00593 XMLSize_t bufLen = trimmedUriSpecLen+XMLString::stringLen(fPath)+XMLString::stringLen(basePath)+1; 00594 XMLCh* path = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen]; 00595 ArrayJanitor<XMLCh> pathName(path, fMemoryManager); 00596 path[0] = 0; 00597 00598 XMLCh* tmp1 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen]; 00599 ArrayJanitor<XMLCh> tmp1Name(tmp1, fMemoryManager); 00600 XMLCh* tmp2 = (XMLCh*) fMemoryManager->allocate(bufLen * sizeof(XMLCh));//new XMLCh[bufLen]; 00601 ArrayJanitor<XMLCh> tmp2Name(tmp2, fMemoryManager); 00602 00603 // 6a - get all but the last segment of the base URI path 00604 if (basePath) 00605 { 00606 int lastSlash = XMLString::lastIndexOf(basePath, chForwardSlash); 00607 if (lastSlash != -1) 00608 { 00609 XMLString::subString(path, basePath, 0, lastSlash+1, fMemoryManager); 00610 } 00611 } 00612 00613 // 6b - append the relative URI path 00614 XMLString::catString(path, fPath); 00615 00616 // 6c - remove all "./" where "." is a complete path segment 00617 int iIndex = -1; 00618 while ((iIndex = XMLString::patternMatch(path, SLASH_DOT_SLASH)) != -1) 00619 { 00620 XMLString::subString(tmp1, path, 0, iIndex, fMemoryManager); 00621 XMLString::subString(tmp2, path, iIndex+2, XMLString::stringLen(path), fMemoryManager); 00622 00623 path[0] = 0; 00624 XMLString::catString(path, tmp1); 00625 XMLString::catString(path, tmp2); 00626 } 00627 00628 // 6d - remove "." if path ends with "." as a complete path segment 00629 if (XMLString::endsWith(path, SLASH_DOT)) 00630 { 00631 path[XMLString::stringLen(path) - 1] = chNull; 00632 } 00633 00634 // 6e - remove all "<segment>/../" where "<segment>" is a complete 00635 // path segment not equal to ".." 00636 iIndex = -1; 00637 int segIndex = -1; 00638 int offset = 1; 00639 00640 while ((iIndex = XMLString::patternMatch(&(path[offset]), SLASH_DOTDOT_SLASH)) != -1) 00641 { 00642 // Undo offset 00643 iIndex += offset; 00644 00645 // Find start of <segment> within substring ending at found point. 00646 XMLString::subString(tmp1, path, 0, iIndex-1, fMemoryManager); 00647 segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash); 00648 00649 // Ensure <segment> exists and != ".." 00650 if (segIndex != -1 && 00651 (path[segIndex+1] != chPeriod || 00652 path[segIndex+2] != chPeriod || 00653 segIndex + 3 != iIndex)) 00654 { 00655 00656 XMLString::subString(tmp1, path, 0, segIndex, fMemoryManager); 00657 XMLString::subString(tmp2, path, iIndex+3, XMLString::stringLen(path), fMemoryManager); 00658 00659 path[0] = 0; 00660 XMLString::catString(path, tmp1); 00661 XMLString::catString(path, tmp2); 00662 00663 offset = (segIndex == 0 ? 1 : segIndex); 00664 } 00665 else 00666 { 00667 offset += 4; 00668 } 00669 }// while 00670 00671 // 6f - remove ending "<segment>/.." where "<segment>" is a 00672 // complete path segment 00673 if (XMLString::endsWith(path, SLASH_DOTDOT)) 00674 { 00675 // Find start of <segment> within substring ending at found point. 00676 index = XMLString::stringLen(path) - 3; 00677 XMLString::subString(tmp1, path, 0, index-1, fMemoryManager); 00678 segIndex = XMLString::lastIndexOf(tmp1, chForwardSlash); 00679 00680 if (segIndex != -1 && 00681 (path[segIndex+1] != chPeriod || 00682 path[segIndex+2] != chPeriod || 00683 segIndex + 3 != (int)index)) 00684 { 00685 path[segIndex+1] = chNull; 00686 } 00687 } 00688 00689 if (getPath()) 00690 fMemoryManager->deallocate(fPath);//delete [] fPath; 00691 00692 fPath = XMLString::replicate(path, fMemoryManager); 00693 00694 } 00695 } 00696 00697 // --------------------------------------------------------------------------- 00698 // Components initialization 00699 // --------------------------------------------------------------------------- 00700 00701 // 00702 // authority = server | reg_name 00703 // server = [ [ userinfo "@" ] hostport ] 00704 // hostport = host [ ":" port ] 00705 // 00706 // reg_name = 1*( unreserved | escaped | "$" | "," | 00707 // ";" | ":" | "@" | "&" | "=" | "+" ) 00708 // 00709 // userinfo = *( unreserved | escaped | 00710 // ";" | ":" | "&" | "=" | "+" | "$" | "," ) 00711 // 00712 00713 void XMLUri::initializeAuthority(const XMLCh* const uriSpec) 00714 { 00715 00716 int index = 0; 00717 XMLSize_t start = 0; 00718 const XMLSize_t end = XMLString::stringLen(uriSpec); 00719 00720 // 00721 // server = [ [ userinfo "@" ] hostport ] 00722 // userinfo is everything up @, 00723 // 00724 XMLCh* userinfo = (XMLCh*) fMemoryManager->allocate 00725 ( 00726 (end+1) * sizeof(XMLCh) 00727 );//new XMLCh[end+1]; 00728 ArrayJanitor<XMLCh> userName(userinfo, fMemoryManager); 00729 index = XMLString::indexOf(&(uriSpec[start]), chAt); 00730 00731 if ( index != -1) 00732 { 00733 XMLString::subString(userinfo, &(uriSpec[start]), 0, index, fMemoryManager); 00734 index++; // skip the @ 00735 start += index; 00736 } 00737 else 00738 { 00739 userinfo = 0; 00740 } 00741 00742 // 00743 // hostport = host [ ":" port ] 00744 // host is everything up to ':', or up to 00745 // and including ']' if followed by ':'. 00746 // 00747 XMLCh* host = (XMLCh*) fMemoryManager->allocate 00748 ( 00749 (end+1) * sizeof(XMLCh) 00750 );//new XMLCh[end+1]; 00751 ArrayJanitor<XMLCh> hostName(host, fMemoryManager); 00752 00753 // Search for port boundary. 00754 if (start < end && uriSpec[start] == chOpenSquare) 00755 { 00756 index = XMLString::indexOf(&(uriSpec[start]), chCloseSquare); 00757 if (index != -1) 00758 { 00759 // skip the ']' 00760 index = ((start + index + 1) < end 00761 && uriSpec[start + index + 1] == chColon) ? index+1 : -1; 00762 } 00763 } 00764 else 00765 { 00766 index = XMLString::indexOf(&(uriSpec[start]), chColon); 00767 } 00768 00769 if ( index != -1 ) 00770 { 00771 XMLString::subString(host, &(uriSpec[start]), 0, index, fMemoryManager); 00772 index++; // skip the : 00773 start +=index; 00774 } 00775 else 00776 { 00777 XMLString::subString(host, &(uriSpec[start]), 0, end-start, fMemoryManager); 00778 start = end; 00779 } 00780 00781 // port is everything after ":" 00782 00783 XMLCh* portStr = (XMLCh*) fMemoryManager->allocate 00784 ( 00785 (end+1) * sizeof(XMLCh) 00786 );//new XMLCh[end+1]; 00787 ArrayJanitor<XMLCh> portName(portStr, fMemoryManager); 00788 int port = -1; 00789 00790 if ((host && *host) && // non empty host 00791 (index != -1) && // ":" found 00792 (start < end) ) // ":" is not the last 00793 { 00794 XMLString::subString(portStr, &(uriSpec[start]), 0, end-start, fMemoryManager); 00795 00796 if (portStr && *portStr) 00797 { 00798 port = XMLString::parseInt(portStr, fMemoryManager); 00799 } 00800 } // if > 0 00801 00802 // Check if we have server based authority. 00803 if (isValidServerBasedAuthority(host, port, userinfo, fMemoryManager)) 00804 { 00805 if (fHost) 00806 fMemoryManager->deallocate(fHost);//delete [] fHost; 00807 00808 if (fUserInfo) 00809 fMemoryManager->deallocate(fUserInfo);//delete[] fUserInfo; 00810 00811 fHost = XMLString::replicate(host, fMemoryManager); 00812 fPort = port; 00813 fUserInfo = XMLString::replicate(userinfo, fMemoryManager); 00814 00815 return; 00816 } 00817 // This must be registry based authority or the URI is malformed. 00818 setRegBasedAuthority(uriSpec); 00819 } 00820 00821 // scheme = alpha *( alpha | digit | "+" | "-" | "." ) 00822 void XMLUri::initializeScheme(const XMLCh* const uriSpec) 00823 { 00824 const XMLCh* tmpPtr = XMLString::findAny(uriSpec, SCHEME_SEPARATORS); 00825 00826 if ( !tmpPtr ) 00827 { 00828 ThrowXMLwithMemMgr(MalformedURLException, XMLExcepts::XMLNUM_URI_No_Scheme, fMemoryManager); 00829 } 00830 else 00831 { 00832 XMLCh* scheme = (XMLCh*) fMemoryManager->allocate 00833 ( 00834 (XMLString::stringLen(uriSpec) + 1) * sizeof(XMLCh) 00835 );//new XMLCh[XMLString::stringLen(uriSpec)+1]; 00836 ArrayJanitor<XMLCh> tmpName(scheme, fMemoryManager); 00837 XMLString::subString(scheme, uriSpec, 0, (tmpPtr - uriSpec), fMemoryManager); 00838 setScheme(scheme); 00839 } 00840 00841 } 00842 00843 void XMLUri::initializePath(const XMLCh* const uriSpec) 00844 { 00845 if ( !uriSpec ) 00846 { 00847 ThrowXMLwithMemMgr1(MalformedURLException 00848 , XMLExcepts::XMLNUM_URI_Component_Empty 00849 , errMsg_PATH 00850 , fMemoryManager); 00851 } 00852 00853 XMLSize_t index = 0; 00854 XMLSize_t start = 0; 00855 XMLSize_t end = XMLString::stringLen(uriSpec); 00856 XMLCh testChar = 0; 00857 00858 // path - everything up to query string or fragment 00859 if (start < end) 00860 { 00861 // RFC 2732 only allows '[' and ']' to appear in the opaque part. 00862 if (!getScheme() || uriSpec[start] == chForwardSlash) 00863 { 00864 // Scan path. 00865 // abs_path = "/" path_segments 00866 // rel_path = rel_segment [ abs_path ] 00867 while (index < end) 00868 { 00869 testChar = uriSpec[index]; 00870 if (testChar == chQuestion || testChar == chPound) 00871 { 00872 break; 00873 } 00874 00875 // check for valid escape sequence 00876 if (testChar == chPercent) 00877 { 00878 if (index+2 >= end || 00879 !XMLString::isHex(uriSpec[index+1]) || 00880 !XMLString::isHex(uriSpec[index+2])) 00881 { 00882 XMLCh value1[BUF_LEN+1]; 00883 XMLString::moveChars(value1, &(uriSpec[index]), 3); 00884 value1[3] = chNull; 00885 ThrowXMLwithMemMgr2(MalformedURLException 00886 , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence 00887 , errMsg_PATH 00888 , value1 00889 , fMemoryManager); 00890 } 00891 } 00892 else if (!isUnreservedCharacter(testChar) && 00893 !isPathCharacter(testChar)) 00894 { 00895 XMLCh value1[BUF_LEN+1]; 00896 value1[0] = testChar; 00897 value1[1] = chNull; 00898 ThrowXMLwithMemMgr2(MalformedURLException 00899 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char 00900 , errMsg_PATH 00901 , value1 00902 , fMemoryManager); 00903 } 00904 00905 index++; 00906 }//while (index < end) 00907 } 00908 else 00909 { 00910 // Scan opaque part. 00911 // opaque_part = uric_no_slash *uric 00912 while (index < end) 00913 { 00914 testChar = uriSpec[index]; 00915 if (testChar == chQuestion || testChar == chPound) 00916 { 00917 break; 00918 } 00919 00920 // check for valid escape sequence 00921 if (testChar == chPercent) 00922 { 00923 if (index+2 >= end || 00924 !XMLString::isHex(uriSpec[index+1]) || 00925 !XMLString::isHex(uriSpec[index+2])) 00926 { 00927 XMLCh value1[BUF_LEN+1]; 00928 XMLString::moveChars(value1, &(uriSpec[index]), 3); 00929 value1[3] = chNull; 00930 ThrowXMLwithMemMgr2(MalformedURLException 00931 , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence 00932 , errMsg_PATH 00933 , value1 00934 , fMemoryManager); 00935 } 00936 } 00937 // If the scheme specific part is opaque, it can contain '[' 00938 // and ']'. uric_no_slash wasn't modified by RFC 2732, which 00939 // I've interpreted as an error in the spec, since the 00940 // production should be equivalent to (uric - '/'), and uric 00941 // contains '[' and ']'. 00942 else if (!isReservedOrUnreservedCharacter(testChar)) 00943 { 00944 XMLCh value1[BUF_LEN+1]; 00945 value1[0] = testChar; 00946 value1[1] = chNull; 00947 ThrowXMLwithMemMgr2(MalformedURLException 00948 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char 00949 , errMsg_PATH 00950 , value1 00951 , fMemoryManager); 00952 } 00953 00954 index++; 00955 }//while (index < end) 00956 } 00957 } //if (start < end) 00958 00959 if (getPath()) 00960 { 00961 fMemoryManager->deallocate(fPath);//delete [] fPath; 00962 } 00963 00964 fPath = (XMLCh*) fMemoryManager->allocate((index+1) * sizeof(XMLCh));//new XMLCh[index+1]; 00965 XMLString::subString(fPath, uriSpec, start, index, fMemoryManager); 00966 00967 // query - starts with ? and up to fragment or end 00968 if (testChar == chQuestion) 00969 { 00970 index++; 00971 start = index; 00972 while (index < end) 00973 { 00974 testChar = uriSpec[index]; 00975 if (testChar == chPound) 00976 { 00977 break; 00978 } 00979 00980 if (testChar == chPercent) 00981 { 00982 if (index+2 >= end || 00983 !XMLString::isHex(uriSpec[index+1]) || 00984 !XMLString::isHex(uriSpec[index+2])) 00985 { 00986 XMLCh value1[BUF_LEN+1]; 00987 XMLString::moveChars(value1, &(uriSpec[index]), 3); 00988 value1[3] = chNull; 00989 ThrowXMLwithMemMgr2(MalformedURLException 00990 , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence 00991 , errMsg_QUERY 00992 , value1 00993 , fMemoryManager); 00994 } 00995 } 00996 else if (!isReservedOrUnreservedCharacter(testChar)) 00997 { 00998 XMLCh value1[BUF_LEN+1]; 00999 value1[0] = testChar; 01000 value1[1] = chNull; 01001 ThrowXMLwithMemMgr2(MalformedURLException 01002 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char 01003 , errMsg_QUERY 01004 , value1 01005 , fMemoryManager); 01006 } 01007 index++; 01008 } 01009 01010 if (getQueryString()) 01011 { 01012 fMemoryManager->deallocate(fQueryString);//delete [] fQueryString; 01013 } 01014 01015 fQueryString = (XMLCh*) fMemoryManager->allocate 01016 ( 01017 (index - start + 1) * sizeof(XMLCh) 01018 );//new XMLCh[index - start + 1]; 01019 XMLString::subString(fQueryString, uriSpec, start, index, fMemoryManager); 01020 } 01021 01022 // fragment - starts with # 01023 if (testChar == chPound) 01024 { 01025 index++; 01026 start = index; 01027 while (index < end) 01028 { 01029 testChar = uriSpec[index]; 01030 01031 if (testChar == chPercent) 01032 { 01033 if (index+2 >= end || 01034 !XMLString::isHex(uriSpec[index+1]) || 01035 !XMLString::isHex(uriSpec[index+2])) 01036 { 01037 XMLCh value1[BUF_LEN+1]; 01038 XMLString::moveChars(value1, &(uriSpec[index]), 3); 01039 value1[3] = chNull; 01040 ThrowXMLwithMemMgr2(MalformedURLException 01041 , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence 01042 , errMsg_FRAGMENT 01043 , value1 01044 , fMemoryManager); 01045 } 01046 } 01047 else if (!isReservedOrUnreservedCharacter(testChar)) 01048 { 01049 XMLCh value1[BUF_LEN+1]; 01050 value1[0] = testChar; 01051 value1[1] = chNull; 01052 ThrowXMLwithMemMgr2(MalformedURLException 01053 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char 01054 , errMsg_FRAGMENT 01055 , value1 01056 , fMemoryManager); 01057 } 01058 01059 index++; 01060 01061 } 01062 01063 if (getFragment()) 01064 fMemoryManager->deallocate(fFragment);//delete [] fFragment; 01065 01066 //make sure that there is something following the '#' 01067 if (index > start) 01068 { 01069 fFragment = (XMLCh*) fMemoryManager->allocate 01070 ( 01071 (index - start + 1) * sizeof(XMLCh) 01072 );//new XMLCh[index - start + 1]; 01073 XMLString::subString(fFragment, uriSpec, start, index, fMemoryManager); 01074 } 01075 else 01076 { 01077 // RFC 2396, 4.0. URI Reference 01078 // URI-reference = [absoulteURI | relativeURI] [# fragment] 01079 // 01080 // RFC 2396, 4.1. Fragment Identifier 01081 // fragment = *uric 01082 // 01083 // empty fragment is valid 01084 fFragment = 0; 01085 } 01086 } 01087 01088 } 01089 01090 // --------------------------------------------------------------------------- 01091 // Setter 01092 // --------------------------------------------------------------------------- 01093 void XMLUri::setScheme(const XMLCh* const newScheme) 01094 { 01095 if ( !newScheme ) 01096 { 01097 ThrowXMLwithMemMgr1(MalformedURLException 01098 , XMLExcepts::XMLNUM_URI_Component_Set_Null 01099 , errMsg_SCHEME 01100 , fMemoryManager); 01101 } 01102 01103 if (!isConformantSchemeName(newScheme)) 01104 { 01105 ThrowXMLwithMemMgr2(MalformedURLException 01106 , XMLExcepts::XMLNUM_URI_Component_Not_Conformant 01107 , errMsg_SCHEME 01108 , newScheme 01109 , fMemoryManager); 01110 } 01111 01112 if (getScheme()) 01113 { 01114 fMemoryManager->deallocate(fScheme);//delete [] fScheme; 01115 } 01116 01117 fScheme = XMLString::replicate(newScheme, fMemoryManager); 01118 XMLString::lowerCase(fScheme); 01119 } 01120 01121 // 01122 // server = [ [ userinfo "@" ] hostport ] 01123 // hostport = host [":" port] 01124 // 01125 // setUserInfo(), setHost() and setPort() are closely related 01126 // three methods, in a word, userinfo and port has dependency 01127 // on host. 01128 // 01129 // if host is not present, userinfo must be null and port = -1 01130 // 01131 void XMLUri::setUserInfo(const XMLCh* const newUserInfo) 01132 { 01133 if ( newUserInfo && 01134 !getHost() ) 01135 { 01136 ThrowXMLwithMemMgr2(MalformedURLException 01137 , XMLExcepts::XMLNUM_URI_NullHost 01138 , errMsg_USERINFO 01139 , newUserInfo 01140 , fMemoryManager); 01141 } 01142 01143 isConformantUserInfo(newUserInfo, fMemoryManager); 01144 01145 if (getUserInfo()) 01146 { 01147 fMemoryManager->deallocate(fUserInfo);//delete [] fUserInfo; 01148 } 01149 01150 //sometimes we get passed a empty string rather than a null. 01151 //Other procedures rely on it being null 01152 if(newUserInfo && *newUserInfo) { 01153 fUserInfo = XMLString::replicate(newUserInfo, fMemoryManager); 01154 } 01155 else 01156 fUserInfo = 0; 01157 01158 } 01159 01160 void XMLUri::setHost(const XMLCh* const newHost) 01161 { 01162 if ( !newHost ) 01163 { 01164 if (getHost()) 01165 fMemoryManager->deallocate(fHost);//delete [] fHost; 01166 01167 fHost = 0; 01168 setUserInfo(0); 01169 setPort(-1); 01170 01171 return; 01172 } 01173 01174 if ( *newHost && !isWellFormedAddress(newHost, fMemoryManager)) 01175 { 01176 ThrowXMLwithMemMgr2(MalformedURLException 01177 , XMLExcepts::XMLNUM_URI_Component_Not_Conformant 01178 , errMsg_HOST 01179 , newHost 01180 , fMemoryManager); 01181 } 01182 01183 if (getHost()) 01184 { 01185 fMemoryManager->deallocate(fHost);//delete [] fHost; 01186 } 01187 01188 fHost = XMLString::replicate(newHost, fMemoryManager); 01189 setRegBasedAuthority(0); 01190 } 01191 01192 void XMLUri::setPort(int newPort) 01193 { 01194 if (newPort >= 0 && newPort <= 65535) 01195 { 01196 if (!getHost()) 01197 { 01198 XMLCh value1[BUF_LEN+1]; 01199 XMLString::binToText(newPort, value1, BUF_LEN, 10, fMemoryManager); 01200 ThrowXMLwithMemMgr2(MalformedURLException 01201 , XMLExcepts::XMLNUM_URI_NullHost 01202 , errMsg_PORT 01203 , value1 01204 , fMemoryManager); 01205 } 01206 } 01207 else if (newPort != -1) 01208 { 01209 XMLCh value1[BUF_LEN+1]; 01210 XMLString::binToText(newPort, value1, BUF_LEN, 10, fMemoryManager); 01211 ThrowXMLwithMemMgr1(MalformedURLException 01212 , XMLExcepts::XMLNUM_URI_PortNo_Invalid 01213 , value1 01214 , fMemoryManager); 01215 } 01216 01217 fPort = newPort; 01218 } 01219 01220 void XMLUri::setRegBasedAuthority(const XMLCh* const newRegAuth) 01221 { 01222 if ( !newRegAuth ) 01223 { 01224 if (getRegBasedAuthority()) 01225 fMemoryManager->deallocate(fRegAuth);//delete [] fRegAuth; 01226 01227 fRegAuth = 0; 01228 return; 01229 } 01230 // reg_name = 1*( unreserved | escaped | "$" | "," | 01231 // ";" | ":" | "@" | "&" | "=" | "+" ) 01232 else if ( !*newRegAuth || !isValidRegistryBasedAuthority(newRegAuth) ) 01233 { 01234 ThrowXMLwithMemMgr2(MalformedURLException 01235 , XMLExcepts::XMLNUM_URI_Component_Not_Conformant 01236 , errMsg_REGNAME 01237 , newRegAuth 01238 , fMemoryManager); 01239 } 01240 01241 if (getRegBasedAuthority()) 01242 fMemoryManager->deallocate(fRegAuth);//delete [] fRegAuth; 01243 01244 fRegAuth = XMLString::replicate(newRegAuth, fMemoryManager); 01245 setHost(0); 01246 } 01247 01248 // 01249 // setPath(), setQueryString() and setFragment() are closely 01250 // related three methods as well. 01251 // 01252 void XMLUri::setPath(const XMLCh* const newPath) 01253 { 01254 if (!newPath) 01255 { 01256 if (getPath()) 01257 fMemoryManager->deallocate(fPath);//delete [] fPath; 01258 01259 fPath = 0; 01260 setQueryString(0); 01261 setFragment(0); 01262 } 01263 else 01264 { 01265 initializePath(newPath); 01266 } 01267 } 01268 01269 // 01270 // fragment = *uric 01271 // 01272 void XMLUri::setFragment(const XMLCh* const newFragment) 01273 { 01274 if ( !newFragment ) 01275 { 01276 if (getFragment()) 01277 fMemoryManager->deallocate(fFragment);//delete [] fFragment; 01278 01279 fFragment = 0; 01280 } 01281 else if (!isGenericURI()) 01282 { 01283 ThrowXMLwithMemMgr2(MalformedURLException 01284 , XMLExcepts::XMLNUM_URI_Component_for_GenURI_Only 01285 , errMsg_FRAGMENT 01286 , newFragment 01287 , fMemoryManager); 01288 } 01289 else if ( !getPath() ) 01290 { 01291 ThrowXMLwithMemMgr2(MalformedURLException 01292 , XMLExcepts::XMLNUM_URI_NullPath 01293 , errMsg_FRAGMENT 01294 , newFragment 01295 , fMemoryManager); 01296 } 01297 else if (!isURIString(newFragment)) 01298 { 01299 ThrowXMLwithMemMgr1(MalformedURLException 01300 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char 01301 , errMsg_FRAGMENT 01302 , fMemoryManager); 01303 } 01304 else 01305 { 01306 if (getFragment()) 01307 { 01308 fMemoryManager->deallocate(fFragment);//delete [] fFragment; 01309 } 01310 01311 fFragment = XMLString::replicate(newFragment, fMemoryManager); 01312 } 01313 } 01314 01315 // 01316 // query = *uric 01317 // 01318 void XMLUri::setQueryString(const XMLCh* const newQueryString) 01319 { 01320 if ( !newQueryString ) 01321 { 01322 if (getQueryString()) 01323 fMemoryManager->deallocate(fQueryString);//delete [] fQueryString; 01324 01325 fQueryString = 0; 01326 } 01327 else if (!isGenericURI()) 01328 { 01329 ThrowXMLwithMemMgr2(MalformedURLException 01330 , XMLExcepts::XMLNUM_URI_Component_for_GenURI_Only 01331 , errMsg_QUERY 01332 , newQueryString 01333 , fMemoryManager); 01334 } 01335 else if ( !getPath() ) 01336 { 01337 ThrowXMLwithMemMgr2(MalformedURLException 01338 , XMLExcepts::XMLNUM_URI_NullPath 01339 , errMsg_QUERY 01340 , newQueryString 01341 , fMemoryManager); 01342 } 01343 else if (!isURIString(newQueryString)) 01344 { 01345 ThrowXMLwithMemMgr2(MalformedURLException 01346 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char 01347 , errMsg_QUERY 01348 , newQueryString 01349 , fMemoryManager); 01350 } 01351 else 01352 { 01353 if (getQueryString()) 01354 { 01355 fMemoryManager->deallocate(fQueryString);//delete [] fQueryString; 01356 } 01357 01358 fQueryString = XMLString::replicate(newQueryString, fMemoryManager); 01359 } 01360 } 01361 01362 // --------------------------------------------------------------------------- 01363 // XMLUri: Public, static methods 01364 // --------------------------------------------------------------------------- 01365 01366 // 01367 // scheme = alpha *( alpha | digit | "+" | "-" | "." ) 01368 // alphanum = alpha | digit 01369 // 01370 bool XMLUri::isConformantSchemeName(const XMLCh* const scheme) 01371 { 01372 if ( !scheme ) 01373 return false; 01374 01375 const XMLCh* tmpStr = scheme; 01376 if (!XMLString::isAlpha(*tmpStr)) // first: alpha 01377 return false; 01378 01379 // second onwards: ( alpha | digit | "+" | "-" | "." ) 01380 tmpStr++; 01381 while (*tmpStr) 01382 { 01383 if ( !XMLString::isAlphaNum(*tmpStr) && 01384 (XMLString::indexOf(SCHEME_CHARACTERS, *tmpStr) == -1)) 01385 return false; 01386 01387 tmpStr++; 01388 } 01389 01390 return true; 01391 } 01392 01393 // 01394 // userinfo = *( unreserved | escaped | 01395 // ";" | ":" | "&" | "=" | "+" | "$" | "," ) 01396 // 01397 void XMLUri::isConformantUserInfo(const XMLCh* const userInfo 01398 , MemoryManager* const manager) 01399 { 01400 if ( !userInfo ) 01401 return; 01402 01403 const XMLCh* tmpStr = userInfo; 01404 while (*tmpStr) 01405 { 01406 if ( isUnreservedCharacter(*tmpStr) || 01407 (XMLString::indexOf(USERINFO_CHARACTERS, *tmpStr) != -1)) 01408 { 01409 tmpStr++; 01410 } 01411 else if (*tmpStr == chPercent) // '%' 01412 { 01413 if (XMLString::isHex(*(tmpStr+1)) && // 1st hex 01414 XMLString::isHex(*(tmpStr+2)) ) // 2nd hex 01415 { 01416 tmpStr+=3; 01417 } 01418 else 01419 { 01420 XMLCh value1[BUF_LEN+1]; 01421 value1[0] = chPercent; 01422 value1[1] = *(tmpStr+1); 01423 value1[2] = *(tmpStr+2); 01424 value1[3] = chNull; 01425 01426 ThrowXMLwithMemMgr2(MalformedURLException 01427 , XMLExcepts::XMLNUM_URI_Component_Invalid_EscapeSequence 01428 , errMsg_USERINFO 01429 , value1 01430 , manager); 01431 } 01432 } 01433 else 01434 { 01435 ThrowXMLwithMemMgr2(MalformedURLException 01436 , XMLExcepts::XMLNUM_URI_Component_Invalid_Char 01437 , errMsg_USERINFO 01438 , userInfo 01439 , manager); 01440 } 01441 } //while 01442 01443 return; 01444 } 01445 01446 bool XMLUri::isValidServerBasedAuthority(const XMLCh* const host, 01447 const XMLSize_t hostLen, 01448 const int port, 01449 const XMLCh* const userinfo, 01450 const XMLSize_t userLen) 01451 { 01452 // The order is important, do not change 01453 if (!isWellFormedAddress(host, hostLen)) 01454 return false; 01455 01456 // check port number 01457 if ((port > 65535) || (port < 0 && port != -1)) 01458 return false; 01459 01460 // check userinfo 01461 XMLSize_t index = 0; 01462 while (index < userLen) 01463 { 01464 if (isUnreservedCharacter(userinfo[index]) || 01465 (XMLString::indexOf(USERINFO_CHARACTERS, userinfo[index]) != -1)) 01466 { 01467 index++; 01468 } 01469 else if (userinfo[index] == chPercent) // '%' 01470 { 01471 if (XMLString::isHex(userinfo[index+1]) && // 1st hex 01472 XMLString::isHex(userinfo[index+2]) ) // 2nd hex 01473 index +=3; 01474 else 01475 return false; 01476 } 01477 else 01478 return false; 01479 } //while 01480 01481 return true; 01482 } 01483 01484 bool XMLUri::isValidServerBasedAuthority(const XMLCh* const host 01485 , const int port 01486 , const XMLCh* const userinfo 01487 , MemoryManager* const manager) 01488 { 01489 // The order is important, do not change 01490 if (!isWellFormedAddress(host, manager)) 01491 return false; 01492 01493 // check port number 01494 if ((port > 65535) || (port < 0 && port != -1)) 01495 return false; 01496 01497 // check userinfo 01498 if (!userinfo) 01499 return true; 01500 01501 const XMLCh* tmpStr = userinfo; 01502 while (*tmpStr) 01503 { 01504 if ( isUnreservedCharacter(*tmpStr) || 01505 (XMLString::indexOf(USERINFO_CHARACTERS, *tmpStr) != -1)) 01506 { 01507 tmpStr++; 01508 } 01509 else if (*tmpStr == chPercent) // '%' 01510 { 01511 if (XMLString::isHex(*(tmpStr+1)) && // 1st hex 01512 XMLString::isHex(*(tmpStr+2)) ) // 2nd hex 01513 { 01514 tmpStr+=3; 01515 } 01516 else 01517 return false; 01518 } 01519 else 01520 return false; 01521 } //while 01522 01523 return true; 01524 } 01525 01526 bool XMLUri::isValidRegistryBasedAuthority(const XMLCh* const authority, 01527 const XMLSize_t authLen) 01528 { 01529 // check authority 01530 XMLSize_t index = 0; 01531 while (index < authLen) 01532 { 01533 if (isUnreservedCharacter(authority[index]) || 01534 (XMLString::indexOf(REG_NAME_CHARACTERS, authority[index]) != -1)) 01535 { 01536 index++; 01537 } 01538 else if (authority[index] == chPercent) // '%' 01539 { 01540 if (XMLString::isHex(authority[index+1]) && // 1st hex 01541 XMLString::isHex(authority[index+2]) ) // 2nd hex 01542 index +=3; 01543 else 01544 return false; 01545 } 01546 else 01547 return false; 01548 } //while 01549 01550 return true; 01551 } 01552 01553 bool XMLUri::isValidRegistryBasedAuthority(const XMLCh* const authority) 01554 { 01555 // check authority 01556 if (!authority) 01557 return false; 01558 01559 const XMLCh* tmpStr = authority; 01560 while (*tmpStr) 01561 { 01562 if (isUnreservedCharacter(*tmpStr) || 01563 (XMLString::indexOf(REG_NAME_CHARACTERS, *tmpStr) != -1)) 01564 { 01565 tmpStr++; 01566 } 01567 else if (*tmpStr == chPercent) // '%' 01568 { 01569 if (XMLString::isHex(*(tmpStr+1)) && // 1st hex 01570 XMLString::isHex(*(tmpStr+2)) ) // 2nd hex 01571 { 01572 tmpStr+=3; 01573 } 01574 else 01575 return false; 01576 } 01577 else 01578 return false; 01579 } //while 01580 01581 return true; 01582 } 01583 01584 // 01585 // uric = reserved | unreserved | escaped 01586 // escaped = "%" hex hex 01587 // hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | 01588 // "a" | "b" | "c" | "d" | "e" | "f" 01589 // 01590 bool XMLUri::isURIString(const XMLCh* const uricString) 01591 { 01592 if (!uricString || !*uricString) 01593 return false; 01594 01595 const XMLCh* tmpStr = uricString; 01596 01597 while (*tmpStr) 01598 { 01599 if (isReservedOrUnreservedCharacter(*tmpStr)) 01600 { 01601 tmpStr++; 01602 } 01603 else if (*tmpStr == chPercent) // '%' 01604 { 01605 if (XMLString::isHex(*(tmpStr+1)) && // 1st hex 01606 XMLString::isHex(*(tmpStr+2)) ) // 2nd hex 01607 { 01608 tmpStr+=3; 01609 } 01610 else 01611 { 01612 return false; 01613 } 01614 } 01615 else 01616 { 01617 return false; 01618 } 01619 } 01620 01621 return true; 01622 } 01623 01624 // 01625 // host = hostname | IPv4address 01626 // 01627 // hostname = *( domainlabel "." ) toplabel [ "." ] 01628 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 01629 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum 01630 // 01631 // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 01632 // 01633 bool XMLUri::isWellFormedAddress(const XMLCh* const addrString 01634 , MemoryManager* const manager) 01635 { 01636 // Check that we have a non-zero length string. 01637 if (!addrString || !*addrString) 01638 return false; 01639 01640 // Get address length. 01641 XMLSize_t addrStrLen = XMLString::stringLen(addrString); 01642 01643 // Check if the host is a valid IPv6reference. 01644 if (*addrString == chOpenSquare) 01645 { 01646 return isWellFormedIPv6Reference(addrString, addrStrLen); 01647 } 01648 01649 // 01650 // Cannot start with a '.', '-', or end with a '-'. 01651 // 01652 if (*addrString == chPeriod || 01653 *addrString == chDash || 01654 addrString[addrStrLen-1] == chDash) 01655 return false; 01656 01657 // rightmost domain label starting with digit indicates IP address 01658 // since top level domain label can only start with an alpha 01659 // see RFC 2396 Section 3.2.2 01660 01661 int lastPeriodPos = XMLString::lastIndexOf(addrString, chPeriod); 01662 01663 // if the string ends with "." 01664 // get the second last "." 01665 if (XMLSize_t(lastPeriodPos + 1) == addrStrLen) 01666 { 01667 XMLCh* tmp2 = (XMLCh*) manager->allocate 01668 ( 01669 addrStrLen * sizeof(XMLCh) 01670 );//new XMLCh[addrStrLen]; 01671 XMLString::subString(tmp2, addrString, 0, lastPeriodPos, manager); 01672 lastPeriodPos = XMLString::lastIndexOf(tmp2, chPeriod); 01673 manager->deallocate(tmp2);//delete [] tmp2; 01674 01675 if ( XMLString::isDigit(addrString[lastPeriodPos + 1])) 01676 return false; 01677 } 01678 01679 if (XMLString::isDigit(addrString[lastPeriodPos + 1])) 01680 { 01681 return isWellFormedIPv4Address(addrString, addrStrLen); 01682 } // end of IPv4address 01683 else 01684 { 01685 // 01686 // hostname = *( domainlabel "." ) toplabel [ "." ] 01687 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 01688 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum 01689 01690 // RFC 2396 states that hostnames take the form described in 01691 // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According 01692 // to RFC 1034, hostnames are limited to 255 characters. 01693 if (addrStrLen > 255) { 01694 return false; 01695 } 01696 01697 unsigned int labelCharCount = 0; 01698 01699 // domain labels can contain alphanumerics and '-" 01700 // but must start and end with an alphanumeric 01701 for (XMLSize_t i = 0; i < addrStrLen; i++) 01702 { 01703 if (addrString[i] == chPeriod) 01704 { 01705 if (((i > 0) && 01706 (!XMLString::isAlphaNum(addrString[i-1]))) || 01707 ((i + 1 < addrStrLen) && 01708 (!XMLString::isAlphaNum(addrString[i+1]))) ) 01709 { 01710 return false; 01711 } 01712 labelCharCount = 0; 01713 } 01714 else if (!XMLString::isAlphaNum(addrString[i]) && 01715 addrString[i] != chDash) 01716 { 01717 return false; 01718 } 01719 // RFC 1034: Labels must be 63 characters or less. 01720 else if (++labelCharCount > 63) { 01721 return false; 01722 } 01723 } //for 01724 } 01725 01726 return true; 01727 } 01728 01729 // 01730 // RFC 2732 amended RFC 2396 by replacing the definition 01731 // of IPv4address with the one defined by RFC 2373. 01732 // 01733 // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 01734 // 01735 bool XMLUri::isWellFormedIPv4Address(const XMLCh* const addr, const XMLSize_t length) 01736 { 01737 int numDots = 0; 01738 int numDigits = 0; 01739 01740 // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 01741 // 01742 // make sure that 01743 // 1) we see only digits and dot separators, 01744 // 2) that any dot separator is preceded and followed by a digit 01745 // 3) that we find 3 dots 01746 // 4) that each segment contains 1 to 3 digits. 01747 // 5) that each segment is not greater than 255. 01748 for (XMLSize_t i = 0; i < length; ++i) 01749 { 01750 if (addr[i] == chPeriod) 01751 { 01752 if ((i == 0) || 01753 (i+1 == length) || 01754 !XMLString::isDigit(addr[i+1])) 01755 { 01756 return false; 01757 } 01758 numDigits = 0; 01759 if (++numDots > 3) 01760 return false; 01761 } 01762 else if (!XMLString::isDigit(addr[i])) 01763 { 01764 return false; 01765 } 01766 // Check that that there are no more than three digits 01767 // in this segment. 01768 else if (++numDigits > 3) 01769 { 01770 return false; 01771 } 01772 // Check that this segment is not greater than 255. 01773 else if (numDigits == 3) 01774 { 01775 XMLCh first = addr[i-2]; 01776 XMLCh second = addr[i-1]; 01777 XMLCh last = addr[i]; 01778 if (!(first < chDigit_2 || 01779 (first == chDigit_2 && 01780 (second < chDigit_5 || 01781 (second == chDigit_5 && last <= chDigit_5))))) 01782 { 01783 return false; 01784 } 01785 } 01786 } //for 01787 return (numDots == 3); 01788 } 01789 01790 // 01791 // IPv6reference = "[" IPv6address "]" 01792 // 01793 bool XMLUri::isWellFormedIPv6Reference(const XMLCh* const addr, const XMLSize_t length) 01794 { 01795 XMLSize_t end = length-1; 01796 01797 // Check if string is a potential match for IPv6reference. 01798 if (!(length > 2 && addr[0] == chOpenSquare && addr[end] == chCloseSquare)) 01799 { 01800 return false; 01801 } 01802 01803 // Counter for the number of 16-bit sections read in the address. 01804 int counter = 0; 01805 01806 // Scan hex sequence before possible '::' or IPv4 address. 01807 int iIndex = scanHexSequence(addr, 1, end, counter); 01808 if (iIndex == -1) 01809 return false; 01810 01811 XMLSize_t index=(XMLSize_t)iIndex; 01812 // Address must contain 128-bits of information. 01813 if (index == end) 01814 { 01815 return (counter == 8); 01816 } 01817 01818 if (index+1 < end && addr[index] == chColon) 01819 { 01820 if (addr[index+1] == chColon) 01821 { 01822 // '::' represents at least one 16-bit group of zeros. 01823 if (++counter > 8) 01824 { 01825 return false; 01826 } 01827 index += 2; 01828 // Trailing zeros will fill out the rest of the address. 01829 if (index == end) 01830 { 01831 return true; 01832 } 01833 } 01834 // If the second character wasn't ':', in order to be valid, 01835 // the remainder of the string must match IPv4Address, 01836 // and we must have read exactly 6 16-bit groups. 01837 else 01838 { 01839 if (counter == 6) 01840 return isWellFormedIPv4Address(addr+index+1, end-index-1); 01841 else 01842 return false; 01843 } 01844 } 01845 else 01846 { 01847 return false; 01848 } 01849 01850 // 3. Scan hex sequence after '::'. 01851 int prevCount = counter; 01852 iIndex = scanHexSequence(addr, index, end, counter); 01853 if (iIndex == -1) 01854 return false; 01855 01856 index=(XMLSize_t)iIndex; 01857 // If this is the end of the address then 01858 // we've got 128-bits of information. 01859 if (index == end) 01860 { 01861 return true; 01862 } 01863 01864 // The address ends in an IPv4 address, or it is invalid. 01865 // scanHexSequence has already made sure that we have the right number of bits. 01866 XMLSize_t shiftCount = (counter > prevCount) ? index+1 : index; 01867 return isWellFormedIPv4Address(addr + shiftCount, end - shiftCount); 01868 } 01869 01870 // 01871 // For use with isWellFormedIPv6Reference only. 01872 // 01873 int XMLUri::scanHexSequence (const XMLCh* const addr, XMLSize_t index, XMLSize_t end, int& counter) 01874 { 01875 XMLCh testChar = chNull; 01876 int numDigits = 0; 01877 XMLSize_t start = index; 01878 01879 // Trying to match the following productions: 01880 // hexseq = hex4 *( ":" hex4) 01881 // hex4 = 1*4HEXDIG 01882 for (; index < end; ++index) 01883 { 01884 testChar = addr[index]; 01885 if (testChar == chColon) 01886 { 01887 // IPv6 addresses are 128-bit, so there can be at most eight sections. 01888 if (numDigits > 0 && ++counter > 8) 01889 { 01890 return -1; 01891 } 01892 // This could be '::'. 01893 if (numDigits == 0 || ((index+1 < end) && addr[index+1] == chColon)) 01894 { 01895 return (int)index; 01896 } 01897 numDigits = 0; 01898 } 01899 // This might be invalid or an IPv4address. If it's potentially an IPv4address, 01900 // backup to just after the last valid character that matches hexseq. 01901 else if (!XMLString::isHex(testChar)) 01902 { 01903 if (testChar == chPeriod && numDigits < 4 && numDigits > 0 && counter <= 6) 01904 { 01905 int back = (int)index - numDigits - 1; 01906 return (back >= (int)start) ? back : (int)start; 01907 } 01908 return -1; 01909 } 01910 // There can be at most 4 hex digits per group. 01911 else if (++numDigits > 4) 01912 { 01913 return -1; 01914 } 01915 } 01916 return (numDigits > 0 && ++counter <= 8) ? (int)end : -1; 01917 } 01918 01919 bool XMLUri::isGenericURI() 01920 { 01921 return (getHost() != 0); 01922 } 01923 01924 01925 // 01926 // This method will take the broken out parts of the URI and build up the 01927 // full text. We don't do this unless someone asks us to, since its often 01928 // never required. 01929 // 01930 void XMLUri::buildFullText() 01931 { 01932 // Calculate the worst case size of the buffer required 01933 XMLSize_t bufSize = XMLString::stringLen(fScheme) + 1 01934 + XMLString::stringLen(fFragment) + 1 01935 + XMLString::stringLen(fHost ? fHost : fRegAuth) + 2 01936 + XMLString::stringLen(fPath) 01937 + XMLString::stringLen(fQueryString) + 1 01938 + XMLString::stringLen(fUserInfo) + 1 01939 + 32; 01940 01941 // Clean up the existing buffer and allocate another 01942 fMemoryManager->deallocate(fURIText);//delete [] fURIText; 01943 fURIText = (XMLCh*) fMemoryManager->allocate(bufSize * sizeof(XMLCh));//new XMLCh[bufSize]; 01944 *fURIText = 0; 01945 01946 XMLCh* outPtr = fURIText; 01947 if (fScheme != 0) 01948 { 01949 XMLString::catString(fURIText, getScheme()); 01950 outPtr += XMLString::stringLen(fURIText); 01951 *outPtr++ = chColon; 01952 } 01953 01954 // Authority 01955 if (fHost || fRegAuth) 01956 { 01957 *outPtr++ = chForwardSlash; 01958 *outPtr++ = chForwardSlash; 01959 01960 // Server based authority. 01961 if (fHost) 01962 { 01963 if (fUserInfo) 01964 { 01965 XMLString::copyString(outPtr, fUserInfo); 01966 outPtr += XMLString::stringLen(fUserInfo); 01967 *outPtr++ = chAt; 01968 } 01969 01970 XMLString::copyString(outPtr, fHost); 01971 outPtr += XMLString::stringLen(fHost); 01972 01973 // 01974 // If the port is -1, then we don't put it in. Else we need 01975 // to because it was explicitly provided. 01976 // 01977 if (fPort != -1) 01978 { 01979 *outPtr++ = chColon; 01980 01981 XMLCh tmpBuf[17]; 01982 XMLString::binToText(fPort, tmpBuf, 16, 10, fMemoryManager); 01983 XMLString::copyString(outPtr, tmpBuf); 01984 outPtr += XMLString::stringLen(tmpBuf); 01985 } 01986 } 01987 // Registry based authority. 01988 else { 01989 XMLString::copyString(outPtr, fRegAuth); 01990 outPtr += XMLString::stringLen(fRegAuth); 01991 } 01992 } 01993 01994 if (fPath) 01995 { 01996 XMLString::copyString(outPtr, fPath); 01997 outPtr += XMLString::stringLen(fPath); 01998 } 01999 02000 if (fQueryString) 02001 { 02002 *outPtr++ = chQuestion; 02003 XMLString::copyString(outPtr, fQueryString); 02004 outPtr += XMLString::stringLen(fQueryString); 02005 } 02006 02007 if (fFragment) 02008 { 02009 *outPtr++ = chPound; 02010 XMLString::copyString(outPtr, fFragment); 02011 outPtr += XMLString::stringLen(fFragment); 02012 } 02013 02014 // Cap it off in case the last op was not a string copy 02015 *outPtr = 0; 02016 } 02017 02018 // NOTE: no check for NULL value of uriStr (caller responsiblilty) 02019 bool XMLUri::isValidURI(const XMLUri* const baseURI 02020 , const XMLCh* const uriStr 02021 , bool bAllowSpaces/*=false*/) 02022 { 02023 // get a trimmed version of uriStr 02024 // uriStr will NO LONGER be used in this function. 02025 const XMLCh* trimmedUriSpec = uriStr; 02026 02027 while (XMLChar1_0::isWhitespace(*trimmedUriSpec)) 02028 trimmedUriSpec++; 02029 02030 XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec); 02031 02032 while (trimmedUriSpecLen) { 02033 if (XMLChar1_0::isWhitespace(trimmedUriSpec[trimmedUriSpecLen-1])) 02034 trimmedUriSpecLen--; 02035 else 02036 break; 02037 } 02038 02039 if (trimmedUriSpecLen == 0) 02040 { 02041 if (!baseURI) 02042 return false; 02043 else 02044 return true; 02045 } 02046 02047 XMLSize_t index = 0; 02048 bool foundScheme = false; 02049 02050 // Check for scheme, which must be before `/', '?' or '#'. 02051 int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon); 02052 int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash); 02053 int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion); 02054 int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound); 02055 02056 if ((colonIdx <= 0) || 02057 (colonIdx > slashIdx && slashIdx != -1) || 02058 (colonIdx > queryIdx && queryIdx != -1) || 02059 (colonIdx > fragmentIdx && fragmentIdx != -1)) 02060 { 02061 // A standalone base is a valid URI according to spec 02062 if (colonIdx == 0 || (!baseURI && fragmentIdx != 0)) 02063 return false; 02064 } 02065 else 02066 { 02067 if (!processScheme(trimmedUriSpec, index)) 02068 return false; 02069 foundScheme = true; 02070 ++index; 02071 } 02072 02073 // It's an error if we stop here 02074 if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound))) 02075 return false; 02076 02077 // two slashes means generic URI syntax, so we get the authority 02078 const XMLCh* authUriSpec = trimmedUriSpec + index; 02079 if (((index+1) < trimmedUriSpecLen) && 02080 XMLString::startsWith(authUriSpec, DOUBLE_SLASH)) 02081 { 02082 index += 2; 02083 XMLSize_t startPos = index; 02084 02085 // get authority - everything up to path, query or fragment 02086 XMLCh testChar; 02087 while (index < trimmedUriSpecLen) 02088 { 02089 testChar = trimmedUriSpec[index]; 02090 if (testChar == chForwardSlash || 02091 testChar == chQuestion || 02092 testChar == chPound ) 02093 { 02094 break; 02095 } 02096 02097 index++; 02098 } 02099 02100 // if we found authority, parse it out, otherwise we set the 02101 // host to empty string 02102 if (index > startPos) 02103 { 02104 if (!processAuthority(trimmedUriSpec + startPos, index - startPos)) 02105 return false; 02106 } 02107 } 02108 02109 // we need to check if index has exceed the lenght or not 02110 if (index < trimmedUriSpecLen) 02111 { 02112 if (!processPath(trimmedUriSpec + index, trimmedUriSpecLen - index, foundScheme, bAllowSpaces)) 02113 return false; 02114 } 02115 02116 return true; 02117 } 02118 02119 // NOTE: no check for NULL value of uriStr (caller responsiblilty) 02120 // NOTE: this routine is the same as above, but it uses a flag to 02121 // indicate the existance of a baseURI rather than an XMLuri. 02122 bool XMLUri::isValidURI(bool haveBaseURI, const XMLCh* const uriStr, bool bAllowSpaces/*=false*/) 02123 { 02124 // get a trimmed version of uriStr 02125 // uriStr will NO LONGER be used in this function. 02126 const XMLCh* trimmedUriSpec = uriStr; 02127 02128 while (XMLChar1_0::isWhitespace(*trimmedUriSpec)) 02129 trimmedUriSpec++; 02130 02131 XMLSize_t trimmedUriSpecLen = XMLString::stringLen(trimmedUriSpec); 02132 02133 while (trimmedUriSpecLen) { 02134 if (XMLChar1_0::isWhitespace(trimmedUriSpec[trimmedUriSpecLen-1])) 02135 trimmedUriSpecLen--; 02136 else 02137 break; 02138 } 02139 02140 if (trimmedUriSpecLen == 0) 02141 { 02142 if (!haveBaseURI) 02143 return false; 02144 return true; 02145 } 02146 02147 XMLSize_t index = 0; 02148 bool foundScheme = false; 02149 02150 // Check for scheme, which must be before `/', '?' or '#'. 02151 int colonIdx = XMLString::indexOf(trimmedUriSpec, chColon); 02152 int slashIdx = XMLString::indexOf(trimmedUriSpec, chForwardSlash); 02153 int queryIdx = XMLString::indexOf(trimmedUriSpec, chQuestion); 02154 int fragmentIdx = XMLString::indexOf(trimmedUriSpec, chPound); 02155 02156 if ((colonIdx <= 0) || 02157 (colonIdx > slashIdx && slashIdx != -1) || 02158 (colonIdx > queryIdx && queryIdx != -1) || 02159 (colonIdx > fragmentIdx && fragmentIdx != -1)) 02160 { 02161 // A standalone base is a valid URI according to spec 02162 if (colonIdx == 0 || (!haveBaseURI && fragmentIdx != 0)) 02163 return false; 02164 } 02165 else 02166 { 02167 if (!processScheme(trimmedUriSpec, index)) 02168 return false; 02169 foundScheme = true; 02170 ++index; 02171 } 02172 02173 // It's an error if we stop here 02174 if (index == trimmedUriSpecLen || (foundScheme && (trimmedUriSpec[index] == chPound))) 02175 return false; 02176 02177 // two slashes means generic URI syntax, so we get the authority 02178 const XMLCh* authUriSpec = trimmedUriSpec + index; 02179 if (((index+1) < trimmedUriSpecLen) && 02180 XMLString::startsWith(authUriSpec, DOUBLE_SLASH)) 02181 { 02182 index += 2; 02183 XMLSize_t startPos = index; 02184 02185 // get authority - everything up to path, query or fragment 02186 XMLCh testChar; 02187 while (index < trimmedUriSpecLen) 02188 { 02189 testChar = trimmedUriSpec[index]; 02190 if (testChar == chForwardSlash || 02191 testChar == chQuestion || 02192 testChar == chPound ) 02193 { 02194 break; 02195 } 02196 02197 index++; 02198 } 02199 02200 // if we found authority, parse it out, otherwise we set the 02201 // host to empty string 02202 if (index > startPos) 02203 { 02204 if (!processAuthority(trimmedUriSpec + startPos, index - startPos)) 02205 return false; 02206 } 02207 } 02208 02209 // we need to check if index has exceed the length or not 02210 if (index < trimmedUriSpecLen) 02211 { 02212 if (!processPath(trimmedUriSpec + index, trimmedUriSpecLen - index, foundScheme, bAllowSpaces)) 02213 return false; 02214 } 02215 02216 return true; 02217 } 02218 02219 bool XMLUri::isWellFormedAddress(const XMLCh* const addrString, 02220 const XMLSize_t addrStrLen) 02221 { 02222 // Check that we have a non-zero length string. 02223 if (addrStrLen == 0) 02224 return false; 02225 02226 // Check if the host is a valid IPv6reference. 02227 if (*addrString == chOpenSquare) 02228 { 02229 return isWellFormedIPv6Reference(addrString, addrStrLen); 02230 } 02231 02232 // 02233 // Cannot start with a '.', '-', or end with a '-'. 02234 // 02235 if (*addrString == chPeriod || 02236 *addrString == chDash || 02237 addrString[addrStrLen-1] == chDash) 02238 return false; 02239 02240 // rightmost domain label starting with digit indicates IP address 02241 // since top level domain label can only start with an alpha 02242 // see RFC 2396 Section 3.2.2 02243 02244 int lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, addrStrLen); 02245 02246 // if the string ends with "." 02247 // get the second last "." 02248 if (XMLSize_t(lastPeriodPos + 1) == addrStrLen) 02249 { 02250 lastPeriodPos = XMLString::lastIndexOf(chPeriod, addrString, lastPeriodPos); 02251 02252 if ( XMLString::isDigit(addrString[lastPeriodPos + 1])) 02253 return false; 02254 } 02255 02256 if (XMLString::isDigit(addrString[lastPeriodPos + 1])) 02257 { 02258 return isWellFormedIPv4Address(addrString, addrStrLen); 02259 } // end of IPv4address 02260 else 02261 { 02262 // 02263 // hostname = *( domainlabel "." ) toplabel [ "." ] 02264 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 02265 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum 02266 02267 // RFC 2396 states that hostnames take the form described in 02268 // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According 02269 // to RFC 1034, hostnames are limited to 255 characters. 02270 if (addrStrLen > 255) { 02271 return false; 02272 } 02273 02274 unsigned int labelCharCount = 0; 02275 02276 // domain labels can contain alphanumerics and '-" 02277 // but must start and end with an alphanumeric 02278 for (XMLSize_t i = 0; i < addrStrLen; i++) 02279 { 02280 if (addrString[i] == chPeriod) 02281 { 02282 if (((i > 0) && 02283 (!XMLString::isAlphaNum(addrString[i-1]))) || 02284 ((i + 1 < addrStrLen) && 02285 (!XMLString::isAlphaNum(addrString[i+1]))) ) 02286 { 02287 return false; 02288 } 02289 labelCharCount = 0; 02290 } 02291 else if (!XMLString::isAlphaNum(addrString[i]) && 02292 addrString[i] != chDash) 02293 { 02294 return false; 02295 } 02296 // RFC 1034: Labels must be 63 characters or less. 02297 else if (++labelCharCount > 63) { 02298 return false; 02299 } 02300 } //for 02301 } 02302 02303 return true; 02304 } 02305 02306 bool XMLUri::processScheme(const XMLCh* const schemeStr, XMLSize_t& index) 02307 { 02308 const XMLCh* tmpPtr = XMLString::findAny(schemeStr, SCHEME_SEPARATORS); 02309 02310 if (tmpPtr) { 02311 index = tmpPtr - schemeStr; 02312 return isConformantSchemeName(schemeStr, index); 02313 } 02314 else { 02315 return false; 02316 } 02317 } 02318 02319 02320 bool XMLUri::isConformantSchemeName( const XMLCh* const scheme 02321 , const XMLSize_t schemeLen) 02322 { 02323 if (!XMLString::isAlpha(*scheme)) // first: alpha 02324 return false; 02325 02326 // second onwards: ( alpha | digit | "+" | "-" | "." ) 02327 for (XMLSize_t i=1; i<schemeLen; i++) 02328 { 02329 if ( !XMLString::isAlphaNum(scheme[i]) && 02330 (XMLString::indexOf(SCHEME_CHARACTERS, scheme[i]) == -1)) 02331 return false; 02332 } 02333 02334 return true; 02335 } 02336 02337 bool XMLUri::processAuthority( const XMLCh* const authSpec 02338 , const XMLSize_t authLen) 02339 { 02340 int index = XMLString::indexOf(authSpec, chAt); 02341 XMLSize_t start = 0; 02342 02343 // server = [ [ userinfo "@" ] hostport ] 02344 // userinfo is everything up @, 02345 const XMLCh* userinfo; 02346 int userInfoLen = 0; 02347 if ((index != -1) && (XMLSize_t(index) < authLen)) 02348 { 02349 userinfo = authSpec; 02350 userInfoLen = index; 02351 start = index + 1; 02352 } 02353 else 02354 { 02355 userinfo = XMLUni::fgZeroLenString; 02356 } 02357 02358 // hostport = host [ ":" port ] 02359 // host is everything up to ':', or up to 02360 // and including ']' if followed by ':'. 02361 // 02362 // Search for port boundary. 02363 const XMLCh* host; 02364 XMLSize_t hostLen = 0; 02365 if ((start < authLen) && (authSpec[start] == chOpenSquare)) 02366 { 02367 index = XMLString::indexOf(&(authSpec[start]), chCloseSquare); 02368 if ((index != -1) && (XMLSize_t(index) < authLen)) 02369 { 02370 // skip the ']' 02371 index = ((start + index + 1) < authLen 02372 && authSpec[start + index + 1] == chColon) ? index+1 : -1; 02373 } 02374 } 02375 else 02376 { 02377 index = XMLString::indexOf(&(authSpec[start]), chColon); 02378 if (index!=-1 && XMLSize_t(index) >= authLen) 02379 index = -1; 02380 } 02381 02382 host = &(authSpec[start]); 02383 if (index != -1) 02384 { 02385 hostLen = index; 02386 start += index + 1; // skip the : 02387 } 02388 else 02389 { 02390 hostLen = authLen - start; 02391 start = authLen; 02392 } 02393 02394 // port is everything after ":" 02395 int port = -1; 02396 if ((hostLen) && // non empty host 02397 (index != -1) && // ":" found 02398 (start < authLen) ) // ":" is not the last 02399 { 02400 const XMLCh* portStr = &(authSpec[start]); 02401 if (*portStr) 02402 { 02403 port = 0; 02404 for (XMLSize_t i=0; i<(authLen - start); i++) 02405 { 02406 if (portStr[i] < chDigit_0 || portStr[i] > chDigit_9) 02407 { 02408 // Assume this is a registry-based authority. 02409 // 02410 port = -1; 02411 hostLen = 0; 02412 host = XMLUni::fgZeroLenString; 02413 userInfoLen = 0; 02414 userinfo = XMLUni::fgZeroLenString; 02415 break; 02416 } 02417 02418 port = (port * 10) + (int) (portStr[i] - chDigit_0); 02419 } 02420 } 02421 } 02422 02423 return isValidServerBasedAuthority(host, hostLen, port, userinfo, userInfoLen) 02424 || isValidRegistryBasedAuthority(authSpec, authLen); 02425 } 02426 02427 bool XMLUri::processPath(const XMLCh* const pathStr, 02428 const XMLSize_t pathStrLen, 02429 const bool isSchemePresent, 02430 const bool bAllowSpaces/*=false*/) 02431 { 02432 if (pathStrLen != 0) 02433 { 02434 XMLSize_t index = 0; 02435 XMLCh testChar = chNull; 02436 bool isOpaque = (!isSchemePresent || *pathStr == chForwardSlash); 02437 02438 // path - everything up to query string or fragment 02439 // 02440 // RFC 2732 only allows '[' and ']' to appear in the opaque part. 02441 while (index < pathStrLen) 02442 { 02443 testChar = pathStr[index]; 02444 if (testChar == chQuestion || testChar == chPound) 02445 break; 02446 02447 if (testChar == chPercent) 02448 { 02449 if (index+2 >= pathStrLen || 02450 !XMLString::isHex(pathStr[index+1]) || 02451 !XMLString::isHex(pathStr[index+2])) 02452 return false; 02453 } 02454 else if (testChar==chSpace) 02455 { 02456 if(!bAllowSpaces) 02457 return false; 02458 } 02459 else if (!isUnreservedCharacter(testChar) && 02460 ((isOpaque && !isPathCharacter(testChar)) || 02461 (!isOpaque && !isReservedCharacter(testChar)))) 02462 { 02463 return false; 02464 } 02465 02466 index++; 02467 } 02468 02469 // query - starts with ? and up to fragment or end 02470 // fragment - starts with # 02471 bool isQuery = (testChar == chQuestion); 02472 if (isQuery || testChar == chPound) 02473 { 02474 index++; 02475 while (index < pathStrLen) 02476 { 02477 testChar = pathStr[index]; 02478 if (testChar == chPound && isQuery) { 02479 isQuery = false; 02480 index++; 02481 continue; 02482 } 02483 02484 if (testChar == chPercent) 02485 { 02486 if (index+2 >= pathStrLen || 02487 !XMLString::isHex(pathStr[index+1]) || 02488 !XMLString::isHex(pathStr[index+2])) 02489 return false; 02490 } 02491 else if (testChar==chSpace) 02492 { 02493 if(!bAllowSpaces) 02494 return false; 02495 } 02496 else if (!isReservedOrUnreservedCharacter(testChar)) 02497 { 02498 return false; 02499 } 02500 index++; 02501 } 02502 } 02503 } //if (pathStrLen...) 02504 02505 return true; 02506 } 02507 02508 /*** 02509 * [Bug7698]: filenames with embedded spaces in schemaLocation strings not handled properly 02510 * 02511 * This method is called when Scanner/TraverseSchema knows that the URI reference is 02512 * for local file. 02513 * 02514 ***/ 02515 void XMLUri::normalizeURI(const XMLCh* const systemURI, 02516 XMLBuffer& normalizedURI) 02517 { 02518 const XMLCh* pszSrc = systemURI; 02519 02520 normalizedURI.reset(); 02521 02522 while (*pszSrc) { 02523 02524 if ((*(pszSrc) == chPercent) 02525 && (*(pszSrc+1) == chDigit_2) 02526 && (*(pszSrc+2) == chDigit_0)) 02527 { 02528 pszSrc += 3; 02529 normalizedURI.append(chSpace); 02530 } 02531 else 02532 { 02533 normalizedURI.append(*pszSrc); 02534 pszSrc++; 02535 } 02536 } 02537 } 02538 02539 /*** 02540 * Support for Serialization/De-serialization 02541 ***/ 02542 02543 IMPL_XSERIALIZABLE_TOCREATE(XMLUri) 02544 02545 void XMLUri::serialize(XSerializeEngine& serEng) 02546 { 02547 02548 if (serEng.isStoring()) 02549 { 02550 serEng<<fPort; 02551 serEng.writeString(fScheme); 02552 serEng.writeString(fUserInfo); 02553 serEng.writeString(fHost); 02554 serEng.writeString(fRegAuth); 02555 serEng.writeString(fPath); 02556 serEng.writeString(fQueryString); 02557 serEng.writeString(fFragment); 02558 serEng.writeString(fURIText); 02559 } 02560 else 02561 { 02562 serEng>>fPort; 02563 serEng.readString(fScheme); 02564 serEng.readString(fUserInfo); 02565 serEng.readString(fHost); 02566 serEng.readString(fRegAuth); 02567 serEng.readString(fPath); 02568 serEng.readString(fQueryString); 02569 serEng.readString(fFragment); 02570 serEng.readString(fURIText); 02571 } 02572 02573 } 02574 02575 XMLUri::XMLUri(MemoryManager* const manager) 02576 : fPort(-1) 02577 , fScheme(0) 02578 , fUserInfo(0) 02579 , fHost(0) 02580 , fRegAuth(0) 02581 , fPath(0) 02582 , fQueryString(0) 02583 , fFragment(0) 02584 , fURIText(0) 02585 , fMemoryManager(manager) 02586 { 02587 } 02588 02589 XERCES_CPP_NAMESPACE_END