001 /* URI.java -- An URI class
002 Copyright (C) 2002, 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
003
004 This file is part of GNU Classpath.
005
006 GNU Classpath is free software; you can redistribute it and/or modify
007 it under the terms of the GNU General Public License as published by
008 the Free Software Foundation; either version 2, or (at your option)
009 any later version.
010
011 GNU Classpath is distributed in the hope that it will be useful, but
012 WITHOUT ANY WARRANTY; without even the implied warranty of
013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 General Public License for more details.
015
016 You should have received a copy of the GNU General Public License
017 along with GNU Classpath; see the file COPYING. If not, write to the
018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019 02110-1301 USA.
020
021 Linking this library statically or dynamically with other modules is
022 making a combined work based on this library. Thus, the terms and
023 conditions of the GNU General Public License cover the whole
024 combination.
025
026 As a special exception, the copyright holders of this library give you
027 permission to link this library with independent modules to produce an
028 executable, regardless of the license terms of these independent
029 modules, and to copy and distribute the resulting executable under
030 terms of your choice, provided that you also meet, for each linked
031 independent module, the terms and conditions of the license of that
032 module. An independent module is a module which is not derived from
033 or based on this library. If you modify this library, you may extend
034 this exception to your version of the library, but you are not
035 obligated to do so. If you do not wish to do so, delete this
036 exception statement from your version. */
037
038
039 package java.net;
040
041 import gnu.java.lang.CPStringBuilder;
042
043 import java.io.IOException;
044 import java.io.ObjectInputStream;
045 import java.io.ObjectOutputStream;
046 import java.io.Serializable;
047 import java.util.regex.Matcher;
048 import java.util.regex.Pattern;
049
050 /**
051 * <p>
052 * A URI instance represents that defined by
053 * <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC3986</a>,
054 * with some deviations.
055 * </p>
056 * <p>
057 * At its highest level, a URI consists of:
058 * </p>
059 * <code>[<em>scheme</em><strong>:</strong>]<em>scheme-specific-part</em>
060 * [<strong>#</strong><em>fragment</em>]</code>
061 * </p>
062 * <p>
063 * where <strong>#</strong> and <strong>:</strong> are literal characters,
064 * and those parts enclosed in square brackets are optional.
065 * </p>
066 * <p>
067 * There are two main types of URI. An <em>opaque</em> URI is one
068 * which just consists of the above three parts, and is not further
069 * defined. An example of such a URI would be <em>mailto:</em> URI.
070 * In contrast, <em>hierarchical</em> URIs give further definition
071 * to the scheme-specific part, so as represent some part of a hierarchical
072 * structure.
073 * </p>
074 * <p>
075 * <code>[<strong>//</strong><em>authority</em>][<em>path</em>]
076 * [<strong>?</strong><em>query</em>]</code>
077 * </p>
078 * <p>
079 * with <strong>/</strong> and <strong>?</strong> being literal characters.
080 * When server-based, the authority section is further subdivided into:
081 * </p>
082 * <p>
083 * <code>[<em>user-info</em><strong>@</strong>]<em>host</em>
084 * [<strong>:</strong><em>port</em>]</code>
085 * </p>
086 * <p>
087 * with <strong>@</strong> and <strong>:</strong> as literal characters.
088 * Authority sections that are not server-based are said to be registry-based.
089 * </p>
090 * <p>
091 * Hierarchical URIs can be either relative or absolute. Absolute URIs
092 * always start with a `<strong>/</strong>', while relative URIs don't
093 * specify a scheme. Opaque URIs are always absolute.
094 * </p>
095 * <p>
096 * Each part of the URI may have one of three states: undefined, empty
097 * or containing some content. The former two of these are represented
098 * by <code>null</code> and the empty string in Java, respectively.
099 * The scheme-specific part may never be undefined. It also follows from
100 * this that the path sub-part may also not be undefined, so as to ensure
101 * the former.
102 * </p>
103 * <h2>Character Escaping and Quoting</h2>
104 * <p>
105 * The characters that can be used within a valid URI are restricted.
106 * There are two main classes of characters which can't be used as is
107 * within the URI:
108 * </p>
109 * <ol>
110 * <li><strong>Characters outside the US-ASCII character set</strong>.
111 * These have to be <strong>escaped</strong> in order to create
112 * an RFC-compliant URI; this means replacing the character with the
113 * appropriate hexadecimal value, preceded by a `%'.</li>
114 * <li><strong>Illegal characters</strong> (e.g. space characters,
115 * control characters) are quoted, which results in them being encoded
116 * in the same way as non-US-ASCII characters.</li>
117 * </ol>
118 * <p>
119 * The set of valid characters differs depending on the section of the URI:
120 * </p>
121 * <ul>
122 * <li><strong>Scheme</strong>: Must be an alphanumeric, `-', `.' or '+'.</li>
123 * <li><strong>Authority</strong>:Composed of the username, host, port, `@'
124 * and `:'.</li>
125 * <li><strong>Username</strong>: Allows unreserved or percent-encoded
126 * characters, sub-delimiters and `:'.</li>
127 * <li><strong>Host</strong>: Allows unreserved or percent-encoded
128 * characters, sub-delimiters and square brackets (`[' and `]') for IPv6
129 * addresses.</li>
130 * <li><strong>Port</strong>: Digits only.</li>
131 * <li><strong>Path</strong>: Allows the path characters and `/'.
132 * <li><strong>Query</strong>: Allows the path characters, `?' and '/'.
133 * <li><strong>Fragment</strong>: Allows the path characters, `?' and '/'.
134 * </ul>
135 * <p>
136 * These definitions reference the following sets of characters:
137 * </p>
138 * <ul>
139 * <li><strong>Unreserved characters</strong>: The alphanumerics plus
140 * `-', `.', `_', and `~'.</li>
141 * <li><strong>Sub-delimiters</strong>: `!', `$', `&', `(', `)', `*',
142 * `+', `,', `;', `=' and the single-quote itself.</li>
143 * <li><strong>Path characters</strong>: Unreserved and percent-encoded
144 * characters and the sub-delimiters along with `@' and `:'.</li>
145 * </ul>
146 * <p>
147 * The constructors and accessor methods allow the use and retrieval of
148 * URI components which contain non-US-ASCII characters directly.
149 * They are only escaped when the <code>toASCIIString()</code> method
150 * is used. In contrast, illegal characters are always quoted, with the
151 * exception of the return values of the non-raw accessors.
152 * </p>
153 *
154 * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
155 * @author Dalibor Topic (robilad@kaffe.org)
156 * @author Michael Koch (konqueror@gmx.de)
157 * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
158 * @since 1.4
159 */
160 public final class URI
161 implements Comparable<URI>, Serializable
162 {
163 /**
164 * For serialization compatability.
165 */
166 static final long serialVersionUID = -6052424284110960213L;
167
168 /**
169 * Regular expression for parsing URIs.
170 *
171 * Taken from RFC 2396, Appendix B.
172 * This expression doesn't parse IPv6 addresses.
173 */
174 private static final String URI_REGEXP =
175 "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
176
177 /**
178 * Regular expression for parsing the authority segment.
179 */
180 private static final String AUTHORITY_REGEXP =
181 "(([^?#]*)@)?([^?#:]*)(:([0-9]*))?";
182
183 /**
184 * Valid characters (taken from rfc2396/3986)
185 */
186 private static final String RFC2396_DIGIT = "0123456789";
187 private static final String RFC2396_LOWALPHA = "abcdefghijklmnopqrstuvwxyz";
188 private static final String RFC2396_UPALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
189 private static final String RFC2396_ALPHA =
190 RFC2396_LOWALPHA + RFC2396_UPALPHA;
191 private static final String RFC2396_ALPHANUM = RFC2396_DIGIT + RFC2396_ALPHA;
192 private static final String RFC3986_UNRESERVED = RFC2396_ALPHANUM + "-._~";
193 private static final String RFC3986_SUBDELIMS = "!$&'()*+,;=";
194 private static final String RFC3986_REG_NAME =
195 RFC3986_UNRESERVED + RFC3986_SUBDELIMS + "%";
196 private static final String RFC3986_PCHAR = RFC3986_UNRESERVED +
197 RFC3986_SUBDELIMS + ":@%";
198 private static final String RFC3986_SEGMENT = RFC3986_PCHAR;
199 private static final String RFC3986_PATH_SEGMENTS = RFC3986_SEGMENT + "/";
200 private static final String RFC3986_SSP = RFC3986_PCHAR + "?/";
201 private static final String RFC3986_HOST = RFC3986_REG_NAME + "[]";
202 private static final String RFC3986_USERINFO = RFC3986_REG_NAME + ":";
203
204 /**
205 * Index of scheme component in parsed URI.
206 */
207 private static final int SCHEME_GROUP = 2;
208
209 /**
210 * Index of scheme-specific-part in parsed URI.
211 */
212 private static final int SCHEME_SPEC_PART_GROUP = 3;
213
214 /**
215 * Index of authority component in parsed URI.
216 */
217 private static final int AUTHORITY_GROUP = 5;
218
219 /**
220 * Index of path component in parsed URI.
221 */
222 private static final int PATH_GROUP = 6;
223
224 /**
225 * Index of query component in parsed URI.
226 */
227 private static final int QUERY_GROUP = 8;
228
229 /**
230 * Index of fragment component in parsed URI.
231 */
232 private static final int FRAGMENT_GROUP = 10;
233
234 /**
235 * Index of userinfo component in parsed authority section.
236 */
237 private static final int AUTHORITY_USERINFO_GROUP = 2;
238
239 /**
240 * Index of host component in parsed authority section.
241 */
242 private static final int AUTHORITY_HOST_GROUP = 3;
243
244 /**
245 * Index of port component in parsed authority section.
246 */
247 private static final int AUTHORITY_PORT_GROUP = 5;
248
249 /**
250 * The compiled version of the URI regular expression.
251 */
252 private static final Pattern URI_PATTERN;
253
254 /**
255 * The compiled version of the authority regular expression.
256 */
257 private static final Pattern AUTHORITY_PATTERN;
258
259 /**
260 * The set of valid hexadecimal characters.
261 */
262 private static final String HEX = "0123456789ABCDEF";
263
264 private transient String scheme;
265 private transient String rawSchemeSpecificPart;
266 private transient String schemeSpecificPart;
267 private transient String rawAuthority;
268 private transient String authority;
269 private transient String rawUserInfo;
270 private transient String userInfo;
271 private transient String rawHost;
272 private transient String host;
273 private transient int port = -1;
274 private transient String rawPath;
275 private transient String path;
276 private transient String rawQuery;
277 private transient String query;
278 private transient String rawFragment;
279 private transient String fragment;
280 private String string;
281
282 /**
283 * Static initializer to pre-compile the regular expressions.
284 */
285 static
286 {
287 URI_PATTERN = Pattern.compile(URI_REGEXP);
288 AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEXP);
289 }
290
291 private void readObject(ObjectInputStream is)
292 throws ClassNotFoundException, IOException
293 {
294 this.string = (String) is.readObject();
295 try
296 {
297 parseURI(this.string);
298 }
299 catch (URISyntaxException x)
300 {
301 // Should not happen.
302 throw new RuntimeException(x);
303 }
304 }
305
306 private void writeObject(ObjectOutputStream os) throws IOException
307 {
308 if (string == null)
309 string = toString();
310 os.writeObject(string);
311 }
312
313 /**
314 * <p>
315 * Returns the string content of the specified group of the supplied
316 * matcher. The returned value is modified according to the following:
317 * </p>
318 * <ul>
319 * <li>If the resulting string has a length greater than 0, then
320 * that string is returned.</li>
321 * <li>If a string of zero length, is matched, then the content
322 * of the preceding group is considered. If this is also an empty
323 * string, then <code>null</code> is returned to indicate an undefined
324 * value. Otherwise, the value is truly the empty string and this is
325 * the returned value.</li>
326 * </ul>
327 * <p>
328 * This method is used for matching against all parts of the URI
329 * that may be either undefined or empty (i.e. all those but the
330 * scheme-specific part and the path). In each case, the preceding
331 * group is the content of the original group, along with some
332 * additional distinguishing feature. For example, the preceding
333 * group for the query includes the preceding question mark,
334 * while that of the fragment includes the hash symbol. The presence
335 * of these features enables disambiguation between the two cases
336 * of a completely unspecified value and a simple non-existant value.
337 * The scheme differs in that it will never return an empty string;
338 * the delimiter follows the scheme rather than preceding it, so
339 * it becomes part of the following section. The same is true
340 * of the user information.
341 * </p>
342 *
343 * @param match the matcher, which contains the results of the URI
344 * matched against the URI regular expression.
345 * @return either the matched content, <code>null</code> for undefined
346 * values, or an empty string for a URI part with empty content.
347 */
348 private static String getURIGroup(Matcher match, int group)
349 {
350 String matched = match.group(group);
351 if (matched == null || matched.length() == 0)
352 {
353 String prevMatched = match.group(group -1);
354 if (prevMatched == null || prevMatched.length() == 0)
355 return null;
356 else
357 return "";
358 }
359 return matched;
360 }
361
362 /**
363 * Sets fields of this URI by parsing the given string.
364 *
365 * @param str The string to parse
366 *
367 * @exception URISyntaxException If the given string violates RFC 2396
368 */
369 private void parseURI(String str) throws URISyntaxException
370 {
371 Matcher matcher = URI_PATTERN.matcher(str);
372
373 if (matcher.matches())
374 {
375 scheme = getURIGroup(matcher, SCHEME_GROUP);
376 rawSchemeSpecificPart = matcher.group(SCHEME_SPEC_PART_GROUP);
377 schemeSpecificPart = unquote(rawSchemeSpecificPart);
378 if (!isOpaque())
379 {
380 rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
381 rawPath = matcher.group(PATH_GROUP);
382 rawQuery = getURIGroup(matcher, QUERY_GROUP);
383 }
384 rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
385 }
386 else
387 throw new URISyntaxException(str,
388 "doesn't match URI regular expression");
389 parseServerAuthority();
390
391 // We must eagerly unquote the parts, because this is the only time
392 // we may throw an exception.
393 authority = unquote(rawAuthority);
394 userInfo = unquote(rawUserInfo);
395 host = unquote(rawHost);
396 path = unquote(rawPath);
397 query = unquote(rawQuery);
398 fragment = unquote(rawFragment);
399 }
400
401 /**
402 * Unquote "%" + hex quotes characters
403 *
404 * @param str The string to unquote or null.
405 *
406 * @return The unquoted string or null if str was null.
407 *
408 * @exception URISyntaxException If the given string contains invalid
409 * escape sequences.
410 */
411 private static String unquote(String str) throws URISyntaxException
412 {
413 if (str == null)
414 return null;
415 byte[] buf = new byte[str.length()];
416 int pos = 0;
417 for (int i = 0; i < str.length(); i++)
418 {
419 char c = str.charAt(i);
420 if (c == '%')
421 {
422 if (i + 2 >= str.length())
423 throw new URISyntaxException(str, "Invalid quoted character");
424 int hi = Character.digit(str.charAt(++i), 16);
425 int lo = Character.digit(str.charAt(++i), 16);
426 if (lo < 0 || hi < 0)
427 throw new URISyntaxException(str, "Invalid quoted character");
428 buf[pos++] = (byte) (hi * 16 + lo);
429 }
430 else
431 buf[pos++] = (byte) c;
432 }
433 try
434 {
435 return new String(buf, 0, pos, "utf-8");
436 }
437 catch (java.io.UnsupportedEncodingException x2)
438 {
439 throw (Error) new InternalError().initCause(x2);
440 }
441 }
442
443 /**
444 * Quote characters illegal in URIs in given string.
445 *
446 * Replace illegal characters by encoding their UTF-8
447 * representation as "%" + hex code for each resulting
448 * UTF-8 character.
449 *
450 * @param str The string to quote
451 *
452 * @return The quoted string.
453 */
454 private static String quote(String str)
455 {
456 return quote(str, RFC3986_SSP);
457 }
458
459 /**
460 * Quote characters illegal in URI authorities in given string.
461 *
462 * Replace illegal characters by encoding their UTF-8
463 * representation as "%" + hex code for each resulting
464 * UTF-8 character.
465 *
466 * @param str The string to quote
467 *
468 * @return The quoted string.
469 */
470 private static String quoteAuthority(String str)
471 {
472 // Technically, we should be using RFC2396_AUTHORITY, but
473 // it contains no additional characters.
474 return quote(str, RFC3986_REG_NAME);
475 }
476
477 /**
478 * Quotes the characters in the supplied string that are not part of
479 * the specified set of legal characters.
480 *
481 * @param str the string to quote
482 * @param legalCharacters the set of legal characters
483 *
484 * @return the quoted string.
485 */
486 private static String quote(String str, String legalCharacters)
487 {
488 CPStringBuilder sb = new CPStringBuilder(str.length());
489 for (int i = 0; i < str.length(); i++)
490 {
491 char c = str.charAt(i);
492 if ((legalCharacters.indexOf(c) == -1)
493 && (c <= 127))
494 {
495 sb.append('%');
496 sb.append(HEX.charAt(c / 16));
497 sb.append(HEX.charAt(c % 16));
498 }
499 else
500 sb.append(c);
501 }
502 return sb.toString();
503 }
504
505 /**
506 * Quote characters illegal in URI hosts in given string.
507 *
508 * Replace illegal characters by encoding their UTF-8
509 * representation as "%" + hex code for each resulting
510 * UTF-8 character.
511 *
512 * @param str The string to quote
513 *
514 * @return The quoted string.
515 */
516 private static String quoteHost(String str)
517 {
518 return quote(str, RFC3986_HOST);
519 }
520
521 /**
522 * Quote characters illegal in URI paths in given string.
523 *
524 * Replace illegal characters by encoding their UTF-8
525 * representation as "%" + hex code for each resulting
526 * UTF-8 character.
527 *
528 * @param str The string to quote
529 *
530 * @return The quoted string.
531 */
532 private static String quotePath(String str)
533 {
534 // Technically, we should be using RFC2396_PATH, but
535 // it contains no additional characters.
536 return quote(str, RFC3986_PATH_SEGMENTS);
537 }
538
539 /**
540 * Quote characters illegal in URI user infos in given string.
541 *
542 * Replace illegal characters by encoding their UTF-8
543 * representation as "%" + hex code for each resulting
544 * UTF-8 character.
545 *
546 * @param str The string to quote
547 *
548 * @return The quoted string.
549 */
550 private static String quoteUserInfo(String str)
551 {
552 return quote(str, RFC3986_USERINFO);
553 }
554
555 /**
556 * Creates an URI from the given string
557 *
558 * @param str The string to create the URI from
559 *
560 * @exception URISyntaxException If the given string violates RFC 2396
561 * @exception NullPointerException If str is null
562 */
563 public URI(String str) throws URISyntaxException
564 {
565 this.string = str;
566 parseURI(str);
567 }
568
569 /**
570 * Create an URI from the given components
571 *
572 * @param scheme The scheme name
573 * @param userInfo The username and authorization info
574 * @param host The hostname
575 * @param port The port number
576 * @param path The path
577 * @param query The query
578 * @param fragment The fragment
579 *
580 * @exception URISyntaxException If the given string violates RFC 2396
581 */
582 public URI(String scheme, String userInfo, String host, int port,
583 String path, String query, String fragment)
584 throws URISyntaxException
585 {
586 this((scheme == null ? "" : scheme + ":")
587 + (userInfo == null && host == null && port == -1 ? "" : "//")
588 + (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
589 + (host == null ? "" : quoteHost(host))
590 + (port == -1 ? "" : ":" + String.valueOf(port))
591 + (path == null ? "" : quotePath(path))
592 + (query == null ? "" : "?" + quote(query))
593 + (fragment == null ? "" : "#" + quote(fragment)));
594 }
595
596 /**
597 * Create an URI from the given components
598 *
599 * @param scheme The scheme name
600 * @param authority The authority
601 * @param path The apth
602 * @param query The query
603 * @param fragment The fragment
604 *
605 * @exception URISyntaxException If the given string violates RFC 2396
606 */
607 public URI(String scheme, String authority, String path, String query,
608 String fragment) throws URISyntaxException
609 {
610 this((scheme == null ? "" : scheme + ":")
611 + (authority == null ? "" : "//" + quoteAuthority(authority))
612 + (path == null ? "" : quotePath(path))
613 + (query == null ? "" : "?" + quote(query))
614 + (fragment == null ? "" : "#" + quote(fragment)));
615 }
616
617 /**
618 * Create an URI from the given components
619 *
620 * @param scheme The scheme name
621 * @param host The hostname
622 * @param path The path
623 * @param fragment The fragment
624 *
625 * @exception URISyntaxException If the given string violates RFC 2396
626 */
627 public URI(String scheme, String host, String path, String fragment)
628 throws URISyntaxException
629 {
630 this(scheme, null, host, -1, path, null, fragment);
631 }
632
633 /**
634 * Create an URI from the given components
635 *
636 * @param scheme The scheme name
637 * @param ssp The scheme specific part
638 * @param fragment The fragment
639 *
640 * @exception URISyntaxException If the given string violates RFC 2396
641 */
642 public URI(String scheme, String ssp, String fragment)
643 throws URISyntaxException
644 {
645 this((scheme == null ? "" : scheme + ":")
646 + (ssp == null ? "" : quote(ssp))
647 + (fragment == null ? "" : "#" + quote(fragment)));
648 }
649
650 /**
651 * Create an URI from the given string
652 *
653 * @param str The string to create the URI from
654 *
655 * @exception IllegalArgumentException If the given string violates RFC 2396
656 * @exception NullPointerException If str is null
657 */
658 public static URI create(String str)
659 {
660 try
661 {
662 return new URI(str);
663 }
664 catch (URISyntaxException e)
665 {
666 throw (IllegalArgumentException) new IllegalArgumentException()
667 .initCause(e);
668 }
669 }
670
671 /**
672 * Attempts to parse this URI's authority component, if defined,
673 * into user-information, host, and port components. The purpose
674 * of this method was to disambiguate between some authority sections,
675 * which form invalid server-based authories, but valid registry
676 * based authorities. In the updated RFC 3986, the authority section
677 * is defined differently, with registry-based authorities part of
678 * the host section. Thus, this method is now simply an explicit
679 * way of parsing any authority section.
680 *
681 * @return the URI, with the authority section parsed into user
682 * information, host and port components.
683 * @throws URISyntaxException if the given string violates RFC 2396
684 */
685 public URI parseServerAuthority() throws URISyntaxException
686 {
687 if (rawAuthority != null)
688 {
689 Matcher matcher = AUTHORITY_PATTERN.matcher(rawAuthority);
690
691 if (matcher.matches())
692 {
693 rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
694 rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
695
696 String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
697
698 if (portStr != null && ! portStr.isEmpty())
699 try
700 {
701 port = Integer.parseInt(portStr);
702 }
703 catch (NumberFormatException e)
704 {
705 URISyntaxException use =
706 new URISyntaxException
707 (string, "doesn't match URI regular expression");
708 use.initCause(e);
709 throw use;
710 }
711 }
712 else
713 throw new URISyntaxException(string,
714 "doesn't match URI regular expression");
715 }
716 return this;
717 }
718
719 /**
720 * <p>
721 * Returns a normalized version of the URI. If the URI is opaque,
722 * or its path is already in normal form, then this URI is simply
723 * returned. Otherwise, the following transformation of the path
724 * element takes place:
725 * </p>
726 * <ol>
727 * <li>All `.' segments are removed.</li>
728 * <li>Each `..' segment which can be paired with a prior non-`..' segment
729 * is removed along with the preceding segment.</li>
730 * <li>A `.' segment is added to the front if the first segment contains
731 * a colon (`:'). This is a deviation from the RFC, which prevents
732 * confusion between the path and the scheme.</li>
733 * </ol>
734 * <p>
735 * The resulting URI will be free of `.' and `..' segments, barring those
736 * that were prepended or which couldn't be paired, respectively.
737 * </p>
738 *
739 * @return the normalized URI.
740 */
741 public URI normalize()
742 {
743 if (isOpaque() || path.indexOf("/./") == -1 && path.indexOf("/../") == -1)
744 return this;
745 try
746 {
747 return new URI(scheme, authority, normalizePath(path), query,
748 fragment);
749 }
750 catch (URISyntaxException e)
751 {
752 throw (Error) new InternalError("Normalized URI variant could not "+
753 "be constructed").initCause(e);
754 }
755 }
756
757 /**
758 * <p>
759 * Normalize the given path. The following transformation takes place:
760 * </p>
761 * <ol>
762 * <li>All `.' segments are removed.</li>
763 * <li>Each `..' segment which can be paired with a prior non-`..' segment
764 * is removed along with the preceding segment.</li>
765 * <li>A `.' segment is added to the front if the first segment contains
766 * a colon (`:'). This is a deviation from the RFC, which prevents
767 * confusion between the path and the scheme.</li>
768 * </ol>
769 * <p>
770 * The resulting URI will be free of `.' and `..' segments, barring those
771 * that were prepended or which couldn't be paired, respectively.
772 * </p>
773 *
774 * @param relativePath the relative path to be normalized.
775 * @return the normalized path.
776 */
777 private String normalizePath(String relativePath)
778 {
779 /*
780 This follows the algorithm in section 5.2.4. of RFC3986,
781 but doesn't modify the input buffer.
782 */
783 CPStringBuilder input = new CPStringBuilder(relativePath);
784 CPStringBuilder output = new CPStringBuilder();
785 int start = 0;
786 while (start < input.length())
787 {
788 /* A */
789 if (input.indexOf("../",start) == start)
790 {
791 start += 3;
792 continue;
793 }
794 if (input.indexOf("./",start) == start)
795 {
796 start += 2;
797 continue;
798 }
799 /* B */
800 if (input.indexOf("/./",start) == start)
801 {
802 start += 2;
803 continue;
804 }
805 if (input.indexOf("/.",start) == start
806 && input.charAt(start + 2) != '.')
807 {
808 start += 1;
809 input.setCharAt(start,'/');
810 continue;
811 }
812 /* C */
813 if (input.indexOf("/../",start) == start)
814 {
815 start += 3;
816 removeLastSegment(output);
817 continue;
818 }
819 if (input.indexOf("/..",start) == start)
820 {
821 start += 2;
822 input.setCharAt(start,'/');
823 removeLastSegment(output);
824 continue;
825 }
826 /* D */
827 if (start == input.length() - 1 && input.indexOf(".",start) == start)
828 {
829 input.delete(0,1);
830 continue;
831 }
832 if (start == input.length() - 2 && input.indexOf("..",start) == start)
833 {
834 input.delete(0,2);
835 continue;
836 }
837 /* E */
838 int indexOfSlash = input.indexOf("/",start);
839 while (indexOfSlash == start)
840 {
841 output.append("/");
842 ++start;
843 indexOfSlash = input.indexOf("/",start);
844 }
845 if (indexOfSlash == -1)
846 indexOfSlash = input.length();
847 output.append(input.substring(start, indexOfSlash));
848 start = indexOfSlash;
849 }
850 return output.toString();
851 }
852
853 /**
854 * Removes the last segment of the path from the specified buffer.
855 *
856 * @param buffer the buffer containing the path.
857 */
858 private void removeLastSegment(CPStringBuilder buffer)
859 {
860 int lastSlash = buffer.lastIndexOf("/");
861 if (lastSlash == -1)
862 buffer.setLength(0);
863 else
864 buffer.setLength(lastSlash);
865 }
866
867 /**
868 * Resolves the given URI against this URI
869 *
870 * @param uri The URI to resolve against this URI
871 *
872 * @return The resulting URI, or null when it couldn't be resolved
873 * for some reason.
874 *
875 * @throws NullPointerException if uri is null
876 */
877 public URI resolve(URI uri)
878 {
879 if (uri.isAbsolute())
880 return uri;
881 if (uri.isOpaque())
882 return uri;
883
884 String scheme = uri.getScheme();
885 String schemeSpecificPart = uri.getSchemeSpecificPart();
886 String authority = uri.getAuthority();
887 String path = uri.getPath();
888 String query = uri.getQuery();
889 String fragment = uri.getFragment();
890
891 try
892 {
893 if (fragment != null && path != null && path.equals("")
894 && scheme == null && authority == null && query == null)
895 return new URI(this.scheme, this.schemeSpecificPart, fragment);
896
897 if (authority == null)
898 {
899 authority = this.authority;
900 if (path == null)
901 path = "";
902 if (! (path.startsWith("/")))
903 {
904 CPStringBuilder basepath = new CPStringBuilder(this.path);
905 int i = this.path.lastIndexOf('/');
906
907 if (i >= 0)
908 basepath.delete(i + 1, basepath.length());
909
910 basepath.append(path);
911 path = normalizePath(basepath.toString());
912 }
913 }
914 return new URI(this.scheme, authority, path, query, fragment);
915 }
916 catch (URISyntaxException e)
917 {
918 throw (Error) new InternalError("Resolved URI variant could not "+
919 "be constructed").initCause(e);
920 }
921 }
922
923 /**
924 * Resolves the given URI string against this URI
925 *
926 * @param str The URI as string to resolve against this URI
927 *
928 * @return The resulting URI
929 *
930 * @throws IllegalArgumentException If the given URI string
931 * violates RFC 2396
932 * @throws NullPointerException If uri is null
933 */
934 public URI resolve(String str) throws IllegalArgumentException
935 {
936 return resolve(create(str));
937 }
938
939 /**
940 * <p>
941 * Relativizes the given URI against this URI. The following
942 * algorithm is used:
943 * </p>
944 * <ul>
945 * <li>If either URI is opaque, the given URI is returned.</li>
946 * <li>If the schemes of the URIs differ, the given URI is returned.</li>
947 * <li>If the authority components of the URIs differ, then the given
948 * URI is returned.</li>
949 * <li>If the path of this URI is not a prefix of the supplied URI,
950 * then the given URI is returned.</li>
951 * <li>If all the above conditions hold, a new URI is created using the
952 * query and fragment components of the given URI, along with a path
953 * computed by removing the path of this URI from the start of the path
954 * of the supplied URI.</li>
955 * </ul>
956 *
957 * @param uri the URI to relativize agsint this URI
958 * @return the resulting URI
959 * @throws NullPointerException if the uri is null
960 */
961 public URI relativize(URI uri)
962 {
963 if (isOpaque() || uri.isOpaque())
964 return uri;
965 if (scheme == null && uri.getScheme() != null)
966 return uri;
967 if (scheme != null && !(scheme.equals(uri.getScheme())))
968 return uri;
969 if (rawAuthority == null && uri.getRawAuthority() != null)
970 return uri;
971 if (rawAuthority != null && !(rawAuthority.equals(uri.getRawAuthority())))
972 return uri;
973 String basePath = rawPath;
974 if (!(uri.getRawPath().equals(rawPath)))
975 {
976 if (!(basePath.endsWith("/")))
977 basePath = basePath.concat("/");
978 if (!(uri.getRawPath().startsWith(basePath)))
979 return uri;
980 }
981 try
982 {
983 return new URI(null, null,
984 uri.getRawPath().substring(basePath.length()),
985 uri.getRawQuery(), uri.getRawFragment());
986 }
987 catch (URISyntaxException e)
988 {
989 throw (Error) new InternalError("Relativized URI variant could not "+
990 "be constructed").initCause(e);
991 }
992 }
993
994 /**
995 * Creates an URL from an URI
996 *
997 * @throws MalformedURLException If a protocol handler for the URL could
998 * not be found, or if some other error occurred while constructing the URL
999 * @throws IllegalArgumentException If the URI is not absolute
1000 */
1001 public URL toURL() throws IllegalArgumentException, MalformedURLException
1002 {
1003 if (isAbsolute())
1004 return new URL(this.toString());
1005
1006 throw new IllegalArgumentException("not absolute");
1007 }
1008
1009 /**
1010 * Returns the scheme of the URI
1011 */
1012 public String getScheme()
1013 {
1014 return scheme;
1015 }
1016
1017 /**
1018 * Tells whether this URI is absolute or not
1019 */
1020 public boolean isAbsolute()
1021 {
1022 return scheme != null;
1023 }
1024
1025 /**
1026 * Tell whether this URI is opaque or not
1027 */
1028 public boolean isOpaque()
1029 {
1030 return ((scheme != null) && ! (schemeSpecificPart.startsWith("/")));
1031 }
1032
1033 /**
1034 * Returns the raw scheme specific part of this URI.
1035 * The scheme-specific part is never undefined, though it may be empty
1036 */
1037 public String getRawSchemeSpecificPart()
1038 {
1039 return rawSchemeSpecificPart;
1040 }
1041
1042 /**
1043 * Returns the decoded scheme specific part of this URI.
1044 */
1045 public String getSchemeSpecificPart()
1046 {
1047 return schemeSpecificPart;
1048 }
1049
1050 /**
1051 * Returns the raw authority part of this URI
1052 */
1053 public String getRawAuthority()
1054 {
1055 return rawAuthority;
1056 }
1057
1058 /**
1059 * Returns the decoded authority part of this URI
1060 */
1061 public String getAuthority()
1062 {
1063 return authority;
1064 }
1065
1066 /**
1067 * Returns the raw user info part of this URI
1068 */
1069 public String getRawUserInfo()
1070 {
1071 return rawUserInfo;
1072 }
1073
1074 /**
1075 * Returns the decoded user info part of this URI
1076 */
1077 public String getUserInfo()
1078 {
1079 return userInfo;
1080 }
1081
1082 /**
1083 * Returns the hostname of the URI
1084 */
1085 public String getHost()
1086 {
1087 return host;
1088 }
1089
1090 /**
1091 * Returns the port number of the URI
1092 */
1093 public int getPort()
1094 {
1095 return port;
1096 }
1097
1098 /**
1099 * Returns the raw path part of this URI
1100 */
1101 public String getRawPath()
1102 {
1103 return rawPath;
1104 }
1105
1106 /**
1107 * Returns the path of the URI
1108 */
1109 public String getPath()
1110 {
1111 return path;
1112 }
1113
1114 /**
1115 * Returns the raw query part of this URI
1116 */
1117 public String getRawQuery()
1118 {
1119 return rawQuery;
1120 }
1121
1122 /**
1123 * Returns the query of the URI
1124 */
1125 public String getQuery()
1126 {
1127 return query;
1128 }
1129
1130 /**
1131 * Return the raw fragment part of this URI
1132 */
1133 public String getRawFragment()
1134 {
1135 return rawFragment;
1136 }
1137
1138 /**
1139 * Returns the fragment of the URI
1140 */
1141 public String getFragment()
1142 {
1143 return fragment;
1144 }
1145
1146 /**
1147 * <p>
1148 * Compares the URI with the given object for equality. If the
1149 * object is not a <code>URI</code>, then the method returns false.
1150 * Otherwise, the following criteria are observed:
1151 * </p>
1152 * <ul>
1153 * <li>The scheme of the URIs must either be null (undefined) in both cases,
1154 * or equal, ignorant of case.</li>
1155 * <li>The raw fragment of the URIs must either be null (undefined) in both
1156 * cases, or equal, ignorant of case.</li>
1157 * <li>Both URIs must be of the same type (opaque or hierarchial)</li>
1158 * <li><strong>For opaque URIs:</strong></li>
1159 * <ul>
1160 * <li>The raw scheme-specific parts must be equal.</li>
1161 * </ul>
1162 * <li>For hierarchical URIs:</li>
1163 * <ul>
1164 * <li>The raw paths must be equal, ignorant of case.</li>
1165 * <li>The raw queries are either both undefined or both equal, ignorant
1166 * of case.</li>
1167 * <li>The raw authority sections are either both undefined or:</li>
1168 * <li><strong>For registry-based authorities:</strong></li>
1169 * <ul><li>they are equal.</li></ul>
1170 * <li><strong>For server-based authorities:</strong></li>
1171 * <ul>
1172 * <li>the hosts are equal, ignoring case</li>
1173 * <li>the ports are equal</li>
1174 * <li>the user information components are equal</li>
1175 * </ul>
1176 * </ul>
1177 * </ul>
1178 *
1179 * @param obj the obj to compare the URI with.
1180 * @return <code>true</code> if the objects are equal, according to
1181 * the specification above.
1182 */
1183 public boolean equals(Object obj)
1184 {
1185 if (!(obj instanceof URI))
1186 return false;
1187 URI uriObj = (URI) obj;
1188 if (scheme == null)
1189 {
1190 if (uriObj.getScheme() != null)
1191 return false;
1192 }
1193 else
1194 if (!(scheme.equalsIgnoreCase(uriObj.getScheme())))
1195 return false;
1196 if (rawFragment == null)
1197 {
1198 if (uriObj.getRawFragment() != null)
1199 return false;
1200 }
1201 else
1202 if (!(rawFragment.equalsIgnoreCase(uriObj.getRawFragment())))
1203 return false;
1204 boolean opaqueThis = isOpaque();
1205 boolean opaqueObj = uriObj.isOpaque();
1206 if (opaqueThis && opaqueObj)
1207 return rawSchemeSpecificPart.equals(uriObj.getRawSchemeSpecificPart());
1208 else if (!opaqueThis && !opaqueObj)
1209 {
1210 boolean common = rawPath.equalsIgnoreCase(uriObj.getRawPath())
1211 && ((rawQuery == null && uriObj.getRawQuery() == null)
1212 || rawQuery.equalsIgnoreCase(uriObj.getRawQuery()));
1213 if (rawAuthority == null && uriObj.getRawAuthority() == null)
1214 return common;
1215 if (host == null)
1216 return common
1217 && rawAuthority.equalsIgnoreCase(uriObj.getRawAuthority());
1218 return common
1219 && host.equalsIgnoreCase(uriObj.getHost())
1220 && port == uriObj.getPort()
1221 && (rawUserInfo == null ?
1222 uriObj.getRawUserInfo() == null :
1223 rawUserInfo.equalsIgnoreCase(uriObj.getRawUserInfo()));
1224 }
1225 else
1226 return false;
1227 }
1228
1229 /**
1230 * Computes the hashcode of the URI
1231 */
1232 public int hashCode()
1233 {
1234 return (getScheme() == null ? 0 : 13 * getScheme().hashCode())
1235 + 17 * getRawSchemeSpecificPart().hashCode()
1236 + (getRawFragment() == null ? 0 : 21 + getRawFragment().hashCode());
1237 }
1238
1239 /**
1240 * Compare the URI with another URI.
1241 * Undefined components are taken to be less than any other component.
1242 * The following criteria are observed:
1243 * </p>
1244 * <ul>
1245 * <li>Two URIs with different schemes are compared according to their
1246 * scheme, regardless of case.</li>
1247 * <li>A hierarchical URI is less than an opaque URI with the same
1248 * scheme.</li>
1249 * <li><strong>For opaque URIs:</strong></li>
1250 * <ul>
1251 * <li>URIs with differing scheme-specific parts are ordered according
1252 * to the ordering of the scheme-specific part.</li>
1253 * <li>URIs with the same scheme-specific part are ordered by the
1254 * raw fragment.</li>
1255 * </ul>
1256 * <li>For hierarchical URIs:</li>
1257 * <ul>
1258 * <li>URIs are ordered according to their raw authority sections,
1259 * if they are unequal.</li>
1260 * <li><strong>For registry-based authorities:</strong></li>
1261 * <ul><li>they are ordered according to the ordering of the authority
1262 * component.</li></ul>
1263 * <li><strong>For server-based authorities:</strong></li>
1264 * <ul>
1265 * <li>URIs are ordered according to the raw user information.</li>
1266 * <li>URIs with the same user information are ordered by the host,
1267 * ignoring case.</li>
1268 * <lI>URIs with the same host are ordered by the port.</li>
1269 * </ul>
1270 * <li>URIs with the same authority section are ordered by the raw path.</li>
1271 * <li>URIs with the same path are ordered by their raw query.</li>
1272 * <li>URIs with the same query are ordered by their raw fragments.</li>
1273 * </ul>
1274 * </ul>
1275 *
1276 * @param uri The other URI to compare this URI with
1277 * @return a negative integer, zero or a positive integer depending
1278 * on whether this URI is less than, equal to or greater
1279 * than that supplied, respectively.
1280 */
1281 public int compareTo(URI uri)
1282 throws ClassCastException
1283 {
1284 if (scheme == null && uri.getScheme() != null)
1285 return -1;
1286 if (scheme != null)
1287 {
1288 int sCompare = scheme.compareToIgnoreCase(uri.getScheme());
1289 if (sCompare != 0)
1290 return sCompare;
1291 }
1292 boolean opaqueThis = isOpaque();
1293 boolean opaqueObj = uri.isOpaque();
1294 if (opaqueThis && !opaqueObj)
1295 return 1;
1296 if (!opaqueThis && opaqueObj)
1297 return -1;
1298 if (opaqueThis)
1299 {
1300 int ssCompare =
1301 rawSchemeSpecificPart.compareTo(uri.getRawSchemeSpecificPart());
1302 if (ssCompare == 0)
1303 return compareFragments(uri);
1304 else
1305 return ssCompare;
1306 }
1307 if (rawAuthority == null && uri.getRawAuthority() != null)
1308 return -1;
1309 if (rawAuthority != null)
1310 {
1311 int aCompare = rawAuthority.compareTo(uri.getRawAuthority());
1312 if (aCompare != 0)
1313 {
1314 if (host == null)
1315 return aCompare;
1316 if (rawUserInfo == null && uri.getRawUserInfo() != null)
1317 return -1;
1318 int uCompare = rawUserInfo.compareTo(uri.getRawUserInfo());
1319 if (uCompare != 0)
1320 return uCompare;
1321 if (host == null && uri.getHost() != null)
1322 return -1;
1323 int hCompare = host.compareTo(uri.getHost());
1324 if (hCompare != 0)
1325 return hCompare;
1326 int uriPort = uri.getPort();
1327 return (uriPort == port) ? 0 : (uriPort > port) ? -1 : 1;
1328 }
1329 }
1330 if (rawPath == null && uri.getRawPath() != null)
1331 return -1;
1332 if (rawPath != null)
1333 {
1334 int pCompare = rawPath.compareTo(uri.getRawPath());
1335 if (pCompare != 0)
1336 return pCompare;
1337 }
1338 if (rawQuery == null && uri.getRawQuery() != null)
1339 return -1;
1340 if (rawQuery != null)
1341 {
1342 int qCompare = rawQuery.compareTo(uri.getRawQuery());
1343 if (qCompare != 0)
1344 return qCompare;
1345 }
1346 return compareFragments(uri);
1347 }
1348
1349 /**
1350 * Compares the fragment of this URI with that of the supplied URI.
1351 *
1352 * @param uri the URI to compare with this one.
1353 * @return a negative integer, zero or a positive integer depending
1354 * on whether this uri's fragment is less than, equal to
1355 * or greater than the fragment of the uri supplied, respectively.
1356 */
1357 private int compareFragments(URI uri)
1358 {
1359 if (rawFragment == null && uri.getRawFragment() != null)
1360 return -1;
1361 else if (rawFragment == null)
1362 return 0;
1363 else
1364 return rawFragment.compareTo(uri.getRawFragment());
1365 }
1366
1367 /**
1368 * Returns the URI as a String. If the URI was created using a constructor,
1369 * then this will be the same as the original input string.
1370 *
1371 * @return a string representation of the URI.
1372 */
1373 public String toString()
1374 {
1375 return (scheme == null ? "" : scheme + ":")
1376 + rawSchemeSpecificPart
1377 + (rawFragment == null ? "" : "#" + rawFragment);
1378 }
1379
1380 /**
1381 * Returns the URI as US-ASCII string. This is the same as the result
1382 * from <code>toString()</code> for URIs that don't contain any non-US-ASCII
1383 * characters. Otherwise, the non-US-ASCII characters are replaced
1384 * by their percent-encoded representations.
1385 *
1386 * @return a string representation of the URI, containing only US-ASCII
1387 * characters.
1388 */
1389 public String toASCIIString()
1390 {
1391 String strRep = toString();
1392 boolean inNonAsciiBlock = false;
1393 CPStringBuilder buffer = new CPStringBuilder();
1394 CPStringBuilder encBuffer = null;
1395 for (int i = 0; i < strRep.length(); i++)
1396 {
1397 char c = strRep.charAt(i);
1398 if (c <= 127)
1399 {
1400 if (inNonAsciiBlock)
1401 {
1402 buffer.append(escapeCharacters(encBuffer.toString()));
1403 inNonAsciiBlock = false;
1404 }
1405 buffer.append(c);
1406 }
1407 else
1408 {
1409 if (!inNonAsciiBlock)
1410 {
1411 encBuffer = new CPStringBuilder();
1412 inNonAsciiBlock = true;
1413 }
1414 encBuffer.append(c);
1415 }
1416 }
1417 return buffer.toString();
1418 }
1419
1420 /**
1421 * Converts the non-ASCII characters in the supplied string
1422 * to their equivalent percent-encoded representations.
1423 * That is, they are replaced by "%" followed by their hexadecimal value.
1424 *
1425 * @param str a string including non-ASCII characters.
1426 * @return the string with the non-ASCII characters converted to their
1427 * percent-encoded representations.
1428 */
1429 private static String escapeCharacters(String str)
1430 {
1431 try
1432 {
1433 CPStringBuilder sb = new CPStringBuilder();
1434 // this is far from optimal, but it works
1435 byte[] utf8 = str.getBytes("utf-8");
1436 for (int j = 0; j < utf8.length; j++)
1437 {
1438 sb.append('%');
1439 sb.append(HEX.charAt((utf8[j] & 0xff) / 16));
1440 sb.append(HEX.charAt((utf8[j] & 0xff) % 16));
1441 }
1442 return sb.toString();
1443 }
1444 catch (java.io.UnsupportedEncodingException x)
1445 {
1446 throw (Error) new InternalError("Escaping error").initCause(x);
1447 }
1448 }
1449
1450 }