URLs are not accessible through Java URLs, httpclient, etc. because of special symbols such as spaces.
Percent-encode values according the RFC 3986. The built-in Java
* Urlencoder does not encode according to the RFC,
A word to move the English, the above means that Java built-in Urlencoder is not designed in accordance with RFC 3986来, that is, not standard.
Standard, reference http://en.wikipedia.org/wiki/URL_normalization for a reference
http://stackoverflow.com/a/4057470/405418
Standard URL Handling
public class Urlcanonicalizer {public static string Getcanonicalurl (string url) {return getcanonicalurl (URL, null);} public static string Getcanonicalurl (string href, string context) {try {URL canonicalurl = new URL (urlresolver.resolveurl ( context = = null? "": Context, href)); String path = Canonicalurl.getpath ();/* Normalize:no empty segments (i.e., "//"), no segments equal to * ".", and no SE Gments equal to ":" That is preceded by a segment * Not equal to "...". * No standardization,no notification,no communication */path = new URI (path). Normalize (). toString ();/* * Convert '//'-> ; '/' */int idx = Path.indexof ("//"), while (idx >= 0) {path = Path.replace ("//", "/"); idx = Path.indexof ("//");} /* * Drop starting '/.. /' */while (Path.startswith ("/. /")) {path = path.substring (3);} /* Trim */path = Path.trim (); final sortedmap<string, string> params = Createparametermap (Canonicalurl.getquery () ); final String querystring;if (params! = null && params.size () > 0) {string canonicalparams = canonicalize (params); queryString = (Canonicalparams.isempty ()? "": "?" + Canonicalparams);} else {queryString = "";} /* * ADD starting slash if needed */if (path.length () = = 0) {path = "/" + Path;} /* * Drop default port:example.com:80, example.com */int port = Canonicalurl.getport (); if (port = = canonicalurl.getd Efaultport ()) {port =-1;} /* * lowercasing Protocol and host */string protocol = Canonicalurl.getprotocol (). toLowerCase (); String host = Canonicalurl.gethost (). toLowerCase (); String pathandquerystring = Normalizepath (path) + queryString; URL result = new URL (protocol, host, Port, pathandquerystring); return Result.toexternalform ();} catch (Malformedurlexception ex) {return null;} catch (URISyntaxException ex) {return null;}} /** * Takes A query string, separates the constituent name-value pairs, and * stores them in a sortedmap ordered by Lexico Graphical order. * * @return Null If there is no query string. */private Static sortedmap<string, string>Createparametermap (Final String queryString) {if (queryString = = NULL | | querystring.isempty ()) {return null;} Final string[] pairs = Querystring.split ("&"), Final map<string, string> params = new hashmap<string, string& gt; (pairs.length); for (final String pair:pairs) {if (pair.length () = = 0) {continue;} string[] tokens = pair.split ("=", 2), switch (tokens.length) {case 1:if (pair.charat (0) = = ' = ') {params.put ("", Tokens[0]); } else {params.put (tokens[0], "");} Break;case 2:params.put (Tokens[0], tokens[1]); break;}} return new treemap<string, string> (params);} /** * canonicalize the query string. * * @param sortedparammap * Parameter name-value pairs in lexicographical order. * @return Canonical form of query string. */private Static String canonicalize (final sortedmap<string, string> sortedparammap) {if (Sortedparammap = = NULL | | Sortedparammap.isempty ()) {return "";} Final StringBuffer sb = new StringBuffer; for (map.entry<string, string> pair: Sortedparammap.entryset ()) {final String key = Pair.getkey (). toLowerCase (); if (Key.equals ("Jsessionid") | | key.equals ( "Phpsessid") | | Key.equals ("ASPSessionID")) {continue;} if (sb.length () > 0) {sb.append (' & ');} Sb.append (percentEncodeRfc3986 (Pair.getkey ())), if (!pair.getvalue (). IsEmpty ()) {sb.append (' = '); Sb.append ( percentEncodeRfc3986 (Pair.getvalue ()));}} return sb.tostring ();} /** * Percent-encode Values according the RFC 3986. The built-in Java * Urlencoder does not encode according to the RFCs, so we make the extra * replacements. * * @param String * decoded string. * @return encoded string per RFC 3986. */private static string percentEncodeRfc3986 (String string) {try {string = String.Replace ("+", "%2b"); string = Urldecoder . Decode (String, "UTF-8"), String = Urlencoder.encode (String, "UTF-8"), return String.Replace ("+", "%20"). Replace ("*", " %2A "). Replace ("%7e "," ~ ");} catch (Exception e) {return string;}} private static string Normalizepath (final String path) {return PATh.replace ("%7e", "~"). Replace ("", "%20");}}
Key locations for handling URLs:
private static String percentEncodeRfc3986 (string string) {try {string = String.Replace ("+", "%2b"); string = URLDECODER.D Ecode (String, "UTF-8"), String = Urlencoder.encode (String, "UTF-8"), return String.Replace ("+", "%20"). Replace ("*", "% 2 a "). Replace ("%7e "," ~ ");} catch (Exception e) {return string;}} private static string Normalizepath (final String path) {return path.replace ("%7e", "~"). Replace ("", "%20");
Httpcient URL Problem