1
2
3 package ch.odi.justblog.discovery;
4
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.net.MalformedURLException;
8 import java.net.URL;
9
10 import org.apache.commons.httpclient.Header;
11 import org.apache.commons.httpclient.HttpClient;
12 import org.apache.commons.httpclient.HttpMethod;
13 import org.apache.commons.httpclient.methods.GetMethod;
14 import org.apache.commons.logging.Log;
15 import org.apache.commons.logging.LogFactory;
16 import org.cyberneko.html.parsers.DOMParser;
17 import org.w3c.dom.Document;
18 import org.w3c.dom.NamedNodeMap;
19 import org.w3c.dom.Node;
20 import org.w3c.dom.NodeList;
21 import org.xml.sax.InputSource;
22 import org.xml.sax.SAXException;
23
24 import ch.odi.justblog.api.ApiRegistry;
25 import ch.odi.util.xml.NodeEnumeration;
26
27 /***
28 * Discovers the weblog API from the meta information on the Blog.
29 *
30 * @author oglueck
31 */
32 public class Discovery {
33 public static final String RSD_MIME_TYPE = "application/rsd+xml";
34
35 private URL url;
36 private HttpClient client;
37 private Log log = LogFactory.getLog(Discovery.class);
38 private RsdApi[] apis;
39
40 /***
41 * Creates a new discovery object for the specified URL.
42 * The URL must point to a HTML document that contains
43 * a link element of type application/rsd+xml.
44 *
45 * @param The URL of the weblog.
46 * @throws DiscoveryException
47 */
48 public Discovery(URL url) throws DiscoveryException {
49 this.url = url;
50 client = new HttpClient();
51 log.debug("Discovery for URL "+ url);
52 fetchRemoteInfo();
53 }
54
55 private void fetchRemoteInfo() throws DiscoveryException {
56 Document page = downloadPage(url);
57 NodeList links = page.getElementsByTagName("link");
58 String href = findRsdUrl(links);
59 if (href == null) {
60 apis = findApisByLinks(links);
61 } else {
62 URL rsdUrl;
63 try {
64 rsdUrl = new URL(url, href);
65 apis = findApisByRSD(rsdUrl);
66 } catch (MalformedURLException e) {
67 throw new DiscoveryException("Invalid RSD link found: "+ href, e);
68 }
69 }
70 if (apis == null) throw new DiscoveryException("Could not discover Blog APIs");
71 }
72
73 private RsdApi[] findApisByRSD(URL rsdUrl) throws DiscoveryException {
74 GetMethod get = new GetMethod(rsdUrl.toString());
75 try {
76 if (log.isDebugEnabled()) log.debug("Downloading RSD file at "+ rsdUrl.toString());
77 int status = client.executeMethod(get);
78 if (log.isDebugEnabled()) log.debug("HTTP status "+ status);
79 InputStream body = get.getResponseBodyAsStream();
80 Rsd rsd = new Rsd(body, rsdUrl);
81 return rsd.getApis();
82 } catch (IOException e) {
83 throw new DiscoveryException("Could not read RSD", e);
84 } finally {
85 get.releaseConnection();
86 }
87 }
88
89 private RsdApi[] findApisByLinks(NodeList links) {
90
91 return null;
92 }
93
94 /***
95 * Automatically selects an API that is supported. Will try the preferred ones first.
96 *
97 * @return null if no supported API is found
98 */
99 public RsdApi autoSelect() {
100 for (int i = 0; i < apis.length; i++) {
101 RsdApi api = apis[i];
102 if (api.isPreferred() && ApiRegistry.isSupported(api.getName())) return api;
103 }
104 for (int i = 0; i < apis.length; i++) {
105 RsdApi api = apis[i];
106 if (ApiRegistry.isSupported(api.getName())) return api;
107 }
108 return null;
109 }
110
111 /***
112 * Look for: <link rel="EditURI" type="application/rsd+xml" title="RSD" href="something">
113 * @param links A list of link elements
114 * @return the URL to the RSD document or null
115 */
116 private String findRsdUrl(NodeList links) {
117 String url = null;
118 NodeEnumeration it = new NodeEnumeration(links);
119 while (it.hasMoreElements()) {
120 Node link = (Node) it.nextElement();
121 NamedNodeMap attributes = link.getAttributes();
122 if (attributes == null) continue;
123 Node type = attributes.getNamedItem("type");
124 if (type == null) continue;
125 if (type.getNodeValue().equals(RSD_MIME_TYPE)) {
126 Node hrefnode = attributes.getNamedItem("href");
127 if (hrefnode == null) continue;
128 url = hrefnode.getNodeValue();
129 break;
130 }
131 }
132 return url;
133 }
134
135 /***
136 * Retrieves a HTML page from the server and parses it.
137 *
138 * @param url The URL of the page.
139 * @return the parsed HTML document
140 * @throws DiscoveryException if the document could not be downloaded.
141 * @throws InvalidDocumentException if the document could not be parsed.
142 */
143 private Document downloadPage(URL url) throws DiscoveryException {
144 Document page = null;
145 GetMethod get = new GetMethod(url.toString());
146 try {
147 int status = client.executeMethod(get);
148 log.debug("HTTP status "+ status);
149 if (!isHtml(get)) throw new InvalidDocumentException("Server says document is not HTML");
150 String encoding = get.getResponseCharSet();
151 InputStream body = get.getResponseBodyAsStream();
152 DOMParser htmlParser = new DOMParser();
153 htmlParser.parse(new InputSource(body));
154 page = htmlParser.getDocument();
155 } catch (SAXException e) {
156 throw new InvalidDocumentException(e);
157 } catch (IOException e) {
158 throw new DiscoveryException(e);
159 } finally {
160 get.releaseConnection();
161 }
162 return page;
163 }
164
165 /***
166 * Checks if the response is HTML.
167 * @param method
168 * @return
169 */
170 private boolean isHtml(HttpMethod method) {
171 Header contentTypeHeader = method.getResponseHeader("Content-Type");
172 if (contentTypeHeader == null) return false;
173 String contentType = contentTypeHeader.getValue().toLowerCase();
174 log.debug("Content-Type: "+ contentType);
175 if (contentType.startsWith("text/html")) return true;
176 if (contentType.startsWith("text/xhtml")) return true;
177 return false;
178 }
179 }