View Javadoc

1   
2   
3   package ch.odi.justblog.discovery;
4   
5   import java.io.IOException;
6   import java.io.InputStream;
7   import java.net.MalformedURLException;
8   import java.net.URL;
9   
10  import org.apache.commons.httpclient.Header;
11  import org.apache.commons.httpclient.HttpClient;
12  import org.apache.commons.httpclient.HttpMethod;
13  import org.apache.commons.httpclient.methods.GetMethod;
14  import org.apache.commons.logging.Log;
15  import org.apache.commons.logging.LogFactory;
16  import org.cyberneko.html.parsers.DOMParser;
17  import org.w3c.dom.Document;
18  import org.w3c.dom.NamedNodeMap;
19  import org.w3c.dom.Node;
20  import org.w3c.dom.NodeList;
21  import org.xml.sax.InputSource;
22  import org.xml.sax.SAXException;
23  
24  import ch.odi.justblog.api.ApiRegistry;
25  import ch.odi.util.xml.NodeEnumeration;
26  
27  /***
28   * Discovers the weblog API from the meta information on the Blog.
29   *
30   * @author oglueck
31   */
32  public class Discovery {
33      public static final String RSD_MIME_TYPE = "application/rsd+xml";
34      
35      private URL url;
36      private HttpClient client;
37      private Log log = LogFactory.getLog(Discovery.class);
38      private RsdApi[] apis;
39      
40      /***
41       * Creates a new discovery object for the specified URL.
42       * The URL must point to a HTML document that contains
43       * a link element of type application/rsd+xml.
44       * 
45       * @param The URL of the weblog.
46       * @throws DiscoveryException
47       */
48      public Discovery(URL url) throws DiscoveryException {
49          this.url = url;
50          client = new HttpClient();
51          log.debug("Discovery for URL "+ url);
52          fetchRemoteInfo();
53      }
54  
55      private void fetchRemoteInfo() throws DiscoveryException {
56          Document page = downloadPage(url);
57          NodeList links = page.getElementsByTagName("link");
58          String href = findRsdUrl(links);
59          if (href == null) {
60              apis = findApisByLinks(links);
61          } else {
62                URL rsdUrl;
63              try {
64                  rsdUrl = new URL(url, href);
65                  apis = findApisByRSD(rsdUrl);
66              } catch (MalformedURLException e) {
67                  throw new DiscoveryException("Invalid RSD link found: "+ href, e);
68              }
69          }
70          if (apis == null) throw new DiscoveryException("Could not discover Blog APIs");
71      }
72      
73      private RsdApi[] findApisByRSD(URL rsdUrl) throws DiscoveryException {
74          GetMethod get = new GetMethod(rsdUrl.toString());
75          try {
76              if (log.isDebugEnabled()) log.debug("Downloading RSD file at "+ rsdUrl.toString());
77              int status = client.executeMethod(get);
78              if (log.isDebugEnabled()) log.debug("HTTP status "+ status);
79              InputStream body = get.getResponseBodyAsStream();
80              Rsd rsd = new Rsd(body, rsdUrl);
81              return rsd.getApis();
82          } catch (IOException e) {
83              throw new DiscoveryException("Could not read RSD", e);
84          } finally {
85              get.releaseConnection();
86          }        
87      }
88      
89      private RsdApi[] findApisByLinks(NodeList links) {
90          //TODO implement
91          return null;
92      }
93      
94      /***
95       * Automatically selects an API that is supported. Will try the preferred ones first.
96       * 
97       * @return null if no supported API is found
98       */
99      public RsdApi autoSelect() {
100         for (int i = 0; i < apis.length; i++) {
101             RsdApi api = apis[i];
102             if (api.isPreferred() && ApiRegistry.isSupported(api.getName())) return api;
103         }
104         for (int i = 0; i < apis.length; i++) {
105             RsdApi api = apis[i];
106             if (ApiRegistry.isSupported(api.getName())) return api;
107         }
108         return null;
109     }
110     
111     /***
112      * Look for: &lt;link rel="EditURI" type="application/rsd+xml" title="RSD" href="something"&gt;
113      * @param links A list of link elements
114      * @return the URL to the RSD document or null
115      */
116     private String findRsdUrl(NodeList links) {
117         String url = null;
118         NodeEnumeration it = new NodeEnumeration(links);
119         while (it.hasMoreElements()) {
120             Node link = (Node) it.nextElement();
121             NamedNodeMap attributes = link.getAttributes();
122             if (attributes == null) continue;
123             Node type = attributes.getNamedItem("type");
124             if (type == null) continue;
125             if (type.getNodeValue().equals(RSD_MIME_TYPE)) {
126                 Node hrefnode = attributes.getNamedItem("href");
127                 if (hrefnode == null) continue;
128                 url = hrefnode.getNodeValue();
129                 break;
130             }
131         }
132         return url;
133     }
134 
135     /***
136      * Retrieves a HTML page from the server and parses it.
137      * 
138      * @param url The URL of the page.
139      * @return the parsed HTML document
140      * @throws DiscoveryException if the document could not be downloaded.
141      * @throws InvalidDocumentException if the document could not be parsed.
142      */
143     private Document downloadPage(URL url) throws DiscoveryException {
144         Document page = null;
145         GetMethod get = new GetMethod(url.toString());
146         try {
147             int status = client.executeMethod(get);
148             log.debug("HTTP status "+ status);
149             if (!isHtml(get)) throw new InvalidDocumentException("Server says document is not HTML");
150             String encoding = get.getResponseCharSet();
151             InputStream body = get.getResponseBodyAsStream();
152             DOMParser htmlParser = new DOMParser();
153             htmlParser.parse(new InputSource(body));
154             page = htmlParser.getDocument();
155         } catch (SAXException e) {
156             throw new InvalidDocumentException(e);
157         } catch (IOException e) {
158             throw new DiscoveryException(e);
159         } finally {
160             get.releaseConnection();
161         }        
162         return page;
163     }
164     
165     /***
166      * Checks if the response is HTML.
167      * @param method
168      * @return
169      */
170     private boolean isHtml(HttpMethod method) {
171         Header contentTypeHeader = method.getResponseHeader("Content-Type");
172         if (contentTypeHeader == null) return false;
173         String contentType = contentTypeHeader.getValue().toLowerCase();
174         log.debug("Content-Type: "+ contentType);
175         if (contentType.startsWith("text/html")) return true;
176         if (contentType.startsWith("text/xhtml")) return true;
177         return false;
178     }
179 }