# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1374163427 -7200
# Node ID 1734d98d6e2c9016ec1edc1242e3708e63830056
# Parent  d01e634420e5e5aa5e9c88afb10f59bb2446c84e
Changed the parsing of HTML documents retrieved using parseURI to use the
libxml2 network retrieval support.

diff -r d01e634420e5 -r 1734d98d6e2c README.txt
--- a/README.txt	Thu Jul 18 17:54:37 2013 +0200
+++ b/README.txt	Thu Jul 18 18:03:47 2013 +0200
@@ -91,7 +91,8 @@
 New in libxml2dom 0.5.1 (Changes since libxml2dom 0.5)
 ------------------------------------------------------
 
-  * Fixed the document encoding for HTML documents retrieved using parseURI.
+  * Changed the parsing of HTML documents retrieved using parseURI to use the
+    libxml2 network retrieval support.
 
 New in libxml2dom 0.5 (Changes since libxml2dom 0.4.7)
 ------------------------------------------------------
diff -r d01e634420e5 -r 1734d98d6e2c libxml2dom/__init__.py
--- a/libxml2dom/__init__.py	Thu Jul 18 17:54:37 2013 +0200
+++ b/libxml2dom/__init__.py	Thu Jul 18 18:03:47 2013 +0200
@@ -28,7 +28,6 @@
     parseFile as Node_parseFile, \
     toString as Node_toString, toStream as Node_toStream, \
     toFile as Node_toFile
-import urllib2 # for parseURI in HTML mode
 import libxml2dom.errors
 
 # Standard namespaces.
@@ -813,11 +812,9 @@
     remote documents (such as DTDs) will be followed in order to obtain such
     documents.
 
-    XML documents are retrieved using libxml2's own network capabilities; HTML
-    documents are retrieved using the urllib2 module provided by Python. To
-    retrieve either kind of document using Python's own modules for this purpose
-    (such as urllib or urllib2), open a stream and pass it to the parse
-    function:
+    Documents are retrieved using libxml2's own network capabilities. To
+    retrieve documents using Python's own modules for this purpose (such as
+    urllib or urllib2), open a stream and pass it to the parse function:
 
     f = urllib.urlopen(uri)
     try:
@@ -829,13 +826,8 @@
     """
 
     if html:
-        f = urllib2.urlopen(uri)
-        try:
-            htmlencoding = f.headers.get("content-type", htmlencoding)
-            return parse(f, html=html, htmlencoding=htmlencoding, unfinished=unfinished,
-                validate=validate, remote=remote, impl=impl)
-        finally:
-            f.close()
+        return parseFile(uri, html=html, htmlencoding=htmlencoding, unfinished=unfinished,
+            validate=validate, remote=remote, impl=impl)
     else:
         impl = impl or default_impl
         return impl.adoptDocument(Node_parseURI(uri, unfinished=unfinished,