1 #!/usr/bin/env python 2 3 """ 4 HTML document fragment parser. 5 6 Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat.parsers.common import ParserBase 23 from moinformat.tree.html import Element, Fragment 24 from moinformat.utils.htmlparse import Parser 25 26 class HTMLParser(ParserBase): 27 28 "A prettyprinted document tree parser." 29 30 formats = ["html"] 31 32 def __init__(self, metadata): 33 self.metadata = metadata 34 35 def parse(self, s): 36 37 "Parse the tree structure representation in 's'." 38 39 doc = Parser(s).parse() 40 41 # If only a fragment is involved, find the body node and return its 42 # children in a fragment. 43 44 if self.metadata.get("fragment") or self.metadata.get("theme_name"): 45 body = self._find_body(doc) 46 47 if body: 48 return Fragment(body.nodes) 49 else: 50 return None 51 52 # Otherwise, return the top-level node. 53 54 else: 55 return doc 56 57 def _find_body(self, node): 58 59 """ 60 Find the body element from 'node', returning the element if found or 61 None otherwise. 62 """ 63 64 # Search all nodes with children. 65 66 if isinstance(node, Fragment): 67 68 # Return the node if it is a body element. 69 70 if isinstance(node, Element) and node.name == "body": 71 return node 72 73 for n in node.nodes: 74 body = self._find_body(n) 75 if body: 76 return body 77 78 return None 79 80 parser = HTMLParser 81 82 # vim: tabstop=4 expandtab shiftwidth=4