paul@0 | 1 | #!/usr/bin/env python |
paul@0 | 2 | |
paul@0 | 3 | import xml.sax |
paul@0 | 4 | |
paul@0 | 5 | class Parser(xml.sax.handler.ContentHandler): |
paul@0 | 6 | |
paul@0 | 7 | "A basic parser, tracking elements and attributes." |
paul@0 | 8 | |
paul@0 | 9 | def __init__(self): |
paul@0 | 10 | self.elements = [] |
paul@0 | 11 | self.attributes = [] |
paul@0 | 12 | self.text = [] |
paul@0 | 13 | |
paul@0 | 14 | def startElement(self, name, attrs): |
paul@0 | 15 | self.elements.append(name) |
paul@0 | 16 | self.attributes.append(attrs) |
paul@0 | 17 | self.text.append([]) |
paul@0 | 18 | |
paul@0 | 19 | def characters(self, content): |
paul@0 | 20 | self.text[-1].append(content) |
paul@0 | 21 | |
paul@0 | 22 | def endElement(self, name): |
paul@0 | 23 | self.handleElement(name) |
paul@0 | 24 | self.elements.pop() |
paul@0 | 25 | self.attributes.pop() |
paul@0 | 26 | self.text.pop() |
paul@0 | 27 | |
paul@0 | 28 | def handleElement(self, name): |
paul@0 | 29 | pass |
paul@0 | 30 | |
paul@0 | 31 | def parse(self, f): |
paul@0 | 32 | try: |
paul@0 | 33 | parser = xml.sax.make_parser() |
paul@0 | 34 | parser.setContentHandler(self) |
paul@0 | 35 | parser.setErrorHandler(xml.sax.handler.ErrorHandler()) |
paul@0 | 36 | parser.setFeature(xml.sax.handler.feature_external_ges, 0) |
paul@0 | 37 | parser.parse(f) |
paul@0 | 38 | finally: |
paul@0 | 39 | f.close() |
paul@0 | 40 | |
paul@0 | 41 | class ConfigurableParser(Parser): |
paul@0 | 42 | |
paul@0 | 43 | "A parser which can be configured to handle elements individually." |
paul@0 | 44 | |
paul@0 | 45 | def __init__(self, handlers=None): |
paul@0 | 46 | Parser.__init__(self) |
paul@0 | 47 | self.handlers = handlers or {} |
paul@0 | 48 | |
paul@0 | 49 | def __setitem__(self, name, handler): |
paul@0 | 50 | self.handlers[name] = handler |
paul@0 | 51 | |
paul@0 | 52 | def update(self, handlers): |
paul@0 | 53 | self.handlers.update(handlers) |
paul@0 | 54 | |
paul@0 | 55 | def handleElement(self, name): |
paul@0 | 56 | for n in (name, None): |
paul@0 | 57 | handler = self.handlers.get(n) |
paul@0 | 58 | if handler: |
paul@1 | 59 | handler(name, self.elements, self.attributes, self.text, "".join(self.text[-1])) |
paul@0 | 60 | break |
paul@0 | 61 | |
paul@0 | 62 | # vim: tabstop=4 expandtab shiftwidth=4 |