1 #!/usr/bin/env python 2 3 import xml.sax 4 5 class Parser(xml.sax.handler.ContentHandler): 6 7 "A basic parser, tracking elements and attributes." 8 9 def __init__(self): 10 self.elements = [] 11 self.attributes = [] 12 self.text = [] 13 14 def startElement(self, name, attrs): 15 self.elements.append(name) 16 self.attributes.append(attrs) 17 self.text.append([]) 18 19 def characters(self, content): 20 self.text[-1].append(content) 21 22 def endElement(self, name): 23 self.handleElement(name) 24 self.elements.pop() 25 self.attributes.pop() 26 self.text.pop() 27 28 def handleElement(self, name): 29 pass 30 31 def parse(self, f): 32 try: 33 parser = xml.sax.make_parser() 34 parser.setContentHandler(self) 35 parser.setErrorHandler(xml.sax.handler.ErrorHandler()) 36 parser.setFeature(xml.sax.handler.feature_external_ges, 0) 37 parser.parse(f) 38 finally: 39 f.close() 40 41 class ConfigurableParser(Parser): 42 43 "A parser which can be configured to handle elements individually." 44 45 def __init__(self, handlers=None): 46 Parser.__init__(self) 47 self.handlers = handlers or {} 48 49 def __setitem__(self, name, handler): 50 self.handlers[name] = handler 51 52 def update(self, handlers): 53 self.handlers.update(handlers) 54 55 def handleElement(self, name): 56 for n in (name, None): 57 handler = self.handlers.get(n) 58 if handler: 59 handler(name, self.elements, self.attributes, self.text, "".join(self.text[-1])) 60 break 61 62 # vim: tabstop=4 expandtab shiftwidth=4