imip-agent (file vContent.py at 9807df0a5e95)

     1 #!/usr/bin/env python     2      3 """     4 Parsing of vCard, vCalendar and iCalendar files.     5      6 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013,     7               2014, 2015 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21     22 --------    23     24 References:    25     26 RFC 5545: Internet Calendaring and Scheduling Core Object Specification    27           (iCalendar)    28           http://tools.ietf.org/html/rfc5545    29     30 RFC 2445: Internet Calendaring and Scheduling Core Object Specification    31           (iCalendar)    32           http://tools.ietf.org/html/rfc2445    33     34 RFC 2425: A MIME Content-Type for Directory Information    35           http://tools.ietf.org/html/rfc2425    36     37 RFC 2426: vCard MIME Directory Profile    38           http://tools.ietf.org/html/rfc2426    39 """    40     41 try:    42     set    43 except NameError:    44     from sets import Set as set    45     46 # Encoding-related imports.    47     48 import base64, quopri    49 import codecs    50     51 # Tokenisation help.    52     53 import re    54     55 # Configuration.    56     57 default_encoding = "utf-8"    58     59 class ParseError(Exception):    60     61     "General parsing errors."    62     63     pass    64     65 # Reader and parser classes.    66     67 class Reader:    68     69     "A simple class wrapping a file, providing simple pushback capabilities."    70     71     def __init__(self, f, non_standard_newline=0):    72     73         """    74         Initialise the object with the file 'f'. If 'non_standard_newline' is    75         set to a true value (unlike the default), lines ending with CR will be    76         treated as complete lines.    77         """    78     79         self.f = f    80         self.non_standard_newline = non_standard_newline    81         self.lines = []    82         self.line_number = 1 # about to read line 1    83     84     def close(self):    85     86         "Close the reader."    87     88         self.f.close()    89     90     def pushback(self, line):    91     92         """    93         Push the given 'line' back so that the next line read is actually the    94         given 'line' and not the next line from the underlying file.    95         """    96     97         self.lines.append(line)    98         self.line_number -= 1    99    100     def readline(self):   101    102         """   103         If no pushed-back lines exist, read a line directly from the file.   104         Otherwise, read from the list of pushed-back lines.   105         """   106    107         self.line_number += 1   108         if self.lines:   109             return self.lines.pop()   110         else:   111             # Sanity check for broken lines (\r instead of \r\n or \n).   112             line = self.f.readline()   113             while line.endswith("\r") and not self.non_standard_newline:   114                 s = self.f.readline()   115                 if not s:   116                     break   117                 line += s   118             if line.endswith("\r") and self.non_standard_newline:   119                 return line + "\n"   120             else:   121                 return line   122    123     def read_content_line(self):   124    125         """   126         Read an entire content line, itself potentially consisting of many   127         physical lines of text, returning a string.   128         """   129    130         # Skip blank lines.   131    132         line = self.readline()   133         while line:   134             line_stripped = line.rstrip("\r\n")   135             if not line_stripped:   136                 line = self.readline()   137             else:   138                 break   139         else:   140             return ""   141    142         # Strip all appropriate whitespace from the right end of each line.   143         # For subsequent lines, remove the first whitespace character.   144         # See section 4.1 of the iCalendar specification.   145    146         lines = [line_stripped]   147    148         line = self.readline()   149         while line.startswith(" ") or line.startswith("\t"):   150             lines.append(line[1:].rstrip("\r\n"))   151             line = self.readline()   152    153         # Since one line too many will have been read, push the line back into   154         # the file.   155    156         if line:   157             self.pushback(line)   158    159         return "".join(lines)   160    161     def get_content_line(self):   162    163         "Return a content line object for the current line."   164    165         return ContentLine(self.read_content_line())   166    167 class ContentLine:   168    169     "A content line which can be searched."   170    171     SEPARATORS = re.compile('[;:"]')   172     SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')   173    174     def __init__(self, text):   175         self.text = text   176         self.start = 0   177    178     def __repr__(self):   179         return "ContentLine(%r)" % self.text   180    181     def get_remaining(self):   182    183         "Get the remaining text from the content line."   184    185         return self.text[self.start:]   186    187     def search(self, targets):   188    189         """   190         Find one of the 'targets' in the text, returning the string from the   191         current position up to the target found, along with the target string,   192         using a tuple of the form (string, target). If no target was found,   193         return the entire string together with a target of None.   194    195         The 'targets' parameter must be a regular expression object or an object   196         compatible with the API of such objects.   197         """   198    199         text = self.text   200         start = pos = self.start   201         length = len(text)   202    203         # Remember the first target.   204    205         first = None   206         first_pos = None   207         in_quoted_region = 0   208    209         # Process the text, looking for the targets.   210    211         while pos < length:   212             match = targets.search(text, pos)   213    214             # Where nothing matches, end the search.   215    216             if match is None:   217                 pos = length   218    219             # Where a double quote matches, toggle the region state.   220    221             elif match.group() == '"':   222                 in_quoted_region = not in_quoted_region   223                 pos = match.end()   224    225             # Where something else matches outside a region, stop searching.   226    227             elif not in_quoted_region:   228                 first = match.group()   229                 first_pos = match.start()   230                 break   231    232             # Otherwise, keep looking for the end of the region.   233    234             else:   235                 pos = match.end()   236    237         # Where no more input can provide the targets, return a special result.   238    239         else:   240             self.start = length   241             return text[start:], None   242    243         self.start = match.end()   244         return text[start:first_pos], first   245    246 class StreamParser:   247    248     "A stream parser for content in vCard/vCalendar/iCalendar-like formats."   249    250     def __init__(self, f):   251    252         "Initialise the parser for the given file 'f'."   253    254         self.f = f   255    256     def close(self):   257    258         "Close the reader."   259    260         self.f.close()   261    262     def __iter__(self):   263    264         "Return self as the iterator."   265    266         return self   267    268     def next(self):   269    270         """   271         Return the next content item in the file as a tuple of the form   272         (name, parameters, values).   273         """   274    275         return self.parse_content_line()   276    277     def decode_content(self, value):   278    279         "Decode the given 'value', replacing quoted characters."   280    281         return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")   282    283     # Internal methods.   284    285     def parse_content_line(self):   286    287         """   288         Return the name, parameters and value information for the current   289         content line in the file being parsed.   290         """   291    292         f = self.f   293         line_number = f.line_number   294         line = f.get_content_line()   295    296         # Read the property name.   297    298         name, sep = line.search(line.SEPARATORS)   299         name = name.strip()   300    301         if not name and sep is None:   302             raise StopIteration   303    304         # Read the parameters.   305    306         parameters = {}   307    308         while sep == ";":   309    310             # Find the actual modifier.   311    312             parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)   313             parameter_name = parameter_name.strip()   314    315             if sep == "=":   316                 parameter_value, sep = line.search(line.SEPARATORS)   317                 parameter_value = parameter_value.strip()   318             else:   319                 parameter_value = None   320    321             # Append a key, value tuple to the parameters list.   322    323             parameters[parameter_name] = parameter_value   324    325         # Get the value content.   326    327         if sep != ":":   328             raise ValueError, (line_number, line)   329    330         # Obtain and decode the value.   331    332         value = self.decode(name, parameters, line.get_remaining())   333    334         return name, parameters, value   335    336     def decode(self, name, parameters, value):   337    338         "Decode using 'name' and 'parameters' the given 'value'."   339    340         encoding = parameters.get("ENCODING")   341         charset = parameters.get("CHARSET")   342    343         value = self.decode_content(value)   344    345         if encoding == "QUOTED-PRINTABLE":   346             return unicode(quopri.decodestring(value), charset or "iso-8859-1")   347         elif encoding == "BASE64":   348             return base64.decodestring(value)   349         else:   350             return value   351    352 class ParserBase:   353    354     "An abstract parser for content in vCard/vCalendar/iCalendar-like formats."   355    356     def __init__(self):   357    358         "Initialise the parser."   359    360         self.names = []   361    362     def parse(self, f, parser_cls=None):   363    364         "Parse the contents of the file 'f'."   365    366         parser = (parser_cls or StreamParser)(f)   367    368         for name, parameters, value in parser:   369    370             if name == "BEGIN":   371                 self.names.append(value)   372                 self.startComponent(value, parameters)   373    374             elif name == "END":   375                 start_name = self.names.pop()   376                 if start_name != value:   377                     raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (   378                         start_name, value, f.line_number)   379    380                 self.endComponent(value)   381    382             else:   383                 self.handleProperty(name, parameters, value)   384    385 class Parser(ParserBase):   386    387     "A SAX-like parser for vCard/vCalendar/iCalendar-like formats."   388    389     def __init__(self):   390         ParserBase.__init__(self)   391         self.components = []   392    393     def startComponent(self, name, parameters):   394    395         """   396         Add the component with the given 'name' and 'parameters', recording an   397         empty list of children as part of the component's content.   398         """   399    400         component = self.handleProperty(name, parameters)   401         self.components.append(component)   402         return component   403    404     def endComponent(self, name):   405    406         """   407         End the component with the given 'name' by removing it from the active   408         component stack. If only one component exists on the stack, retain it   409         for later inspection.   410         """   411    412         if len(self.components) > 1:   413             return self.components.pop()   414    415         # Or return the only element.   416    417         elif self.components:   418             return self.components[0]   419    420     def handleProperty(self, name, parameters, value=None):   421    422         """   423         Record the property with the given 'name', 'parameters' and optional   424         'value' as part of the current component's children.   425         """   426    427         component = self.makeComponent(name, parameters, value)   428         self.attachComponent(component)   429         return component   430    431     # Component object construction/manipulation methods.   432    433     def attachComponent(self, component):   434    435         "Attach the given 'component' to its parent."   436    437         if self.components:   438             component_name, component_parameters, component_children = self.components[-1]   439             component_children.append(component)   440    441     def makeComponent(self, name, parameters, value=None):   442    443         """   444         Make a component object from the given 'name', 'parameters' and optional   445         'value'.   446         """   447    448         return (name, parameters, value or [])   449    450     # Public methods.   451    452     def parse(self, f, parser_cls=None):   453    454         "Parse the contents of the file 'f'."   455    456         ParserBase.parse(self, f, parser_cls)   457         try:   458             return self.components[0]   459         except IndexError:   460             raise ParseError, "No vContent component found in file."   461    462 # Writer classes.   463    464 class Writer:   465    466     "A simple class wrapping a file, providing simple output capabilities."   467    468     default_line_length = 76   469    470     def __init__(self, write, line_length=None):   471    472         """   473         Initialise the object with the given 'write' operation. If 'line_length'   474         is set, the length of written lines will conform to the specified value   475         instead of the default value.    476         """   477    478         self._write = write   479         self.line_length = line_length or self.default_line_length   480         self.char_offset = 0   481    482     def write(self, text):   483    484         "Write the 'text' to the file."   485    486         write = self._write   487         line_length = self.line_length   488    489         i = 0   490         remaining = len(text)   491    492         while remaining:   493             space = line_length - self.char_offset   494             if remaining > space:   495                 write(text[i:i + space])   496                 write("\r\n ")   497                 self.char_offset = 1   498                 i += space   499                 remaining -= space   500             else:   501                 write(text[i:])   502                 self.char_offset += remaining   503                 i += remaining   504                 remaining = 0   505    506     def end_line(self):   507    508         "End the current content line."   509    510         if self.char_offset > 0:   511             self.char_offset = 0   512             self._write("\r\n")   513    514 class StreamWriter:   515    516     "A stream writer for content in vCard/vCalendar/iCalendar-like formats."   517    518     def __init__(self, f):   519    520         "Initialise the stream writer with the given 'f' stream object."   521    522         self.f = f   523    524     def append(self, record):   525         self.write(*record)   526    527     def write(self, name, parameters, value):   528    529         """   530         Write a content line, serialising the given 'name', 'parameters' and   531         'value' information.   532         """   533    534         self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))   535    536     # Internal methods.   537    538     def write_content_line(self, name, encoded_parameters, encoded_value):   539    540         """   541         Write a content line for the given 'name', 'encoded_parameters' and   542         'encoded_value' information.   543         """   544    545         f = self.f   546    547         f.write(name)   548         for param_name, param_value in encoded_parameters.items():   549             f.write(";")   550             f.write(param_name)   551             f.write("=")   552             f.write(param_value)   553         f.write(":")   554         f.write(encoded_value)   555         f.end_line()   556    557     def encode_quoted_parameter_value(self, value):   558    559         "Encode the given 'value'."   560    561         return '"%s"' % value   562    563     def encode_value(self, name, parameters, value):   564    565         """   566         Encode using 'name' and 'parameters' the given 'value' so that the   567         resulting encoded form employs any specified character encodings.   568         """   569    570         encoding = parameters.get("ENCODING")   571         charset = parameters.get("CHARSET")   572    573         if encoding == "QUOTED-PRINTABLE":   574             value = quopri.encodestring(value.encode(charset or "iso-8859-1"))   575         elif encoding == "BASE64":   576             value = base64.encodestring(value)   577    578         return self.encode_content(value)   579    580     # Overrideable methods.   581    582     def encode_parameters(self, parameters):   583    584         """   585         Encode the given 'parameters' according to the vCalendar specification.   586         """   587    588         encoded_parameters = {}   589    590         for param_name, param_value in parameters.items():   591    592             # Basic format support merely involves quoting values which seem to   593             # need it. Other more specific formats may define exactly which   594             # parameters should be quoted.   595    596             if ContentLine.SEPARATORS.search(param_value):   597                 param_value = self.encode_quoted_parameter_value(param_value)   598    599             encoded_parameters[param_name] = param_value   600    601         return encoded_parameters   602    603     def encode_content(self, value):   604    605         "Encode the given 'value', quoting characters."   606    607         return value.replace("\n", "\\n")   608    609 # Utility functions.   610    611 def is_input_stream(stream_or_string):   612     return hasattr(stream_or_string, "read")   613    614 def get_input_stream(stream_or_string, encoding=None):   615     if is_input_stream(stream_or_string):   616         if isinstance(stream_or_string, codecs.StreamReader):   617             return stream_or_string   618         else:   619             return codecs.getreader(encoding or default_encoding)(stream_or_string)   620     else:   621         return codecs.open(stream_or_string, encoding=(encoding or default_encoding))   622    623 def get_output_stream(stream_or_string, encoding=None):   624     if hasattr(stream_or_string, "write"):   625         if isinstance(stream_or_string, codecs.StreamWriter):   626             return stream_or_string   627         else:   628             return codecs.getwriter(encoding or default_encoding)(stream_or_string)   629     else:   630         return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))   631    632 def items_to_dict(items, sections=None):   633    634     """   635     Return the given 'items' as a dictionary mapping names to tuples of the form   636     (value, attributes). Where 'sections' is provided, only items whose names   637     occur in the given 'sections' collection will be treated as groups or   638     sections of definitions.   639     """   640    641     d = {}   642     for name, attr, value in items:   643         if not d.has_key(name):   644             d[name] = []   645         if isinstance(value, list) and (not sections or name in sections):   646             d[name].append((items_to_dict(value, sections), attr))   647         else:   648             d[name].append((value, attr))   649     return d   650    651 def dict_to_items(d):   652    653     """   654     Return 'd' converted to a list of items suitable for serialisation using   655     iterwrite.   656     """   657    658     items = []   659     for name, value in d.items():   660         if isinstance(value, list):   661             for v, a in value:   662                 if isinstance(v, dict):   663                     items.append((name, a, dict_to_items(v)))   664                 else:   665                     items.append((name, a, v))   666         else:   667             v, a = value   668             items.append((name, a, dict_to_items(v)))   669     return items   670    671 # Public functions.   672    673 def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   674    675     """   676     Parse the resource data found through the use of the 'stream_or_string',   677     which is either a stream providing Unicode data (the codecs module can be   678     used to open files or to wrap streams in order to provide Unicode data) or a   679     filename identifying a file to be parsed.   680    681     The optional 'encoding' can be used to specify the character encoding used   682     by the file to be parsed.   683    684     The optional 'non_standard_newline' can be set to a true value (unlike the   685     default) in order to attempt to process files with CR as the end of line   686     character.   687    688     As a result of parsing the resource, the root node of the imported resource   689     is returned.   690     """   691    692     stream = get_input_stream(stream_or_string, encoding)   693     reader = Reader(stream, non_standard_newline)   694    695     # Parse using the reader.   696    697     try:   698         parser = (parser_cls or Parser)()   699         return parser.parse(reader)   700    701     # Close any opened streams.   702    703     finally:   704         if not is_input_stream(stream_or_string):   705             reader.close()   706    707 def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   708    709     """   710     Parse the resource data found through the use of the 'stream_or_string',   711     which is either a stream providing Unicode data (the codecs module can be   712     used to open files or to wrap streams in order to provide Unicode data) or a   713     filename identifying a file to be parsed.   714    715     The optional 'encoding' can be used to specify the character encoding used   716     by the file to be parsed.   717    718     The optional 'non_standard_newline' can be set to a true value (unlike the   719     default) in order to attempt to process files with CR as the end of line   720     character.   721    722     An iterator is returned which provides event tuples describing parsing   723     events of the form (name, parameters, value).   724     """   725    726     stream = get_input_stream(stream_or_string, encoding)   727     reader = Reader(stream, non_standard_newline)   728     parser = (parser_cls or StreamParser)(reader)   729     return parser   730    731 def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None):   732    733     """   734     Return a writer which will either send data to the resource found through   735     the use of 'stream_or_string' or using the given 'write' operation.   736    737     The 'stream_or_string' parameter may be either a stream accepting Unicode   738     data (the codecs module can be used to open files or to wrap streams in   739     order to accept Unicode data) or a filename identifying a file to be   740     written.   741    742     The optional 'encoding' can be used to specify the character encoding used   743     by the file to be written.   744    745     The optional 'line_length' can be used to specify how long lines should be   746     in the resulting data.   747     """   748    749     if stream_or_string:   750         stream = get_output_stream(stream_or_string, encoding)   751         _writer = Writer(stream.write, line_length)   752     elif write:   753         _writer = Writer(write, line_length)   754     else:   755         raise IOError, "No stream, filename or write operation specified."   756    757     return (writer_cls or StreamWriter)(_writer)   758    759 def to_dict(node, sections=None):   760    761     "Return the 'node' converted to a dictionary representation."   762    763     name, attr, items = node   764     return {name : (isinstance(items, list) and items_to_dict(items, sections) or items, attr)}   765    766 def to_node(d):   767    768     "Return 'd' converted to a items-based representation."   769    770     return dict_to_items(d)[0]   771    772 # vim: tabstop=4 expandtab shiftwidth=4