imip-agent (file vContent.py at 64357f489755)

     1 #!/usr/bin/env python     2      3 """     4 Parsing of vCard, vCalendar and iCalendar files.     5      6 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013,     7               2014 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21     22 --------    23     24 References:    25     26 RFC 5545: Internet Calendaring and Scheduling Core Object Specification    27           (iCalendar)    28           http://tools.ietf.org/html/rfc5545    29     30 RFC 2445: Internet Calendaring and Scheduling Core Object Specification    31           (iCalendar)    32           http://tools.ietf.org/html/rfc2445    33     34 RFC 2425: A MIME Content-Type for Directory Information    35           http://tools.ietf.org/html/rfc2425    36     37 RFC 2426: vCard MIME Directory Profile    38           http://tools.ietf.org/html/rfc2426    39 """    40     41 try:    42     set    43 except NameError:    44     from sets import Set as set    45     46 # Encoding-related imports.    47     48 import base64, quopri    49 import codecs    50     51 # Tokenisation help.    52     53 import re    54     55 # Configuration.    56     57 default_encoding = "utf-8"    58     59 class ParseError(Exception):    60     61     "General parsing errors."    62     63     pass    64     65 # Reader and parser classes.    66     67 class Reader:    68     69     "A simple class wrapping a file, providing simple pushback capabilities."    70     71     def __init__(self, f, non_standard_newline=0):    72     73         """    74         Initialise the object with the file 'f'. If 'non_standard_newline' is    75         set to a true value (unlike the default), lines ending with CR will be    76         treated as complete lines.    77         """    78     79         self.f = f    80         self.non_standard_newline = non_standard_newline    81         self.lines = []    82         self.line_number = 1 # about to read line 1    83     84     def close(self):    85     86         "Close the reader."    87     88         self.f.close()    89     90     def pushback(self, line):    91     92         """    93         Push the given 'line' back so that the next line read is actually the    94         given 'line' and not the next line from the underlying file.    95         """    96     97         self.lines.append(line)    98         self.line_number -= 1    99    100     def readline(self):   101    102         """   103         If no pushed-back lines exist, read a line directly from the file.   104         Otherwise, read from the list of pushed-back lines.   105         """   106    107         self.line_number += 1   108         if self.lines:   109             return self.lines.pop()   110         else:   111             # Sanity check for broken lines (\r instead of \r\n or \n).   112             line = self.f.readline()   113             while line.endswith("\r") and not self.non_standard_newline:   114                 s = self.f.readline()   115                 if not s:   116                     break   117                 line += s   118             if line.endswith("\r") and self.non_standard_newline:   119                 return line + "\n"   120             else:   121                 return line   122    123     def read_content_line(self):   124    125         """   126         Read an entire content line, itself potentially consisting of many   127         physical lines of text, returning a string.   128         """   129    130         # Skip blank lines.   131    132         line = self.readline()   133         while line:   134             line_stripped = line.rstrip("\r\n")   135             if not line_stripped:   136                 line = self.readline()   137             else:   138                 break   139         else:   140             return ""   141    142         # Strip all appropriate whitespace from the right end of each line.   143         # For subsequent lines, remove the first whitespace character.   144         # See section 4.1 of the iCalendar specification.   145    146         lines = [line_stripped]   147    148         line = self.readline()   149         while line.startswith(" ") or line.startswith("\t"):   150             lines.append(line[1:].rstrip("\r\n"))   151             line = self.readline()   152    153         # Since one line too many will have been read, push the line back into   154         # the file.   155    156         if line:   157             self.pushback(line)   158    159         return "".join(lines)   160    161     def get_content_line(self):   162    163         "Return a content line object for the current line."   164    165         return ContentLine(self.read_content_line())   166    167 class ContentLine:   168    169     "A content line which can be searched."   170    171     SEPARATORS = re.compile('[;:"]')   172     SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]')   173    174     def __init__(self, text):   175         self.text = text   176         self.start = 0   177    178     def __repr__(self):   179         return "ContentLine(%r)" % self.text   180    181     def get_remaining(self):   182    183         "Get the remaining text from the content line."   184    185         return self.text[self.start:]   186    187     def search(self, targets):   188    189         """   190         Find one of the 'targets' in the text, returning the string from the   191         current position up to the target found, along with the target string,   192         using a tuple of the form (string, target). If no target was found,   193         return the entire string together with a target of None.   194    195         The 'targets' parameter must be a regular expression object or an object   196         compatible with the API of such objects.   197         """   198    199         text = self.text   200         start = pos = self.start   201         length = len(text)   202    203         # Remember the first target.   204    205         first = None   206         first_pos = None   207         in_quoted_region = 0   208    209         # Process the text, looking for the targets.   210    211         while pos < length:   212             match = targets.search(text, pos)   213    214             # Where nothing matches, end the search.   215    216             if match is None:   217                 pos = length   218    219             # Where a double quote matches, toggle the region state.   220    221             elif match.group() == '"':   222                 in_quoted_region = not in_quoted_region   223                 pos = match.end()   224    225             # Where something else matches outside a region, stop searching.   226    227             elif not in_quoted_region:   228                 first = match.group()   229                 first_pos = match.start()   230                 break   231    232             # Otherwise, keep looking for the end of the region.   233    234             else:   235                 pos = match.end()   236    237         # Where no more input can provide the targets, return a special result.   238    239         else:   240             self.start = length   241             return text[start:], None   242    243         self.start = match.end()   244         return text[start:first_pos], first   245    246 class StreamParser:   247    248     "A stream parser for content in vCard/vCalendar/iCalendar-like formats."   249    250     def __init__(self, f):   251    252         "Initialise the parser for the given file 'f'."   253    254         self.f = f   255    256     def close(self):   257    258         "Close the reader."   259    260         self.f.close()   261    262     def __iter__(self):   263    264         "Return self as the iterator."   265    266         return self   267    268     def next(self):   269    270         """   271         Return the next content item in the file as a tuple of the form   272         (name, parameters, values).   273         """   274    275         return self.parse_content_line()   276    277     def decode_content(self, value):   278    279         "Decode the given 'value', replacing quoted characters."   280    281         return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n")   282    283     # Internal methods.   284    285     def parse_content_line(self):   286    287         """   288         Return the name, parameters and value information for the current   289         content line in the file being parsed.   290         """   291    292         f = self.f   293         line_number = f.line_number   294         line = f.get_content_line()   295    296         # Read the property name.   297    298         name, sep = line.search(line.SEPARATORS)   299         name = name.strip()   300    301         if not name and sep is None:   302             raise StopIteration   303    304         # Read the parameters.   305    306         parameters = {}   307    308         while sep == ";":   309    310             # Find the actual modifier.   311    312             parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS)   313             parameter_name = parameter_name.strip()   314    315             if sep == "=":   316                 parameter_value, sep = line.search(line.SEPARATORS)   317                 parameter_value = parameter_value.strip()   318             else:   319                 parameter_value = None   320    321             # Append a key, value tuple to the parameters list.   322    323             parameters[parameter_name] = parameter_value   324    325         # Get the value content.   326    327         if sep != ":":   328             raise ValueError, (line_number, line)   329    330         # Obtain and decode the value.   331    332         value = self.decode(name, parameters, line.get_remaining())   333    334         return name, parameters, value   335    336     def decode(self, name, parameters, value):   337    338         "Decode using 'name' and 'parameters' the given 'value'."   339    340         encoding = parameters.get("ENCODING")   341         charset = parameters.get("CHARSET")   342    343         value = self.decode_content(value)   344    345         if encoding == "QUOTED-PRINTABLE":   346             return unicode(quopri.decodestring(value), charset or "iso-8859-1")   347         elif encoding == "BASE64":   348             return base64.decodestring(value)   349         else:   350             return value   351    352 class ParserBase:   353    354     "An abstract parser for content in vCard/vCalendar/iCalendar-like formats."   355    356     def __init__(self):   357    358         "Initialise the parser."   359    360         self.names = []   361    362     def parse(self, f, parser_cls=None):   363    364         "Parse the contents of the file 'f'."   365    366         parser = (parser_cls or StreamParser)(f)   367    368         for name, parameters, value in parser:   369    370             if name == "BEGIN":   371                 self.names.append(value)   372                 self.startComponent(value, parameters)   373    374             elif name == "END":   375                 start_name = self.names.pop()   376                 if start_name != value:   377                     raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % (   378                         start_name, value, f.line_number)   379    380                 self.endComponent(value)   381    382             else:   383                 self.handleProperty(name, parameters, value)   384    385 class Parser(ParserBase):   386    387     "A SAX-like parser for vCard/vCalendar/iCalendar-like formats."   388    389     def __init__(self):   390         ParserBase.__init__(self)   391         self.components = []   392    393     def startComponent(self, name, parameters):   394    395         """   396         Add the component with the given 'name' and 'parameters', recording an   397         empty list of children as part of the component's content.   398         """   399    400         component = self.handleProperty(name, parameters)   401         self.components.append(component)   402         return component   403    404     def endComponent(self, name):   405    406         """   407         End the component with the given 'name' by removing it from the active   408         component stack. If only one component exists on the stack, retain it   409         for later inspection.   410         """   411    412         if len(self.components) > 1:   413             return self.components.pop()   414    415         # Or return the only element.   416    417         elif self.components:   418             return self.components[0]   419    420     def handleProperty(self, name, parameters, value=None):   421    422         """   423         Record the property with the given 'name', 'parameters' and optional   424         'value' as part of the current component's children.   425         """   426    427         component = self.makeComponent(name, parameters, value)   428         self.attachComponent(component)   429         return component   430    431     # Component object construction/manipulation methods.   432    433     def attachComponent(self, component):   434    435         "Attach the given 'component' to its parent."   436    437         if self.components:   438             component_name, component_parameters, component_children = self.components[-1]   439             component_children.append(component)   440    441     def makeComponent(self, name, parameters, value=None):   442    443         """   444         Make a component object from the given 'name', 'parameters' and optional   445         'value'.   446         """   447    448         return (name, parameters, value or [])   449    450     # Public methods.   451    452     def parse(self, f, parser_cls=None):   453    454         "Parse the contents of the file 'f'."   455    456         ParserBase.parse(self, f, parser_cls)   457         return self.components[0]   458    459 # Writer classes.   460    461 class Writer:   462    463     "A simple class wrapping a file, providing simple output capabilities."   464    465     default_line_length = 76   466    467     def __init__(self, write, line_length=None):   468    469         """   470         Initialise the object with the given 'write' operation. If 'line_length'   471         is set, the length of written lines will conform to the specified value   472         instead of the default value.    473         """   474    475         self._write = write   476         self.line_length = line_length or self.default_line_length   477         self.char_offset = 0   478    479     def write(self, text):   480    481         "Write the 'text' to the file."   482    483         write = self._write   484         line_length = self.line_length   485    486         i = 0   487         remaining = len(text)   488    489         while remaining:   490             space = line_length - self.char_offset   491             if remaining > space:   492                 write(text[i:i + space])   493                 write("\r\n ")   494                 self.char_offset = 1   495                 i += space   496                 remaining -= space   497             else:   498                 write(text[i:])   499                 self.char_offset += remaining   500                 i += remaining   501                 remaining = 0   502    503     def end_line(self):   504    505         "End the current content line."   506    507         if self.char_offset > 0:   508             self.char_offset = 0   509             self._write("\r\n")   510    511 class StreamWriter:   512    513     "A stream writer for content in vCard/vCalendar/iCalendar-like formats."   514    515     def __init__(self, f):   516    517         "Initialise the stream writer with the given 'f' stream object."   518    519         self.f = f   520    521     def append(self, record):   522         self.write(*record)   523    524     def write(self, name, parameters, value):   525    526         """   527         Write a content line, serialising the given 'name', 'parameters' and   528         'value' information.   529         """   530    531         self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))   532    533     # Internal methods.   534    535     def write_content_line(self, name, encoded_parameters, encoded_value):   536    537         """   538         Write a content line for the given 'name', 'encoded_parameters' and   539         'encoded_value' information.   540         """   541    542         f = self.f   543    544         f.write(name)   545         for param_name, param_value in encoded_parameters.items():   546             f.write(";")   547             f.write(param_name)   548             f.write("=")   549             f.write(param_value)   550         f.write(":")   551         f.write(encoded_value)   552         f.end_line()   553    554     def encode_quoted_parameter_value(self, value):   555    556         "Encode the given 'value'."   557    558         return '"%s"' % value   559    560     def encode_value(self, name, parameters, value):   561    562         """   563         Encode using 'name' and 'parameters' the given 'value' so that the   564         resulting encoded form employs any specified character encodings.   565         """   566    567         encoding = parameters.get("ENCODING")   568         charset = parameters.get("CHARSET")   569    570         if encoding == "QUOTED-PRINTABLE":   571             value = quopri.encodestring(value.encode(charset or "iso-8859-1"))   572         elif encoding == "BASE64":   573             value = base64.encodestring(value)   574    575         return self.encode_content(value)   576    577     # Overrideable methods.   578    579     def encode_parameters(self, parameters):   580    581         """   582         Encode the given 'parameters' according to the vCalendar specification.   583         """   584    585         encoded_parameters = {}   586    587         for param_name, param_value in parameters.items():   588    589             # Basic format support merely involves quoting values which seem to   590             # need it. Other more specific formats may define exactly which   591             # parameters should be quoted.   592    593             if ContentLine.SEPARATORS.search(param_value):   594                 param_value = self.encode_quoted_parameter_value(param_value)   595    596             encoded_parameters[param_name] = param_value   597    598         return encoded_parameters   599    600     def encode_content(self, value):   601    602         "Encode the given 'value', quoting characters."   603    604         return value.replace("\n", "\\n")   605    606 # Utility functions.   607    608 def is_input_stream(stream_or_string):   609     return hasattr(stream_or_string, "read")   610    611 def get_input_stream(stream_or_string, encoding=None):   612     if is_input_stream(stream_or_string):   613         return stream_or_string   614     else:   615         return codecs.open(stream_or_string, encoding=(encoding or default_encoding))   616    617 def get_output_stream(stream_or_string, encoding=None):   618     if hasattr(stream_or_string, "write"):   619         return stream_or_string   620     else:   621         return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))   622    623 def items_to_dict(items):   624    625     """   626     Return the given 'items' as a dictionary mapping names to tuples of the form   627     (value, attributes).   628     """   629    630     d = {}   631     for name, attr, value in items:   632         if not d.has_key(name):   633             d[name] = []   634         if isinstance(value, list):   635             d[name].append((items_to_dict(value), attr))   636         else:   637             d[name].append((value, attr))   638     return d   639    640 def dict_to_items(d):   641    642     """   643     Return 'd' converted to a list of items suitable for serialisation using   644     iterwrite.   645     """   646    647     items = []   648     for name, value in d.items():   649         if isinstance(value, list):   650             for v, a in value:   651                 if isinstance(v, dict):   652                     items.append((name, a, dict_to_items(v)))   653                 else:   654                     items.append((name, a, v))   655         else:   656             v, a = value   657             items.append((name, a, dict_to_items(v)))   658     return items   659    660 # Public functions.   661    662 def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   663    664     """   665     Parse the resource data found through the use of the 'stream_or_string',   666     which is either a stream providing Unicode data (the codecs module can be   667     used to open files or to wrap streams in order to provide Unicode data) or a   668     filename identifying a file to be parsed.   669    670     The optional 'encoding' can be used to specify the character encoding used   671     by the file to be parsed.   672    673     The optional 'non_standard_newline' can be set to a true value (unlike the   674     default) in order to attempt to process files with CR as the end of line   675     character.   676    677     As a result of parsing the resource, the root node of the imported resource   678     is returned.   679     """   680    681     stream = get_input_stream(stream_or_string, encoding)   682     reader = Reader(stream, non_standard_newline)   683    684     # Parse using the reader.   685    686     try:   687         parser = (parser_cls or Parser)()   688         return parser.parse(reader)   689    690     # Close any opened streams.   691    692     finally:   693         if not is_input_stream(stream_or_string):   694             reader.close()   695    696 def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):   697    698     """   699     Parse the resource data found through the use of the 'stream_or_string',   700     which is either a stream providing Unicode data (the codecs module can be   701     used to open files or to wrap streams in order to provide Unicode data) or a   702     filename identifying a file to be parsed.   703    704     The optional 'encoding' can be used to specify the character encoding used   705     by the file to be parsed.   706    707     The optional 'non_standard_newline' can be set to a true value (unlike the   708     default) in order to attempt to process files with CR as the end of line   709     character.   710    711     An iterator is returned which provides event tuples describing parsing   712     events of the form (name, parameters, value).   713     """   714    715     stream = get_input_stream(stream_or_string, encoding)   716     reader = Reader(stream, non_standard_newline)   717     parser = (parser_cls or StreamParser)(reader)   718     return parser   719    720 def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None):   721    722     """   723     Return a writer which will either send data to the resource found through   724     the use of 'stream_or_string' or using the given 'write' operation.   725    726     The 'stream_or_string' parameter may be either a stream accepting Unicode   727     data (the codecs module can be used to open files or to wrap streams in   728     order to accept Unicode data) or a filename identifying a file to be   729     written.   730    731     The optional 'encoding' can be used to specify the character encoding used   732     by the file to be written.   733    734     The optional 'line_length' can be used to specify how long lines should be   735     in the resulting data.   736     """   737    738     if stream_or_string:   739         stream = get_output_stream(stream_or_string, encoding)   740         _writer = Writer(stream.write, line_length)   741     elif write:   742         _writer = Writer(write, line_length)   743     else:   744         raise IOError, "No stream, filename or write operation specified."   745    746     return (writer_cls or StreamWriter)(_writer)   747    748 def to_dict(node):   749    750     "Return the 'node' converted to a dictionary representation."   751    752     name, attr, items = node   753     return {name : (isinstance(items, list) and items_to_dict(items) or items, attr)}   754    755 def to_node(d):   756    757     "Return 'd' converted to a items-based representation."   758    759     return dict_to_items(d)[0]   760    761 # vim: tabstop=4 expandtab shiftwidth=4