1 #!/usr/bin/env python 2 3 """ 4 Parsing of vCalendar and iCalendar files. 5 6 Copyright (C) 2008, 2009, 2011, 2013, 2014, 2015, 7 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 22 -------- 23 24 References: 25 26 RFC 5545: Internet Calendaring and Scheduling Core Object Specification 27 (iCalendar) 28 http://tools.ietf.org/html/rfc5545 29 30 RFC 2445: Internet Calendaring and Scheduling Core Object Specification 31 (iCalendar) 32 http://tools.ietf.org/html/rfc2445 33 """ 34 35 import vContent 36 import re 37 38 try: 39 set 40 except NameError: 41 from sets import Set as set 42 43 ParseError = vContent.ParseError 44 45 # Format details. 46 47 SECTION_TYPES = set([ 48 "VALARM", "VCALENDAR", "VEVENT", "VFREEBUSY", "VJOURNAL", "VTIMEZONE", "VTODO", 49 "DAYLIGHT", "STANDARD" 50 ]) 51 QUOTED_PARAMETERS = set([ 52 "ALTREP", "DELEGATED-FROM", "DELEGATED-TO", "DIR", "MEMBER", "SENT-BY" 53 ]) 54 MULTIVALUED_PARAMETERS = set([ 55 "DELEGATED-FROM", "DELEGATED-TO", "MEMBER" 56 ]) 57 NON_MULTIVALUED_PROPERTIES = set([ 58 "RRULE" 59 ]) 60 QUOTED_TYPES = set(["URI"]) 61 62 unquoted_separator_regexp = re.compile(r"(?<!\\)([,;])") 63 unquoted_semicolon_regexp = re.compile(r"(?<!\\)([;])") 64 65 # Parser classes. 66 67 class vCalendarStreamParser(vContent.StreamParser): 68 69 "A stream parser specifically for vCalendar/iCalendar." 70 71 def next(self): 72 73 """ 74 Return the next content item in the file as a tuple of the form 75 (name, parameters, value). 76 """ 77 78 name, parameters, value = vContent.StreamParser.next(self) 79 return name, self.decode_parameters(parameters), value 80 81 def decode_content(self, name, value): 82 83 """ 84 Decode for property 'name' the given 'value' (which may represent a 85 collection of distinct values), replacing quoted separator characters. 86 """ 87 88 sep = None 89 values = [] 90 91 if name in NON_MULTIVALUED_PROPERTIES: 92 split = unquoted_semicolon_regexp.split 93 else: 94 split = unquoted_separator_regexp.split 95 96 for i, s in enumerate(split(value)): 97 if i % 2 != 0: 98 if not sep: 99 sep = s 100 continue 101 values.append(self.decode_content_value(name, s)) 102 103 if sep == ",": 104 return values 105 elif sep == ";": 106 return tuple(values) 107 else: 108 return values[0] 109 110 def decode_content_value(self, name, value): 111 112 """ 113 Decode for property 'name' the given 'value', replacing quoted separator 114 characters. 115 """ 116 117 # Replace quoted characters (see 4.3.11 in RFC 2445). 118 119 value = vContent.StreamParser.decode_content(self, name, value) 120 return value.replace(r"\,", ",").replace(r"\;", ";") 121 122 # Internal methods. 123 124 def decode_quoted_value(self, value): 125 126 "Decode the given 'value', returning a list of decoded values." 127 128 if value[0] == '"' and value[-1] == '"': 129 return value[1:-1] 130 else: 131 return value 132 133 def decode_parameters(self, parameters): 134 135 """ 136 Decode the given 'parameters' according to the vCalendar specification. 137 """ 138 139 decoded_parameters = {} 140 141 for param_name, param_value in parameters.items(): 142 if param_name in QUOTED_PARAMETERS: 143 param_value = self.decode_quoted_value(param_value) 144 separator = '","' 145 else: 146 separator = "," 147 if param_name in MULTIVALUED_PARAMETERS: 148 param_value = param_value.split(separator) 149 decoded_parameters[param_name] = param_value 150 151 return decoded_parameters 152 153 class vCalendarParser(vContent.Parser): 154 155 "A parser specifically for vCalendar/iCalendar." 156 157 def parse(self, f, parser_cls=None): 158 return vContent.Parser.parse(self, f, (parser_cls or vCalendarStreamParser)) 159 160 def makeComponent(self, name, parameters, value=None): 161 162 """ 163 Make a component object from the given 'name', 'parameters' and optional 164 'value'. 165 """ 166 167 if name in SECTION_TYPES: 168 return (name, parameters, value or []) 169 else: 170 return (name, parameters, value or None) 171 172 # Writer classes. 173 174 class vCalendarStreamWriter(vContent.StreamWriter): 175 176 "A stream writer specifically for vCalendar." 177 178 # Overridden methods. 179 180 def write(self, name, parameters, value): 181 182 """ 183 Write a content line, serialising the given 'name', 'parameters' and 184 'value' information. 185 """ 186 187 if name in SECTION_TYPES: 188 self.write_content_line("BEGIN", {}, name) 189 for n, p, v in value: 190 self.write(n, p, v) 191 self.write_content_line("END", {}, name) 192 else: 193 vContent.StreamWriter.write(self, name, parameters, value) 194 195 def encode_parameters(self, parameters): 196 197 """ 198 Encode the given 'parameters' according to the vCalendar specification. 199 """ 200 201 encoded_parameters = {} 202 203 for param_name, param_value in parameters.items(): 204 if param_name in QUOTED_PARAMETERS: 205 separator = '","' 206 else: 207 separator = "," 208 if param_name in MULTIVALUED_PARAMETERS: 209 param_value = separator.join(param_value) 210 if param_name in QUOTED_PARAMETERS: 211 param_value = self.encode_quoted_parameter_value(param_value) 212 encoded_parameters[param_name] = param_value 213 214 return encoded_parameters 215 216 def encode_content(self, name, value): 217 218 """ 219 Encode for property 'name' the given 'value' (which may be a list or 220 tuple of separate values), quoting characters and separating collections 221 of values. 222 """ 223 224 if isinstance(value, list): 225 sep = "," 226 elif isinstance(value, tuple): 227 sep = ";" 228 else: 229 value = [value] 230 sep = "" 231 232 l = [] 233 for v in value: 234 l.append(self.encode_content_value(name, v)) 235 return sep.join(l) 236 237 def encode_content_value(self, name, value): 238 239 "Encode for property 'name' the given 'value', quoting characters." 240 241 # Replace quoted characters (see 4.3.11 in RFC 2445). 242 243 value = vContent.StreamWriter.encode_content(self, name, value) 244 245 if name in NON_MULTIVALUED_PROPERTIES: 246 quote = self.quote_semicolons 247 else: 248 quote = self.quote_separators 249 250 return quote(value) 251 252 def quote_separators(self, value): 253 return value.replace(";", r"\;").replace(",", r"\,") 254 255 def quote_semicolons(self, value): 256 return value.replace(";", r"\;") 257 258 # Public functions. 259 260 def parse(stream_or_string, encoding=None, non_standard_newline=0): 261 262 """ 263 Parse the resource data found through the use of the 'stream_or_string', 264 which is either a stream providing Unicode data (the codecs module can be 265 used to open files or to wrap streams in order to provide Unicode data) or a 266 filename identifying a file to be parsed. 267 268 The optional 'encoding' can be used to specify the character encoding used 269 by the file to be parsed. 270 271 The optional 'non_standard_newline' can be set to a true value (unlike the 272 default) in order to attempt to process files with CR as the end of line 273 character. 274 275 As a result of parsing the resource, the root node of the imported resource 276 is returned. 277 """ 278 279 return vContent.parse(stream_or_string, encoding, non_standard_newline, vCalendarParser) 280 281 def iterparse(stream_or_string, encoding=None, non_standard_newline=0): 282 283 """ 284 Parse the resource data found through the use of the 'stream_or_string', 285 which is either a stream providing Unicode data (the codecs module can be 286 used to open files or to wrap streams in order to provide Unicode data) or a 287 filename identifying a file to be parsed. 288 289 The optional 'encoding' can be used to specify the character encoding used 290 by the file to be parsed. 291 292 The optional 'non_standard_newline' can be set to a true value (unlike the 293 default) in order to attempt to process files with CR as the end of line 294 character. 295 296 An iterator is returned which provides event tuples describing parsing 297 events of the form (name, parameters, value). 298 """ 299 300 return vContent.iterparse(stream_or_string, encoding, non_standard_newline, vCalendarStreamParser) 301 302 def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None): 303 304 """ 305 Return a writer which will either send data to the resource found through 306 the use of 'stream_or_string' or using the given 'write' operation. 307 308 The 'stream_or_string' parameter may be either a stream accepting Unicode 309 data (the codecs module can be used to open files or to wrap streams in 310 order to accept Unicode data) or a filename identifying a file to be 311 written. 312 313 The optional 'encoding' can be used to specify the character encoding used 314 by the file to be written. 315 316 The optional 'line_length' can be used to specify how long lines should be 317 in the resulting data. 318 """ 319 320 return vContent.iterwrite(stream_or_string, write, encoding, line_length, vCalendarStreamWriter) 321 322 def to_dict(node): 323 324 "Return the 'node' converted to a dictionary representation." 325 326 return vContent.to_dict(node, SECTION_TYPES) 327 328 to_node = vContent.to_node 329 330 # vim: tabstop=4 expandtab shiftwidth=4