paul@0 | 1 | #!/usr/bin/env python |
paul@0 | 2 | |
paul@0 | 3 | """ |
paul@0 | 4 | Parsing of vCard, vCalendar and iCalendar files. |
paul@0 | 5 | |
paul@39 | 6 | Copyright (C) 2005, 2006, 2007, 2008, 2009, 2011, 2013, |
paul@76 | 7 | 2014, 2015, 2017 Paul Boddie <paul@boddie.org.uk> |
paul@0 | 8 | |
paul@0 | 9 | This program is free software; you can redistribute it and/or modify it under |
paul@14 | 10 | the terms of the GNU General Public License as published by the Free Software |
paul@14 | 11 | Foundation; either version 3 of the License, or (at your option) any later |
paul@14 | 12 | version. |
paul@0 | 13 | |
paul@0 | 14 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@0 | 15 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@14 | 16 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@0 | 17 | details. |
paul@0 | 18 | |
paul@14 | 19 | You should have received a copy of the GNU General Public License along with |
paul@14 | 20 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@0 | 21 | |
paul@0 | 22 | -------- |
paul@0 | 23 | |
paul@0 | 24 | References: |
paul@0 | 25 | |
paul@16 | 26 | RFC 5545: Internet Calendaring and Scheduling Core Object Specification |
paul@16 | 27 | (iCalendar) |
paul@18 | 28 | http://tools.ietf.org/html/rfc5545 |
paul@16 | 29 | |
paul@0 | 30 | RFC 2445: Internet Calendaring and Scheduling Core Object Specification |
paul@0 | 31 | (iCalendar) |
paul@18 | 32 | http://tools.ietf.org/html/rfc2445 |
paul@0 | 33 | |
paul@0 | 34 | RFC 2425: A MIME Content-Type for Directory Information |
paul@18 | 35 | http://tools.ietf.org/html/rfc2425 |
paul@0 | 36 | |
paul@0 | 37 | RFC 2426: vCard MIME Directory Profile |
paul@18 | 38 | http://tools.ietf.org/html/rfc2426 |
paul@0 | 39 | """ |
paul@0 | 40 | |
paul@4 | 41 | try: |
paul@4 | 42 | set |
paul@4 | 43 | except NameError: |
paul@4 | 44 | from sets import Set as set |
paul@4 | 45 | |
paul@0 | 46 | # Encoding-related imports. |
paul@0 | 47 | |
paul@0 | 48 | import base64, quopri |
paul@9 | 49 | import codecs |
paul@0 | 50 | |
paul@4 | 51 | # Tokenisation help. |
paul@4 | 52 | |
paul@4 | 53 | import re |
paul@4 | 54 | |
paul@9 | 55 | # Configuration. |
paul@9 | 56 | |
paul@9 | 57 | default_encoding = "utf-8" |
paul@9 | 58 | |
paul@39 | 59 | class ParseError(Exception): |
paul@39 | 60 | |
paul@39 | 61 | "General parsing errors." |
paul@39 | 62 | |
paul@39 | 63 | pass |
paul@39 | 64 | |
paul@66 | 65 | class WriteError(Exception): |
paul@66 | 66 | |
paul@66 | 67 | "General writing errors." |
paul@66 | 68 | |
paul@66 | 69 | pass |
paul@66 | 70 | |
paul@7 | 71 | # Reader and parser classes. |
paul@0 | 72 | |
paul@0 | 73 | class Reader: |
paul@0 | 74 | |
paul@0 | 75 | "A simple class wrapping a file, providing simple pushback capabilities." |
paul@0 | 76 | |
paul@0 | 77 | def __init__(self, f, non_standard_newline=0): |
paul@0 | 78 | |
paul@0 | 79 | """ |
paul@0 | 80 | Initialise the object with the file 'f'. If 'non_standard_newline' is |
paul@0 | 81 | set to a true value (unlike the default), lines ending with CR will be |
paul@0 | 82 | treated as complete lines. |
paul@0 | 83 | """ |
paul@0 | 84 | |
paul@0 | 85 | self.f = f |
paul@0 | 86 | self.non_standard_newline = non_standard_newline |
paul@0 | 87 | self.lines = [] |
paul@8 | 88 | self.line_number = 1 # about to read line 1 |
paul@0 | 89 | |
paul@9 | 90 | def close(self): |
paul@9 | 91 | |
paul@9 | 92 | "Close the reader." |
paul@9 | 93 | |
paul@9 | 94 | self.f.close() |
paul@9 | 95 | |
paul@0 | 96 | def pushback(self, line): |
paul@0 | 97 | |
paul@0 | 98 | """ |
paul@0 | 99 | Push the given 'line' back so that the next line read is actually the |
paul@0 | 100 | given 'line' and not the next line from the underlying file. |
paul@0 | 101 | """ |
paul@0 | 102 | |
paul@0 | 103 | self.lines.append(line) |
paul@0 | 104 | self.line_number -= 1 |
paul@0 | 105 | |
paul@0 | 106 | def readline(self): |
paul@0 | 107 | |
paul@0 | 108 | """ |
paul@0 | 109 | If no pushed-back lines exist, read a line directly from the file. |
paul@0 | 110 | Otherwise, read from the list of pushed-back lines. |
paul@0 | 111 | """ |
paul@0 | 112 | |
paul@0 | 113 | self.line_number += 1 |
paul@0 | 114 | if self.lines: |
paul@0 | 115 | return self.lines.pop() |
paul@0 | 116 | else: |
paul@11 | 117 | # Sanity check for broken lines (\r instead of \r\n or \n). |
paul@0 | 118 | line = self.f.readline() |
paul@0 | 119 | while line.endswith("\r") and not self.non_standard_newline: |
paul@31 | 120 | s = self.f.readline() |
paul@31 | 121 | if not s: |
paul@31 | 122 | break |
paul@31 | 123 | line += s |
paul@0 | 124 | if line.endswith("\r") and self.non_standard_newline: |
paul@0 | 125 | return line + "\n" |
paul@0 | 126 | else: |
paul@0 | 127 | return line |
paul@0 | 128 | |
paul@8 | 129 | def read_content_line(self): |
paul@0 | 130 | |
paul@0 | 131 | """ |
paul@8 | 132 | Read an entire content line, itself potentially consisting of many |
paul@11 | 133 | physical lines of text, returning a string. |
paul@0 | 134 | """ |
paul@0 | 135 | |
paul@9 | 136 | # Skip blank lines. |
paul@9 | 137 | |
paul@8 | 138 | line = self.readline() |
paul@9 | 139 | while line: |
paul@9 | 140 | line_stripped = line.rstrip("\r\n") |
paul@9 | 141 | if not line_stripped: |
paul@9 | 142 | line = self.readline() |
paul@9 | 143 | else: |
paul@9 | 144 | break |
paul@9 | 145 | else: |
paul@9 | 146 | return "" |
paul@0 | 147 | |
paul@8 | 148 | # Strip all appropriate whitespace from the right end of each line. |
paul@8 | 149 | # For subsequent lines, remove the first whitespace character. |
paul@8 | 150 | # See section 4.1 of the iCalendar specification. |
paul@8 | 151 | |
paul@9 | 152 | lines = [line_stripped] |
paul@0 | 153 | |
paul@0 | 154 | line = self.readline() |
paul@8 | 155 | while line.startswith(" ") or line.startswith("\t"): |
paul@8 | 156 | lines.append(line[1:].rstrip("\r\n")) |
paul@8 | 157 | line = self.readline() |
paul@8 | 158 | |
paul@8 | 159 | # Since one line too many will have been read, push the line back into |
paul@8 | 160 | # the file. |
paul@8 | 161 | |
paul@8 | 162 | if line: |
paul@8 | 163 | self.pushback(line) |
paul@8 | 164 | |
paul@8 | 165 | return "".join(lines) |
paul@8 | 166 | |
paul@8 | 167 | def get_content_line(self): |
paul@8 | 168 | |
paul@8 | 169 | "Return a content line object for the current line." |
paul@8 | 170 | |
paul@8 | 171 | return ContentLine(self.read_content_line()) |
paul@8 | 172 | |
paul@8 | 173 | class ContentLine: |
paul@8 | 174 | |
paul@8 | 175 | "A content line which can be searched." |
paul@8 | 176 | |
paul@8 | 177 | SEPARATORS = re.compile('[;:"]') |
paul@8 | 178 | SEPARATORS_PLUS_EQUALS = re.compile('[=;:"]') |
paul@8 | 179 | |
paul@8 | 180 | def __init__(self, text): |
paul@8 | 181 | self.text = text |
paul@8 | 182 | self.start = 0 |
paul@8 | 183 | |
paul@30 | 184 | def __repr__(self): |
paul@30 | 185 | return "ContentLine(%r)" % self.text |
paul@30 | 186 | |
paul@8 | 187 | def get_remaining(self): |
paul@8 | 188 | |
paul@8 | 189 | "Get the remaining text from the content line." |
paul@8 | 190 | |
paul@8 | 191 | return self.text[self.start:] |
paul@8 | 192 | |
paul@8 | 193 | def search(self, targets): |
paul@8 | 194 | |
paul@8 | 195 | """ |
paul@8 | 196 | Find one of the 'targets' in the text, returning the string from the |
paul@8 | 197 | current position up to the target found, along with the target string, |
paul@8 | 198 | using a tuple of the form (string, target). If no target was found, |
paul@8 | 199 | return the entire string together with a target of None. |
paul@11 | 200 | |
paul@11 | 201 | The 'targets' parameter must be a regular expression object or an object |
paul@11 | 202 | compatible with the API of such objects. |
paul@8 | 203 | """ |
paul@8 | 204 | |
paul@8 | 205 | text = self.text |
paul@8 | 206 | start = pos = self.start |
paul@8 | 207 | length = len(text) |
paul@0 | 208 | |
paul@4 | 209 | # Remember the first target. |
paul@4 | 210 | |
paul@4 | 211 | first = None |
paul@4 | 212 | first_pos = None |
paul@4 | 213 | in_quoted_region = 0 |
paul@0 | 214 | |
paul@8 | 215 | # Process the text, looking for the targets. |
paul@4 | 216 | |
paul@8 | 217 | while pos < length: |
paul@8 | 218 | match = targets.search(text, pos) |
paul@4 | 219 | |
paul@8 | 220 | # Where nothing matches, end the search. |
paul@0 | 221 | |
paul@4 | 222 | if match is None: |
paul@8 | 223 | pos = length |
paul@0 | 224 | |
paul@4 | 225 | # Where a double quote matches, toggle the region state. |
paul@0 | 226 | |
paul@4 | 227 | elif match.group() == '"': |
paul@4 | 228 | in_quoted_region = not in_quoted_region |
paul@8 | 229 | pos = match.end() |
paul@4 | 230 | |
paul@4 | 231 | # Where something else matches outside a region, stop searching. |
paul@0 | 232 | |
paul@4 | 233 | elif not in_quoted_region: |
paul@4 | 234 | first = match.group() |
paul@4 | 235 | first_pos = match.start() |
paul@4 | 236 | break |
paul@0 | 237 | |
paul@4 | 238 | # Otherwise, keep looking for the end of the region. |
paul@4 | 239 | |
paul@4 | 240 | else: |
paul@8 | 241 | pos = match.end() |
paul@4 | 242 | |
paul@4 | 243 | # Where no more input can provide the targets, return a special result. |
paul@0 | 244 | |
paul@4 | 245 | else: |
paul@8 | 246 | self.start = length |
paul@8 | 247 | return text[start:], None |
paul@0 | 248 | |
paul@8 | 249 | self.start = match.end() |
paul@8 | 250 | return text[start:first_pos], first |
paul@0 | 251 | |
paul@0 | 252 | class StreamParser: |
paul@0 | 253 | |
paul@0 | 254 | "A stream parser for content in vCard/vCalendar/iCalendar-like formats." |
paul@0 | 255 | |
paul@0 | 256 | def __init__(self, f): |
paul@0 | 257 | |
paul@0 | 258 | "Initialise the parser for the given file 'f'." |
paul@0 | 259 | |
paul@0 | 260 | self.f = f |
paul@0 | 261 | |
paul@9 | 262 | def close(self): |
paul@9 | 263 | |
paul@9 | 264 | "Close the reader." |
paul@9 | 265 | |
paul@9 | 266 | self.f.close() |
paul@9 | 267 | |
paul@0 | 268 | def __iter__(self): |
paul@0 | 269 | |
paul@0 | 270 | "Return self as the iterator." |
paul@0 | 271 | |
paul@0 | 272 | return self |
paul@0 | 273 | |
paul@0 | 274 | def next(self): |
paul@0 | 275 | |
paul@0 | 276 | """ |
paul@0 | 277 | Return the next content item in the file as a tuple of the form |
paul@0 | 278 | (name, parameters, values). |
paul@0 | 279 | """ |
paul@0 | 280 | |
paul@0 | 281 | return self.parse_content_line() |
paul@0 | 282 | |
paul@76 | 283 | def decode_content(self, name, value): |
paul@7 | 284 | |
paul@76 | 285 | """ |
paul@76 | 286 | Decode for property 'name' the given 'value', replacing quoted |
paul@76 | 287 | characters. |
paul@76 | 288 | """ |
paul@7 | 289 | |
paul@7 | 290 | return value.replace("\r", "").replace("\\N", "\n").replace("\\n", "\n") |
paul@7 | 291 | |
paul@5 | 292 | # Internal methods. |
paul@5 | 293 | |
paul@0 | 294 | def parse_content_line(self): |
paul@0 | 295 | |
paul@0 | 296 | """ |
paul@7 | 297 | Return the name, parameters and value information for the current |
paul@7 | 298 | content line in the file being parsed. |
paul@0 | 299 | """ |
paul@0 | 300 | |
paul@0 | 301 | f = self.f |
paul@8 | 302 | line_number = f.line_number |
paul@8 | 303 | line = f.get_content_line() |
paul@0 | 304 | |
paul@8 | 305 | # Read the property name. |
paul@0 | 306 | |
paul@8 | 307 | name, sep = line.search(line.SEPARATORS) |
paul@0 | 308 | name = name.strip() |
paul@0 | 309 | |
paul@0 | 310 | if not name and sep is None: |
paul@0 | 311 | raise StopIteration |
paul@0 | 312 | |
paul@8 | 313 | # Read the parameters. |
paul@8 | 314 | |
paul@8 | 315 | parameters = {} |
paul@8 | 316 | |
paul@0 | 317 | while sep == ";": |
paul@0 | 318 | |
paul@0 | 319 | # Find the actual modifier. |
paul@0 | 320 | |
paul@8 | 321 | parameter_name, sep = line.search(line.SEPARATORS_PLUS_EQUALS) |
paul@0 | 322 | parameter_name = parameter_name.strip() |
paul@0 | 323 | |
paul@0 | 324 | if sep == "=": |
paul@8 | 325 | parameter_value, sep = line.search(line.SEPARATORS) |
paul@0 | 326 | parameter_value = parameter_value.strip() |
paul@0 | 327 | else: |
paul@0 | 328 | parameter_value = None |
paul@0 | 329 | |
paul@0 | 330 | # Append a key, value tuple to the parameters list. |
paul@0 | 331 | |
paul@0 | 332 | parameters[parameter_name] = parameter_value |
paul@0 | 333 | |
paul@0 | 334 | # Get the value content. |
paul@0 | 335 | |
paul@0 | 336 | if sep != ":": |
paul@30 | 337 | raise ValueError, (line_number, line) |
paul@0 | 338 | |
paul@8 | 339 | # Obtain and decode the value. |
paul@0 | 340 | |
paul@8 | 341 | value = self.decode(name, parameters, line.get_remaining()) |
paul@0 | 342 | |
paul@0 | 343 | return name, parameters, value |
paul@0 | 344 | |
paul@7 | 345 | def decode(self, name, parameters, value): |
paul@1 | 346 | |
paul@7 | 347 | "Decode using 'name' and 'parameters' the given 'value'." |
paul@0 | 348 | |
paul@1 | 349 | encoding = parameters.get("ENCODING") |
paul@1 | 350 | charset = parameters.get("CHARSET") |
paul@0 | 351 | |
paul@76 | 352 | value = self.decode_content(name, value) |
paul@0 | 353 | |
paul@0 | 354 | if encoding == "QUOTED-PRINTABLE": |
paul@1 | 355 | return unicode(quopri.decodestring(value), charset or "iso-8859-1") |
paul@0 | 356 | elif encoding == "BASE64": |
paul@0 | 357 | return base64.decodestring(value) |
paul@0 | 358 | else: |
paul@1 | 359 | return value |
paul@0 | 360 | |
paul@2 | 361 | class ParserBase: |
paul@0 | 362 | |
paul@2 | 363 | "An abstract parser for content in vCard/vCalendar/iCalendar-like formats." |
paul@0 | 364 | |
paul@0 | 365 | def __init__(self): |
paul@0 | 366 | |
paul@0 | 367 | "Initialise the parser." |
paul@0 | 368 | |
paul@2 | 369 | self.names = [] |
paul@0 | 370 | |
paul@5 | 371 | def parse(self, f, parser_cls=None): |
paul@0 | 372 | |
paul@0 | 373 | "Parse the contents of the file 'f'." |
paul@0 | 374 | |
paul@5 | 375 | parser = (parser_cls or StreamParser)(f) |
paul@0 | 376 | |
paul@0 | 377 | for name, parameters, value in parser: |
paul@0 | 378 | |
paul@0 | 379 | if name == "BEGIN": |
paul@2 | 380 | self.names.append(value) |
paul@3 | 381 | self.startComponent(value, parameters) |
paul@0 | 382 | |
paul@0 | 383 | elif name == "END": |
paul@2 | 384 | start_name = self.names.pop() |
paul@2 | 385 | if start_name != value: |
paul@0 | 386 | raise ParseError, "Mismatch in BEGIN and END declarations (%r and %r) at line %d." % ( |
paul@2 | 387 | start_name, value, f.line_number) |
paul@2 | 388 | |
paul@3 | 389 | self.endComponent(value) |
paul@0 | 390 | |
paul@0 | 391 | else: |
paul@3 | 392 | self.handleProperty(name, parameters, value) |
paul@2 | 393 | |
paul@2 | 394 | class Parser(ParserBase): |
paul@2 | 395 | |
paul@2 | 396 | "A SAX-like parser for vCard/vCalendar/iCalendar-like formats." |
paul@2 | 397 | |
paul@2 | 398 | def __init__(self): |
paul@2 | 399 | ParserBase.__init__(self) |
paul@3 | 400 | self.components = [] |
paul@2 | 401 | |
paul@3 | 402 | def startComponent(self, name, parameters): |
paul@2 | 403 | |
paul@2 | 404 | """ |
paul@3 | 405 | Add the component with the given 'name' and 'parameters', recording an |
paul@3 | 406 | empty list of children as part of the component's content. |
paul@2 | 407 | """ |
paul@2 | 408 | |
paul@12 | 409 | component = self.handleProperty(name, parameters) |
paul@3 | 410 | self.components.append(component) |
paul@3 | 411 | return component |
paul@2 | 412 | |
paul@3 | 413 | def endComponent(self, name): |
paul@2 | 414 | |
paul@2 | 415 | """ |
paul@3 | 416 | End the component with the given 'name' by removing it from the active |
paul@12 | 417 | component stack. If only one component exists on the stack, retain it |
paul@12 | 418 | for later inspection. |
paul@2 | 419 | """ |
paul@2 | 420 | |
paul@3 | 421 | if len(self.components) > 1: |
paul@3 | 422 | return self.components.pop() |
paul@12 | 423 | |
paul@12 | 424 | # Or return the only element. |
paul@12 | 425 | |
paul@3 | 426 | elif self.components: |
paul@12 | 427 | return self.components[0] |
paul@2 | 428 | |
paul@12 | 429 | def handleProperty(self, name, parameters, value=None): |
paul@0 | 430 | |
paul@2 | 431 | """ |
paul@12 | 432 | Record the property with the given 'name', 'parameters' and optional |
paul@12 | 433 | 'value' as part of the current component's children. |
paul@2 | 434 | """ |
paul@2 | 435 | |
paul@2 | 436 | component = self.makeComponent(name, parameters, value) |
paul@2 | 437 | self.attachComponent(component) |
paul@2 | 438 | return component |
paul@2 | 439 | |
paul@2 | 440 | # Component object construction/manipulation methods. |
paul@2 | 441 | |
paul@2 | 442 | def attachComponent(self, component): |
paul@2 | 443 | |
paul@2 | 444 | "Attach the given 'component' to its parent." |
paul@2 | 445 | |
paul@3 | 446 | if self.components: |
paul@3 | 447 | component_name, component_parameters, component_children = self.components[-1] |
paul@3 | 448 | component_children.append(component) |
paul@2 | 449 | |
paul@12 | 450 | def makeComponent(self, name, parameters, value=None): |
paul@2 | 451 | |
paul@2 | 452 | """ |
paul@12 | 453 | Make a component object from the given 'name', 'parameters' and optional |
paul@12 | 454 | 'value'. |
paul@2 | 455 | """ |
paul@2 | 456 | |
paul@12 | 457 | return (name, parameters, value or []) |
paul@2 | 458 | |
paul@2 | 459 | # Public methods. |
paul@2 | 460 | |
paul@5 | 461 | def parse(self, f, parser_cls=None): |
paul@2 | 462 | |
paul@2 | 463 | "Parse the contents of the file 'f'." |
paul@2 | 464 | |
paul@5 | 465 | ParserBase.parse(self, f, parser_cls) |
paul@56 | 466 | try: |
paul@56 | 467 | return self.components[0] |
paul@56 | 468 | except IndexError: |
paul@56 | 469 | raise ParseError, "No vContent component found in file." |
paul@0 | 470 | |
paul@7 | 471 | # Writer classes. |
paul@7 | 472 | |
paul@8 | 473 | class Writer: |
paul@8 | 474 | |
paul@8 | 475 | "A simple class wrapping a file, providing simple output capabilities." |
paul@8 | 476 | |
paul@8 | 477 | default_line_length = 76 |
paul@8 | 478 | |
paul@21 | 479 | def __init__(self, write, line_length=None): |
paul@8 | 480 | |
paul@8 | 481 | """ |
paul@21 | 482 | Initialise the object with the given 'write' operation. If 'line_length' |
paul@21 | 483 | is set, the length of written lines will conform to the specified value |
paul@21 | 484 | instead of the default value. |
paul@8 | 485 | """ |
paul@8 | 486 | |
paul@21 | 487 | self._write = write |
paul@8 | 488 | self.line_length = line_length or self.default_line_length |
paul@8 | 489 | self.char_offset = 0 |
paul@8 | 490 | |
paul@8 | 491 | def write(self, text): |
paul@8 | 492 | |
paul@8 | 493 | "Write the 'text' to the file." |
paul@8 | 494 | |
paul@21 | 495 | write = self._write |
paul@8 | 496 | line_length = self.line_length |
paul@8 | 497 | |
paul@8 | 498 | i = 0 |
paul@8 | 499 | remaining = len(text) |
paul@8 | 500 | |
paul@8 | 501 | while remaining: |
paul@8 | 502 | space = line_length - self.char_offset |
paul@8 | 503 | if remaining > space: |
paul@21 | 504 | write(text[i:i + space]) |
paul@21 | 505 | write("\r\n ") |
paul@8 | 506 | self.char_offset = 1 |
paul@8 | 507 | i += space |
paul@8 | 508 | remaining -= space |
paul@8 | 509 | else: |
paul@21 | 510 | write(text[i:]) |
paul@8 | 511 | self.char_offset += remaining |
paul@8 | 512 | i += remaining |
paul@8 | 513 | remaining = 0 |
paul@8 | 514 | |
paul@8 | 515 | def end_line(self): |
paul@8 | 516 | |
paul@8 | 517 | "End the current content line." |
paul@8 | 518 | |
paul@8 | 519 | if self.char_offset > 0: |
paul@8 | 520 | self.char_offset = 0 |
paul@21 | 521 | self._write("\r\n") |
paul@8 | 522 | |
paul@7 | 523 | class StreamWriter: |
paul@7 | 524 | |
paul@7 | 525 | "A stream writer for content in vCard/vCalendar/iCalendar-like formats." |
paul@7 | 526 | |
paul@8 | 527 | def __init__(self, f): |
paul@7 | 528 | |
paul@21 | 529 | "Initialise the stream writer with the given 'f' stream object." |
paul@7 | 530 | |
paul@7 | 531 | self.f = f |
paul@7 | 532 | |
paul@37 | 533 | def append(self, record): |
paul@37 | 534 | self.write(*record) |
paul@37 | 535 | |
paul@11 | 536 | def write(self, name, parameters, value): |
paul@7 | 537 | |
paul@7 | 538 | """ |
paul@11 | 539 | Write a content line, serialising the given 'name', 'parameters' and |
paul@11 | 540 | 'value' information. |
paul@11 | 541 | """ |
paul@11 | 542 | |
paul@11 | 543 | self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value)) |
paul@11 | 544 | |
paul@11 | 545 | # Internal methods. |
paul@11 | 546 | |
paul@11 | 547 | def write_content_line(self, name, encoded_parameters, encoded_value): |
paul@11 | 548 | |
paul@11 | 549 | """ |
paul@11 | 550 | Write a content line for the given 'name', 'encoded_parameters' and |
paul@11 | 551 | 'encoded_value' information. |
paul@7 | 552 | """ |
paul@7 | 553 | |
paul@7 | 554 | f = self.f |
paul@7 | 555 | |
paul@7 | 556 | f.write(name) |
paul@11 | 557 | for param_name, param_value in encoded_parameters.items(): |
paul@8 | 558 | f.write(";") |
paul@11 | 559 | f.write(param_name) |
paul@8 | 560 | f.write("=") |
paul@11 | 561 | f.write(param_value) |
paul@7 | 562 | f.write(":") |
paul@11 | 563 | f.write(encoded_value) |
paul@8 | 564 | f.end_line() |
paul@7 | 565 | |
paul@11 | 566 | def encode_quoted_parameter_value(self, value): |
paul@7 | 567 | |
paul@11 | 568 | "Encode the given 'value'." |
paul@7 | 569 | |
paul@11 | 570 | return '"%s"' % value |
paul@7 | 571 | |
paul@11 | 572 | def encode_value(self, name, parameters, value): |
paul@7 | 573 | |
paul@11 | 574 | """ |
paul@11 | 575 | Encode using 'name' and 'parameters' the given 'value' so that the |
paul@11 | 576 | resulting encoded form employs any specified character encodings. |
paul@11 | 577 | """ |
paul@7 | 578 | |
paul@7 | 579 | encoding = parameters.get("ENCODING") |
paul@7 | 580 | charset = parameters.get("CHARSET") |
paul@7 | 581 | |
paul@66 | 582 | try: |
paul@66 | 583 | if encoding == "QUOTED-PRINTABLE": |
paul@66 | 584 | value = quopri.encodestring(value.encode(charset or "iso-8859-1")) |
paul@66 | 585 | elif encoding == "BASE64": |
paul@66 | 586 | value = base64.encodestring(value) |
paul@7 | 587 | |
paul@76 | 588 | return self.encode_content(name, value) |
paul@66 | 589 | except TypeError: |
paul@66 | 590 | raise WriteError, "Property %r value with parameters %r cannot be encoded: %r" % (name, parameters, value) |
paul@7 | 591 | |
paul@11 | 592 | # Overrideable methods. |
paul@11 | 593 | |
paul@11 | 594 | def encode_parameters(self, parameters): |
paul@11 | 595 | |
paul@11 | 596 | """ |
paul@11 | 597 | Encode the given 'parameters' according to the vCalendar specification. |
paul@11 | 598 | """ |
paul@11 | 599 | |
paul@11 | 600 | encoded_parameters = {} |
paul@11 | 601 | |
paul@11 | 602 | for param_name, param_value in parameters.items(): |
paul@11 | 603 | |
paul@11 | 604 | # Basic format support merely involves quoting values which seem to |
paul@11 | 605 | # need it. Other more specific formats may define exactly which |
paul@11 | 606 | # parameters should be quoted. |
paul@11 | 607 | |
paul@11 | 608 | if ContentLine.SEPARATORS.search(param_value): |
paul@11 | 609 | param_value = self.encode_quoted_parameter_value(param_value) |
paul@11 | 610 | |
paul@11 | 611 | encoded_parameters[param_name] = param_value |
paul@11 | 612 | |
paul@11 | 613 | return encoded_parameters |
paul@11 | 614 | |
paul@76 | 615 | def encode_content(self, name, value): |
paul@11 | 616 | |
paul@76 | 617 | "Encode for property 'name' the given 'value', quoting characters." |
paul@11 | 618 | |
paul@68 | 619 | return (value or "").replace("\n", "\\n") |
paul@11 | 620 | |
paul@9 | 621 | # Utility functions. |
paul@9 | 622 | |
paul@9 | 623 | def is_input_stream(stream_or_string): |
paul@9 | 624 | return hasattr(stream_or_string, "read") |
paul@9 | 625 | |
paul@11 | 626 | def get_input_stream(stream_or_string, encoding=None): |
paul@9 | 627 | if is_input_stream(stream_or_string): |
paul@57 | 628 | if isinstance(stream_or_string, codecs.StreamReader): |
paul@57 | 629 | return stream_or_string |
paul@57 | 630 | else: |
paul@57 | 631 | return codecs.getreader(encoding or default_encoding)(stream_or_string) |
paul@9 | 632 | else: |
paul@11 | 633 | return codecs.open(stream_or_string, encoding=(encoding or default_encoding)) |
paul@9 | 634 | |
paul@11 | 635 | def get_output_stream(stream_or_string, encoding=None): |
paul@9 | 636 | if hasattr(stream_or_string, "write"): |
paul@57 | 637 | if isinstance(stream_or_string, codecs.StreamWriter): |
paul@57 | 638 | return stream_or_string |
paul@57 | 639 | else: |
paul@57 | 640 | return codecs.getwriter(encoding or default_encoding)(stream_or_string) |
paul@9 | 641 | else: |
paul@11 | 642 | return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding)) |
paul@9 | 643 | |
paul@55 | 644 | def items_to_dict(items, sections=None): |
paul@40 | 645 | |
paul@40 | 646 | """ |
paul@40 | 647 | Return the given 'items' as a dictionary mapping names to tuples of the form |
paul@55 | 648 | (value, attributes). Where 'sections' is provided, only items whose names |
paul@55 | 649 | occur in the given 'sections' collection will be treated as groups or |
paul@55 | 650 | sections of definitions. |
paul@40 | 651 | """ |
paul@40 | 652 | |
paul@40 | 653 | d = {} |
paul@40 | 654 | for name, attr, value in items: |
paul@40 | 655 | if not d.has_key(name): |
paul@40 | 656 | d[name] = [] |
paul@55 | 657 | if isinstance(value, list) and (not sections or name in sections): |
paul@55 | 658 | d[name].append((items_to_dict(value, sections), attr)) |
paul@40 | 659 | else: |
paul@40 | 660 | d[name].append((value, attr)) |
paul@40 | 661 | return d |
paul@40 | 662 | |
paul@40 | 663 | def dict_to_items(d): |
paul@40 | 664 | |
paul@40 | 665 | """ |
paul@40 | 666 | Return 'd' converted to a list of items suitable for serialisation using |
paul@40 | 667 | iterwrite. |
paul@40 | 668 | """ |
paul@40 | 669 | |
paul@40 | 670 | items = [] |
paul@40 | 671 | for name, value in d.items(): |
paul@40 | 672 | if isinstance(value, list): |
paul@40 | 673 | for v, a in value: |
paul@40 | 674 | if isinstance(v, dict): |
paul@40 | 675 | items.append((name, a, dict_to_items(v))) |
paul@40 | 676 | else: |
paul@40 | 677 | items.append((name, a, v)) |
paul@40 | 678 | else: |
paul@40 | 679 | v, a = value |
paul@40 | 680 | items.append((name, a, dict_to_items(v))) |
paul@40 | 681 | return items |
paul@40 | 682 | |
paul@0 | 683 | # Public functions. |
paul@0 | 684 | |
paul@11 | 685 | def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None): |
paul@0 | 686 | |
paul@0 | 687 | """ |
paul@9 | 688 | Parse the resource data found through the use of the 'stream_or_string', |
paul@9 | 689 | which is either a stream providing Unicode data (the codecs module can be |
paul@9 | 690 | used to open files or to wrap streams in order to provide Unicode data) or a |
paul@9 | 691 | filename identifying a file to be parsed. |
paul@0 | 692 | |
paul@11 | 693 | The optional 'encoding' can be used to specify the character encoding used |
paul@11 | 694 | by the file to be parsed. |
paul@11 | 695 | |
paul@0 | 696 | The optional 'non_standard_newline' can be set to a true value (unlike the |
paul@0 | 697 | default) in order to attempt to process files with CR as the end of line |
paul@0 | 698 | character. |
paul@0 | 699 | |
paul@0 | 700 | As a result of parsing the resource, the root node of the imported resource |
paul@0 | 701 | is returned. |
paul@0 | 702 | """ |
paul@0 | 703 | |
paul@11 | 704 | stream = get_input_stream(stream_or_string, encoding) |
paul@9 | 705 | reader = Reader(stream, non_standard_newline) |
paul@9 | 706 | |
paul@9 | 707 | # Parse using the reader. |
paul@0 | 708 | |
paul@9 | 709 | try: |
paul@9 | 710 | parser = (parser_cls or Parser)() |
paul@9 | 711 | return parser.parse(reader) |
paul@9 | 712 | |
paul@9 | 713 | # Close any opened streams. |
paul@9 | 714 | |
paul@9 | 715 | finally: |
paul@9 | 716 | if not is_input_stream(stream_or_string): |
paul@9 | 717 | reader.close() |
paul@9 | 718 | |
paul@11 | 719 | def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None): |
paul@5 | 720 | |
paul@5 | 721 | """ |
paul@9 | 722 | Parse the resource data found through the use of the 'stream_or_string', |
paul@9 | 723 | which is either a stream providing Unicode data (the codecs module can be |
paul@9 | 724 | used to open files or to wrap streams in order to provide Unicode data) or a |
paul@9 | 725 | filename identifying a file to be parsed. |
paul@5 | 726 | |
paul@11 | 727 | The optional 'encoding' can be used to specify the character encoding used |
paul@11 | 728 | by the file to be parsed. |
paul@11 | 729 | |
paul@5 | 730 | The optional 'non_standard_newline' can be set to a true value (unlike the |
paul@5 | 731 | default) in order to attempt to process files with CR as the end of line |
paul@5 | 732 | character. |
paul@5 | 733 | |
paul@5 | 734 | An iterator is returned which provides event tuples describing parsing |
paul@5 | 735 | events of the form (name, parameters, value). |
paul@5 | 736 | """ |
paul@5 | 737 | |
paul@11 | 738 | stream = get_input_stream(stream_or_string, encoding) |
paul@9 | 739 | reader = Reader(stream, non_standard_newline) |
paul@5 | 740 | parser = (parser_cls or StreamParser)(reader) |
paul@9 | 741 | return parser |
paul@5 | 742 | |
paul@21 | 743 | def iterwrite(stream_or_string=None, write=None, encoding=None, line_length=None, writer_cls=None): |
paul@11 | 744 | |
paul@11 | 745 | """ |
paul@21 | 746 | Return a writer which will either send data to the resource found through |
paul@21 | 747 | the use of 'stream_or_string' or using the given 'write' operation. |
paul@21 | 748 | |
paul@21 | 749 | The 'stream_or_string' parameter may be either a stream accepting Unicode |
paul@21 | 750 | data (the codecs module can be used to open files or to wrap streams in |
paul@21 | 751 | order to accept Unicode data) or a filename identifying a file to be |
paul@21 | 752 | written. |
paul@11 | 753 | |
paul@11 | 754 | The optional 'encoding' can be used to specify the character encoding used |
paul@11 | 755 | by the file to be written. |
paul@11 | 756 | |
paul@11 | 757 | The optional 'line_length' can be used to specify how long lines should be |
paul@11 | 758 | in the resulting data. |
paul@11 | 759 | """ |
paul@11 | 760 | |
paul@21 | 761 | if stream_or_string: |
paul@21 | 762 | stream = get_output_stream(stream_or_string, encoding) |
paul@21 | 763 | _writer = Writer(stream.write, line_length) |
paul@21 | 764 | elif write: |
paul@21 | 765 | _writer = Writer(write, line_length) |
paul@21 | 766 | else: |
paul@21 | 767 | raise IOError, "No stream, filename or write operation specified." |
paul@21 | 768 | |
paul@21 | 769 | return (writer_cls or StreamWriter)(_writer) |
paul@8 | 770 | |
paul@55 | 771 | def to_dict(node, sections=None): |
paul@40 | 772 | |
paul@40 | 773 | "Return the 'node' converted to a dictionary representation." |
paul@40 | 774 | |
paul@40 | 775 | name, attr, items = node |
paul@55 | 776 | return {name : (isinstance(items, list) and items_to_dict(items, sections) or items, attr)} |
paul@40 | 777 | |
paul@40 | 778 | def to_node(d): |
paul@40 | 779 | |
paul@40 | 780 | "Return 'd' converted to a items-based representation." |
paul@40 | 781 | |
paul@40 | 782 | return dict_to_items(d)[0] |
paul@40 | 783 | |
paul@0 | 784 | # vim: tabstop=4 expandtab shiftwidth=4 |