1.1 --- a/tests/test_calendar_stream.py Thu Jan 15 23:39:48 2009 +0100
1.2 +++ b/tests/test_calendar_stream.py Sat Mar 14 22:13:22 2009 +0100
1.3 @@ -10,9 +10,26 @@
1.4 w = vCalendar.iterwrite(out)
1.5 for name, parameters, value in doc:
1.6 print "%r, %r, %r" % (name, parameters, value)
1.7 - w.write_content_line(name, parameters, value)
1.8 + w.write(name, parameters, value)
1.9 finally:
1.10 out.close()
1.11 f.close()
1.12
1.13 +print "--------"
1.14 +
1.15 +f = codecs.open(os.path.join(this_dir, "test.ics"), encoding="utf-8")
1.16 +f2 = codecs.open("tmp.ics", encoding="utf-8")
1.17 +try:
1.18 + doc = vCalendar.iterparse(f)
1.19 + doc2 = vCalendar.iterparse(f2)
1.20 + for (name, parameters, value), (name2, parameters2, value2) in zip(doc, doc2):
1.21 + print "%r, %r, %r" % (name, parameters, value)
1.22 + print "%r, %r, %r" % (name2, parameters2, value2)
1.23 + assert name == name2
1.24 + assert parameters == parameters2
1.25 + assert value == value2
1.26 +finally:
1.27 + f2.close()
1.28 + f.close()
1.29 +
1.30 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/tests/test_card_stream.py Thu Jan 15 23:39:48 2009 +0100
2.2 +++ b/tests/test_card_stream.py Sat Mar 14 22:13:22 2009 +0100
2.3 @@ -10,9 +10,26 @@
2.4 w = vContent.iterwrite(out)
2.5 for name, parameters, value in doc:
2.6 print "%r, %r, %r" % (name, parameters, value)
2.7 - w.write_content_line(name, parameters, value)
2.8 + w.write(name, parameters, value)
2.9 finally:
2.10 out.close()
2.11 f.close()
2.12
2.13 +print "--------"
2.14 +
2.15 +f = codecs.open(os.path.join(this_dir, "test.vcf"), encoding="utf-8")
2.16 +f2 = codecs.open("tmp.vcf", encoding="utf-8")
2.17 +try:
2.18 + doc = vContent.iterparse(f)
2.19 + doc2 = vContent.iterparse(f2)
2.20 + for (name, parameters, value), (name2, parameters2, value2) in zip(doc, doc2):
2.21 + print "%r, %r, %r" % (name, parameters, value)
2.22 + print "%r, %r, %r" % (name2, parameters2, value2)
2.23 + assert name == name2
2.24 + assert parameters == parameters2
2.25 + assert value == value2
2.26 +finally:
2.27 + f2.close()
2.28 + f.close()
2.29 +
2.30 # vim: tabstop=4 expandtab shiftwidth=4
3.1 --- a/vCalendar.py Thu Jan 15 23:39:48 2009 +0100
3.2 +++ b/vCalendar.py Sat Mar 14 22:13:22 2009 +0100
3.3 @@ -3,7 +3,7 @@
3.4 """
3.5 Parsing of vCalendar and iCalendar files.
3.6
3.7 -Copyright (C) 2008 Paul Boddie <paul@boddie.org.uk>
3.8 +Copyright (C) 2008, 2009 Paul Boddie <paul@boddie.org.uk>
3.9
3.10 This program is free software; you can redistribute it and/or modify it under
3.11 the terms of the GNU Lesser General Public License as published by the Free
3.12 @@ -113,22 +113,7 @@
3.13
3.14 "A stream writer specifically for vCard."
3.15
3.16 - def write(self, name, parameters, value):
3.17 -
3.18 - """
3.19 - Write a content line for the given 'name', 'parameters' and 'value'
3.20 - information.
3.21 - """
3.22 -
3.23 - vContent.StreamWriter.write(self, name, self.encode_parameters(parameters), value)
3.24 -
3.25 - # Internal methods.
3.26 -
3.27 - def encode_quoted_value(self, value):
3.28 -
3.29 - "Encode the given 'value'."
3.30 -
3.31 - return '"%s"' % value
3.32 + # Overridden methods.
3.33
3.34 def encode_parameters(self, parameters):
3.35
3.36 @@ -140,7 +125,7 @@
3.37
3.38 for param_name, param_value in parameters.items():
3.39 if param_name in QUOTED_PARAMETERS:
3.40 - param_value = self.encode_quoted_value(param_value)
3.41 + param_value = self.encode_quoted_parameter_value(param_value)
3.42 separator = '","'
3.43 else:
3.44 separator = ","
3.45 @@ -152,7 +137,7 @@
3.46
3.47 def encode_content(self, value):
3.48
3.49 - "Encode the given 'value', replacing separator characters."
3.50 + "Encode the given 'value', quoting characters."
3.51
3.52 # Replace quoted characters (see 4.3.11 in RFC 2445).
3.53
3.54 @@ -161,7 +146,7 @@
3.55
3.56 # Public functions.
3.57
3.58 -def parse(stream_or_string, non_standard_newline=0):
3.59 +def parse(stream_or_string, encoding=None, non_standard_newline=0):
3.60
3.61 """
3.62 Parse the resource data found through the use of the 'stream_or_string',
3.63 @@ -169,6 +154,9 @@
3.64 used to open files or to wrap streams in order to provide Unicode data) or a
3.65 filename identifying a file to be parsed.
3.66
3.67 + The optional 'encoding' can be used to specify the character encoding used
3.68 + by the file to be parsed.
3.69 +
3.70 The optional 'non_standard_newline' can be set to a true value (unlike the
3.71 default) in order to attempt to process files with CR as the end of line
3.72 character.
3.73 @@ -177,9 +165,9 @@
3.74 is returned.
3.75 """
3.76
3.77 - return vContent.parse(stream_or_string, non_standard_newline, vCalendarParser)
3.78 + return vContent.parse(stream_or_string, encoding, non_standard_newline, vCalendarParser)
3.79
3.80 -def iterparse(stream_or_string, non_standard_newline=0):
3.81 +def iterparse(stream_or_string, encoding=None, non_standard_newline=0):
3.82
3.83 """
3.84 Parse the resource data found through the use of the 'stream_or_string',
3.85 @@ -187,6 +175,9 @@
3.86 used to open files or to wrap streams in order to provide Unicode data) or a
3.87 filename identifying a file to be parsed.
3.88
3.89 + The optional 'encoding' can be used to specify the character encoding used
3.90 + by the file to be parsed.
3.91 +
3.92 The optional 'non_standard_newline' can be set to a true value (unlike the
3.93 default) in order to attempt to process files with CR as the end of line
3.94 character.
3.95 @@ -195,9 +186,23 @@
3.96 events of the form (name, parameters, value).
3.97 """
3.98
3.99 - return vContent.iterparse(stream_or_string, non_standard_newline, vCalendarStreamParser)
3.100 + return vContent.iterparse(stream_or_string, encoding, non_standard_newline, vCalendarStreamParser)
3.101 +
3.102 +def iterwrite(stream_or_string, encoding=None, line_length=None):
3.103
3.104 -def iterwrite(stream_or_string, line_length=None):
3.105 - return vContent.iterwrite(stream_or_string, line_length, vCalendarStreamWriter)
3.106 + """
3.107 + Return a writer which will send data to the resource found through the use
3.108 + of 'stream_or_string', which is either a stream accepting Unicode data (the
3.109 + codecs module can be used to open files or to wrap streams in order to
3.110 + accept Unicode data) or a filename identifying a file to be parsed.
3.111 +
3.112 + The optional 'encoding' can be used to specify the character encoding used
3.113 + by the file to be written.
3.114 +
3.115 + The optional 'line_length' can be used to specify how long lines should be
3.116 + in the resulting data.
3.117 + """
3.118 +
3.119 + return vContent.iterwrite(stream_or_string, encoding, line_length, vCalendarStreamWriter)
3.120
3.121 # vim: tabstop=4 expandtab shiftwidth=4
4.1 --- a/vContent.py Thu Jan 15 23:39:48 2009 +0100
4.2 +++ b/vContent.py Sat Mar 14 22:13:22 2009 +0100
4.3 @@ -3,7 +3,7 @@
4.4 """
4.5 Parsing of vCard, vCalendar and iCalendar files.
4.6
4.7 -Copyright (C) 2005, 2006, 2007, 2008 Paul Boddie <paul@boddie.org.uk>
4.8 +Copyright (C) 2005, 2006, 2007, 2008, 2009 Paul Boddie <paul@boddie.org.uk>
4.9
4.10 This program is free software; you can redistribute it and/or modify it under
4.11 the terms of the GNU Lesser General Public License as published by the Free
4.12 @@ -97,7 +97,7 @@
4.13 if self.lines:
4.14 return self.lines.pop()
4.15 else:
4.16 - # NOTE: Sanity check for broken lines (\r instead of \r\n or \n).
4.17 + # Sanity check for broken lines (\r instead of \r\n or \n).
4.18 line = self.f.readline()
4.19 while line.endswith("\r") and not self.non_standard_newline:
4.20 line += self.f.readline()
4.21 @@ -110,7 +110,7 @@
4.22
4.23 """
4.24 Read an entire content line, itself potentially consisting of many
4.25 - physical lines of text.
4.26 + physical lines of text, returning a string.
4.27 """
4.28
4.29 # Skip blank lines.
4.30 @@ -174,6 +174,9 @@
4.31 current position up to the target found, along with the target string,
4.32 using a tuple of the form (string, target). If no target was found,
4.33 return the entire string together with a target of None.
4.34 +
4.35 + The 'targets' parameter must be a regular expression object or an object
4.36 + compatible with the API of such objects.
4.37 """
4.38
4.39 text = self.text
4.40 @@ -505,36 +508,48 @@
4.41
4.42 self.f.close()
4.43
4.44 - def write_content_line(self, name, parameters, value):
4.45 + def write(self, name, parameters, value):
4.46
4.47 """
4.48 - Write a content line for the given 'name', 'parameters' and 'value'
4.49 - information.
4.50 + Write a content line, serialising the given 'name', 'parameters' and
4.51 + 'value' information.
4.52 + """
4.53 +
4.54 + self.write_content_line(name, self.encode_parameters(parameters), self.encode_value(name, parameters, value))
4.55 +
4.56 + # Internal methods.
4.57 +
4.58 + def write_content_line(self, name, encoded_parameters, encoded_value):
4.59 +
4.60 + """
4.61 + Write a content line for the given 'name', 'encoded_parameters' and
4.62 + 'encoded_value' information.
4.63 """
4.64
4.65 f = self.f
4.66
4.67 f.write(name)
4.68 - for parameter_name, parameter_value in parameters.items():
4.69 + for param_name, param_value in encoded_parameters.items():
4.70 f.write(";")
4.71 - f.write(parameter_name)
4.72 + f.write(param_name)
4.73 f.write("=")
4.74 - f.write(parameter_value)
4.75 + f.write(param_value)
4.76 f.write(":")
4.77 - f.write(self.encode(name, parameters, value))
4.78 + f.write(encoded_value)
4.79 f.end_line()
4.80
4.81 - def encode_content(self, value):
4.82 + def encode_quoted_parameter_value(self, value):
4.83
4.84 - "Encode the given 'value', quoting characters."
4.85 + "Encode the given 'value'."
4.86
4.87 - return value.replace("\n", "\\n")
4.88 + return '"%s"' % value
4.89
4.90 - # Internal methods.
4.91 + def encode_value(self, name, parameters, value):
4.92
4.93 - def encode(self, name, parameters, value):
4.94 -
4.95 - "Encode using 'name' and 'parameters' the given 'value'."
4.96 + """
4.97 + Encode using 'name' and 'parameters' the given 'value' so that the
4.98 + resulting encoded form employs any specified character encodings.
4.99 + """
4.100
4.101 encoding = parameters.get("ENCODING")
4.102 charset = parameters.get("CHARSET")
4.103 @@ -546,26 +561,55 @@
4.104
4.105 return self.encode_content(value)
4.106
4.107 + # Overrideable methods.
4.108 +
4.109 + def encode_parameters(self, parameters):
4.110 +
4.111 + """
4.112 + Encode the given 'parameters' according to the vCalendar specification.
4.113 + """
4.114 +
4.115 + encoded_parameters = {}
4.116 +
4.117 + for param_name, param_value in parameters.items():
4.118 +
4.119 + # Basic format support merely involves quoting values which seem to
4.120 + # need it. Other more specific formats may define exactly which
4.121 + # parameters should be quoted.
4.122 +
4.123 + if ContentLine.SEPARATORS.search(param_value):
4.124 + param_value = self.encode_quoted_parameter_value(param_value)
4.125 +
4.126 + encoded_parameters[param_name] = param_value
4.127 +
4.128 + return encoded_parameters
4.129 +
4.130 + def encode_content(self, value):
4.131 +
4.132 + "Encode the given 'value', quoting characters."
4.133 +
4.134 + return value.replace("\n", "\\n")
4.135 +
4.136 # Utility functions.
4.137
4.138 def is_input_stream(stream_or_string):
4.139 return hasattr(stream_or_string, "read")
4.140
4.141 -def get_input_stream(stream_or_string):
4.142 +def get_input_stream(stream_or_string, encoding=None):
4.143 if is_input_stream(stream_or_string):
4.144 return stream_or_string
4.145 else:
4.146 - return codecs.open(stream_or_string, encoding=default_encoding)
4.147 + return codecs.open(stream_or_string, encoding=(encoding or default_encoding))
4.148
4.149 -def get_output_stream(stream_or_string):
4.150 +def get_output_stream(stream_or_string, encoding=None):
4.151 if hasattr(stream_or_string, "write"):
4.152 return stream_or_string
4.153 else:
4.154 - return codecs.open(stream_or_string, "w", encoding=default_encoding)
4.155 + return codecs.open(stream_or_string, "w", encoding=(encoding or default_encoding))
4.156
4.157 # Public functions.
4.158
4.159 -def parse(stream_or_string, non_standard_newline=0, parser_cls=None):
4.160 +def parse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
4.161
4.162 """
4.163 Parse the resource data found through the use of the 'stream_or_string',
4.164 @@ -573,6 +617,9 @@
4.165 used to open files or to wrap streams in order to provide Unicode data) or a
4.166 filename identifying a file to be parsed.
4.167
4.168 + The optional 'encoding' can be used to specify the character encoding used
4.169 + by the file to be parsed.
4.170 +
4.171 The optional 'non_standard_newline' can be set to a true value (unlike the
4.172 default) in order to attempt to process files with CR as the end of line
4.173 character.
4.174 @@ -581,7 +628,7 @@
4.175 is returned.
4.176 """
4.177
4.178 - stream = get_input_stream(stream_or_string)
4.179 + stream = get_input_stream(stream_or_string, encoding)
4.180 reader = Reader(stream, non_standard_newline)
4.181
4.182 # Parse using the reader.
4.183 @@ -596,7 +643,7 @@
4.184 if not is_input_stream(stream_or_string):
4.185 reader.close()
4.186
4.187 -def iterparse(stream_or_string, non_standard_newline=0, parser_cls=None):
4.188 +def iterparse(stream_or_string, encoding=None, non_standard_newline=0, parser_cls=None):
4.189
4.190 """
4.191 Parse the resource data found through the use of the 'stream_or_string',
4.192 @@ -604,6 +651,9 @@
4.193 used to open files or to wrap streams in order to provide Unicode data) or a
4.194 filename identifying a file to be parsed.
4.195
4.196 + The optional 'encoding' can be used to specify the character encoding used
4.197 + by the file to be parsed.
4.198 +
4.199 The optional 'non_standard_newline' can be set to a true value (unlike the
4.200 default) in order to attempt to process files with CR as the end of line
4.201 character.
4.202 @@ -612,13 +662,27 @@
4.203 events of the form (name, parameters, value).
4.204 """
4.205
4.206 - stream = get_input_stream(stream_or_string)
4.207 + stream = get_input_stream(stream_or_string, encoding)
4.208 reader = Reader(stream, non_standard_newline)
4.209 parser = (parser_cls or StreamParser)(reader)
4.210 return parser
4.211
4.212 -def iterwrite(stream_or_string, line_length=None, writer_cls=None):
4.213 - stream = get_output_stream(stream_or_string)
4.214 +def iterwrite(stream_or_string, encoding=None, line_length=None, writer_cls=None):
4.215 +
4.216 + """
4.217 + Return a writer which will send data to the resource found through the use
4.218 + of 'stream_or_string', which is either a stream accepting Unicode data (the
4.219 + codecs module can be used to open files or to wrap streams in order to
4.220 + accept Unicode data) or a filename identifying a file to be parsed.
4.221 +
4.222 + The optional 'encoding' can be used to specify the character encoding used
4.223 + by the file to be written.
4.224 +
4.225 + The optional 'line_length' can be used to specify how long lines should be
4.226 + in the resulting data.
4.227 + """
4.228 +
4.229 + stream = get_output_stream(stream_or_string, encoding)
4.230 _writer = Writer(stream, line_length)
4.231 writer = (writer_cls or StreamWriter)(_writer)
4.232 return writer