# HG changeset patch # User paulb # Date 1094079100 0 # Node ID 4084122d73cc8f9cab7832543ef4148d7f188bed # Parent 629909b81c9fe6ed9e843c7f720ecf825d934f46 [project @ 2004-09-01 22:51:40 by paulb] Changed the ContentType class so that objects have a media_type attribute (previously content_type) and can be converted to an appropriate header value using the str function. This removes the need for a format_content_type method in the Transaction class and changes the implementation of the parse_content_type method. Added a default character set (default_charset) attribute to the Transaction class to cover cases where no character set has been specified in the content type. This permits naive usage of the stream, at least. Fixed the get_response_stream method in Transaction so that only the response content type (and the default character set) controls the encoding of the output stream. If the request content type's character set is secretly involved, this can cause inconsistencies with the stated response content type (where no character set may have been stated). Therefore, it is left to applications to set the response content type correctly. diff -r 629909b81c9f -r 4084122d73cc WebStack/BaseHTTPRequestHandler.py --- a/WebStack/BaseHTTPRequestHandler.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/BaseHTTPRequestHandler.py Wed Sep 01 22:51:40 2004 +0000 @@ -53,7 +53,7 @@ self.trans.send_response(self.response_code) if self.content_type is not None: - self.trans.send_header("Content-Type", self.format_content_type(self.content_type)) + self.trans.send_header("Content-Type", str(self.content_type)) for header, value in self.headers_out.items(): self.trans.send_header(self.format_header_value(header), self.format_header_value(value)) @@ -212,7 +212,7 @@ or a plain string (representing a file upload form field, for example). """ - encoding = encoding or self.get_content_type().charset or "iso-8859-1" + encoding = encoding or self.get_content_type().charset or self.default_charset if self.storage_body is None: self.storage_body = FieldStorage(fp=self.get_request_stream(), headers=self.get_headers(), @@ -277,12 +277,12 @@ """ # Return a stream which is later emptied into the real stream. - # Unicode can upset this operation. Using either the specified charset, - # the same charset as that used in the request, or a default encoding. + # Unicode can upset this operation. Using either the specified charset + # or a default encoding. - encoding = self.get_content_type().charset or "utf-8" if self.content_type: - encoding = self.content_type.charset or encoding + encoding = self.content_type.charset + encoding = encoding or self.default_charset return ConvertingStream(self.content, encoding) def get_response_code(self): diff -r 629909b81c9f -r 4084122d73cc WebStack/CGI.py --- a/WebStack/CGI.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/CGI.py Wed Sep 01 22:51:40 2004 +0000 @@ -62,7 +62,7 @@ self.output.write("Status: %s %s\n" % (self.response_code, "WebStack status")) if self.content_type is not None: - self.output.write("Content-type: %s\n" % self.format_content_type(self.content_type)) + self.output.write("Content-type: %s\n" % str(self.content_type)) for header, value in self.headers_out.items(): self.output.write("%s: %s\n" % (self.format_header_value(header), self.format_header_value(value)) @@ -206,7 +206,7 @@ or a plain string (representing a file upload form field, for example). """ - encoding = encoding or self.get_content_type().charset or "iso-8859-1" + encoding = encoding or self.get_content_type().charset or self.default_charset if self.storage_body is None: self.storage_body = FieldStorage(fp=self.get_request_stream(), keep_blank_values=1) @@ -266,12 +266,12 @@ """ # Return a stream which is later emptied into the real stream. - # Unicode can upset this operation. Using either the specified charset, - # the same charset as that used in the request, or a default encoding. + # Unicode can upset this operation. Using either the specified charset + # or a default encoding. - encoding = self.get_content_type().charset or "utf-8" if self.content_type: - encoding = self.content_type.charset or encoding + encoding = self.content_type.charset + encoding = encoding or self.default_charset return ConvertingStream(self.content, encoding) def get_response_code(self): diff -r 629909b81c9f -r 4084122d73cc WebStack/Generic.py --- a/WebStack/Generic.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/Generic.py Wed Sep 01 22:51:40 2004 +0000 @@ -19,17 +19,38 @@ "A container for content type information." - def __init__(self, content_type, charset=None, attributes=None): + def __init__(self, media_type, charset=None, **attributes): + + """ + Initialise the container with the given 'media_type', an optional + 'charset', and optional keyword attributes representing the key=value + pairs which qualify content types. + """ + + self.media_type = media_type + self.charset = charset + self.attributes = attributes + + def __str__(self): """ - Initialise the container with the given principal 'content_type', an - optional 'charset', and optional 'attributes' (a list of 2-tuples each - representing the key=value pairs which qualify content types). + Format the content type object, producing a string suitable for the + response header field. """ - self.content_type = content_type - self.charset = charset - self.attributes = attributes + l = [] + if self.media_type: + l.append(self.media_type) + if self.charset: + l.append("; ") + l.append("charset=%s" % self.charset) + for name, value in self.attributes.items(): + l.append("; ") + l.append("%s=%s" % (name, value)) + + # Make sure that only ASCII is used. + + return "".join(l).encode("US-ASCII") class Transaction: @@ -38,6 +59,10 @@ overridden. """ + # The default charset ties output together with body field interpretation. + + default_charset = "iso-8859-1" + def commit(self): """ @@ -57,40 +82,26 @@ """ if content_type_field is None: - return ContentType(None, "iso-8859-1") + return ContentType(None) l = content_type_field.split(";") - content_type_attributes = [] - content_type_charset = "iso-8859-1" + attributes = {} + charset = None # Find the charset and remember all other attributes. - content_type_str, attributes = l[0].strip(), l[1:] - - for attribute in attributes: - t = attribute.strip().split("=") - if t[0] == "charset" and len(t) > 1: - content_type_charset = t[1] - else: - content_type_attributes.append(t) - - return ContentType(content_type_str, content_type_charset, content_type_attributes) - - def format_content_type(self, content_type): + media_type, attributes_str = l[0].strip(), l[1:] - """ - Format the given 'content_type' object, producing a string suitable for - the response header field. - """ + for attribute_str in attributes_str: + t = attribute_str.split("=") + if len(t) > 1: + name, value = t[0].strip(), t[1].strip() + if name == "charset": + charset = value + else: + attributes[name] = value - if content_type.charset: - field = "%s; charset=%s" % (content_type.content_type, content_type.charset) - else: - field = content_type.content_type - - # Make sure that only ASCII is used in the header. - - return field.encode("US-ASCII") + return ContentType(media_type, charset, **attributes) def format_header_value(self, value): diff -r 629909b81c9f -r 4084122d73cc WebStack/JavaServlet.py --- a/WebStack/JavaServlet.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/JavaServlet.py Wed Sep 01 22:51:40 2004 +0000 @@ -343,7 +343,7 @@ Sets the 'content_type' for the response. """ - return self.response.setHeader("Content-Type", self.format_content_type(content_type)) + return self.response.setHeader("Content-Type", str(content_type)) # Higher level response-related methods. diff -r 629909b81c9f -r 4084122d73cc WebStack/ModPython.py --- a/WebStack/ModPython.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/ModPython.py Wed Sep 01 22:51:40 2004 +0000 @@ -170,7 +170,7 @@ body with fields found in the path. """ - encoding = encoding or self.get_content_type().charset or "iso-8859-1" + encoding = encoding or self.get_content_type().charset or self.default_charset if self.storage_body is None: self.storage_body = FieldStorage(self.trans, keep_blank_values=1) @@ -232,12 +232,12 @@ Returns the response stream for the transaction. """ - # Unicode can upset this operation. Using either the specified charset, - # the same charset as that used in the request, or a default encoding. + # Unicode can upset this operation. Using either the specified charset + # or a default encoding. - encoding = self.get_content_type().charset or "utf-8" if self.content_type: - encoding = self.content_type.charset or encoding + encoding = self.content_type.charset + encoding = encoding or self.default_charset return ConvertingStream(self.trans, encoding) def get_response_code(self): @@ -275,7 +275,7 @@ # Remember the content type for encoding purposes later. self.content_type = content_type - self.trans.content_type = self.format_content_type(content_type) + self.trans.content_type = str(content_type) # Higher level response-related methods. diff -r 629909b81c9f -r 4084122d73cc WebStack/Twisted.py --- a/WebStack/Twisted.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/Twisted.py Wed Sep 01 22:51:40 2004 +0000 @@ -161,7 +161,9 @@ or a plain string (representing a file upload form field, for example). """ - encoding = encoding or self.get_content_type().charset or "iso-8859-1" + # NOTE: Fix the inclusion of path fields since this prevents Unicode conversion. + + encoding = encoding or self.get_content_type().charset or self.default_charset fields = {} for field_name, field_values in self.trans.args.items(): if type(field_values) == type([]): @@ -232,12 +234,12 @@ Returns the response stream for the transaction. """ - # Unicode can upset this operation. Using either the specified charset, - # the same charset as that used in the request, or a default encoding. + # Unicode can upset this operation. Using either the specified charset + # or a default encoding. - encoding = self.get_content_type().charset or "utf-8" if self.content_type: - encoding = self.content_type.charset or encoding + encoding = self.content_type.charset + encoding = encoding or self.default_charset return ConvertingStream(self.trans, encoding) def get_response_code(self): @@ -277,7 +279,7 @@ # Remember the content type for encoding purposes later. self.content_type = content_type - self.trans.setHeader("Content-Type", self.format_content_type(content_type)) + self.trans.setHeader("Content-Type", str(content_type)) # Higher level response-related methods. diff -r 629909b81c9f -r 4084122d73cc WebStack/Webware.py --- a/WebStack/Webware.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/Webware.py Wed Sep 01 22:51:40 2004 +0000 @@ -182,7 +182,7 @@ or a plain string (representing a file upload form field, for example). """ - encoding = encoding or self.get_content_type().charset or "iso-8859-1" + encoding = encoding or self.get_content_type().charset or self.default_charset fields = {} for field_name, field_values in self.trans.request().fields().items(): if type(field_values) == type([]): @@ -248,12 +248,12 @@ Returns the response stream for the transaction. """ - # Unicode can upset this operation. Using either the specified charset, - # the same charset as that used in the request, or a default encoding. + # Unicode can upset this operation. Using either the specified charset + # or a default encoding. - encoding = self.get_content_type().charset or "utf-8" if self.content_type: - encoding = self.content_type.charset or encoding + encoding = self.content_type.charset + encoding = encoding or self.default_charset return ConvertingStream(self.trans.response(), encoding) def get_response_code(self): @@ -300,7 +300,7 @@ # Remember the content type for encoding purposes later. self.content_type = content_type - return self.trans.response().setHeader("Content-Type", self.format_content_type(content_type)) + return self.trans.response().setHeader("Content-Type", str(content_type)) # Higher level response-related methods. diff -r 629909b81c9f -r 4084122d73cc WebStack/Zope.py --- a/WebStack/Zope.py Tue Aug 31 18:28:41 2004 +0000 +++ b/WebStack/Zope.py Wed Sep 01 22:51:40 2004 +0000 @@ -184,7 +184,7 @@ # NOTE: Conversion to Unicode may be inappropriate. - encoding = encoding or self.get_content_type().charset or "iso-8859-1" + encoding = encoding or self.get_content_type().charset or self.default_charset fields = {} for field_name, field_values in self.get_fields_from_path().items(): if type(field_values) == type([]): @@ -240,12 +240,12 @@ Returns the response stream for the transaction. """ - # Unicode can upset this operation. Using either the specified charset, - # the same charset as that used in the request, or a default encoding. + # Unicode can upset this operation. Using either the specified charset + # or a default encoding. - encoding = self.get_content_type().charset or "utf-8" if self.content_type: - encoding = self.content_type.charset or encoding + encoding = self.content_type.charset + encoding = encoding or self.default_charset return ConvertingStream(self.response, encoding) def get_response_code(self): @@ -281,7 +281,7 @@ """ self.content_type = content_type - self.response.setHeader("Content-Type", self.format_content_type(content_type)) + self.response.setHeader("Content-Type", str(content_type)) # Higher level response-related methods.