# HG changeset patch # User paulb # Date 1086534819 0 # Node ID c44168f0246c0e1f2072ff73375e6868b5703cd3 # Parent 1abb649370aae4cb80e3ca6bd52db90c1575bb96 [project @ 2004-06-06 15:13:39 by paulb] Introduced Unicode conversion in message body fields acquisition (to Unicode from the original charset) and in the production of response output (to an appropriate charset from Unicode where used). diff -r 1abb649370aa -r c44168f0246c WebStack/BaseHTTPRequestHandler.py --- a/WebStack/BaseHTTPRequestHandler.py Fri Jun 20 22:23:58 2008 +0200 +++ b/WebStack/BaseHTTPRequestHandler.py Sun Jun 06 15:13:39 2004 +0000 @@ -6,6 +6,7 @@ import Generic from Helpers.Request import MessageBodyStream +from Helpers.Response import ConvertingStream from Helpers.Auth import UserInfo from cgi import parse_qs, FieldStorage import Cookie @@ -192,16 +193,21 @@ return parse_qs(self.get_query_string(), keep_blank_values=1) - def get_fields_from_body(self): + def get_fields_from_body(self, encoding=None): """ A framework-specific method which extracts the form fields from the - message body in the transaction. + message body in the transaction. The optional 'encoding' parameter + specifies the character encoding of the message body for cases where no + such information is available, but where the default encoding is to be + overridden. Returns a dictionary mapping field names to lists of values (even if a single value is associated with any given field name). """ + encoding = self.get_content_type().charset or encoding or "iso-8859-1" + if self.storage_body is None: self.storage_body = FieldStorage(fp=self.get_request_stream(), headers=self.get_headers(), environ={"REQUEST_METHOD" : self.get_request_method()}, keep_blank_values=1) @@ -215,7 +221,9 @@ # Traverse the storage, finding each field value. for field_name in self.storage_body.keys(): - fields[field_name] = self.storage_body.getlist(field_name) + fields[field_name] = [] + for field_value in self.storage_body.getlist(field_name): + fields[field_name].append(unicode(field_value, encoding)) return fields def get_user(self): @@ -269,8 +277,13 @@ """ # Return a stream which is later emptied into the real stream. + # Unicode can upset this operation. Using either the specified charset, + # the same charset as that used in the request, or a default encoding. - return self.content + encoding = self.get_content_type().charset or "utf-8" + if self.content_type: + encoding = self.content_type.charset or encoding + return ConvertingStream(self.content, encoding) def get_response_code(self): diff -r 1abb649370aa -r c44168f0246c WebStack/CGI.py --- a/WebStack/CGI.py Fri Jun 20 22:23:58 2008 +0200 +++ b/WebStack/CGI.py Sun Jun 06 15:13:39 2004 +0000 @@ -7,6 +7,7 @@ import Generic import os, sys from Helpers.Request import MessageBodyStream +from Helpers.Response import ConvertingStream from Helpers.Auth import UserInfo from Helpers import Environment from cgi import parse_qs, FieldStorage @@ -196,16 +197,21 @@ return parse_qs(self.get_query_string(), keep_blank_values=1) - def get_fields_from_body(self): + def get_fields_from_body(self, encoding=None): """ A framework-specific method which extracts the form fields from the - message body in the transaction. + message body in the transaction. The optional 'encoding' parameter + specifies the character encoding of the message body for cases where no + such information is available, but where the default encoding is to be + overridden. Returns a dictionary mapping field names to lists of values (even if a single value is associated with any given field name). """ + encoding = self.get_content_type().charset or encoding or "iso-8859-1" + if self.storage_body is None: self.storage_body = FieldStorage(fp=self.get_request_stream(), keep_blank_values=1) @@ -218,7 +224,9 @@ # Traverse the storage, finding each field value. for field_name in self.storage_body.keys(): - fields[field_name] = self.storage_body.getlist(field_name) + fields[field_name] = [] + for field_value in self.storage_body.getlist(field_name): + fields[field_name].append(unicode(field_value, encoding)) return fields def get_user(self): @@ -268,8 +276,13 @@ """ # Return a stream which is later emptied into the real stream. + # Unicode can upset this operation. Using either the specified charset, + # the same charset as that used in the request, or a default encoding. - return self.content + encoding = self.get_content_type().charset or "utf-8" + if self.content_type: + encoding = self.content_type.charset or encoding + return ConvertingStream(self.content, encoding) def get_response_code(self): diff -r 1abb649370aa -r c44168f0246c WebStack/Generic.py --- a/WebStack/Generic.py Fri Jun 20 22:23:58 2008 +0200 +++ b/WebStack/Generic.py Sun Jun 06 15:13:39 2004 +0000 @@ -238,11 +238,14 @@ raise NotImplementedError, "get_fields_from_path" - def get_fields_from_body(self): + def get_fields_from_body(self, encoding=None): """ A framework-specific method which extracts the form fields from the - message body in the transaction. + message body in the transaction. The optional 'encoding' parameter + specifies the character encoding of the message body for cases where no + such information is available, but where the default encoding is to be + overridden. Returns a dictionary mapping field names to lists of values (even if a single value is associated with any given field name). diff -r 1abb649370aa -r c44168f0246c WebStack/JavaServlet.py --- a/WebStack/JavaServlet.py Fri Jun 20 22:23:58 2008 +0200 +++ b/WebStack/JavaServlet.py Sun Jun 06 15:13:39 2004 +0000 @@ -222,17 +222,21 @@ return self.get_fields_from_body() - def get_fields_from_body(self): + def get_fields_from_body(self, encoding=None): """ A framework-specific method which extracts the form fields from the - message body in the transaction. + message body in the transaction. The optional 'encoding' parameter + specifies the character encoding of the message body for cases where no + such information is available, but where the default encoding is to be + overridden. Returns a dictionary mapping field names to lists of values (even if a single value is associated with any given field name). NOTE: There may not be a reliable means of extracting only the fields - NOTE: from the message body. + NOTE: from the message body. Moreover, the encoding of the fields may + NOTE: not be pertinent. """ parameter_map = self.request.getParameterMap() diff -r 1abb649370aa -r c44168f0246c WebStack/ModPython.py --- a/WebStack/ModPython.py Fri Jun 20 22:23:58 2008 +0200 +++ b/WebStack/ModPython.py Sun Jun 06 15:13:39 2004 +0000 @@ -5,6 +5,7 @@ """ import Generic +from Helpers.Response import ConvertingStream from mod_python.util import parse_qs, FieldStorage from mod_python import apache try: @@ -26,6 +27,7 @@ self.trans = trans self.response_code = apache.OK self.user = None + self.content_type = None # Cached information. @@ -152,11 +154,14 @@ return parse_qs(self.get_query_string(), 1) # keep_blank_values=1 - def get_fields_from_body(self): + def get_fields_from_body(self, encoding=None): """ A framework-specific method which extracts the form fields from the - message body in the transaction. + message body in the transaction. The optional 'encoding' parameter + specifies the character encoding of the message body for cases where no + such information is available, but where the default encoding is to be + overridden. Returns a dictionary mapping field names to lists of values (even if a single value is associated with any given field name). @@ -165,6 +170,8 @@ body with fields found in the path. """ + encoding = self.get_content_type().charset or encoding or "iso-8859-1" + if self.storage_body is None: self.storage_body = FieldStorage(self.trans, keep_blank_values=1) @@ -174,7 +181,7 @@ for field in self.storage_body.list: if not fields.has_key(field.name): fields[field.name] = [] - fields[field.name].append(field.value) + fields[field.name].append(unicode(field.value, encoding)) return fields def get_user(self): @@ -229,7 +236,13 @@ the transaction. """ - return self.trans + # Unicode can upset this operation. Using either the specified charset, + # the same charset as that used in the request, or a default encoding. + + encoding = self.get_content_type().charset or "utf-8" + if self.content_type: + encoding = self.content_type.charset or encoding + return ConvertingStream(self.trans, encoding) def get_response_code(self): @@ -264,6 +277,9 @@ response. """ + # Remember the content type for encoding purposes later. + + self.content_type = content_type self.trans.content_type = self.format_content_type(content_type) def set_cookie(self, cookie): diff -r 1abb649370aa -r c44168f0246c WebStack/Twisted.py --- a/WebStack/Twisted.py Fri Jun 20 22:23:58 2008 +0200 +++ b/WebStack/Twisted.py Sun Jun 06 15:13:39 2004 +0000 @@ -7,6 +7,7 @@ import Generic from Helpers.Auth import UserInfo from Helpers.Request import Cookie +from Helpers.Response import ConvertingStream from cgi import parse_qs class Transaction(Generic.Transaction): @@ -21,6 +22,7 @@ self.trans = trans self.user = None + self.content_type = None # Request-related methods. @@ -149,17 +151,26 @@ return parse_qs(self.get_query_string(), keep_blank_values=1) - def get_fields_from_body(self): + def get_fields_from_body(self, encoding=None): """ A framework-specific method which extracts the form fields from the - message body in the transaction. + message body in the transaction. The optional 'encoding' parameter + specifies the character encoding of the message body for cases where no + such information is available, but where the default encoding is to be + overridden. Returns a dictionary mapping field names to lists of values (even if a single value is associated with any given field name). """ - return self.trans.args + encoding = self.get_content_type().charset or encoding or "iso-8859-1" + fields = {} + for field_name, field_values in self.trans.args.items(): + fields[field_name] = [] + for field_value in field_values: + fields[field_name].append(unicode(field_value, encoding)) + return fields def get_user(self): @@ -222,7 +233,13 @@ the transaction. """ - return self.trans + # Unicode can upset this operation. Using either the specified charset, + # the same charset as that used in the request, or a default encoding. + + encoding = self.get_content_type().charset or "utf-8" + if self.content_type: + encoding = self.content_type.charset or encoding + return ConvertingStream(self.trans, encoding) def get_response_code(self): @@ -259,6 +276,9 @@ response. """ + # Remember the content type for encoding purposes later. + + self.content_type = content_type self.trans.setHeader("Content-Type", self.format_content_type(content_type)) # Higher level response-related methods. diff -r 1abb649370aa -r c44168f0246c WebStack/Webware.py --- a/WebStack/Webware.py Fri Jun 20 22:23:58 2008 +0200 +++ b/WebStack/Webware.py Sun Jun 06 15:13:39 2004 +0000 @@ -9,6 +9,7 @@ import StringIO from Helpers import Environment from Helpers.Request import Cookie +from Helpers.Response import ConvertingStream class Transaction(Generic.Transaction): @@ -22,6 +23,7 @@ self.trans = trans self.user = None + self.content_type = None # Request-related methods. @@ -165,24 +167,31 @@ return parse_qs(self.get_query_string(), keep_blank_values=1) - def get_fields_from_body(self): + def get_fields_from_body(self, encoding=None): """ A framework-specific method which extracts the form fields from the - message body in the transaction. + message body in the transaction. The optional 'encoding' parameter + specifies the character encoding of the message body for cases where no + such information is available, but where the default encoding is to be + overridden. Returns a dictionary mapping field names to lists of values (even if a single value is associated with any given field name). """ + encoding = self.get_content_type().charset or encoding or "iso-8859-1" + # Fix the non-list results. fields = {} - for field_name, field_value in self.trans.request().fields().items(): - if type(field_value) == type([]): - fields[field_name] = field_value + for field_name, field_values in self.trans.request().fields().items(): + if type(field_values) == type([]): + fields[field_name] = [] + for field_value in field_values: + fields[field_name].append(unicode(field_value, encoding)) else: - fields[field_name] = [field_value] + fields[field_name] = [unicode(field_values, encoding)] return fields def get_user(self): @@ -244,7 +253,13 @@ the transaction. """ - return self.trans.response() + # Unicode can upset this operation. Using either the specified charset, + # the same charset as that used in the request, or a default encoding. + + encoding = self.get_content_type().charset or "utf-8" + if self.content_type: + encoding = self.content_type.charset or encoding + return ConvertingStream(self.trans.response(), encoding) def get_response_code(self): @@ -288,6 +303,9 @@ response. """ + # Remember the content type for encoding purposes later. + + self.content_type = content_type return self.trans.response().setHeader("Content-Type", self.format_content_type(content_type)) # Higher level response-related methods.