# HG changeset patch # User paulb # Date 1124909410 0 # Node ID 47a4fa8e0104a3e107edd96b74545921adfd33c1 # Parent a01b466df1c168e4623642f8e7228085a1a4ea4c [project @ 2005-08-24 18:50:07 by paulb] Introduced decoding of path information with an optional explicit encoding. diff -r a01b466df1c1 -r 47a4fa8e0104 README.txt --- a/README.txt Wed Aug 24 16:07:04 2005 +0000 +++ b/README.txt Wed Aug 24 18:50:10 2005 +0000 @@ -55,8 +55,12 @@ New in WebStack 1.0 (Changes since WebStack 0.10) ------------------------------------------------- -Changed the behaviour of get_fields_from_path to return Unicode data decoded -using the optional encoding parameter or a common default encoding. +Changed the behaviour of get_path, get_path_without_query, get_path_info and +get_fields_from_path to return Unicode data decoded using the optional +encoding parameter or a common default encoding. +Added convenience methods to Transaction for the decoding and encoding of path +values (to and from Unicode objects) - see the decode_path and encode_path +methods. Added Debian package support. Added automatic session directory creation for the WebStack sessions implementation. @@ -69,9 +73,6 @@ from Scott Robinson). Fixed ResourceMap.MapResource to handle non-existent resources properly (where the virtual path info is only one component in length). -Added convenience methods to Transaction for the decoding and encoding of path -values (to and from Unicode objects) - see the decode_path and encode_path -methods. New in WebStack 0.10 (Changes since WebStack 0.9) ------------------------------------------------- diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/BaseHTTPRequestHandler.py --- a/WebStack/BaseHTTPRequestHandler.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/BaseHTTPRequestHandler.py Wed Aug 24 18:50:10 2005 +0000 @@ -189,32 +189,56 @@ return self.parse_content_preferences(self.trans.headers.get("accept-language")) - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.trans.path + path = self.get_path_without_query(encoding) + qs = self.get_query_string() + if qs: + return path + "?" + qs + else: + return path - def get_path_without_query(self): + def get_path_without_query(self, encoding=None): """ - Returns the entire path from the request minus the query string. + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ # Remove the query string from the end of the path. - return self.trans.path.split("?")[0] + return self.decode_path(self.trans.path.split("?")[0], encoding) - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.get_path_without_query() + return self.get_path_without_query(encoding) def get_query_string(self): diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/CGI.py --- a/WebStack/CGI.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/CGI.py Wed Aug 24 18:50:10 2005 +0000 @@ -176,38 +176,56 @@ return self.parse_content_preferences(None) - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - path = self.get_path_without_query() + path = self.get_path_without_query(encoding) qs = self.get_query_string() if qs: - path += "?" - path += qs + return path + "?" + qs + else: + return path + + def get_path_without_query(self, encoding=None): + + """ + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. + """ + + path = self.decode_path(self.env.get("SCRIPT_NAME") or "", encoding) + path += self.get_path_info(encoding) return path - def get_path_without_query(self): - - """ - Returns the entire path from the request minus the query string. - """ - - path = self.env.get("SCRIPT_NAME") or "" - if self.env.has_key("PATH_INFO"): - path += self.env["PATH_INFO"] - return path - - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.env.get("PATH_INFO") or "" + return self.decode_path(self.env.get("PATH_INFO") or "", encoding) def get_query_string(self): diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/Generic.py --- a/WebStack/Generic.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/Generic.py Wed Aug 24 18:50:10 2005 +0000 @@ -365,27 +365,46 @@ raise NotImplementedError, "get_content_languages" - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ raise NotImplementedError, "get_path" - def get_path_without_query(self): + def get_path_without_query(self, encoding=None): """ - Returns the entire path from the request minus the query string. + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ raise NotImplementedError, "get_path_without_query" - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ raise NotImplementedError, "get_path_info" diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/JavaServlet.py --- a/WebStack/JavaServlet.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/JavaServlet.py Wed Aug 24 18:50:10 2005 +0000 @@ -211,34 +211,53 @@ else: return None - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ # NOTE: To be verified. - path = self.get_path_without_query() + path = self.get_path_without_query(encoding) qs = self.get_query_string() if qs: - path += "?" - path += qs - return path + return path = "?" + qs + else: + return path - def get_path_without_query(self): + def get_path_without_query(self, encoding=None): """ - Returns the entire path from the request minus the query string. + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.request.getContextPath() + self.request.getServletPath() + self.get_path_info() + return self.request.getContextPath() + self.request.getServletPath() + self.get_path_info(encoding) - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ return self.request.getPathInfo() or "" diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/ModPython.py --- a/WebStack/ModPython.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/ModPython.py Wed Aug 24 18:50:10 2005 +0000 @@ -171,34 +171,53 @@ return self.parse_content_preferences(self.trans.headers_in.get("Accept-Language")) - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ query_string = self.get_query_string() if query_string: - return self.trans.uri + "?" + query_string + return self.decode_path(self.trans.uri, encoding) + "?" + query_string else: - return self.trans.uri + return self.decode_path(self.trans.uri, encoding) - def get_path_without_query(self): + def get_path_without_query(self, encoding=None): """ - Returns the entire path from the request minus the query string. + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.trans.uri + return self.decode_path(self.trans.uri, encoding) - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.trans.path_info + return self.decode_path(self.trans.path_info, encoding) def get_query_string(self): @@ -263,9 +282,10 @@ fields = {} for field in self.storage_body.list: - if not fields.has_key(field.name): - fields[field.name] = [] - fields[field.name].append(get_body_field(field.value, encoding)) + field_name = self.decode_path(field.name, encoding) + if not fields.has_key(field_name): + fields[field_name] = [] + fields[field_name].append(get_body_field(field.value, encoding)) return fields def get_fields(self, encoding=None): diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/Twisted.py --- a/WebStack/Twisted.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/Twisted.py Wed Aug 24 18:50:10 2005 +0000 @@ -140,30 +140,54 @@ return self.parse_content_preferences(self.trans.getHeader("Accept-Charset")) - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.trans.uri + path = self.get_path_without_query(encoding) + qs = self.get_query_string() + if qs: + return path + "?" + qs + else: + return path - def get_path_without_query(self): + def get_path_without_query(self, encoding=None): """ - Returns the entire path from the request minus the query string. + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.get_path().split("?")[0] + return self.decode_path(self.trans.uri.split("?")[0], encoding) - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return "/%s" % "/".join(self.trans.postpath) + return self.decode_path("/%s" % "/".join(self.trans.postpath), encoding) def get_query_string(self): @@ -171,7 +195,7 @@ Returns the query string from the path in the request. """ - t = self.get_path().split("?") + t = self.trans.uri.split("?") if len(t) == 1: return "" else: @@ -231,6 +255,11 @@ fields = {} for field_name, field_values in self.trans.args.items(): + # NOTE: Need to use the special path decoding method due to the + # NOTE: mixing of path and body fields within Twisted. + + field_name = self.decode_path(field_name, encoding) + # Find the body values. if type(field_values) == type([]): diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/WSGI.py --- a/WebStack/WSGI.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/WSGI.py Wed Aug 24 18:50:10 2005 +0000 @@ -180,38 +180,56 @@ return self.parse_content_preferences(None) - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - path = self.get_path_without_query() + path = self.get_path_without_query(encoding) qs = self.get_query_string() if qs: - path += "?" - path += qs + return path + "?" + qs + else: + return path + + def get_path_without_query(self, encoding=None): + + """ + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. + """ + + path = self.decode_path(self.env.get("SCRIPT_NAME") or "", encoding) + path += self.get_path_info(encoding) return path - def get_path_without_query(self): - - """ - Returns the entire path from the request minus the query string. - """ - - path = self.env.get("SCRIPT_NAME") or "" - if self.env.has_key("PATH_INFO"): - path += self.env["PATH_INFO"] - return path - - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.env.get("PATH_INFO") or "" + return self.decode_path(self.env.get("PATH_INFO") or "", encoding) def get_query_string(self): diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/Webware.py --- a/WebStack/Webware.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/Webware.py Wed Aug 24 18:50:10 2005 +0000 @@ -150,35 +150,60 @@ return self.trans.request().contentLanguages() - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.trans.request().uri() + path = self.get_path_without_query(encoding) + qs = self.get_query_string() + if qs: + return path + "?" + qs + else: + return path - def get_path_without_query(self): + def get_path_without_query(self, encoding=None): """ - Returns the entire path from the request minus the query string. + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ - return self.get_path().split("?")[0] + return self.decode_path(self.trans.request().uri().split("?")[0], encoding) - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ path_info = self.trans.request().pathInfo() context_name = self.trans.request().contextName() if path_info.startswith(context_name): - return path_info[len(context_name):] + real_path_info = path_info[len(context_name):] else: - return path_info + real_path_info = path_info + return self.decode_path(real_path_info, encoding) def get_query_string(self): diff -r a01b466df1c1 -r 47a4fa8e0104 WebStack/Zope.py --- a/WebStack/Zope.py Wed Aug 24 16:07:04 2005 +0000 +++ b/WebStack/Zope.py Wed Aug 24 18:50:10 2005 +0000 @@ -146,44 +146,63 @@ return self.parse_content_preferences(None) - def get_path(self): + def get_path(self, encoding=None): """ - Returns the entire path from the request. + Returns the entire path from the request as a Unicode object. Any "URL + encoded" character values in the part of the path before the query + string will be decoded and presented as genuine characters; the query + string will remain "URL encoded", however. + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ # NOTE: Based on WebStack.CGI.get_path. - path = self.get_path_without_query() + path = self.get_path_without_query(encoding) qs = self.get_query_string() if qs: - path += "?" - path += qs - return path + return path + "?" + qs + else: + return path - def get_path_without_query(self): + def get_path_without_query(self, encoding=None): """ - Returns the entire path from the request minus the query string. + Returns the entire path from the request minus the query string as a + Unicode object containing genuine characters (as opposed to "URL + encoded" character values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ # NOTE: Based on WebStack.CGI.get_path. - path = self.request.environ.get("SCRIPT_NAME") or "" - if self.request.environ.has_key("PATH_INFO"): - path += self.request.environ["PATH_INFO"] + path = self.decode_path(self.request.environ.get("SCRIPT_NAME") or "", encoding) + path += self.get_path_info(encoding) return path - def get_path_info(self): + def get_path_info(self, encoding=None): """ Returns the "path info" (the part of the URL after the resource name - handling the current request) from the request. + handling the current request) from the request as a Unicode object + containing genuine characters (as opposed to "URL encoded" character + values). + + If the optional 'encoding' is set, use that in preference to the default + encoding to convert the path into a form not containing "URL encoded" + character values. """ product_path = "/".join(self.adapter.getPhysicalPath()) path_info = self.request.environ.get("PATH_INFO") or "" - return path_info[len(product_path):] + real_path_info = path_info[len(product_path):] + return self.decode_path(real_path_info, encoding) def get_query_string(self):