paul@0 | 1 | #!/usr/bin/env python |
paul@0 | 2 | |
paul@0 | 3 | """ |
paul@0 | 4 | Search Python abstract syntax trees for nodes of a particular type having a |
paul@0 | 5 | particular textual value. |
paul@0 | 6 | |
paul@0 | 7 | Copyright (C) 2008 Paul Boddie <paul@boddie.org.uk> |
paul@0 | 8 | |
paul@0 | 9 | This program is free software; you can redistribute it and/or modify it under |
paul@0 | 10 | the terms of the GNU General Public License as published by the Free Software |
paul@0 | 11 | Foundation; either version 3 of the License, or (at your option) any later |
paul@0 | 12 | version. |
paul@0 | 13 | |
paul@0 | 14 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@0 | 15 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@0 | 16 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@0 | 17 | details. |
paul@0 | 18 | |
paul@0 | 19 | You should have received a copy of the GNU General Public License along with |
paul@0 | 20 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@0 | 21 | """ |
paul@0 | 22 | |
paul@0 | 23 | import compiler |
paul@0 | 24 | import os |
paul@14 | 25 | import linecache |
paul@17 | 26 | import types |
paul@0 | 27 | |
paul@17 | 28 | __version__ = "0.1.1" |
paul@17 | 29 | |
paul@17 | 30 | # Excluded AST nodes and their names. |
paul@17 | 31 | |
paul@17 | 32 | excluded_term_types = ["Module", "Stmt"] |
paul@17 | 33 | excluded_term_cls = tuple([getattr(compiler.ast, name) for name in excluded_term_types]) |
paul@17 | 34 | |
paul@17 | 35 | # Search functions. |
paul@3 | 36 | |
paul@18 | 37 | def search_recursive(directory, term_type, op=None): |
paul@0 | 38 | |
paul@0 | 39 | """ |
paul@0 | 40 | Search files within the filesystem below 'directory' for terms having the |
paul@18 | 41 | given 'term_type', using 'op' (if specified) to match a search term. |
paul@0 | 42 | """ |
paul@0 | 43 | |
paul@0 | 44 | results = [] |
paul@1 | 45 | for path, directories, filenames in os.walk(directory): |
paul@0 | 46 | for filename in filenames: |
paul@0 | 47 | if os.path.splitext(filename)[-1] == os.path.extsep + "py": |
paul@18 | 48 | results += search_file(os.path.join(path, filename), term_type, op) |
paul@0 | 49 | return results |
paul@0 | 50 | |
paul@18 | 51 | def search_file(filename, term_type, op=None): |
paul@0 | 52 | |
paul@0 | 53 | """ |
paul@0 | 54 | Search the file with the given 'filename' for terms having the given |
paul@18 | 55 | 'term_type', using 'op' (if specified) to match a search term. If |
paul@18 | 56 | 'term_type' is given as "*", attempt to match any term type. |
paul@0 | 57 | """ |
paul@0 | 58 | |
paul@17 | 59 | try: |
paul@17 | 60 | node = compiler.parseFile(filename) |
paul@17 | 61 | except SyntaxError: |
paul@17 | 62 | return [] |
paul@17 | 63 | |
paul@17 | 64 | if term_type != "*": |
paul@17 | 65 | cls = getattr(compiler.ast, term_type) |
paul@17 | 66 | else: |
paul@17 | 67 | cls = None |
paul@17 | 68 | |
paul@18 | 69 | return search_tree(node, cls, op, filename) |
paul@0 | 70 | |
paul@18 | 71 | def search_tree(node, cls, op=None, filename=None): |
paul@0 | 72 | |
paul@0 | 73 | """ |
paul@0 | 74 | Search the tree rooted at the given 'node' for nodes of the given class |
paul@18 | 75 | 'cls', using 'op' (if specified) to match a search term. If 'cls' is None, |
paul@18 | 76 | all node types will be considered for matches. |
paul@0 | 77 | |
paul@0 | 78 | Return a list of results of the form (node, value, filename). |
paul@0 | 79 | """ |
paul@0 | 80 | |
paul@0 | 81 | results = [] |
paul@0 | 82 | |
paul@17 | 83 | # Ignore excluded nodes. |
paul@17 | 84 | |
paul@17 | 85 | if isinstance(node, excluded_term_cls): |
paul@17 | 86 | pass |
paul@17 | 87 | |
paul@17 | 88 | # Test permitted nodes. |
paul@17 | 89 | |
paul@17 | 90 | elif cls is None or isinstance(node, cls): |
paul@0 | 91 | if op is None: |
paul@0 | 92 | results.append((node, None, filename)) |
paul@0 | 93 | else: |
paul@18 | 94 | found_str = 0 |
paul@18 | 95 | |
paul@0 | 96 | for child in node.getChildren(): |
paul@17 | 97 | |
paul@17 | 98 | # Test literals. |
paul@17 | 99 | |
paul@18 | 100 | if isinstance(child, (int, float, long, bool)): |
paul@17 | 101 | if op(str(child)): |
paul@17 | 102 | results.append((node, child, filename)) |
paul@17 | 103 | |
paul@18 | 104 | # Only check a single string child value since subsequent |
paul@18 | 105 | # values are typically docstrings. |
paul@17 | 106 | |
paul@18 | 107 | elif not found_str and isinstance(child, str): |
paul@18 | 108 | found_str = 1 |
paul@18 | 109 | if op(child): |
paul@18 | 110 | results.append((node, child, filename)) |
paul@18 | 111 | |
paul@18 | 112 | # Argument lists, globals and imports. |
paul@18 | 113 | |
paul@18 | 114 | elif isinstance(child, list): |
paul@18 | 115 | results += search_list(child, node, op, filename) |
paul@0 | 116 | |
paul@0 | 117 | # Search within nodes, even if matches have already been found. |
paul@0 | 118 | |
paul@0 | 119 | for child in node.getChildNodes(): |
paul@18 | 120 | results += search_tree(child, cls, op, filename) |
paul@18 | 121 | |
paul@18 | 122 | return results |
paul@18 | 123 | |
paul@18 | 124 | def search_list(values, node, op=None, filename=None): |
paul@18 | 125 | |
paul@18 | 126 | """ |
paul@18 | 127 | Search the given 'values' from the given 'node', using 'op' (if specified) |
paul@18 | 128 | to match a search term. |
paul@18 | 129 | |
paul@18 | 130 | Return a list of results of the form (node, value, filename). |
paul@18 | 131 | """ |
paul@18 | 132 | |
paul@18 | 133 | results = [] |
paul@18 | 134 | |
paul@18 | 135 | for value in values: |
paul@18 | 136 | |
paul@18 | 137 | # Test strings. |
paul@18 | 138 | |
paul@18 | 139 | if isinstance(value, str) and op(str(value)): |
paul@18 | 140 | results.append((node, value, filename)) |
paul@18 | 141 | |
paul@18 | 142 | # Test import tuples. |
paul@18 | 143 | |
paul@18 | 144 | elif isinstance(value, tuple): |
paul@18 | 145 | for subvalue in value: |
paul@18 | 146 | if isinstance(value, str) and op(str(subvalue)): |
paul@18 | 147 | results.append((node, subvalue, filename)) |
paul@0 | 148 | |
paul@0 | 149 | return results |
paul@0 | 150 | |
paul@0 | 151 | def expand_results(results): |
paul@0 | 152 | |
paul@0 | 153 | """ |
paul@0 | 154 | Expand the given 'results', making a list containing tuples of the form |
paul@17 | 155 | (node, filename, line number, line, value). |
paul@0 | 156 | """ |
paul@0 | 157 | |
paul@0 | 158 | expanded = [] |
paul@0 | 159 | |
paul@0 | 160 | for node, value, filename in results: |
paul@17 | 161 | lineno = node.lineno |
paul@17 | 162 | |
paul@17 | 163 | if filename is not None and lineno is not None: |
paul@17 | 164 | line = linecache.getline(filename, lineno).rstrip() |
paul@0 | 165 | else: |
paul@0 | 166 | line = None |
paul@0 | 167 | |
paul@17 | 168 | expanded.append((node, filename, lineno, line, value)) |
paul@0 | 169 | |
paul@0 | 170 | return expanded |
paul@0 | 171 | |
paul@17 | 172 | def get_term_types(): |
paul@17 | 173 | |
paul@17 | 174 | "Return the term types supported by the module." |
paul@17 | 175 | |
paul@17 | 176 | term_types = [] |
paul@17 | 177 | |
paul@17 | 178 | for name in dir(compiler.ast): |
paul@17 | 179 | if name in excluded_term_types: |
paul@17 | 180 | continue |
paul@17 | 181 | |
paul@17 | 182 | obj = getattr(compiler.ast, name) |
paul@17 | 183 | |
paul@17 | 184 | if isinstance(obj, types.ClassType) and \ |
paul@17 | 185 | issubclass(obj, compiler.ast.Node) and \ |
paul@17 | 186 | name[0].isupper(): |
paul@17 | 187 | |
paul@17 | 188 | term_types.append(name) |
paul@17 | 189 | |
paul@17 | 190 | return term_types |
paul@17 | 191 | |
paul@0 | 192 | # Command syntax. |
paul@0 | 193 | |
paul@0 | 194 | syntax_description = """ |
paul@1 | 195 | [ -n | --line-number ] |
paul@1 | 196 | [ -p | --print-token ] |
paul@17 | 197 | [ ( -t TERM_TYPE ) | ( --type=TERM_TYPE ) ] |
paul@1 | 198 | [ ( -e PATTERN ) | ( --regexp=PATTERN ) ] |
paul@17 | 199 | [ -r | -R | --recursive ] ( FILENAME ... ) |
paul@0 | 200 | """ |
paul@0 | 201 | |
paul@0 | 202 | # Main program. |
paul@0 | 203 | |
paul@14 | 204 | def run_command(): |
paul@14 | 205 | |
paul@14 | 206 | "The functionality of the main program." |
paul@14 | 207 | |
paul@0 | 208 | import sys |
paul@0 | 209 | import cmdsyntax |
paul@1 | 210 | import re |
paul@17 | 211 | import textwrap |
paul@0 | 212 | |
paul@0 | 213 | # Match command arguments. |
paul@0 | 214 | |
paul@0 | 215 | syntax = cmdsyntax.Syntax(syntax_description) |
paul@0 | 216 | syntax_matches = syntax.get_args(sys.argv[1:]) |
paul@17 | 217 | show_syntax = 0 |
paul@0 | 218 | |
paul@0 | 219 | try: |
paul@0 | 220 | args = syntax_matches[0] |
paul@0 | 221 | except IndexError: |
paul@17 | 222 | show_syntax = 1 |
paul@17 | 223 | |
paul@17 | 224 | if show_syntax: |
paul@0 | 225 | print "Syntax:" |
paul@0 | 226 | print syntax_description |
paul@17 | 227 | print "Term types:" |
paul@17 | 228 | print "\n".join(textwrap.wrap(", ".join(get_term_types()))) |
paul@0 | 229 | sys.exit(1) |
paul@0 | 230 | |
paul@0 | 231 | # Get the search details. |
paul@0 | 232 | |
paul@17 | 233 | term_type = args.get("TERM_TYPE", "*") |
paul@1 | 234 | term = args.get("PATTERN") |
paul@17 | 235 | recursive = args.has_key("r") or args.has_key("R") or args.has_key("recursive") |
paul@0 | 236 | |
paul@0 | 237 | if term is None: |
paul@0 | 238 | op = None |
paul@0 | 239 | else: |
paul@13 | 240 | op = re.compile(term).search |
paul@0 | 241 | |
paul@17 | 242 | # Perform the search in files and directory hierarchies. |
paul@17 | 243 | |
paul@17 | 244 | results = [] |
paul@0 | 245 | |
paul@17 | 246 | for filename in args["FILENAME"]: |
paul@17 | 247 | if os.path.isfile(filename): |
paul@18 | 248 | results += search_file(filename, term_type, op) |
paul@17 | 249 | elif recursive and os.path.isdir(filename): |
paul@18 | 250 | results += search_recursive(filename, term_type, op) |
paul@0 | 251 | |
paul@0 | 252 | # Present the results. |
paul@0 | 253 | |
paul@17 | 254 | for node, filename, lineno, line, value in expand_results(results): |
paul@0 | 255 | format = "%s:" |
paul@0 | 256 | output = [filename] |
paul@0 | 257 | |
paul@17 | 258 | # Handle line numbers and missing details. |
paul@17 | 259 | |
paul@1 | 260 | if args.has_key("n") or args.has_key("line-number"): |
paul@17 | 261 | if lineno is not None: |
paul@17 | 262 | format += "%d:" |
paul@17 | 263 | output.append(lineno) |
paul@17 | 264 | |
paul@17 | 265 | # Show matching tokens, if requested. |
paul@0 | 266 | |
paul@1 | 267 | if args.has_key("p"): |
paul@0 | 268 | if value is not None: |
paul@0 | 269 | format += "%r:" |
paul@0 | 270 | output.append(value) |
paul@0 | 271 | else: |
paul@0 | 272 | format += "%s:" |
paul@17 | 273 | output.append("<%s>" % (term_type or "*")) |
paul@17 | 274 | |
paul@17 | 275 | # Show lines, if defined. |
paul@0 | 276 | |
paul@17 | 277 | if line is not None: |
paul@17 | 278 | format += " %s" |
paul@17 | 279 | output.append(line) |
paul@0 | 280 | |
paul@0 | 281 | print format % tuple(output) |
paul@0 | 282 | |
paul@14 | 283 | if __name__ == "__main__": |
paul@14 | 284 | run_command() |
paul@14 | 285 | |
paul@0 | 286 | # vim: tabstop=4 expandtab shiftwidth=4 |