paul@0 | 1 | #!/usr/bin/env python |
paul@0 | 2 | |
paul@0 | 3 | """ |
paul@0 | 4 | Search Python abstract syntax trees for nodes of a particular type having a |
paul@0 | 5 | particular textual value. |
paul@0 | 6 | |
paul@0 | 7 | Copyright (C) 2008 Paul Boddie <paul@boddie.org.uk> |
paul@0 | 8 | |
paul@0 | 9 | This program is free software; you can redistribute it and/or modify it under |
paul@0 | 10 | the terms of the GNU General Public License as published by the Free Software |
paul@0 | 11 | Foundation; either version 3 of the License, or (at your option) any later |
paul@0 | 12 | version. |
paul@0 | 13 | |
paul@0 | 14 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@0 | 15 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@0 | 16 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@0 | 17 | details. |
paul@0 | 18 | |
paul@0 | 19 | You should have received a copy of the GNU General Public License along with |
paul@0 | 20 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@0 | 21 | """ |
paul@0 | 22 | |
paul@0 | 23 | import compiler |
paul@0 | 24 | import os |
paul@14 | 25 | import linecache |
paul@17 | 26 | import types |
paul@0 | 27 | |
paul@17 | 28 | __version__ = "0.1.1" |
paul@17 | 29 | |
paul@17 | 30 | # Excluded AST nodes and their names. |
paul@17 | 31 | |
paul@17 | 32 | excluded_term_types = ["Module", "Stmt"] |
paul@17 | 33 | excluded_term_cls = tuple([getattr(compiler.ast, name) for name in excluded_term_types]) |
paul@17 | 34 | |
paul@17 | 35 | # Search functions. |
paul@3 | 36 | |
paul@0 | 37 | def search_recursive(directory, term_type, term, op=None): |
paul@0 | 38 | |
paul@0 | 39 | """ |
paul@0 | 40 | Search files within the filesystem below 'directory' for terms having the |
paul@0 | 41 | given 'term_type' whose value matches the specified 'term'. |
paul@0 | 42 | """ |
paul@0 | 43 | |
paul@0 | 44 | results = [] |
paul@1 | 45 | for path, directories, filenames in os.walk(directory): |
paul@0 | 46 | for filename in filenames: |
paul@0 | 47 | if os.path.splitext(filename)[-1] == os.path.extsep + "py": |
paul@0 | 48 | results += search_file(os.path.join(path, filename), term_type, term, op) |
paul@0 | 49 | return results |
paul@0 | 50 | |
paul@0 | 51 | def search_file(filename, term_type, term, op=None): |
paul@0 | 52 | |
paul@0 | 53 | """ |
paul@0 | 54 | Search the file with the given 'filename' for terms having the given |
paul@17 | 55 | 'term_type' whose value matches the specified 'term'. If 'term_type' is |
paul@17 | 56 | given as "*", attempt to match any term type. |
paul@0 | 57 | """ |
paul@0 | 58 | |
paul@17 | 59 | try: |
paul@17 | 60 | node = compiler.parseFile(filename) |
paul@17 | 61 | except SyntaxError: |
paul@17 | 62 | return [] |
paul@17 | 63 | |
paul@17 | 64 | if term_type != "*": |
paul@17 | 65 | cls = getattr(compiler.ast, term_type) |
paul@17 | 66 | else: |
paul@17 | 67 | cls = None |
paul@17 | 68 | |
paul@0 | 69 | return search_tree(node, cls, term, op, filename) |
paul@0 | 70 | |
paul@0 | 71 | def search_tree(node, cls, term, op=None, filename=None): |
paul@0 | 72 | |
paul@0 | 73 | """ |
paul@0 | 74 | Search the tree rooted at the given 'node' for nodes of the given class |
paul@17 | 75 | 'cls' for content matching the specified 'term'. If 'cls' is None, all node |
paul@17 | 76 | types will be considered for matches. |
paul@0 | 77 | |
paul@0 | 78 | Return a list of results of the form (node, value, filename). |
paul@0 | 79 | """ |
paul@0 | 80 | |
paul@0 | 81 | results = [] |
paul@0 | 82 | |
paul@17 | 83 | # Ignore excluded nodes. |
paul@17 | 84 | |
paul@17 | 85 | if isinstance(node, excluded_term_cls): |
paul@17 | 86 | pass |
paul@17 | 87 | |
paul@17 | 88 | # Test permitted nodes. |
paul@17 | 89 | |
paul@17 | 90 | elif cls is None or isinstance(node, cls): |
paul@0 | 91 | if op is None: |
paul@0 | 92 | results.append((node, None, filename)) |
paul@0 | 93 | else: |
paul@0 | 94 | for child in node.getChildren(): |
paul@17 | 95 | |
paul@17 | 96 | # Test literals. |
paul@17 | 97 | |
paul@17 | 98 | if isinstance(child, (str, int, float, long, bool)): |
paul@17 | 99 | if op(str(child)): |
paul@17 | 100 | results.append((node, child, filename)) |
paul@17 | 101 | |
paul@17 | 102 | # Only check a single string child value since subsequent |
paul@17 | 103 | # values are typically docstrings. |
paul@17 | 104 | |
paul@17 | 105 | if isinstance(child, str): |
paul@17 | 106 | break |
paul@0 | 107 | |
paul@0 | 108 | # Search within nodes, even if matches have already been found. |
paul@0 | 109 | |
paul@0 | 110 | for child in node.getChildNodes(): |
paul@0 | 111 | results += search_tree(child, cls, term, op, filename) |
paul@0 | 112 | |
paul@0 | 113 | return results |
paul@0 | 114 | |
paul@0 | 115 | def expand_results(results): |
paul@0 | 116 | |
paul@0 | 117 | """ |
paul@0 | 118 | Expand the given 'results', making a list containing tuples of the form |
paul@17 | 119 | (node, filename, line number, line, value). |
paul@0 | 120 | """ |
paul@0 | 121 | |
paul@0 | 122 | expanded = [] |
paul@0 | 123 | |
paul@0 | 124 | for node, value, filename in results: |
paul@17 | 125 | lineno = node.lineno |
paul@17 | 126 | |
paul@17 | 127 | if filename is not None and lineno is not None: |
paul@17 | 128 | line = linecache.getline(filename, lineno).rstrip() |
paul@0 | 129 | else: |
paul@0 | 130 | line = None |
paul@0 | 131 | |
paul@17 | 132 | expanded.append((node, filename, lineno, line, value)) |
paul@0 | 133 | |
paul@0 | 134 | return expanded |
paul@0 | 135 | |
paul@17 | 136 | def get_term_types(): |
paul@17 | 137 | |
paul@17 | 138 | "Return the term types supported by the module." |
paul@17 | 139 | |
paul@17 | 140 | term_types = [] |
paul@17 | 141 | |
paul@17 | 142 | for name in dir(compiler.ast): |
paul@17 | 143 | if name in excluded_term_types: |
paul@17 | 144 | continue |
paul@17 | 145 | |
paul@17 | 146 | obj = getattr(compiler.ast, name) |
paul@17 | 147 | |
paul@17 | 148 | if isinstance(obj, types.ClassType) and \ |
paul@17 | 149 | issubclass(obj, compiler.ast.Node) and \ |
paul@17 | 150 | name[0].isupper(): |
paul@17 | 151 | |
paul@17 | 152 | term_types.append(name) |
paul@17 | 153 | |
paul@17 | 154 | return term_types |
paul@17 | 155 | |
paul@0 | 156 | # Command syntax. |
paul@0 | 157 | |
paul@0 | 158 | syntax_description = """ |
paul@1 | 159 | [ -n | --line-number ] |
paul@1 | 160 | [ -p | --print-token ] |
paul@17 | 161 | [ ( -t TERM_TYPE ) | ( --type=TERM_TYPE ) ] |
paul@1 | 162 | [ ( -e PATTERN ) | ( --regexp=PATTERN ) ] |
paul@17 | 163 | [ -r | -R | --recursive ] ( FILENAME ... ) |
paul@0 | 164 | """ |
paul@0 | 165 | |
paul@0 | 166 | # Main program. |
paul@0 | 167 | |
paul@14 | 168 | def run_command(): |
paul@14 | 169 | |
paul@14 | 170 | "The functionality of the main program." |
paul@14 | 171 | |
paul@0 | 172 | import sys |
paul@0 | 173 | import cmdsyntax |
paul@1 | 174 | import re |
paul@17 | 175 | import textwrap |
paul@0 | 176 | |
paul@0 | 177 | # Match command arguments. |
paul@0 | 178 | |
paul@0 | 179 | syntax = cmdsyntax.Syntax(syntax_description) |
paul@0 | 180 | syntax_matches = syntax.get_args(sys.argv[1:]) |
paul@17 | 181 | show_syntax = 0 |
paul@0 | 182 | |
paul@0 | 183 | try: |
paul@0 | 184 | args = syntax_matches[0] |
paul@0 | 185 | except IndexError: |
paul@17 | 186 | show_syntax = 1 |
paul@17 | 187 | |
paul@17 | 188 | if show_syntax: |
paul@0 | 189 | print "Syntax:" |
paul@0 | 190 | print syntax_description |
paul@17 | 191 | print "Term types:" |
paul@17 | 192 | print "\n".join(textwrap.wrap(", ".join(get_term_types()))) |
paul@0 | 193 | sys.exit(1) |
paul@0 | 194 | |
paul@0 | 195 | # Get the search details. |
paul@0 | 196 | |
paul@17 | 197 | term_type = args.get("TERM_TYPE", "*") |
paul@1 | 198 | term = args.get("PATTERN") |
paul@17 | 199 | recursive = args.has_key("r") or args.has_key("R") or args.has_key("recursive") |
paul@0 | 200 | |
paul@0 | 201 | if term is None: |
paul@0 | 202 | op = None |
paul@0 | 203 | else: |
paul@13 | 204 | op = re.compile(term).search |
paul@0 | 205 | |
paul@17 | 206 | # Perform the search in files and directory hierarchies. |
paul@17 | 207 | |
paul@17 | 208 | results = [] |
paul@0 | 209 | |
paul@17 | 210 | for filename in args["FILENAME"]: |
paul@17 | 211 | if os.path.isfile(filename): |
paul@17 | 212 | results += search_file(filename, term_type, term, op) |
paul@17 | 213 | elif recursive and os.path.isdir(filename): |
paul@17 | 214 | results += search_recursive(filename, term_type, term, op) |
paul@0 | 215 | |
paul@0 | 216 | # Present the results. |
paul@0 | 217 | |
paul@17 | 218 | for node, filename, lineno, line, value in expand_results(results): |
paul@0 | 219 | format = "%s:" |
paul@0 | 220 | output = [filename] |
paul@0 | 221 | |
paul@17 | 222 | # Handle line numbers and missing details. |
paul@17 | 223 | |
paul@1 | 224 | if args.has_key("n") or args.has_key("line-number"): |
paul@17 | 225 | if lineno is not None: |
paul@17 | 226 | format += "%d:" |
paul@17 | 227 | output.append(lineno) |
paul@17 | 228 | |
paul@17 | 229 | # Show matching tokens, if requested. |
paul@0 | 230 | |
paul@1 | 231 | if args.has_key("p"): |
paul@0 | 232 | if value is not None: |
paul@0 | 233 | format += "%r:" |
paul@0 | 234 | output.append(value) |
paul@0 | 235 | else: |
paul@0 | 236 | format += "%s:" |
paul@17 | 237 | output.append("<%s>" % (term_type or "*")) |
paul@17 | 238 | |
paul@17 | 239 | # Show lines, if defined. |
paul@0 | 240 | |
paul@17 | 241 | if line is not None: |
paul@17 | 242 | format += " %s" |
paul@17 | 243 | output.append(line) |
paul@0 | 244 | |
paul@0 | 245 | print format % tuple(output) |
paul@0 | 246 | |
paul@14 | 247 | if __name__ == "__main__": |
paul@14 | 248 | run_command() |
paul@14 | 249 | |
paul@0 | 250 | # vim: tabstop=4 expandtab shiftwidth=4 |