# HG changeset patch # User Paul Boddie # Date 1225243039 -3600 # Node ID 4a0acdfbb1a5331700d4abfa3daabfbd112c4a59 # Parent e141e023ac2b1fa22a0b263ed3affa82d7643550 Relaxed term type and pattern requirements. Permitted searching of files and directories at the same time. Removed docstring searching since it didn't give helpful results, especially where line numbers were required. Added nodes to the expand_results output. Added term type names to the syntax help text. Introduced lists of excluded nodes/types and their names. Updated the manual and release information. diff -r e141e023ac2b -r 4a0acdfbb1a5 PKG-INFO --- a/PKG-INFO Mon Oct 27 22:49:46 2008 +0100 +++ b/PKG-INFO Wed Oct 29 02:17:19 2008 +0100 @@ -1,12 +1,12 @@ Metadata-Version: 1.1 Name: astgrep -Version: 0.1 +Version: 0.1.1 Author: Paul Boddie Author-email: paul at boddie org uk Maintainer: Paul Boddie Maintainer-email: paul at boddie org uk Home-page: http://www.boddie.org.uk/python/astgrep.html -Download-url: http://www.boddie.org.uk/python/downloads/astgrep-0.1.tar.gz +Download-url: http://www.boddie.org.uk/python/downloads/astgrep-0.1.1.tar.gz Summary: Search Python abstract syntax trees for nodes of a particular type having a particular textual value. License: GPL (version 3 or later) Description: Search through Python source files for textual information diff -r e141e023ac2b -r 4a0acdfbb1a5 README.txt --- a/README.txt Mon Oct 27 22:49:46 2008 +0100 +++ b/README.txt Wed Oct 29 02:17:19 2008 +0100 @@ -55,6 +55,16 @@ Copyright and licence information can be found in the docs directory - see docs/COPYING.txt and docs/gpl-3.0.txt for more information. +New in astgrep 0.1.1 (Changes since astgrep 0.1) +------------------------------------------------ + + * Supported searching of mixtures of files and directories. + * Prevented searching of docstrings, since the information about them is + deficient in the abstract syntax trees. + * Removed the term type requirement, permitting searching in all types. + * Removed the pattern requirement, permitting searching for all tokens of a + particular type (or, indeed, all tokens). + Release Procedures ------------------ diff -r e141e023ac2b -r 4a0acdfbb1a5 astgrep.py --- a/astgrep.py Mon Oct 27 22:49:46 2008 +0100 +++ b/astgrep.py Wed Oct 29 02:17:19 2008 +0100 @@ -23,8 +23,16 @@ import compiler import os import linecache +import types -__version__ = "0.1" +__version__ = "0.1.1" + +# Excluded AST nodes and their names. + +excluded_term_types = ["Module", "Stmt"] +excluded_term_cls = tuple([getattr(compiler.ast, name) for name in excluded_term_types]) + +# Search functions. def search_recursive(directory, term_type, term, op=None): @@ -44,32 +52,58 @@ """ Search the file with the given 'filename' for terms having the given - 'term_type' whose value matches the specified 'term'. + 'term_type' whose value matches the specified 'term'. If 'term_type' is + given as "*", attempt to match any term type. """ - node = compiler.parseFile(filename) - cls = getattr(compiler.ast, term_type) + try: + node = compiler.parseFile(filename) + except SyntaxError: + return [] + + if term_type != "*": + cls = getattr(compiler.ast, term_type) + else: + cls = None + return search_tree(node, cls, term, op, filename) def search_tree(node, cls, term, op=None, filename=None): """ Search the tree rooted at the given 'node' for nodes of the given class - 'cls' for content matching the specified 'term'. + 'cls' for content matching the specified 'term'. If 'cls' is None, all node + types will be considered for matches. Return a list of results of the form (node, value, filename). """ results = [] - if isinstance(node, cls): + # Ignore excluded nodes. + + if isinstance(node, excluded_term_cls): + pass + + # Test permitted nodes. + + elif cls is None or isinstance(node, cls): if op is None: results.append((node, None, filename)) else: for child in node.getChildren(): - if isinstance(child, (str, unicode, int, float, long, bool)) and op(unicode(child)): - results.append((node, child, filename)) - break + + # Test literals. + + if isinstance(child, (str, int, float, long, bool)): + if op(str(child)): + results.append((node, child, filename)) + + # Only check a single string child value since subsequent + # values are typically docstrings. + + if isinstance(child, str): + break # Search within nodes, even if matches have already been found. @@ -82,29 +116,51 @@ """ Expand the given 'results', making a list containing tuples of the form - (filename, line number, line, value). + (node, filename, line number, line, value). """ expanded = [] for node, value, filename in results: - if filename is not None: - line = linecache.getline(filename, node.lineno).rstrip() + lineno = node.lineno + + if filename is not None and lineno is not None: + line = linecache.getline(filename, lineno).rstrip() else: line = None - expanded.append((filename, node.lineno, line, value)) + expanded.append((node, filename, lineno, line, value)) return expanded +def get_term_types(): + + "Return the term types supported by the module." + + term_types = [] + + for name in dir(compiler.ast): + if name in excluded_term_types: + continue + + obj = getattr(compiler.ast, name) + + if isinstance(obj, types.ClassType) and \ + issubclass(obj, compiler.ast.Node) and \ + name[0].isupper(): + + term_types.append(name) + + return term_types + # Command syntax. syntax_description = """ [ -n | --line-number ] [ -p | --print-token ] - ( ( -t TERM_TYPE ) | ( --type=TERM_TYPE ) ) + [ ( -t TERM_TYPE ) | ( --type=TERM_TYPE ) ] [ ( -e PATTERN ) | ( --regexp=PATTERN ) ] - ( ( ( -r | -R | --recursive ) DIRECTORY ) | FILENAME ) + [ -r | -R | --recursive ] ( FILENAME ... ) """ # Main program. @@ -116,45 +172,61 @@ import sys import cmdsyntax import re + import textwrap # Match command arguments. syntax = cmdsyntax.Syntax(syntax_description) syntax_matches = syntax.get_args(sys.argv[1:]) + show_syntax = 0 try: args = syntax_matches[0] except IndexError: + show_syntax = 1 + + if show_syntax: print "Syntax:" print syntax_description + print "Term types:" + print "\n".join(textwrap.wrap(", ".join(get_term_types()))) sys.exit(1) # Get the search details. - term_type = args["TERM_TYPE"] + term_type = args.get("TERM_TYPE", "*") term = args.get("PATTERN") + recursive = args.has_key("r") or args.has_key("R") or args.has_key("recursive") if term is None: op = None else: op = re.compile(term).search - # Perform the search either in a single file or in a directory hierarchy. + # Perform the search in files and directory hierarchies. + + results = [] - if args.has_key("FILENAME"): - results = search_file(args["FILENAME"], term_type, term, op) - else: - results = search_recursive(args["DIRECTORY"], term_type, term, op) + for filename in args["FILENAME"]: + if os.path.isfile(filename): + results += search_file(filename, term_type, term, op) + elif recursive and os.path.isdir(filename): + results += search_recursive(filename, term_type, term, op) # Present the results. - for filename, lineno, line, value in expand_results(results): + for node, filename, lineno, line, value in expand_results(results): format = "%s:" output = [filename] + # Handle line numbers and missing details. + if args.has_key("n") or args.has_key("line-number"): - format += "%d:" - output.append(lineno) + if lineno is not None: + format += "%d:" + output.append(lineno) + + # Show matching tokens, if requested. if args.has_key("p"): if value is not None: @@ -162,10 +234,13 @@ output.append(value) else: format += "%s:" - output.append("<%s>" % term_type) + output.append("<%s>" % (term_type or "*")) + + # Show lines, if defined. - format += " %s" - output.append(line) + if line is not None: + format += " %s" + output.append(line) print format % tuple(output) diff -r e141e023ac2b -r 4a0acdfbb1a5 packages/ubuntu-gutsy/python-astgrep/debian/astgrep.1 --- a/packages/ubuntu-gutsy/python-astgrep/debian/astgrep.1 Mon Oct 27 22:49:46 2008 +0100 +++ b/packages/ubuntu-gutsy/python-astgrep/debian/astgrep.1 Wed Oct 29 02:17:19 2008 +0100 @@ -11,7 +11,7 @@ astgrep \- grep/search through Python abstract syntax trees .SH SYNOPSIS .B astgrep -[options] \-t TERM_TYPE [ \-e PATTERN ] ( \-r DIRECTORY | FILE ) +[options] [ \-t TERM_TYPE ] [ \-e PATTERN ] [ \-r ] FILE... .SH DESCRIPTION \fBastgrep\fR is a program which searches through Python source files for textual information of a specific type. Instead of matching a search term or @@ -19,9 +19,10 @@ \fBastgrep\fR matches only tokens in the program having a particular type, specified using \fITERM_TYPE\fR, such as names or constants. -Like \fBgrep\fR, a single \fIFILE\fR or a number of files within a directory -hierarchy, \fIDIRECTORY\fR, can be searched, with the occurrences listed from -each file. +Like \fBgrep\fR, a collection of \fIFILE\fRs can be searched, and if the +\fB\-r\fR option is specified, directory hierarchies can also be searched +recursively, with the occurrences listed from each file successfully found +and parsed. .SH COMMAND LINE OPTIONS .TP .BR \-n , " \-\-line-number" @@ -31,13 +32,14 @@ Show the matching token for each match. .TP \fB\-t\fR, \fB\-\-type\fR=\fITERM_TYPE\fR -Indicate the type of token to be matched. +Indicate the type of token to be matched. If \fB*\fR is given, all term types +are tested. .TP \fB\-e\fR, \fB\-\-regexp\fR=\fIPATTERN\fR Use \fIPATTERN\fR as the term to search for. .TP -\fB\-r\fR, \fB\-R\fR, \fB\-\-recursive\fR \fIDIRECTORY\fR -Search Python files within \fIDIRECTORY\fR, recursively. +\fB\-r\fR, \fB\-R\fR, \fB\-\-recursive\fR +Search Python files found within directories. .SH TERM TYPES Details of term types can be found in the "AST Nodes" section of the Python Library Reference or by using \fBpydoc\fR to inspect the node classes in the @@ -76,8 +78,18 @@ .B astgrep -n -t Getattr -e '^_node$' -r libxml2dom .RE .PD +.SH LIMITATIONS +\fBastgrep\fR does not attempt to search docstrings (since line number +information is inaccurate for docstrings in abstract syntax trees) or comments +(since the \fBcompiler\fR package only considers significant syntax when parsing +programs). +.PP +\fBastgrep\fR cannot search syntactically incorrect programs (since the +\fBcompiler\fR package will only return an abstract syntax tree for valid +programs). .SH SEE ALSO .BR python (1), +.BR pydoc (1), .BR grep (1) .PP astgrep diff -r e141e023ac2b -r 4a0acdfbb1a5 packages/ubuntu-gutsy/python-astgrep/debian/changelog --- a/packages/ubuntu-gutsy/python-astgrep/debian/changelog Mon Oct 27 22:49:46 2008 +0100 +++ b/packages/ubuntu-gutsy/python-astgrep/debian/changelog Wed Oct 29 02:17:19 2008 +0100 @@ -1,3 +1,17 @@ +python-astgrep (0.1.1-0ubuntu1) gutsy; urgency=low + + * Supported searching of mixtures of files and + directories. + * Prevented searching of docstrings, since the information + about them is deficient in the abstract syntax trees. + * Removed the term type requirement, permitting searching + in all types. + * Removed the pattern requirement, permitting searching + for all tokens of a particular type (or, indeed, all + tokens). + + -- Paul Boddie Wed, 29 Oct 2008 02:12:19 +0100 + python-astgrep (0.1-0ubuntu1) gutsy; urgency=low * Packaging of upstream sources.