Lichen (annotate common.py in 5e4ac6b12b8f)

Lichen

Annotated common.py

774:6b26d7bf00d6

737:5e4ac6b12b8f

732:fcdcc00b3f66

2017-03-16

Paul Boddie

Merged changes from the default branch.

normal-function-parameters

paul@0	1	#!/usr/bin/env python
paul@0	2
paul@0	3	"""
paul@0	4	Common functions.
paul@0	5
paul@0	6	Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,
paul@508	7	2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>
paul@0	8
paul@0	9	This program is free software; you can redistribute it and/or modify it under
paul@0	10	the terms of the GNU General Public License as published by the Free Software
paul@0	11	Foundation; either version 3 of the License, or (at your option) any later
paul@0	12	version.
paul@0	13
paul@0	14	This program is distributed in the hope that it will be useful, but WITHOUT
paul@0	15	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0	16	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@0	17	details.
paul@0	18
paul@0	19	You should have received a copy of the GNU General Public License along with
paul@0	20	this program. If not, see <http://www.gnu.org/licenses/>.
paul@0	21	"""
paul@0	22
paul@512	23	from compiler.transformer import Transformer
paul@430	24	from errors import InspectError
paul@0	25	from os import listdir, makedirs, remove
paul@609	26	from os.path import exists, getmtime, isdir, join, split
paul@11	27	from results import ConstantValueRef, LiteralSequenceRef, NameRef
paul@405	28	import compiler.ast
paul@0	29
paul@0	30	class CommonOutput:
paul@0	31
paul@0	32	"Common output functionality."
paul@0	33
paul@617	34	def check_output(self, options=None):
paul@0	35
paul@0	36	"Check the existing output and remove it if irrelevant."
paul@0	37
paul@0	38	if not exists(self.output):
paul@0	39	makedirs(self.output)
paul@0	40
paul@0	41	details = self.importer.get_cache_details()
paul@0	42	recorded_details = self.get_output_details()
paul@0	43
paul@617	44	# Combine cache details with any options.
paul@617	45
paul@617	46	full_details = options and (details + " " + options) or details
paul@617	47
paul@617	48	if recorded_details != full_details:
paul@0	49	self.remove_output()
paul@0	50
paul@617	51	writefile(self.get_output_details_filename(), full_details)
paul@0	52
paul@0	53	def get_output_details_filename(self):
paul@0	54
paul@0	55	"Return the output details filename."
paul@0	56
paul@0	57	return join(self.output, "$details")
paul@0	58
paul@0	59	def get_output_details(self):
paul@0	60
paul@0	61	"Return details of the existing output."
paul@0	62
paul@0	63	details_filename = self.get_output_details_filename()
paul@0	64
paul@0	65	if not exists(details_filename):
paul@0	66	return None
paul@0	67	else:
paul@0	68	return readfile(details_filename)
paul@0	69
paul@0	70	def remove_output(self, dirname=None):
paul@0	71
paul@0	72	"Remove the output."
paul@0	73
paul@0	74	dirname = dirname or self.output
paul@0	75
paul@0	76	for filename in listdir(dirname):
paul@0	77	path = join(dirname, filename)
paul@0	78	if isdir(path):
paul@0	79	self.remove_output(path)
paul@0	80	else:
paul@0	81	remove(path)
paul@0	82
paul@609	83	def copy(source, target, only_if_newer=True):
paul@609	84
paul@609	85	"Copy a text file from 'source' to 'target'."
paul@609	86
paul@609	87	if isdir(target):
paul@609	88	target = join(target, split(source)[-1])
paul@609	89
paul@609	90	if only_if_newer and not is_newer(source, target):
paul@609	91	return
paul@609	92
paul@609	93	infile = open(source)
paul@609	94	outfile = open(target, "w")
paul@609	95
paul@609	96	try:
paul@609	97	outfile.write(infile.read())
paul@609	98	finally:
paul@609	99	outfile.close()
paul@609	100	infile.close()
paul@609	101
paul@609	102	def is_newer(source, target):
paul@609	103
paul@609	104	"Return whether 'source' is newer than 'target'."
paul@609	105
paul@609	106	if exists(target):
paul@609	107	target_mtime = getmtime(target)
paul@609	108	source_mtime = getmtime(source)
paul@609	109	return source_mtime > target_mtime
paul@609	110
paul@609	111	return True
paul@609	112
paul@0	113	class CommonModule:
paul@0	114
paul@0	115	"A common module representation."
paul@0	116
paul@0	117	def __init__(self, name, importer):
paul@0	118
paul@0	119	"""
paul@0	120	Initialise this module with the given 'name' and an 'importer' which is
paul@0	121	used to provide access to other modules when required.
paul@0	122	"""
paul@0	123
paul@0	124	self.name = name
paul@0	125	self.importer = importer
paul@0	126	self.filename = None
paul@0	127
paul@0	128	# Inspection-related attributes.
paul@0	129
paul@0	130	self.astnode = None
paul@405	131	self.encoding = None
paul@0	132	self.temp = {}
paul@0	133	self.lambdas = {}
paul@0	134
paul@0	135	# Constants, literals and values.
paul@0	136
paul@0	137	self.constants = {}
paul@0	138	self.constant_values = {}
paul@0	139	self.literals = {}
paul@0	140	self.literal_types = {}
paul@0	141
paul@0	142	# Nested namespaces.
paul@0	143
paul@0	144	self.namespace_path = []
paul@0	145	self.in_function = False
paul@0	146
paul@124	147	# Retain the assignment value expression and track invocations.
paul@124	148
paul@124	149	self.in_assignment = None
paul@553	150	self.in_invocation = None
paul@124	151
paul@124	152	# Attribute chain state management.
paul@0	153
paul@0	154	self.attrs = []
paul@124	155	self.chain_assignment = []
paul@124	156	self.chain_invocation = []
paul@0	157
paul@0	158	def __repr__(self):
paul@0	159	return "CommonModule(%r, %r)" % (self.name, self.importer)
paul@0	160
paul@0	161	def parse_file(self, filename):
paul@0	162
paul@0	163	"Parse the file with the given 'filename', initialising attributes."
paul@0	164
paul@0	165	self.filename = filename
paul@405	166
paul@405	167	# Use the Transformer directly to obtain encoding information.
paul@405	168
paul@405	169	t = Transformer()
paul@405	170	f = open(filename)
paul@405	171
paul@405	172	try:
paul@405	173	self.astnode = t.parsesuite(f.read() + "\n")
paul@405	174	self.encoding = t.encoding
paul@405	175	finally:
paul@405	176	f.close()
paul@0	177
paul@0	178	# Module-relative naming.
paul@0	179
paul@0	180	def get_global_path(self, name):
paul@0	181	return "%s.%s" % (self.name, name)
paul@0	182
paul@0	183	def get_namespace_path(self):
paul@0	184	return ".".join([self.name] + self.namespace_path)
paul@0	185
paul@0	186	def get_object_path(self, name):
paul@0	187	return ".".join([self.name] + self.namespace_path + [name])
paul@0	188
paul@0	189	def get_parent_path(self):
paul@0	190	return ".".join([self.name] + self.namespace_path[:-1])
paul@0	191
paul@0	192	# Namespace management.
paul@0	193
paul@0	194	def enter_namespace(self, name):
paul@0	195
paul@0	196	"Enter the namespace having the given 'name'."
paul@0	197
paul@0	198	self.namespace_path.append(name)
paul@0	199
paul@0	200	def exit_namespace(self):
paul@0	201
paul@0	202	"Exit the current namespace."
paul@0	203
paul@0	204	self.namespace_path.pop()
paul@0	205
paul@0	206	# Constant reference naming.
paul@0	207
paul@406	208	def get_constant_name(self, value, value_type, encoding=None):
paul@0	209
paul@397	210	"""
paul@397	211	Add a new constant to the current namespace for 'value' with
paul@397	212	'value_type'.
paul@397	213	"""
paul@0	214
paul@0	215	path = self.get_namespace_path()
paul@0	216	init_item(self.constants, path, dict)
paul@406	217	return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))
paul@0	218
paul@0	219	# Literal reference naming.
paul@0	220
paul@0	221	def get_literal_name(self):
paul@0	222
paul@0	223	"Add a new literal to the current namespace."
paul@0	224
paul@0	225	path = self.get_namespace_path()
paul@0	226	init_item(self.literals, path, lambda: 0)
paul@0	227	return "$C%d" % self.literals[path]
paul@0	228
paul@0	229	def next_literal(self):
paul@0	230	self.literals[self.get_namespace_path()] += 1
paul@0	231
paul@0	232	# Temporary variable naming.
paul@0	233
paul@0	234	def get_temporary_name(self):
paul@0	235	path = self.get_namespace_path()
paul@0	236	init_item(self.temp, path, lambda: 0)
paul@0	237	return "$t%d" % self.temp[path]
paul@0	238
paul@0	239	def next_temporary(self):
paul@0	240	self.temp[self.get_namespace_path()] += 1
paul@0	241
paul@0	242	# Arbitrary function naming.
paul@0	243
paul@0	244	def get_lambda_name(self):
paul@0	245	path = self.get_namespace_path()
paul@0	246	init_item(self.lambdas, path, lambda: 0)
paul@0	247	name = "$l%d" % self.lambdas[path]
paul@0	248	self.lambdas[path] += 1
paul@0	249	return name
paul@0	250
paul@0	251	def reset_lambdas(self):
paul@0	252	self.lambdas = {}
paul@0	253
paul@0	254	# Constant and literal recording.
paul@0	255
paul@537	256	def get_constant_value(self, value, literals=None):
paul@394	257
paul@406	258	"""
paul@406	259	Encode the 'value' if appropriate, returning a value, a typename and any
paul@406	260	encoding.
paul@406	261	"""
paul@394	262
paul@394	263	if isinstance(value, unicode):
paul@406	264	return value.encode("utf-8"), "unicode", self.encoding
paul@405	265
paul@405	266	# Attempt to convert plain strings to text.
paul@405	267
paul@405	268	elif isinstance(value, str) and self.encoding:
paul@513	269	try:
paul@537	270	return get_string_details(literals, self.encoding)
paul@513	271	except UnicodeDecodeError:
paul@513	272	pass
paul@405	273
paul@406	274	return value, value.__class__.__name__, None
paul@394	275
paul@406	276	def get_constant_reference(self, ref, value, encoding=None):
paul@0	277
paul@406	278	"""
paul@406	279	Return a constant reference for the given 'ref' type and 'value', with
paul@406	280	the optional 'encoding' applying to text values.
paul@406	281	"""
paul@0	282
paul@406	283	constant_name = self.get_constant_name(value, ref.get_origin(), encoding)
paul@0	284
paul@0	285	# Return a reference for the constant.
paul@0	286
paul@0	287	objpath = self.get_object_path(constant_name)
paul@338	288	name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)
paul@0	289
paul@0	290	# Record the value and type for the constant.
paul@0	291
paul@406	292	self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)
paul@0	293	return name_ref
paul@0	294
paul@406	295	def reserve_constant(self, objpath, value, origin, encoding=None):
paul@251	296
paul@251	297	"""
paul@251	298	Reserve a constant within 'objpath' with the given 'value' and having a
paul@406	299	type with the given 'origin', with the optional 'encoding' applying to
paul@406	300	text values.
paul@251	301	"""
paul@251	302
paul@397	303	constant_name = self.get_constant_name(value, origin)
paul@251	304	objpath = self.get_object_path(constant_name)
paul@406	305	self._reserve_constant(objpath, value, origin, encoding)
paul@251	306
paul@406	307	def _reserve_constant(self, objpath, value, origin, encoding):
paul@251	308
paul@406	309	"""
paul@406	310	Store a constant for 'objpath' with the given 'value' and 'origin', with
paul@406	311	the optional 'encoding' applying to text values.
paul@406	312	"""
paul@251	313
paul@406	314	self.constant_values[objpath] = value, origin, encoding
paul@251	315
paul@0	316	def get_literal_reference(self, name, ref, items, cls):
paul@0	317
paul@11	318	"""
paul@11	319	Return a literal reference for the given type 'name', literal 'ref',
paul@11	320	node 'items' and employing the given 'cls' as the class of the returned
paul@11	321	reference object.
paul@11	322	"""
paul@11	323
paul@0	324	# Construct an invocation using the items as arguments.
paul@0	325
paul@0	326	typename = "$L%s" % name
paul@0	327
paul@0	328	invocation = compiler.ast.CallFunc(
paul@0	329	compiler.ast.Name(typename),
paul@0	330	items
paul@0	331	)
paul@0	332
paul@0	333	# Get a name for the actual literal.
paul@0	334
paul@0	335	instname = self.get_literal_name()
paul@0	336	self.next_literal()
paul@0	337
paul@0	338	# Record the type for the literal.
paul@0	339
paul@0	340	objpath = self.get_object_path(instname)
paul@0	341	self.literal_types[objpath] = ref.get_origin()
paul@0	342
paul@0	343	# Return a wrapper for the invocation exposing the items.
paul@0	344
paul@0	345	return cls(
paul@0	346	instname,
paul@0	347	ref.instance_of(),
paul@0	348	self.process_structure_node(invocation),
paul@0	349	invocation.args
paul@0	350	)
paul@0	351
paul@0	352	# Node handling.
paul@0	353
paul@0	354	def process_structure(self, node):
paul@0	355
paul@0	356	"""
paul@0	357	Within the given 'node', process the program structure.
paul@0	358
paul@0	359	During inspection, this will process global declarations, adjusting the
paul@0	360	module namespace, and import statements, building a module dependency
paul@0	361	hierarchy.
paul@0	362
paul@0	363	During translation, this will consult deduced program information and
paul@0	364	output translated code.
paul@0	365	"""
paul@0	366
paul@0	367	l = []
paul@0	368	for n in node.getChildNodes():
paul@0	369	l.append(self.process_structure_node(n))
paul@0	370	return l
paul@0	371
paul@0	372	def process_augassign_node(self, n):
paul@0	373
paul@0	374	"Process the given augmented assignment node 'n'."
paul@0	375
paul@0	376	op = operator_functions[n.op]
paul@0	377
paul@0	378	if isinstance(n.node, compiler.ast.Getattr):
paul@0	379	target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")
paul@0	380	elif isinstance(n.node, compiler.ast.Name):
paul@0	381	target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")
paul@0	382	else:
paul@0	383	target = n.node
paul@0	384
paul@0	385	assignment = compiler.ast.Assign(
paul@0	386	[target],
paul@0	387	compiler.ast.CallFunc(
paul@0	388	compiler.ast.Name("$op%s" % op),
paul@0	389	[n.node, n.expr]))
paul@0	390
paul@0	391	return self.process_structure_node(assignment)
paul@0	392
paul@320	393	def process_assignment_for_object(self, original_name, source):
paul@0	394
paul@0	395	"""
paul@0	396	Return an assignment operation making 'original_name' refer to the given
paul@196	397	'source'.
paul@0	398	"""
paul@0	399
paul@0	400	assignment = compiler.ast.Assign(
paul@0	401	[compiler.ast.AssName(original_name, "OP_ASSIGN")],
paul@196	402	source
paul@0	403	)
paul@0	404
paul@0	405	return self.process_structure_node(assignment)
paul@0	406
paul@0	407	def process_assignment_node_items(self, n, expr):
paul@0	408
paul@0	409	"""
paul@0	410	Process the given assignment node 'n' whose children are to be assigned
paul@0	411	items of 'expr'.
paul@0	412	"""
paul@0	413
paul@0	414	name_ref = self.process_structure_node(expr)
paul@0	415
paul@509	416	# Either unpack the items and present them directly to each assignment
paul@509	417	# node.
paul@509	418
paul@509	419	if isinstance(name_ref, LiteralSequenceRef) and \
paul@509	420	self.process_literal_sequence_items(n, name_ref):
paul@0	421
paul@509	422	pass
paul@509	423
paul@509	424	# Or have the assignment nodes access each item via the sequence API.
paul@509	425
paul@509	426	else:
paul@509	427	self.process_assignment_node_items_by_position(n, expr, name_ref)
paul@0	428
paul@0	429	def process_assignment_node_items_by_position(self, n, expr, name_ref):
paul@0	430
paul@0	431	"""
paul@0	432	Process the given sequence assignment node 'n', converting the node to
paul@0	433	the separate assignment of each target using positional access on a
paul@0	434	temporary variable representing the sequence. Use 'expr' as the assigned
paul@0	435	value and 'name_ref' as the reference providing any existing temporary
paul@0	436	variable.
paul@0	437	"""
paul@0	438
paul@0	439	assignments = []
paul@0	440
paul@508	441	# Employ existing names to access the sequence.
paul@508	442	# Literal sequences do not provide names of accessible objects.
paul@508	443
paul@508	444	if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):
paul@0	445	temp = name_ref.name
paul@508	446
paul@508	447	# For other expressions, create a temporary name to reference the items.
paul@508	448
paul@0	449	else:
paul@0	450	temp = self.get_temporary_name()
paul@0	451	self.next_temporary()
paul@0	452
paul@0	453	assignments.append(
paul@0	454	compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)
paul@0	455	)
paul@0	456
paul@508	457	# Assign the items to the target nodes.
paul@508	458
paul@0	459	for i, node in enumerate(n.nodes):
paul@0	460	assignments.append(
paul@0	461	compiler.ast.Assign([node], compiler.ast.Subscript(
paul@395	462	compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))
paul@0	463	)
paul@0	464
paul@0	465	return self.process_structure_node(compiler.ast.Stmt(assignments))
paul@0	466
paul@0	467	def process_literal_sequence_items(self, n, name_ref):
paul@0	468
paul@0	469	"""
paul@0	470	Process the given assignment node 'n', obtaining from the given
paul@0	471	'name_ref' the items to be assigned to the assignment targets.
paul@509	472
paul@509	473	Return whether this method was able to process the assignment node as
paul@509	474	a sequence of direct assignments.
paul@0	475	"""
paul@0	476
paul@0	477	if len(n.nodes) == len(name_ref.items):
paul@509	478	assigned_names, count = get_names_from_nodes(n.nodes)
paul@509	479	accessed_names, _count = get_names_from_nodes(name_ref.items)
paul@509	480
paul@509	481	# Only assign directly between items if all assigned names are
paul@509	482	# plain names (not attribute assignments), and if the assigned names
paul@509	483	# do not appear in the accessed names.
paul@509	484
paul@509	485	if len(assigned_names) == count and \
paul@509	486	not assigned_names.intersection(accessed_names):
paul@509	487
paul@509	488	for node, item in zip(n.nodes, name_ref.items):
paul@509	489	self.process_assignment_node(node, item)
paul@509	490
paul@509	491	return True
paul@509	492
paul@509	493	# Otherwise, use the position-based mechanism to obtain values.
paul@509	494
paul@509	495	else:
paul@509	496	return False
paul@0	497	else:
paul@0	498	raise InspectError("In %s, item assignment needing %d items is given %d items." % (
paul@0	499	self.get_namespace_path(), len(n.nodes), len(name_ref.items)))
paul@0	500
paul@0	501	def process_compare_node(self, n):
paul@0	502
paul@0	503	"""
paul@0	504	Process the given comparison node 'n', converting an operator sequence
paul@0	505	from...
paul@0	506
paul@0	507	<expr1> <op1> <expr2> <op2> <expr3>
paul@0	508
paul@0	509	...to...
paul@0	510
paul@0	511	<op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)
paul@0	512	"""
paul@0	513
paul@0	514	invocations = []
paul@0	515	last = n.expr
paul@0	516
paul@0	517	for op, op_node in n.ops:
paul@0	518	op = operator_functions.get(op)
paul@0	519
paul@0	520	invocations.append(compiler.ast.CallFunc(
paul@0	521	compiler.ast.Name("$op%s" % op),
paul@0	522	[last, op_node]))
paul@0	523
paul@0	524	last = op_node
paul@0	525
paul@0	526	if len(invocations) > 1:
paul@0	527	result = compiler.ast.And(invocations)
paul@0	528	else:
paul@0	529	result = invocations[0]
paul@0	530
paul@0	531	return self.process_structure_node(result)
paul@0	532
paul@0	533	def process_dict_node(self, node):
paul@0	534
paul@0	535	"""
paul@0	536	Process the given dictionary 'node', returning a list of (key, value)
paul@0	537	tuples.
paul@0	538	"""
paul@0	539
paul@0	540	l = []
paul@0	541	for key, value in node.items:
paul@0	542	l.append((
paul@0	543	self.process_structure_node(key),
paul@0	544	self.process_structure_node(value)))
paul@0	545	return l
paul@0	546
paul@0	547	def process_for_node(self, n):
paul@0	548
paul@0	549	"""
paul@0	550	Generate attribute accesses for {n.list}.__iter__ and the next method on
paul@0	551	the iterator, producing a replacement node for the original.
paul@0	552	"""
paul@0	553
paul@705	554	t0 = self.get_temporary_name()
paul@705	555	self.next_temporary()
paul@705	556	t1 = self.get_temporary_name()
paul@705	557	self.next_temporary()
paul@704	558	i0 = self.get_temporary_name()
paul@704	559	self.next_temporary()
paul@704	560
paul@0	561	node = compiler.ast.Stmt([
paul@0	562
paul@705	563	# <t0> = {n.list}
paul@705	564	# <t1> = <t0>.__iter__()
paul@705	565
paul@705	566	compiler.ast.Assign(
paul@705	567	[compiler.ast.AssName(t0, "OP_ASSIGN")],
paul@705	568	n.list),
paul@705	569
paul@705	570	compiler.ast.Assign(
paul@705	571	[compiler.ast.AssName(t1, "OP_ASSIGN")],
paul@705	572	compiler.ast.CallFunc(
paul@705	573	compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),
paul@705	574	[])),
paul@0	575
paul@0	576	# try:
paul@0	577	# while True:
paul@737	578	# <var>... = <t1>.next()
paul@0	579	# ...
paul@0	580	# except StopIteration:
paul@0	581	# pass
paul@0	582
paul@0	583	compiler.ast.TryExcept(
paul@0	584	compiler.ast.While(
paul@0	585	compiler.ast.Name("True"),
paul@0	586	compiler.ast.Stmt([
paul@0	587	compiler.ast.Assign(
paul@0	588	[n.assign],
paul@0	589	compiler.ast.CallFunc(
paul@737	590	compiler.ast.Getattr(compiler.ast.Name(t1), "next"),
paul@0	591	[]
paul@0	592	)),
paul@0	593	n.body]),
paul@0	594	None),
paul@0	595	[(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],
paul@0	596	None)
paul@0	597	])
paul@0	598
paul@0	599	self.process_structure_node(node)
paul@0	600
paul@0	601	def process_literal_sequence_node(self, n, name, ref, cls):
paul@0	602
paul@0	603	"""
paul@0	604	Process the given literal sequence node 'n' as a function invocation,
paul@0	605	with 'name' indicating the type of the sequence, and 'ref' being a
paul@0	606	reference to the type. The 'cls' is used to instantiate a suitable name
paul@0	607	reference.
paul@0	608	"""
paul@0	609
paul@0	610	if name == "dict":
paul@0	611	items = []
paul@0	612	for key, value in n.items:
paul@0	613	items.append(compiler.ast.Tuple([key, value]))
paul@0	614	else: # name in ("list", "tuple"):
paul@0	615	items = n.nodes
paul@0	616
paul@0	617	return self.get_literal_reference(name, ref, items, cls)
paul@0	618
paul@0	619	def process_operator_node(self, n):
paul@0	620
paul@0	621	"""
paul@0	622	Process the given operator node 'n' as an operator function invocation.
paul@0	623	"""
paul@0	624
paul@0	625	op = operator_functions[n.__class__.__name__]
paul@0	626	invocation = compiler.ast.CallFunc(
paul@0	627	compiler.ast.Name("$op%s" % op),
paul@0	628	list(n.getChildNodes())
paul@0	629	)
paul@0	630	return self.process_structure_node(invocation)
paul@0	631
paul@173	632	def process_print_node(self, n):
paul@173	633
paul@173	634	"""
paul@173	635	Process the given print node 'n' as an invocation on a stream of the
paul@173	636	form...
paul@173	637
paul@173	638	$print(dest, args, nl)
paul@173	639
paul@173	640	The special function name will be translated elsewhere.
paul@173	641	"""
paul@173	642
paul@173	643	nl = isinstance(n, compiler.ast.Printnl)
paul@173	644	invocation = compiler.ast.CallFunc(
paul@173	645	compiler.ast.Name("$print"),
paul@173	646	[n.dest or compiler.ast.Name("None"),
paul@173	647	compiler.ast.List(list(n.nodes)),
paul@359	648	nl and compiler.ast.Name("True") or compiler.ast.Name("False")]
paul@173	649	)
paul@173	650	return self.process_structure_node(invocation)
paul@173	651
paul@0	652	def process_slice_node(self, n, expr=None):
paul@0	653
paul@0	654	"""
paul@0	655	Process the given slice node 'n' as an operator function invocation.
paul@0	656	"""
paul@0	657
paul@548	658	if n.flags == "OP_ASSIGN": op = "setslice"
paul@548	659	elif n.flags == "OP_DELETE": op = "delslice"
paul@548	660	else: op = "getslice"
paul@548	661
paul@0	662	invocation = compiler.ast.CallFunc(
paul@0	663	compiler.ast.Name("$op%s" % op),
paul@0	664	[n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +
paul@0	665	(expr and [expr] or [])
paul@0	666	)
paul@548	667
paul@548	668	# Fix parse tree structure.
paul@548	669
paul@548	670	if op == "delslice":
paul@548	671	invocation = compiler.ast.Discard(invocation)
paul@548	672
paul@0	673	return self.process_structure_node(invocation)
paul@0	674
paul@0	675	def process_sliceobj_node(self, n):
paul@0	676
paul@0	677	"""
paul@0	678	Process the given slice object node 'n' as a slice constructor.
paul@0	679	"""
paul@0	680
paul@0	681	op = "slice"
paul@0	682	invocation = compiler.ast.CallFunc(
paul@0	683	compiler.ast.Name("$op%s" % op),
paul@0	684	n.nodes
paul@0	685	)
paul@0	686	return self.process_structure_node(invocation)
paul@0	687
paul@0	688	def process_subscript_node(self, n, expr=None):
paul@0	689
paul@0	690	"""
paul@0	691	Process the given subscript node 'n' as an operator function invocation.
paul@0	692	"""
paul@0	693
paul@548	694	if n.flags == "OP_ASSIGN": op = "setitem"
paul@548	695	elif n.flags == "OP_DELETE": op = "delitem"
paul@548	696	else: op = "getitem"
paul@548	697
paul@0	698	invocation = compiler.ast.CallFunc(
paul@0	699	compiler.ast.Name("$op%s" % op),
paul@0	700	[n.expr] + list(n.subs) + (expr and [expr] or [])
paul@0	701	)
paul@548	702
paul@548	703	# Fix parse tree structure.
paul@548	704
paul@548	705	if op == "delitem":
paul@548	706	invocation = compiler.ast.Discard(invocation)
paul@548	707
paul@0	708	return self.process_structure_node(invocation)
paul@0	709
paul@0	710	def process_attribute_chain(self, n):
paul@0	711
paul@0	712	"""
paul@0	713	Process the given attribute access node 'n'. Return a reference
paul@0	714	describing the expression.
paul@0	715	"""
paul@0	716
paul@0	717	# AssAttr/Getattr are nested with the outermost access being the last
paul@0	718	# access in any chain.
paul@0	719
paul@0	720	self.attrs.insert(0, n.attrname)
paul@0	721	attrs = self.attrs
paul@0	722
paul@0	723	# Break attribute chains where non-access nodes are found.
paul@0	724
paul@0	725	if not self.have_access_expression(n):
paul@110	726	self.reset_attribute_chain()
paul@0	727
paul@0	728	# Descend into the expression, extending backwards any existing chain,
paul@0	729	# or building another for the expression.
paul@0	730
paul@0	731	name_ref = self.process_structure_node(n.expr)
paul@0	732
paul@0	733	# Restore chain information applying to this node.
paul@0	734
paul@110	735	if not self.have_access_expression(n):
paul@110	736	self.restore_attribute_chain(attrs)
paul@0	737
paul@0	738	# Return immediately if the expression was another access and thus a
paul@0	739	# continuation backwards along the chain. The above processing will
paul@0	740	# have followed the chain all the way to its conclusion.
paul@0	741
paul@0	742	if self.have_access_expression(n):
paul@0	743	del self.attrs[0]
paul@0	744
paul@0	745	return name_ref
paul@0	746
paul@124	747	# Attribute chain handling.
paul@124	748
paul@110	749	def reset_attribute_chain(self):
paul@110	750
paul@110	751	"Reset the attribute chain for a subexpression of an attribute access."
paul@110	752
paul@110	753	self.attrs = []
paul@124	754	self.chain_assignment.append(self.in_assignment)
paul@124	755	self.chain_invocation.append(self.in_invocation)
paul@124	756	self.in_assignment = None
paul@553	757	self.in_invocation = None
paul@110	758
paul@110	759	def restore_attribute_chain(self, attrs):
paul@110	760
paul@110	761	"Restore the attribute chain for an attribute access."
paul@110	762
paul@110	763	self.attrs = attrs
paul@124	764	self.in_assignment = self.chain_assignment.pop()
paul@124	765	self.in_invocation = self.chain_invocation.pop()
paul@110	766
paul@0	767	def have_access_expression(self, node):
paul@0	768
paul@0	769	"Return whether the expression associated with 'node' is Getattr."
paul@0	770
paul@0	771	return isinstance(node.expr, compiler.ast.Getattr)
paul@0	772
paul@678	773	def get_name_for_tracking(self, name, name_ref=None, is_global=False):
paul@0	774
paul@0	775	"""
paul@0	776	Return the name to be used for attribute usage observations involving
paul@603	777	the given 'name' in the current namespace.
paul@603	778
paul@603	779	If the name is being used outside a function, and if 'name_ref' is
paul@678	780	given and indicates a global or if 'is_global' is specified as a true
paul@678	781	value, a path featuring the name in the global namespace is returned.
paul@678	782	Otherwise, a path computed using the current namespace and the given
paul@678	783	name is returned.
paul@0	784
paul@0	785	The intention of this method is to provide a suitably-qualified name
paul@0	786	that can be tracked across namespaces. Where globals are being
paul@0	787	referenced in class namespaces, they should be referenced using their
paul@0	788	path within the module, not using a path within each class.
paul@0	789
paul@0	790	It may not be possible to identify a global within a function at the
paul@0	791	time of inspection (since a global may appear later in a file).
paul@0	792	Consequently, globals are identified by their local name rather than
paul@0	793	their module-qualified path.
paul@0	794	"""
paul@0	795
paul@0	796	# For functions, use the appropriate local names.
paul@0	797
paul@0	798	if self.in_function:
paul@0	799	return name
paul@0	800
paul@603	801	# For global names outside functions, use a global name.
paul@597	802
paul@678	803	elif is_global or name_ref and name_ref.is_global_name():
paul@603	804	return self.get_global_path(name)
paul@0	805
paul@152	806	# Otherwise, establish a name in the current namespace.
paul@0	807
paul@0	808	else:
paul@0	809	return self.get_object_path(name)
paul@0	810
paul@0	811	def get_path_for_access(self):
paul@0	812
paul@0	813	"Outside functions, register accesses at the module level."
paul@0	814
paul@0	815	if not self.in_function:
paul@0	816	return self.name
paul@0	817	else:
paul@0	818	return self.get_namespace_path()
paul@0	819
paul@0	820	def get_module_name(self, node):
paul@0	821
paul@0	822	"""
paul@0	823	Using the given From 'node' in this module, calculate any relative import
paul@0	824	information, returning a tuple containing a module to import along with any
paul@0	825	names to import based on the node's name information.
paul@0	826
paul@0	827	Where the returned module is given as None, whole module imports should
paul@0	828	be performed for the returned modules using the returned names.
paul@0	829	"""
paul@0	830
paul@0	831	# Absolute import.
paul@0	832
paul@0	833	if node.level == 0:
paul@0	834	return node.modname, node.names
paul@0	835
paul@0	836	# Relative to an ancestor of this module.
paul@0	837
paul@0	838	else:
paul@0	839	path = self.name.split(".")
paul@0	840	level = node.level
paul@0	841
paul@0	842	# Relative imports treat package roots as submodules.
paul@0	843
paul@0	844	if split(self.filename)[-1] == "__init__.py":
paul@0	845	level -= 1
paul@0	846
paul@0	847	if level > len(path):
paul@0	848	raise InspectError("Relative import %r involves too many levels up from module %r" % (
paul@0	849	("%s%s" % ("." * node.level, node.modname or "")), self.name))
paul@0	850
paul@0	851	basename = ".".join(path[:len(path)-level])
paul@0	852
paul@0	853	# Name imports from a module.
paul@0	854
paul@0	855	if node.modname:
paul@0	856	return "%s.%s" % (basename, node.modname), node.names
paul@0	857
paul@0	858	# Relative whole module imports.
paul@0	859
paul@0	860	else:
paul@0	861	return basename, node.names
paul@0	862
paul@0	863	def get_argnames(args):
paul@0	864
paul@0	865	"""
paul@0	866	Return a list of all names provided by 'args'. Since tuples may be
paul@0	867	employed, the arguments are traversed depth-first.
paul@0	868	"""
paul@0	869
paul@0	870	l = []
paul@0	871	for arg in args:
paul@0	872	if isinstance(arg, tuple):
paul@0	873	l += get_argnames(arg)
paul@0	874	else:
paul@0	875	l.append(arg)
paul@0	876	return l
paul@0	877
paul@509	878	def get_names_from_nodes(nodes):
paul@509	879
paul@509	880	"""
paul@509	881	Return the names employed in the given 'nodes' along with the number of
paul@509	882	nodes excluding sequences.
paul@509	883	"""
paul@509	884
paul@509	885	names = set()
paul@509	886	count = 0
paul@509	887
paul@509	888	for node in nodes:
paul@509	889
paul@509	890	# Add names and count them.
paul@509	891
paul@509	892	if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):
paul@509	893	names.add(node.name)
paul@509	894	count += 1
paul@509	895
paul@509	896	# Add names from sequences and incorporate their counts.
paul@509	897
paul@509	898	elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,
paul@509	899	compiler.ast.List, compiler.ast.Set,
paul@509	900	compiler.ast.Tuple)):
paul@509	901	_names, _count = get_names_from_nodes(node.nodes)
paul@509	902	names.update(_names)
paul@509	903	count += _count
paul@509	904
paul@509	905	# Count non-name, non-sequence nodes.
paul@509	906
paul@509	907	else:
paul@509	908	count += 1
paul@509	909
paul@509	910	return names, count
paul@509	911
paul@491	912	# Result classes.
paul@491	913
paul@491	914	class InstructionSequence:
paul@491	915
paul@491	916	"A generic sequence of instructions."
paul@491	917
paul@491	918	def __init__(self, instructions):
paul@491	919	self.instructions = instructions
paul@491	920
paul@491	921	def get_value_instruction(self):
paul@491	922	return self.instructions[-1]
paul@491	923
paul@491	924	def get_init_instructions(self):
paul@491	925	return self.instructions[:-1]
paul@491	926
paul@0	927	# Dictionary utilities.
paul@0	928
paul@0	929	def init_item(d, key, fn):
paul@0	930
paul@0	931	"""
paul@0	932	Add to 'd' an entry for 'key' using the callable 'fn' to make an initial
paul@0	933	value where no entry already exists.
paul@0	934	"""
paul@0	935
paul@0	936	if not d.has_key(key):
paul@0	937	d[key] = fn()
paul@0	938	return d[key]
paul@0	939
paul@0	940	def dict_for_keys(d, keys):
paul@0	941
paul@0	942	"Return a new dictionary containing entries from 'd' for the given 'keys'."
paul@0	943
paul@0	944	nd = {}
paul@0	945	for key in keys:
paul@0	946	if d.has_key(key):
paul@0	947	nd[key] = d[key]
paul@0	948	return nd
paul@0	949
paul@0	950	def make_key(s):
paul@0	951
paul@0	952	"Make sequence 's' into a tuple-based key, first sorting its contents."
paul@0	953
paul@0	954	l = list(s)
paul@0	955	l.sort()
paul@0	956	return tuple(l)
paul@0	957
paul@0	958	def add_counter_item(d, key):
paul@0	959
paul@0	960	"""
paul@0	961	Make a mapping in 'd' for 'key' to the number of keys added before it, thus
paul@0	962	maintaining a mapping of keys to their order of insertion.
paul@0	963	"""
paul@0	964
paul@0	965	if not d.has_key(key):
paul@0	966	d[key] = len(d.keys())
paul@0	967	return d[key]
paul@0	968
paul@0	969	def remove_items(d1, d2):
paul@0	970
paul@0	971	"Remove from 'd1' all items from 'd2'."
paul@0	972
paul@0	973	for key in d2.keys():
paul@0	974	if d1.has_key(key):
paul@0	975	del d1[key]
paul@0	976
paul@0	977	# Set utilities.
paul@0	978
paul@0	979	def first(s):
paul@0	980	return list(s)[0]
paul@0	981
paul@0	982	def same(s1, s2):
paul@0	983	return set(s1) == set(s2)
paul@0	984
paul@724	985	def order_dependencies(all_depends):
paul@724	986
paul@724	987	"""
paul@724	988	Produce a dependency ordering for the 'all_depends' mapping. This mapping
paul@724	989	has the form "A depends on B, C...". The result will order A, B, C, and so
paul@724	990	on.
paul@724	991	"""
paul@724	992
paul@726	993	usage = init_reverse_dependencies(all_depends)
paul@726	994
paul@726	995	# Produce an ordering by obtaining exposed items (required by items already
paul@726	996	# processed) and putting them at the start of the list.
paul@726	997
paul@726	998	ordered = []
paul@726	999
paul@726	1000	while usage:
paul@726	1001	have_next = False
paul@726	1002
paul@726	1003	for key, n in usage.items():
paul@726	1004
paul@726	1005	# Add items needed by no other items to the ordering.
paul@726	1006
paul@726	1007	if not n:
paul@726	1008	remove_dependency(key, all_depends, usage, ordered)
paul@726	1009	have_next = True
paul@726	1010
paul@726	1011	if not have_next:
paul@726	1012	raise ValueError, usage
paul@726	1013
paul@726	1014	return ordered
paul@726	1015
paul@726	1016	def order_dependencies_partial(all_depends):
paul@726	1017
paul@726	1018	"""
paul@726	1019	Produce a dependency ordering for the 'all_depends' mapping. This mapping
paul@726	1020	has the form "A depends on B, C...". The result will order A, B, C, and so
paul@726	1021	on. Where cycles exist, they will be broken and a partial ordering returned.
paul@726	1022	"""
paul@726	1023
paul@726	1024	usage = init_reverse_dependencies(all_depends)
paul@726	1025
paul@726	1026	# Duplicate the dependencies for subsequent modification.
paul@726	1027
paul@726	1028	new_depends = {}
paul@726	1029	for key, values in all_depends.items():
paul@726	1030	new_depends[key] = set(values)
paul@726	1031
paul@726	1032	all_depends = new_depends
paul@726	1033
paul@726	1034	# Produce an ordering by obtaining exposed items (required by items already
paul@726	1035	# processed) and putting them at the start of the list.
paul@726	1036
paul@726	1037	ordered = []
paul@726	1038
paul@726	1039	while usage:
paul@726	1040	least = None
paul@726	1041	least_key = None
paul@726	1042
paul@726	1043	for key, n in usage.items():
paul@726	1044
paul@726	1045	# Add items needed by no other items to the ordering.
paul@726	1046
paul@726	1047	if not n:
paul@726	1048	remove_dependency(key, all_depends, usage, ordered)
paul@726	1049	least = 0
paul@726	1050
paul@726	1051	# When breaking cycles, note the least used items.
paul@726	1052
paul@726	1053	elif least is None or len(n) < least:
paul@726	1054	least_key = key
paul@726	1055	least = len(n)
paul@726	1056
paul@726	1057	if least:
paul@726	1058	transfer_dependencies(least_key, all_depends, usage, ordered)
paul@726	1059
paul@726	1060	return ordered
paul@726	1061
paul@726	1062	def init_reverse_dependencies(all_depends):
paul@726	1063
paul@726	1064	"""
paul@726	1065	From 'all_depends', providing a mapping of the form "A depends on B, C...",
paul@726	1066	record the reverse dependencies, making a mapping of the form
paul@726	1067	"B is needed by A", "C is needed by A", and so on.
paul@726	1068	"""
paul@724	1069
paul@724	1070	usage = {}
paul@724	1071
paul@724	1072	# Record path-based dependencies.
paul@724	1073
paul@724	1074	for key in all_depends.keys():
paul@724	1075	usage[key] = set()
paul@724	1076
paul@724	1077	for key, depends in all_depends.items():
paul@724	1078	for depend in depends:
paul@724	1079	init_item(usage, depend, set)
paul@724	1080	usage[depend].add(key)
paul@724	1081
paul@726	1082	return usage
paul@726	1083
paul@726	1084	def transfer_dependencies(key, all_depends, usage, ordered):
paul@726	1085
paul@726	1086	"""
paul@726	1087	Transfer items needed by 'key' to those items needing 'key', found using
paul@726	1088	'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'
paul@726	1089	collection of dependencies.
paul@724	1090
paul@726	1091	If "A is needed by X" and "B is needed by A", then transferring items needed
paul@726	1092	by A will cause "B is needed by X" to be recorded as a consequence.
paul@726	1093
paul@726	1094	Transferring items also needs to occur in the reverse mapping, so that
paul@726	1095	"A needs B" and "X needs A", then the consequence must be recorded as
paul@726	1096	"X needs B".
paul@726	1097	"""
paul@726	1098
paul@726	1099	ordered.insert(0, key)
paul@724	1100
paul@726	1101	needing = usage[key] # A is needed by X
paul@726	1102	needed = all_depends.get(key) # A needs B
paul@726	1103
paul@726	1104	if needing:
paul@726	1105	for depend in needing:
paul@726	1106	l = all_depends.get(depend)
paul@726	1107	if not l:
paul@726	1108	continue
paul@724	1109
paul@726	1110	l.remove(key) # X needs (A)
paul@726	1111
paul@726	1112	if needed:
paul@726	1113	l.update(needed) # X needs B...
paul@726	1114
paul@726	1115	# Prevent self references.
paul@726	1116
paul@726	1117	if depend in needed:
paul@726	1118	l.remove(depend)
paul@724	1119
paul@726	1120	if needed:
paul@726	1121	for depend in needed:
paul@726	1122	l = usage.get(depend)
paul@726	1123	if not l:
paul@726	1124	continue
paul@726	1125
paul@726	1126	l.remove(key) # B is needed by (A)
paul@726	1127	l.update(needing) # B is needed by X...
paul@724	1128
paul@726	1129	# Prevent self references.
paul@726	1130
paul@726	1131	if depend in needing:
paul@726	1132	l.remove(depend)
paul@726	1133
paul@726	1134	if needed:
paul@726	1135	del all_depends[key]
paul@726	1136	del usage[key]
paul@726	1137
paul@726	1138	def remove_dependency(key, all_depends, usage, ordered):
paul@724	1139
paul@726	1140	"""
paul@726	1141	Remove 'key', found in 'all_depends', from 'usage', inserting it into the
paul@726	1142	'ordered' collection of dependencies.
paul@726	1143
paul@726	1144	Given that 'usage' for a given key A would indicate that "A needs <nothing>"
paul@726	1145	upon removing A from 'usage', the outcome is that all keys needing A will
paul@726	1146	have A removed from their 'usage' records.
paul@726	1147
paul@726	1148	So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded
paul@726	1149	as a consequence.
paul@726	1150	"""
paul@724	1151
paul@726	1152	ordered.insert(0, key)
paul@726	1153
paul@726	1154	depends = all_depends.get(key)
paul@726	1155
paul@726	1156	# Reduce usage of the referenced items.
paul@724	1157
paul@726	1158	if depends:
paul@726	1159	for depend in depends:
paul@726	1160	usage[depend].remove(key)
paul@726	1161
paul@726	1162	del usage[key]
paul@724	1163
paul@0	1164	# General input/output.
paul@0	1165
paul@0	1166	def readfile(filename):
paul@0	1167
paul@0	1168	"Return the contents of 'filename'."
paul@0	1169
paul@0	1170	f = open(filename)
paul@0	1171	try:
paul@0	1172	return f.read()
paul@0	1173	finally:
paul@0	1174	f.close()
paul@0	1175
paul@0	1176	def writefile(filename, s):
paul@0	1177
paul@0	1178	"Write to 'filename' the string 's'."
paul@0	1179
paul@0	1180	f = open(filename, "w")
paul@0	1181	try:
paul@0	1182	f.write(s)
paul@0	1183	finally:
paul@0	1184	f.close()
paul@0	1185
paul@0	1186	# General encoding.
paul@0	1187
paul@0	1188	def sorted_output(x):
paul@0	1189
paul@0	1190	"Sort sequence 'x' and return a string with commas separating the values."
paul@0	1191
paul@0	1192	x = map(str, x)
paul@0	1193	x.sort()
paul@0	1194	return ", ".join(x)
paul@0	1195
paul@537	1196	def get_string_details(literals, encoding):
paul@512	1197
paul@512	1198	"""
paul@537	1199	Determine whether 'literals' represent Unicode strings or byte strings,
paul@537	1200	using 'encoding' to reproduce byte sequences.
paul@537	1201
paul@537	1202	Each literal is the full program representation including prefix and quotes
paul@537	1203	recoded by the parser to UTF-8. Thus, any literal found to represent a byte
paul@537	1204	string needs to be translated back to its original encoding.
paul@537	1205
paul@537	1206	Return a single encoded literal value, a type name, and the original
paul@537	1207	encoding as a tuple.
paul@537	1208	"""
paul@537	1209
paul@537	1210	typename = "unicode"
paul@537	1211
paul@537	1212	l = []
paul@537	1213
paul@537	1214	for s in literals:
paul@537	1215	out, _typename = get_literal_details(s)
paul@537	1216	if _typename == "str":
paul@537	1217	typename = "str"
paul@537	1218	l.append(out)
paul@537	1219
paul@537	1220	out = "".join(l)
paul@537	1221
paul@537	1222	# For Unicode values, convert to the UTF-8 program representation.
paul@537	1223
paul@537	1224	if typename == "unicode":
paul@537	1225	return out.encode("utf-8"), typename, encoding
paul@537	1226
paul@537	1227	# For byte string values, convert back to the original encoding.
paul@537	1228
paul@537	1229	else:
paul@537	1230	return out.encode(encoding), typename, encoding
paul@537	1231
paul@537	1232	def get_literal_details(s):
paul@537	1233
paul@537	1234	"""
paul@537	1235	Determine whether 's' represents a Unicode string or a byte string, where
paul@537	1236	's' contains the full program representation of a literal including prefix
paul@537	1237	and quotes, recoded by the parser to UTF-8.
paul@512	1238
paul@512	1239	Find and convert Unicode values starting with <backslash>u or <backslash>U,
paul@512	1240	and byte or Unicode values starting with <backslash><octal digit> or
paul@512	1241	<backslash>x.
paul@512	1242
paul@512	1243	Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x
paul@512	1244	to be considered as Unicode values. Otherwise, they produce byte values and
paul@512	1245	cause unprefixed strings to be considered as byte strings.
paul@512	1246
paul@512	1247	Literals prefixed with "r" do not have their backslash-encoded values
paul@512	1248	converted unless also prefixed with "u", in which case only the above value
paul@512	1249	formats are converted, not any of the other special sequences for things
paul@512	1250	like newlines.
paul@512	1251
paul@537	1252	Return the literal value as a Unicode object together with the appropriate
paul@537	1253	type name in a tuple.
paul@512	1254	"""
paul@512	1255
paul@512	1256	l = []
paul@512	1257
paul@512	1258	# Identify the quote character and use it to identify the prefix.
paul@512	1259
paul@512	1260	quote_type = s[-1]
paul@512	1261	prefix_end = s.find(quote_type)
paul@512	1262	prefix = s[:prefix_end].lower()
paul@512	1263
paul@512	1264	if prefix not in ("", "b", "br", "r", "u", "ur"):
paul@512	1265	raise ValueError, "String literal does not have a supported prefix: %s" % s
paul@512	1266
paul@513	1267	if "b" in prefix:
paul@513	1268	typename = "str"
paul@513	1269	else:
paul@513	1270	typename = "unicode"
paul@513	1271
paul@512	1272	# Identify triple quotes or single quotes.
paul@512	1273
paul@512	1274	if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:
paul@512	1275	quote = s[prefix_end:prefix_end+3]
paul@512	1276	current = prefix_end + 3
paul@512	1277	end = len(s) - 3
paul@512	1278	else:
paul@512	1279	quote = s[prefix_end]
paul@512	1280	current = prefix_end + 1
paul@512	1281	end = len(s) - 1
paul@512	1282
paul@512	1283	# Conversions of some quoted values.
paul@512	1284
paul@512	1285	searches = {
paul@512	1286	"u" : (6, 16),
paul@512	1287	"U" : (10, 16),
paul@512	1288	"x" : (4, 16),
paul@512	1289	}
paul@512	1290
paul@512	1291	octal_digits = map(str, range(0, 8))
paul@512	1292
paul@512	1293	# Translations of some quoted values.
paul@512	1294
paul@512	1295	escaped = {
paul@512	1296	"\\" : "\\", "'" : "'", '"' : '"',
paul@512	1297	"a" : "\a", "b" : "\b", "f" : "\f",
paul@512	1298	"n" : "\n", "r" : "\r", "t" : "\t",
paul@512	1299	}
paul@512	1300
paul@512	1301	while current < end:
paul@512	1302
paul@512	1303	# Look for quoted values.
paul@512	1304
paul@512	1305	index = s.find("\\", current)
paul@512	1306	if index == -1 or index + 1 == end:
paul@512	1307	l.append(s[current:end])
paul@512	1308	break
paul@512	1309
paul@512	1310	# Add the preceding text.
paul@512	1311
paul@512	1312	l.append(s[current:index])
paul@512	1313
paul@512	1314	# Handle quoted text.
paul@512	1315
paul@512	1316	term = s[index+1]
paul@512	1317
paul@512	1318	# Add Unicode values. Where a string is u-prefixed, even \o and \x
paul@512	1319	# produce Unicode values.
paul@512	1320
paul@513	1321	if typename == "unicode" and (
paul@513	1322	term in ("u", "U") or
paul@513	1323	"u" in prefix and (term == "x" or term in octal_digits)):
paul@512	1324
paul@512	1325	needed, base = searches.get(term, (4, 8))
paul@512	1326	value = convert_quoted_value(s, index, needed, end, base, unichr)
paul@512	1327	l.append(value)
paul@512	1328	current = index + needed
paul@512	1329
paul@512	1330	# Add raw byte values, changing the string type.
paul@512	1331
paul@512	1332	elif "r" not in prefix and (
paul@512	1333	term == "x" or term in octal_digits):
paul@512	1334
paul@512	1335	needed, base = searches.get(term, (4, 8))
paul@512	1336	value = convert_quoted_value(s, index, needed, end, base, chr)
paul@512	1337	l.append(value)
paul@512	1338	typename = "str"
paul@512	1339	current = index + needed
paul@512	1340
paul@512	1341	# Add other escaped values.
paul@512	1342
paul@512	1343	elif "r" not in prefix and escaped.has_key(term):
paul@512	1344	l.append(escaped[term])
paul@512	1345	current = index + 2
paul@512	1346
paul@512	1347	# Add other text as found.
paul@512	1348
paul@512	1349	else:
paul@512	1350	l.append(s[index:index+2])
paul@512	1351	current = index + 2
paul@512	1352
paul@537	1353	# Collect the components into a single Unicode object. Since the literal
paul@537	1354	# text was already in UTF-8 form, interpret plain strings as UTF-8
paul@537	1355	# sequences.
paul@512	1356
paul@537	1357	out = []
paul@512	1358
paul@537	1359	for value in l:
paul@537	1360	if isinstance(value, unicode):
paul@537	1361	out.append(value)
paul@537	1362	else:
paul@537	1363	out.append(unicode(value, "utf-8"))
paul@512	1364
paul@537	1365	return "".join(out), typename
paul@512	1366
paul@512	1367	def convert_quoted_value(s, index, needed, end, base, fn):
paul@512	1368
paul@512	1369	"""
paul@512	1370	Interpret a quoted value in 's' at 'index' with the given 'needed' number of
paul@512	1371	positions, and with the given 'end' indicating the first position after the
paul@512	1372	end of the actual string content.
paul@512	1373
paul@512	1374	Use 'base' as the numerical base when interpreting the value, and use 'fn'
paul@512	1375	to convert the value to an appropriate type.
paul@512	1376	"""
paul@512	1377
paul@512	1378	s = s[index:min(index+needed, end)]
paul@512	1379
paul@512	1380	# Not a complete occurrence.
paul@512	1381
paul@512	1382	if len(s) < needed:
paul@512	1383	return s
paul@512	1384
paul@512	1385	# Test for a well-formed value.
paul@512	1386
paul@512	1387	try:
paul@512	1388	first = base == 8 and 1 or 2
paul@512	1389	value = int(s[first:needed], base)
paul@512	1390	except ValueError:
paul@512	1391	return s
paul@512	1392	else:
paul@512	1393	return fn(value)
paul@512	1394
paul@0	1395	# Attribute chain decoding.
paul@0	1396
paul@0	1397	def get_attrnames(attrnames):
paul@11	1398
paul@11	1399	"""
paul@11	1400	Split the qualified attribute chain 'attrnames' into its components,
paul@11	1401	handling special attributes starting with "#" that indicate type
paul@11	1402	conformance.
paul@11	1403	"""
paul@11	1404
paul@0	1405	if attrnames.startswith("#"):
paul@0	1406	return [attrnames]
paul@0	1407	else:
paul@0	1408	return attrnames.split(".")
paul@0	1409
paul@0	1410	def get_attrname_from_location(location):
paul@11	1411
paul@11	1412	"""
paul@11	1413	Extract the first attribute from the attribute names employed in a
paul@11	1414	'location'.
paul@11	1415	"""
paul@11	1416
paul@0	1417	path, name, attrnames, access = location
paul@91	1418	if not attrnames:
paul@91	1419	return attrnames
paul@0	1420	return get_attrnames(attrnames)[0]
paul@0	1421
paul@85	1422	def get_name_path(path, name):
paul@85	1423
paul@85	1424	"Return a suitable qualified name from the given 'path' and 'name'."
paul@85	1425
paul@85	1426	if "." in name:
paul@85	1427	return name
paul@85	1428	else:
paul@85	1429	return "%s.%s" % (path, name)
paul@85	1430
paul@90	1431	# Usage-related functions.
paul@89	1432
paul@89	1433	def get_types_for_usage(attrnames, objects):
paul@89	1434
paul@89	1435	"""
paul@89	1436	Identify the types that can support the given 'attrnames', using the
paul@89	1437	given 'objects' as the catalogue of type details.
paul@89	1438	"""
paul@89	1439
paul@89	1440	types = []
paul@89	1441	for name, _attrnames in objects.items():
paul@89	1442	if set(attrnames).issubset(_attrnames):
paul@89	1443	types.append(name)
paul@89	1444	return types
paul@89	1445
paul@90	1446	def get_invoked_attributes(usage):
paul@90	1447
paul@90	1448	"Obtain invoked attribute from the given 'usage'."
paul@90	1449
paul@90	1450	invoked = []
paul@90	1451	if usage:
paul@107	1452	for attrname, invocation, assignment in usage:
paul@90	1453	if invocation:
paul@90	1454	invoked.append(attrname)
paul@90	1455	return invoked
paul@90	1456
paul@107	1457	def get_assigned_attributes(usage):
paul@107	1458
paul@107	1459	"Obtain assigned attribute from the given 'usage'."
paul@107	1460
paul@107	1461	assigned = []
paul@107	1462	if usage:
paul@107	1463	for attrname, invocation, assignment in usage:
paul@107	1464	if assignment:
paul@107	1465	assigned.append(attrname)
paul@107	1466	return assigned
paul@107	1467
paul@366	1468	# Type and module functions.
paul@538	1469	# NOTE: This makes assumptions about the __builtins__ structure.
paul@366	1470
paul@366	1471	def get_builtin_module(name):
paul@366	1472
paul@366	1473	"Return the module name containing the given type 'name'."
paul@366	1474
paul@394	1475	if name == "string":
paul@538	1476	modname = "str"
paul@394	1477	elif name == "utf8string":
paul@538	1478	modname = "unicode"
paul@394	1479	elif name == "NoneType":
paul@538	1480	modname = "none"
paul@394	1481	else:
paul@538	1482	modname = name
paul@538	1483
paul@538	1484	return "__builtins__.%s" % modname
paul@366	1485
paul@366	1486	def get_builtin_type(name):
paul@366	1487
paul@366	1488	"Return the type name provided by the given Python value 'name'."
paul@366	1489
paul@394	1490	if name == "str":
paul@394	1491	return "string"
paul@394	1492	elif name == "unicode":
paul@394	1493	return "utf8string"
paul@394	1494	else:
paul@394	1495	return name
paul@366	1496
paul@538	1497	def get_builtin_class(name):
paul@538	1498
paul@538	1499	"Return the full name of the built-in class having the given 'name'."
paul@538	1500
paul@538	1501	typename = get_builtin_type(name)
paul@538	1502	module = get_builtin_module(typename)
paul@538	1503	return "%s.%s" % (module, typename)
paul@538	1504
paul@0	1505	# Useful data.
paul@0	1506
paul@11	1507	predefined_constants = "False", "None", "NotImplemented", "True"
paul@0	1508
paul@0	1509	operator_functions = {
paul@0	1510
paul@0	1511	# Fundamental operations.
paul@0	1512
paul@0	1513	"is" : "is_",
paul@0	1514	"is not" : "is_not",
paul@0	1515
paul@0	1516	# Binary operations.
paul@0	1517
paul@0	1518	"in" : "in_",
paul@0	1519	"not in" : "not_in",
paul@0	1520	"Add" : "add",
paul@0	1521	"Bitand" : "and_",
paul@0	1522	"Bitor" : "or_",
paul@0	1523	"Bitxor" : "xor",
paul@0	1524	"Div" : "div",
paul@0	1525	"FloorDiv" : "floordiv",
paul@0	1526	"LeftShift" : "lshift",
paul@0	1527	"Mod" : "mod",
paul@0	1528	"Mul" : "mul",
paul@0	1529	"Power" : "pow",
paul@0	1530	"RightShift" : "rshift",
paul@0	1531	"Sub" : "sub",
paul@0	1532
paul@0	1533	# Unary operations.
paul@0	1534
paul@0	1535	"Invert" : "invert",
paul@0	1536	"UnaryAdd" : "pos",
paul@0	1537	"UnarySub" : "neg",
paul@0	1538
paul@0	1539	# Augmented assignment.
paul@0	1540
paul@0	1541	"+=" : "iadd",
paul@0	1542	"-=" : "isub",
paul@0	1543	"*=" : "imul",
paul@0	1544	"/=" : "idiv",
paul@0	1545	"//=" : "ifloordiv",
paul@0	1546	"%=" : "imod",
paul@0	1547	"**=" : "ipow",
paul@0	1548	"<<=" : "ilshift",
paul@0	1549	">>=" : "irshift",
paul@0	1550	"&=" : "iand",
paul@0	1551	"^=" : "ixor",
paul@0	1552	"\|=" : "ior",
paul@0	1553
paul@0	1554	# Comparisons.
paul@0	1555
paul@0	1556	"==" : "eq",
paul@0	1557	"!=" : "ne",
paul@0	1558	"<" : "lt",
paul@0	1559	"<=" : "le",
paul@0	1560	">=" : "ge",
paul@0	1561	">" : "gt",
paul@0	1562	}
paul@0	1563
paul@0	1564	# vim: tabstop=4 expandtab shiftwidth=4