ConfluenceConverter (annotate wikiparser.py in fa3c16972921)

ConfluenceConverter

Annotated wikiparser.py

75:fa3c16972921

2013-06-09

Paul Boddie

Added nested section handling, although the "color" macro should not permit macro usage in the text enclosed by it, or some kind of alternative Moin facility should be used instead of the "Color2" macro.

paul@6	1	#!/usr/bin/env python
paul@6	2
paul@7	3	"""
paul@7	4	Confluence Wiki syntax parsing.
paul@7	5
paul@34	6	Copyright (C) 2012, 2013 Paul Boddie <paul@boddie.org.uk>
paul@8	7
paul@8	8	This software is free software; you can redistribute it and/or
paul@8	9	modify it under the terms of the GNU General Public License as
paul@8	10	published by the Free Software Foundation; either version 2 of
paul@8	11	the License, or (at your option) any later version.
paul@8	12
paul@8	13	This software is distributed in the hope that it will be useful,
paul@8	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
paul@8	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
paul@8	16	GNU General Public License for more details.
paul@8	17
paul@8	18	You should have received a copy of the GNU General Public
paul@8	19	License along with this library; see the file LICENCE.txt
paul@8	20	If not, write to the Free Software Foundation, Inc.,
paul@8	21	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
paul@8	22
paul@8	23	--------
paul@8	24
paul@8	25	The basic procedure is as follows:
paul@8	26
paul@7	27	1. Wiki pages are first split up into regions.
paul@7	28	2. Then, within these regions, the text is split into blocks.
paul@7	29	1. First, lists are identified.
paul@7	30	2. Additionally, other block-like elements are identified.
paul@7	31	3. Each block is then parsed.
paul@7	32	"""
paul@7	33
paul@35	34	from common import *
paul@6	35	import re
paul@25	36	import sys
paul@41	37	import codecs
paul@19	38
paul@6	39	# Section extraction.
paul@6	40
paul@75	41	sections_regexp_str = r"(?<!{){(?P<type>[^-_*+{}\n:]+)(?P<options>:[^}\n]+)?}"
paul@6	42	sections_regexp = re.compile(sections_regexp_str, re.DOTALL \| re.MULTILINE)
paul@6	43
paul@6	44	def get_regions(s):
paul@6	45
paul@6	46	"""
paul@6	47	Return a list of regions from 's'. Each region is specified using a tuple of
paul@6	48	the form (type, text).
paul@6	49	"""
paul@6	50
paul@6	51	last = 0
paul@6	52	regions = []
paul@75	53	depth = 0
paul@75	54
paul@6	55	for match in sections_regexp.finditer(s):
paul@6	56	start, end = match.span()
paul@75	57	is_start = match.group("options")
paul@75	58
paul@75	59	# The start of a region is either indicated by a marker with options or
paul@75	60	# by a marker where no region is currently active.
paul@75	61
paul@75	62	if is_start or not depth:
paul@75	63
paul@75	64	# Where no region is active, add the text since the last match as a
paul@75	65	# "null" region.
paul@75	66
paul@75	67	if not depth:
paul@75	68	regions.append((None, s[last:start]))
paul@75	69
paul@75	70	# A new region is maintained as a string.
paul@75	71
paul@75	72	regions.append(s[start:end])
paul@75	73
paul@75	74	# Where a region is active, add the text since the last match as
paul@75	75	# well as the text in this match to the region.
paul@75	76
paul@75	77	else:
paul@75	78	regions[-1] += s[last:end]
paul@75	79
paul@75	80	# Any start marker will cause an increase in the depth of the region
paul@75	81	# nesting.
paul@75	82
paul@75	83	depth += 1
paul@75	84
paul@75	85	# The end of a region is indicated by a marker with no options.
paul@75	86
paul@75	87	else:
paul@75	88	# Where no region is active, the text since the last match plus the
paul@75	89	# marker are added to the current "null" region.
paul@75	90
paul@75	91	if not depth:
paul@75	92
paul@75	93	# Add to the string portion of the "null" region.
paul@75	94
paul@75	95	regions[-1][1] += s[last:end]
paul@75	96
paul@75	97	# Where a region is active, the end marker and preceding text is
paul@75	98	# either incorporated into the current region if more than one
paul@75	99	# region is active, or the preceding text is incorporated into the
paul@75	100	# current region and the details of the region are then obtained.
paul@75	101
paul@75	102	else:
paul@75	103	if depth > 1:
paul@75	104	regions[-1] += s[last:end]
paul@75	105
paul@75	106	# Terminate the active region, interpreting its contents.
paul@75	107
paul@75	108	else:
paul@75	109	regions[-1] = get_section_details(regions[-1] + s[last:end])
paul@75	110	depth -= 1
paul@75	111
paul@6	112	last = end
paul@75	113
paul@75	114	# Where a region is still active, terminate it.
paul@75	115
paul@75	116	if depth:
paul@75	117	regions[-1] = get_section_details(regions[-1] + s[last:])
paul@75	118
paul@75	119	# Otherwise, add a "null" region.
paul@75	120
paul@75	121	else:
paul@75	122	regions.append((None, s[last:]))
paul@75	123
paul@6	124	return regions
paul@6	125
paul@7	126	# Section inspection.
paul@7	127
paul@15	128	section_regexp_str = r"{(?P<sectiontype>[^\n:]?)(?::(?P<options>.?))?}(?P<section>.*){(?P=sectiontype)}"
paul@7	129	section_regexp = re.compile(section_regexp_str, re.DOTALL \| re.MULTILINE)
paul@7	130
paul@6	131	def get_section_details(s):
paul@6	132
paul@7	133	"Return the details of a section 's' in the form (type, text)."
paul@6	134
paul@6	135	match = section_regexp.match(s)
paul@6	136	if match:
paul@15	137	return (match.group("sectiontype"), match.group("options")), match.group("section")
paul@6	138	else:
paul@6	139	return None, s
paul@6	140
paul@14	141	# Heading, table and list extraction.
paul@7	142
paul@41	143	list_regexp_str = r"^\s(?P<listtype>[#-])[#-]\s+.(\n\s(?P=listtype).?)(?:\n\|$)"
paul@39	144	table_regexp_str = r"^((?P<celltype>[\|]{1,2})((.\|\n(?!\n))+?(?P=celltype))+(\n\|$))+"
paul@14	145	blocktext_regexp_str = r"^(?P<type>h\d\|bq)\.\s+(?P<text>.*)$"
paul@7	146
paul@14	147	blockelement_regexp = re.compile(
paul@14	148	"(" + list_regexp_str + ")"
paul@14	149	"\|"
paul@14	150	"(" + table_regexp_str + ")"
paul@14	151	"\|"
paul@14	152	"(" + blocktext_regexp_str + ")",
paul@14	153	re.MULTILINE
paul@14	154	)
paul@14	155
paul@14	156	def get_block_elements(s):
paul@7	157
paul@7	158	"""
paul@14	159	Extract headings, tables and lists from the given string 's'.
paul@7	160	"""
paul@7	161
paul@7	162	last = 0
paul@7	163	blocks = []
paul@14	164	for match in blockelement_regexp.finditer(s):
paul@7	165	start, end = match.span()
paul@14	166	matchtype = match.group("listtype") and "list" or match.group("celltype") and "table" or match.group("type")
paul@7	167	blocks.append((None, s[last:start]))
paul@14	168	blocks.append((matchtype, match.group("text") or s[start:end]))
paul@7	169	last = end
paul@7	170	blocks.append((None, s[last:]))
paul@7	171	return blocks
paul@7	172
paul@7	173	# Block extraction.
paul@7	174
paul@7	175	block_regexp_str = r"^(?:\s*\n)+"
paul@7	176	block_regexp = re.compile(block_regexp_str, re.MULTILINE)
paul@7	177
paul@7	178	def get_basic_blocks(s):
paul@7	179
paul@7	180	"""
paul@7	181	Return blocks from the given string 's' by splitting the text on blank lines
paul@7	182	and eliminating those lines.
paul@7	183	"""
paul@7	184
paul@7	185	return [b for b in block_regexp.split(s) if b.strip()]
paul@7	186
paul@7	187	# Block inspection.
paul@7	188
paul@7	189	def get_blocks(s):
paul@7	190
paul@7	191	"""
paul@7	192	Return blocks from the given string 's', inspecting the basic blocks and
paul@7	193	generating additional block-level text where appropriate.
paul@7	194	"""
paul@7	195
paul@7	196	blocks = []
paul@7	197
paul@14	198	for blocktype, blocktext in get_block_elements(s):
paul@7	199
paul@14	200	# Collect heading, list and table blocks.
paul@7	201
paul@7	202	if blocktype is not None:
paul@7	203	blocks.append((blocktype, blocktext))
paul@7	204
paul@7	205	# Attempt to find new subblocks in other regions.
paul@7	206
paul@7	207	else:
paul@7	208	for block in get_basic_blocks(blocktext):
paul@14	209	blocks.append((None, block))
paul@7	210
paul@7	211	return blocks
paul@7	212
paul@14	213	# List item inspection.
paul@14	214
paul@41	215	listitem_regexp_str = r"^(?P<marker> [-#]+)\s+(?P<text>.*)$"
paul@7	216	listitem_regexp = re.compile(listitem_regexp_str, re.MULTILINE)
paul@7	217
paul@14	218	def get_list_items(text):
paul@14	219
paul@14	220	"Return a list of (marker, text) tuples for the given list 'text'."
paul@14	221
paul@14	222	items = []
paul@14	223
paul@14	224	for match in listitem_regexp.finditer(text):
paul@14	225	items.append((match.group("marker"), match.group("text")))
paul@14	226
paul@14	227	return items
paul@14	228
paul@36	229	# Content inspection.
paul@14	230
paul@19	231	monospace_regexp_str = r"{{(?P<monotext>.*?)}}"
paul@36	232	link_regexp_str = r"[[](?P<linktext>.*?)]"
paul@38	233	image_regexp_str = r"!(?P<imagetext>\w.*?)!"
paul@71	234	macro_regexp_str = r"{(?P<macro>.?):(?P<options>.?)}"
paul@36	235
paul@36	236	# Word-dependent patterns.
paul@36	237	# Here, the unbracketed markers must test for the absence of surrounding word
paul@36	238	# characters.
paul@36	239
paul@36	240	italic_regexp_str = r"(?:(?<!\w)_\|\{_\})(?P<italictext>.*?)(?:_(?!\w)\|\{_\})"
paul@36	241	bold_regexp_str = r"(?:(?<!\w)\\|\{\\})(?P<boldtext>.?)(?:\(?!\w)\|\{\*\})"
paul@36	242	del_regexp_str = r"(?:(?<!\w)-\|\{-\})(?P<deltext>.*?)(?:-(?!\w)\|\{-\})"
paul@36	243	underline_regexp_str = r"(?:(?<!\w)\+\|\{\+\})(?P<underlinetext>.*?)(?:\+(?!\w)\|\{\+\})"
paul@36	244	sub_regexp_str = r"(?:(?<!\w)~\|\{~\})(?P<subtext>.*?)(?:~(?!\w)\|\{~\})"
paul@16	245
paul@16	246	content_regexp_str = (
paul@19	247	"(" + monospace_regexp_str + ")"
paul@19	248	"\|"
paul@14	249	"(" + link_regexp_str + ")"
paul@14	250	"\|"
paul@14	251	"(" + image_regexp_str + ")"
paul@36	252	"\|"
paul@71	253	"(" + macro_regexp_str + ")"
paul@71	254	"\|"
paul@36	255	"(" + italic_regexp_str + ")"
paul@36	256	"\|"
paul@36	257	"(" + bold_regexp_str + ")"
paul@36	258	"\|"
paul@36	259	"(" + del_regexp_str + ")"
paul@36	260	"\|"
paul@36	261	"(" + underline_regexp_str + ")"
paul@36	262	"\|"
paul@36	263	"(" + sub_regexp_str + ")"
paul@16	264	)
paul@16	265
paul@36	266	# Table row inspection.
paul@36	267
paul@36	268	cellsep_regexp_str = r"(?P<celltype>[\|]{1,2})"
paul@36	269
paul@16	270	table_content_regexp_str = (
paul@16	271	content_regexp_str +
paul@14	272	"\|"
paul@14	273	"(" + cellsep_regexp_str + ")"
paul@14	274	)
paul@14	275
paul@16	276	content_regexp = re.compile(content_regexp_str)
paul@16	277	table_content_regexp = re.compile(table_content_regexp_str)
paul@16	278
paul@14	279	def get_table_rows(text):
paul@14	280
paul@14	281	"Return a list of (cellsep, columns) tuples for the given table 'text'."
paul@14	282
paul@14	283	rows = []
paul@14	284
paul@39	285	for row in text.split("\|\n"):
paul@39	286	if not row:
paul@39	287	break
paul@39	288
paul@39	289	row += "\|"
paul@14	290	cellsep = None
paul@14	291	columns = [""]
paul@14	292	last = 0
paul@39	293	for match in table_content_regexp.finditer(row):
paul@14	294	start, end = match.span()
paul@39	295	columns[-1] += row[last:start]
paul@14	296
paul@14	297	if match.group("celltype"):
paul@14	298	if cellsep is None:
paul@14	299	cellsep = match.group("celltype")
paul@14	300	columns.append("")
paul@14	301	else:
paul@16	302	columns[-1] += match.group()
paul@14	303
paul@14	304	last = end
paul@14	305
paul@39	306	columns[-1] += row[last:]
paul@14	307
paul@14	308	if cellsep:
paul@14	309	rows.append((cellsep, columns[1:-1]))
paul@14	310
paul@14	311	return rows
paul@14	312
paul@70	313	# Notation conversion.
paul@70	314
paul@70	315	notation_mapping = [
paul@70	316	(r"\!", "!"),
paul@70	317	(r"\-", "-"),
paul@70	318	(r"\\""\n", "<<BR>>"),
paul@70	319	(r"\\ ", "<<BR>>"),
paul@70	320	(r"\~", "~"),
paul@70	321	]
paul@70	322
paul@70	323	preformatted_notation_mapping = [
paul@70	324	(r"\!", "!"),
paul@70	325	(r"\-", "-"),
paul@70	326	(r"\\""\n", "\n"),
paul@70	327	(r"\\ ", "\n"),
paul@70	328	(r"\~", "~"),
paul@70	329	]
paul@70	330
paul@70	331	# Translation helpers.
paul@70	332
paul@70	333	markers = {
paul@70	334	"" : "",
paul@70	335	"#" : "1.",
paul@70	336	"-" : "*",
paul@70	337	}
paul@70	338
paul@70	339	cellseps = {
paul@70	340	"\|" : "\n\|\| ",
paul@70	341	"\|\|" : "\n\|\| ",
paul@70	342	}
paul@70	343
paul@70	344	cellextra = {
paul@70	345	"\|" : "",
paul@70	346	"\|\|" : "'''",
paul@70	347	}
paul@70	348
paul@15	349	sectiontypes = {
paul@42	350	"code" : "",
paul@42	351	"noformat" : "",
paul@42	352	"quote" : "",
paul@68	353	"info" : "#!wiki important",
paul@68	354	"note" : "#!wiki caution",
paul@68	355	"tip" : "#!wiki tip",
paul@68	356	"warning" : "#!wiki warning",
paul@42	357	}
paul@42	358
paul@66	359	preformatted_sectiontypes = (None, "noformat")
paul@66	360
paul@71	361	macroargs = {
paul@71	362	"color" : "col",
paul@71	363	}
paul@71	364
paul@42	365	macrotypes = {
paul@71	366	"anchor" : "<<Anchor(%(args)s)>>",
paul@71	367	"color" : "<<Color2(%(content)s, %(args)s)>>",
paul@15	368	}
paul@15	369
paul@70	370	class ConfluenceParser:
paul@70	371
paul@70	372	"A parser for Confluence markup."
paul@70	373
paul@70	374	def __init__(self):
paul@70	375	self.max_level = self.level = 0
paul@71	376	self.in_heading = False
paul@72	377	self.held_anchors = []
paul@70	378
paul@70	379	def translate_marker(self, marker):
paul@70	380
paul@70	381	"Translate the given 'marker' to a suitable Moin representation."
paul@70	382
paul@70	383	return " " * len(marker) + markers[marker[-1]]
paul@70	384
paul@70	385	def translate_cellsep(self, cellsep):
paul@70	386
paul@70	387	"Translate the given 'cellsep' to a suitable Moin representation."
paul@70	388
paul@70	389	return cellseps[cellsep]
paul@70	390
paul@70	391	def translate_cell(self, cellsep, text):
paul@15	392
paul@70	393	"Using 'cellsep', translate the cell 'text'."
paul@70	394
paul@70	395	return cellextra[cellsep] + self.parse_text(text).strip() + cellextra[cellsep]
paul@70	396
paul@70	397	def translate_content_match(self, match):
paul@70	398
paul@70	399	"Translate the content described by the given 'match', returning a string."
paul@70	400
paul@70	401	if match.group("monotext"):
paul@70	402	self.enter_section(); self.leave_section()
paul@70	403	return "{{{%s}}}" % match.group("monotext")
paul@11	404
paul@70	405	elif match.group("linktext"):
paul@70	406	parts = match.group("linktext").split("\|")
paul@70	407
paul@70	408	# NOTE: Proper detection of external links required.
paul@70	409
paul@70	410	if len(parts) == 1:
paul@70	411	label, target, title = None, parts[0], None
paul@70	412	elif len(parts) == 2:
paul@70	413	(label, target), title = parts, None
paul@70	414	else:
paul@70	415	label, target, title = parts
paul@39	416
paul@70	417	target = target.strip()
paul@70	418
paul@70	419	# Look for namespace links and rewrite them.
paul@70	420
paul@70	421	if target.find(":") != -1:
paul@70	422	prefix = ""
paul@70	423	space, rest = target.split(":", 1)
paul@70	424	if space not in URL_SCHEMES:
paul@70	425	target = "%s/%s" % (space, rest)
paul@70	426
paul@70	427	# Detect anchors.
paul@70	428
paul@70	429	elif target.startswith("#"):
paul@70	430	prefix = ""
paul@70	431
paul@70	432	# Detect attachments.
paul@70	433
paul@70	434	elif target.startswith("^"):
paul@70	435	prefix = "attachment:"
paul@70	436
paul@70	437	# Link to other pages within a space.
paul@11	438
paul@70	439	else:
paul@70	440	prefix = "../"
paul@70	441
paul@70	442	# Make the link tidier by making a target if none was given.
paul@70	443
paul@70	444	if not label:
paul@70	445	label = target
paul@42	446
paul@70	447	if not label and not title:
paul@70	448	return "[[%s%s]]" % (prefix, target)
paul@70	449	elif not title:
paul@70	450	return "[[%s%s\|%s]]" % (prefix, target, label)
paul@70	451	else:
paul@70	452	return "[[%s%s\|%s\|title=%s]]" % (prefix, target, label, title)
paul@70	453
paul@70	454	elif match.group("imagetext"):
paul@70	455	parts = match.group("imagetext").split("\|")
paul@70	456
paul@70	457	# NOTE: Proper detection of external links required.
paul@70	458
paul@70	459	if parts[0].startswith("http"):
paul@70	460	prefix = ""
paul@70	461	else:
paul@70	462	prefix = "attachment:"
paul@42	463
paul@70	464	# NOTE: Proper options conversion required.
paul@70	465
paul@70	466	if len(parts) == 1:
paul@70	467	return "{{%s%s}}" % (prefix, parts[0])
paul@70	468	else:
paul@70	469	return "{{%s%s\|%s}}" % (prefix, parts[0], parts[1])
paul@70	470
paul@71	471	elif match.group("macro"):
paul@71	472	macro_name = match.group("macro")
paul@72	473	if macrotypes.has_key(macro_name):
paul@71	474	argname = macroargs.get(macro_name)
paul@72	475	result = macrotypes[macro_name] % {
paul@71	476	"args" : quote_macro_argument((argname and ("%s=" % argname) or "") + match.group("options"))
paul@71	477	}
paul@72	478	if not self.forbids_macros():
paul@72	479	return result
paul@72	480	if macro_name == "anchor":
paul@72	481	self.held_anchors.append(result)
paul@72	482	return ""
paul@71	483
paul@70	484	elif match.group("italictext"):
paul@70	485	return "''%s''" % self.translate_content(match.group("italictext"))
paul@70	486
paul@70	487	elif match.group("boldtext"):
paul@70	488	return "'''%s'''" % self.translate_content(match.group("boldtext"))
paul@70	489
paul@70	490	elif match.group("deltext"):
paul@70	491	return "--(%s)--" % self.translate_content(match.group("deltext"))
paul@70	492
paul@70	493	elif match.group("underlinetext"):
paul@70	494	return "__%s__" % self.translate_content(match.group("underlinetext"))
paul@70	495
paul@70	496	elif match.group("subtext"):
paul@70	497	return ",,%s,," % self.translate_content(match.group("subtext"))
paul@11	498
paul@70	499	else:
paul@70	500	return self.translate_text(match.group())
paul@70	501
paul@70	502	def translate_text(self, s, preformatted=False):
paul@70	503
paul@70	504	"Translate the plain text string 's', converting notation."
paul@70	505
paul@70	506	for before, after in preformatted and preformatted_notation_mapping or notation_mapping:
paul@70	507	s = s.replace(before, after)
paul@70	508	return s
paul@70	509
paul@70	510	def translate_content(self, text, sectiontype=None):
paul@70	511
paul@70	512	"""
paul@70	513	Return a translation of the given 'text'. If the optional 'sectiontype' is
paul@70	514	specified, the translation may be modified to a form appropriate to the
paul@70	515	section being translated.
paul@70	516	"""
paul@70	517
paul@70	518	parts = []
paul@70	519	preformatted = sectiontype in preformatted_sectiontypes
paul@11	520
paul@70	521	last = 0
paul@70	522	for match in content_regexp.finditer(text):
paul@70	523	start, end = match.span()
paul@70	524	parts.append(self.translate_text(text[last:start], preformatted))
paul@70	525
paul@70	526	# Handle unformatted sections.
paul@70	527
paul@70	528	if sectiontype in ("code", "noformat"):
paul@70	529	parts.append(match.group())
paul@70	530	else:
paul@70	531	parts.append(self.translate_content_match(match))
paul@70	532
paul@70	533	last = end
paul@70	534
paul@70	535	parts.append(self.translate_text(text[last:], preformatted))
paul@70	536	return "".join(parts)
paul@70	537
paul@70	538	def translate_block(self, blocktype, blocktext):
paul@70	539
paul@70	540	"Translate the block with the given 'blocktype' and 'blocktext'."
paul@70	541
paul@71	542	if blocktype in headings:
paul@71	543	self.in_heading = True
paul@72	544	self.held_anchors = []
paul@71	545
paul@70	546	parts = []
paul@42	547
paul@70	548	# Translate headings and blockquotes.
paul@70	549
paul@70	550	if blocktypes.has_key(blocktype):
paul@72	551	text = self.translate_content(blocktext)
paul@72	552	for anchor in self.held_anchors:
paul@72	553	parts.append(anchor)
paul@72	554	parts.append(blocktypes[blocktype] % text)
paul@70	555
paul@70	556	# Translate list items.
paul@70	557
paul@70	558	elif blocktype == "list":
paul@70	559	for listmarker, listitem in get_list_items(blocktext):
paul@70	560	parts.append("%s %s" % (self.translate_marker(listmarker), self.translate_content(listitem)))
paul@70	561
paul@70	562	# Translate table items.
paul@70	563
paul@70	564	elif blocktype == "table":
paul@70	565
paul@70	566	# Enter the table.
paul@70	567
paul@70	568	self.enter_section()
paul@70	569
paul@70	570	table_parts = []
paul@42	571	first = True
paul@70	572
paul@70	573	for cellsep, columns in get_table_rows(blocktext):
paul@42	574	if not first:
paul@70	575	table_parts.append("==")
paul@42	576	else:
paul@42	577	first = False
paul@70	578	moinsep = self.translate_cellsep(cellsep)
paul@70	579	table_parts.append(moinsep.join([self.translate_cell(cellsep, column) for column in columns]))
paul@70	580
paul@70	581	# Nest the section appropriately.
paul@70	582
paul@70	583	opening, closing = self.nest_section()
paul@42	584
paul@70	585	parts.append("%s#!table" % opening)
paul@70	586	parts += table_parts
paul@70	587	parts.append(closing)
paul@11	588
paul@70	589	# Leave the table.
paul@70	590
paul@70	591	self.leave_section()
paul@70	592
paul@70	593	# Handle anonymous blocks.
paul@11	594
paul@11	595	else:
paul@70	596	parts.append(self.translate_content(blocktext))
paul@70	597
paul@71	598	if blocktype in headings:
paul@71	599	self.in_heading = False
paul@71	600
paul@70	601	return "\n".join(parts)
paul@70	602
paul@70	603	def translate_section(self, sectiontype, options, text):
paul@70	604
paul@70	605	"""
paul@70	606	Translate the section with the given 'sectiontype', 'options' and
paul@70	607	'text'.
paul@70	608	"""
paul@70	609
paul@70	610	parts = []
paul@70	611
paul@70	612	# Enter the section.
paul@70	613
paul@70	614	self.enter_section()
paul@70	615
paul@70	616	mointype = sectiontypes.get(sectiontype)
paul@70	617	section_content = self.translate_content(text.strip(), sectiontype)
paul@70	618
paul@70	619	# Nest the section appropriately.
paul@70	620
paul@70	621	opening, closing = self.nest_section()
paul@70	622
paul@70	623	parts.append("%s%s\n" % (opening, mointype or ""))
paul@70	624	if options:
paul@70	625	parts.append("## %s\n" % options)
paul@70	626	parts.append(section_content)
paul@70	627	parts.append("\n%s\n" % closing)
paul@70	628
paul@70	629	# Leave the section.
paul@70	630
paul@70	631	self.leave_section()
paul@15	632
paul@70	633	return parts
paul@70	634
paul@70	635	def enter_section(self):
paul@70	636	self.level += 1
paul@70	637	self.max_level = max(self.level, self.max_level)
paul@70	638
paul@70	639	def leave_section(self):
paul@70	640	self.level -= 1
paul@70	641	if not self.level:
paul@70	642	self.max_level = 0
paul@70	643
paul@70	644	def nest_section(self):
paul@70	645	level = 3 + self.max_level - self.level
paul@70	646	opening = "{" * level
paul@70	647	closing = "}" * level
paul@70	648	return opening, closing
paul@15	649
paul@70	650	# General parsing.
paul@70	651
paul@70	652	def parse_text(self, s):
paul@70	653
paul@70	654	"Parse the content in the string 's', returning the translation."
paul@70	655
paul@70	656	parts = []
paul@70	657
paul@70	658	# Control spacing between blocks and other blocks or sections.
paul@70	659
paul@70	660	preceded_by_block = False
paul@70	661
paul@70	662	for type, text in get_regions(s):
paul@70	663
paul@70	664	# Handle list, heading, blockquote or anonymous blocks.
paul@70	665
paul@70	666	if type is None:
paul@42	667	if preceded_by_block:
paul@42	668	parts.append("\n")
paul@42	669
paul@70	670	first = True
paul@70	671	for blocktype, blocktext in get_blocks(text):
paul@70	672	if not first:
paul@70	673	parts.append("\n")
paul@70	674	else:
paul@70	675	first = False
paul@70	676	parts.append("%s" % self.translate_block(blocktype, blocktext))
paul@42	677
paul@70	678	if not first:
paul@70	679	preceded_by_block = True
paul@42	680
paul@70	681	# Handle sections.
paul@42	682
paul@15	683	else:
paul@70	684	sectiontype, options = type
paul@70	685
paul@70	686	# Direct translations of sections.
paul@70	687
paul@70	688	if sectiontypes.has_key(sectiontype):
paul@70	689	if preceded_by_block:
paul@70	690	parts.append("\n")
paul@70	691
paul@70	692	parts += self.translate_section(sectiontype, options, text)
paul@70	693	preceded_by_block = True
paul@39	694
paul@70	695	# Translations of macros (which can look like sections).
paul@70	696
paul@71	697	elif macrotypes.has_key(sectiontype) and not self.forbids_macros():
paul@71	698	argname = macroargs.get(sectiontype)
paul@71	699	parts.append(macrotypes[sectiontype] % {
paul@75	700	"content" : quote_macro_argument(self.parse_text(text)),
paul@71	701	"args" : quote_macro_argument((argname and ("%s=" % argname) or "") + options)
paul@71	702	})
paul@70	703	preceded_by_block = False
paul@70	704
paul@70	705	# Unrecognised sections.
paul@70	706
paul@70	707	else:
paul@70	708	parts += self.translate_section(sectiontype, None, text)
paul@70	709	preceded_by_block = False
paul@70	710
paul@70	711	return "".join(parts)
paul@39	712
paul@71	713	def forbids_macros(self):
paul@71	714	return self.in_heading
paul@71	715
paul@39	716	def parse(s, out):
paul@39	717
paul@39	718	"Parse the content in the string 's', writing a translation to 'out'."
paul@39	719
paul@70	720	parser = ConfluenceParser()
paul@70	721	out.write(parser.parse_text(s))
paul@11	722
paul@6	723	if __name__ == "__main__":
paul@62	724	s = codecs.getreader("utf-8")(sys.stdin).read()
paul@41	725	out = codecs.getwriter("utf-8")(sys.stdout)
paul@41	726	parse(s, out)
paul@6	727
paul@6	728	# vim: tabstop=4 expandtab shiftwidth=4