ConfluenceConverter (annotate wikiparser.py in dac27f1544a5)

ConfluenceConverter

Annotated wikiparser.py

71:dac27f1544a5

2013-06-08

Paul Boddie

Added proper support for non-section macros in the Confluence markup parser. Introduced macro suppression within certain regions. Added support for the translation of Confluence "color" macros to Moin "Color2" macros.

paul@6	1	#!/usr/bin/env python
paul@6	2
paul@7	3	"""
paul@7	4	Confluence Wiki syntax parsing.
paul@7	5
paul@34	6	Copyright (C) 2012, 2013 Paul Boddie <paul@boddie.org.uk>
paul@8	7
paul@8	8	This software is free software; you can redistribute it and/or
paul@8	9	modify it under the terms of the GNU General Public License as
paul@8	10	published by the Free Software Foundation; either version 2 of
paul@8	11	the License, or (at your option) any later version.
paul@8	12
paul@8	13	This software is distributed in the hope that it will be useful,
paul@8	14	but WITHOUT ANY WARRANTY; without even the implied warranty of
paul@8	15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
paul@8	16	GNU General Public License for more details.
paul@8	17
paul@8	18	You should have received a copy of the GNU General Public
paul@8	19	License along with this library; see the file LICENCE.txt
paul@8	20	If not, write to the Free Software Foundation, Inc.,
paul@8	21	51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
paul@8	22
paul@8	23	--------
paul@8	24
paul@8	25	The basic procedure is as follows:
paul@8	26
paul@7	27	1. Wiki pages are first split up into regions.
paul@7	28	2. Then, within these regions, the text is split into blocks.
paul@7	29	1. First, lists are identified.
paul@7	30	2. Additionally, other block-like elements are identified.
paul@7	31	3. Each block is then parsed.
paul@7	32	"""
paul@7	33
paul@35	34	from common import *
paul@6	35	import re
paul@25	36	import sys
paul@41	37	import codecs
paul@19	38
paul@6	39	# Section extraction.
paul@6	40
paul@19	41	sections_regexp_str = r"(?<!{){(?P<type>[^-_+{}\n:]+)(:[^}\n]+)?}.?{(?P=type)}"
paul@6	42	sections_regexp = re.compile(sections_regexp_str, re.DOTALL \| re.MULTILINE)
paul@6	43
paul@6	44	def get_regions(s):
paul@6	45
paul@6	46	"""
paul@6	47	Return a list of regions from 's'. Each region is specified using a tuple of
paul@6	48	the form (type, text).
paul@6	49	"""
paul@6	50
paul@6	51	last = 0
paul@6	52	regions = []
paul@6	53	for match in sections_regexp.finditer(s):
paul@6	54	start, end = match.span()
paul@6	55	regions.append((None, s[last:start]))
paul@6	56	regions.append(get_section_details(s[start:end]))
paul@6	57	last = end
paul@6	58	regions.append((None, s[last:]))
paul@6	59	return regions
paul@6	60
paul@7	61	# Section inspection.
paul@7	62
paul@15	63	section_regexp_str = r"{(?P<sectiontype>[^\n:]?)(?::(?P<options>.?))?}(?P<section>.*){(?P=sectiontype)}"
paul@7	64	section_regexp = re.compile(section_regexp_str, re.DOTALL \| re.MULTILINE)
paul@7	65
paul@6	66	def get_section_details(s):
paul@6	67
paul@7	68	"Return the details of a section 's' in the form (type, text)."
paul@6	69
paul@6	70	match = section_regexp.match(s)
paul@6	71	if match:
paul@15	72	return (match.group("sectiontype"), match.group("options")), match.group("section")
paul@6	73	else:
paul@6	74	return None, s
paul@6	75
paul@14	76	# Heading, table and list extraction.
paul@7	77
paul@41	78	list_regexp_str = r"^\s(?P<listtype>[#-])[#-]\s+.(\n\s(?P=listtype).?)(?:\n\|$)"
paul@39	79	table_regexp_str = r"^((?P<celltype>[\|]{1,2})((.\|\n(?!\n))+?(?P=celltype))+(\n\|$))+"
paul@14	80	blocktext_regexp_str = r"^(?P<type>h\d\|bq)\.\s+(?P<text>.*)$"
paul@7	81
paul@14	82	blockelement_regexp = re.compile(
paul@14	83	"(" + list_regexp_str + ")"
paul@14	84	"\|"
paul@14	85	"(" + table_regexp_str + ")"
paul@14	86	"\|"
paul@14	87	"(" + blocktext_regexp_str + ")",
paul@14	88	re.MULTILINE
paul@14	89	)
paul@14	90
paul@14	91	def get_block_elements(s):
paul@7	92
paul@7	93	"""
paul@14	94	Extract headings, tables and lists from the given string 's'.
paul@7	95	"""
paul@7	96
paul@7	97	last = 0
paul@7	98	blocks = []
paul@14	99	for match in blockelement_regexp.finditer(s):
paul@7	100	start, end = match.span()
paul@14	101	matchtype = match.group("listtype") and "list" or match.group("celltype") and "table" or match.group("type")
paul@7	102	blocks.append((None, s[last:start]))
paul@14	103	blocks.append((matchtype, match.group("text") or s[start:end]))
paul@7	104	last = end
paul@7	105	blocks.append((None, s[last:]))
paul@7	106	return blocks
paul@7	107
paul@7	108	# Block extraction.
paul@7	109
paul@7	110	block_regexp_str = r"^(?:\s*\n)+"
paul@7	111	block_regexp = re.compile(block_regexp_str, re.MULTILINE)
paul@7	112
paul@7	113	def get_basic_blocks(s):
paul@7	114
paul@7	115	"""
paul@7	116	Return blocks from the given string 's' by splitting the text on blank lines
paul@7	117	and eliminating those lines.
paul@7	118	"""
paul@7	119
paul@7	120	return [b for b in block_regexp.split(s) if b.strip()]
paul@7	121
paul@7	122	# Block inspection.
paul@7	123
paul@7	124	def get_blocks(s):
paul@7	125
paul@7	126	"""
paul@7	127	Return blocks from the given string 's', inspecting the basic blocks and
paul@7	128	generating additional block-level text where appropriate.
paul@7	129	"""
paul@7	130
paul@7	131	blocks = []
paul@7	132
paul@14	133	for blocktype, blocktext in get_block_elements(s):
paul@7	134
paul@14	135	# Collect heading, list and table blocks.
paul@7	136
paul@7	137	if blocktype is not None:
paul@7	138	blocks.append((blocktype, blocktext))
paul@7	139
paul@7	140	# Attempt to find new subblocks in other regions.
paul@7	141
paul@7	142	else:
paul@7	143	for block in get_basic_blocks(blocktext):
paul@14	144	blocks.append((None, block))
paul@7	145
paul@7	146	return blocks
paul@7	147
paul@14	148	# List item inspection.
paul@14	149
paul@41	150	listitem_regexp_str = r"^(?P<marker> [-#]+)\s+(?P<text>.*)$"
paul@7	151	listitem_regexp = re.compile(listitem_regexp_str, re.MULTILINE)
paul@7	152
paul@14	153	def get_list_items(text):
paul@14	154
paul@14	155	"Return a list of (marker, text) tuples for the given list 'text'."
paul@14	156
paul@14	157	items = []
paul@14	158
paul@14	159	for match in listitem_regexp.finditer(text):
paul@14	160	items.append((match.group("marker"), match.group("text")))
paul@14	161
paul@14	162	return items
paul@14	163
paul@36	164	# Content inspection.
paul@14	165
paul@19	166	monospace_regexp_str = r"{{(?P<monotext>.*?)}}"
paul@36	167	link_regexp_str = r"[[](?P<linktext>.*?)]"
paul@38	168	image_regexp_str = r"!(?P<imagetext>\w.*?)!"
paul@71	169	macro_regexp_str = r"{(?P<macro>.?):(?P<options>.?)}"
paul@36	170
paul@36	171	# Word-dependent patterns.
paul@36	172	# Here, the unbracketed markers must test for the absence of surrounding word
paul@36	173	# characters.
paul@36	174
paul@36	175	italic_regexp_str = r"(?:(?<!\w)_\|\{_\})(?P<italictext>.*?)(?:_(?!\w)\|\{_\})"
paul@36	176	bold_regexp_str = r"(?:(?<!\w)\\|\{\\})(?P<boldtext>.?)(?:\(?!\w)\|\{\*\})"
paul@36	177	del_regexp_str = r"(?:(?<!\w)-\|\{-\})(?P<deltext>.*?)(?:-(?!\w)\|\{-\})"
paul@36	178	underline_regexp_str = r"(?:(?<!\w)\+\|\{\+\})(?P<underlinetext>.*?)(?:\+(?!\w)\|\{\+\})"
paul@36	179	sub_regexp_str = r"(?:(?<!\w)~\|\{~\})(?P<subtext>.*?)(?:~(?!\w)\|\{~\})"
paul@16	180
paul@16	181	content_regexp_str = (
paul@19	182	"(" + monospace_regexp_str + ")"
paul@19	183	"\|"
paul@14	184	"(" + link_regexp_str + ")"
paul@14	185	"\|"
paul@14	186	"(" + image_regexp_str + ")"
paul@36	187	"\|"
paul@71	188	"(" + macro_regexp_str + ")"
paul@71	189	"\|"
paul@36	190	"(" + italic_regexp_str + ")"
paul@36	191	"\|"
paul@36	192	"(" + bold_regexp_str + ")"
paul@36	193	"\|"
paul@36	194	"(" + del_regexp_str + ")"
paul@36	195	"\|"
paul@36	196	"(" + underline_regexp_str + ")"
paul@36	197	"\|"
paul@36	198	"(" + sub_regexp_str + ")"
paul@16	199	)
paul@16	200
paul@36	201	# Table row inspection.
paul@36	202
paul@36	203	cellsep_regexp_str = r"(?P<celltype>[\|]{1,2})"
paul@36	204
paul@16	205	table_content_regexp_str = (
paul@16	206	content_regexp_str +
paul@14	207	"\|"
paul@14	208	"(" + cellsep_regexp_str + ")"
paul@14	209	)
paul@14	210
paul@16	211	content_regexp = re.compile(content_regexp_str)
paul@16	212	table_content_regexp = re.compile(table_content_regexp_str)
paul@16	213
paul@14	214	def get_table_rows(text):
paul@14	215
paul@14	216	"Return a list of (cellsep, columns) tuples for the given table 'text'."
paul@14	217
paul@14	218	rows = []
paul@14	219
paul@39	220	for row in text.split("\|\n"):
paul@39	221	if not row:
paul@39	222	break
paul@39	223
paul@39	224	row += "\|"
paul@14	225	cellsep = None
paul@14	226	columns = [""]
paul@14	227	last = 0
paul@39	228	for match in table_content_regexp.finditer(row):
paul@14	229	start, end = match.span()
paul@39	230	columns[-1] += row[last:start]
paul@14	231
paul@14	232	if match.group("celltype"):
paul@14	233	if cellsep is None:
paul@14	234	cellsep = match.group("celltype")
paul@14	235	columns.append("")
paul@14	236	else:
paul@16	237	columns[-1] += match.group()
paul@14	238
paul@14	239	last = end
paul@14	240
paul@39	241	columns[-1] += row[last:]
paul@14	242
paul@14	243	if cellsep:
paul@14	244	rows.append((cellsep, columns[1:-1]))
paul@14	245
paul@14	246	return rows
paul@14	247
paul@70	248	# Notation conversion.
paul@70	249
paul@70	250	notation_mapping = [
paul@70	251	(r"\!", "!"),
paul@70	252	(r"\-", "-"),
paul@70	253	(r"\\""\n", "<<BR>>"),
paul@70	254	(r"\\ ", "<<BR>>"),
paul@70	255	(r"\~", "~"),
paul@70	256	]
paul@70	257
paul@70	258	preformatted_notation_mapping = [
paul@70	259	(r"\!", "!"),
paul@70	260	(r"\-", "-"),
paul@70	261	(r"\\""\n", "\n"),
paul@70	262	(r"\\ ", "\n"),
paul@70	263	(r"\~", "~"),
paul@70	264	]
paul@70	265
paul@70	266	# Translation helpers.
paul@70	267
paul@70	268	markers = {
paul@70	269	"" : "",
paul@70	270	"#" : "1.",
paul@70	271	"-" : "*",
paul@70	272	}
paul@70	273
paul@70	274	cellseps = {
paul@70	275	"\|" : "\n\|\| ",
paul@70	276	"\|\|" : "\n\|\| ",
paul@70	277	}
paul@70	278
paul@70	279	cellextra = {
paul@70	280	"\|" : "",
paul@70	281	"\|\|" : "'''",
paul@70	282	}
paul@70	283
paul@15	284	sectiontypes = {
paul@42	285	"code" : "",
paul@42	286	"noformat" : "",
paul@42	287	"quote" : "",
paul@68	288	"info" : "#!wiki important",
paul@68	289	"note" : "#!wiki caution",
paul@68	290	"tip" : "#!wiki tip",
paul@68	291	"warning" : "#!wiki warning",
paul@42	292	}
paul@42	293
paul@66	294	preformatted_sectiontypes = (None, "noformat")
paul@66	295
paul@71	296	macroargs = {
paul@71	297	"color" : "col",
paul@71	298	}
paul@71	299
paul@42	300	macrotypes = {
paul@71	301	"anchor" : "<<Anchor(%(args)s)>>",
paul@71	302	"color" : "<<Color2(%(content)s, %(args)s)>>",
paul@15	303	}
paul@15	304
paul@70	305	class ConfluenceParser:
paul@70	306
paul@70	307	"A parser for Confluence markup."
paul@70	308
paul@70	309	def __init__(self):
paul@70	310	self.max_level = self.level = 0
paul@71	311	self.in_heading = False
paul@70	312
paul@70	313	def translate_marker(self, marker):
paul@70	314
paul@70	315	"Translate the given 'marker' to a suitable Moin representation."
paul@70	316
paul@70	317	return " " * len(marker) + markers[marker[-1]]
paul@70	318
paul@70	319	def translate_cellsep(self, cellsep):
paul@70	320
paul@70	321	"Translate the given 'cellsep' to a suitable Moin representation."
paul@70	322
paul@70	323	return cellseps[cellsep]
paul@70	324
paul@70	325	def translate_cell(self, cellsep, text):
paul@15	326
paul@70	327	"Using 'cellsep', translate the cell 'text'."
paul@70	328
paul@70	329	return cellextra[cellsep] + self.parse_text(text).strip() + cellextra[cellsep]
paul@70	330
paul@70	331	def translate_content_match(self, match):
paul@70	332
paul@70	333	"Translate the content described by the given 'match', returning a string."
paul@70	334
paul@70	335	if match.group("monotext"):
paul@70	336	self.enter_section(); self.leave_section()
paul@70	337	return "{{{%s}}}" % match.group("monotext")
paul@11	338
paul@70	339	elif match.group("linktext"):
paul@70	340	parts = match.group("linktext").split("\|")
paul@70	341
paul@70	342	# NOTE: Proper detection of external links required.
paul@70	343
paul@70	344	if len(parts) == 1:
paul@70	345	label, target, title = None, parts[0], None
paul@70	346	elif len(parts) == 2:
paul@70	347	(label, target), title = parts, None
paul@70	348	else:
paul@70	349	label, target, title = parts
paul@39	350
paul@70	351	target = target.strip()
paul@70	352
paul@70	353	# Look for namespace links and rewrite them.
paul@70	354
paul@70	355	if target.find(":") != -1:
paul@70	356	prefix = ""
paul@70	357	space, rest = target.split(":", 1)
paul@70	358	if space not in URL_SCHEMES:
paul@70	359	target = "%s/%s" % (space, rest)
paul@70	360
paul@70	361	# Detect anchors.
paul@70	362
paul@70	363	elif target.startswith("#"):
paul@70	364	prefix = ""
paul@70	365
paul@70	366	# Detect attachments.
paul@70	367
paul@70	368	elif target.startswith("^"):
paul@70	369	prefix = "attachment:"
paul@70	370
paul@70	371	# Link to other pages within a space.
paul@11	372
paul@70	373	else:
paul@70	374	prefix = "../"
paul@70	375
paul@70	376	# Make the link tidier by making a target if none was given.
paul@70	377
paul@70	378	if not label:
paul@70	379	label = target
paul@42	380
paul@70	381	if not label and not title:
paul@70	382	return "[[%s%s]]" % (prefix, target)
paul@70	383	elif not title:
paul@70	384	return "[[%s%s\|%s]]" % (prefix, target, label)
paul@70	385	else:
paul@70	386	return "[[%s%s\|%s\|title=%s]]" % (prefix, target, label, title)
paul@70	387
paul@70	388	elif match.group("imagetext"):
paul@70	389	parts = match.group("imagetext").split("\|")
paul@70	390
paul@70	391	# NOTE: Proper detection of external links required.
paul@70	392
paul@70	393	if parts[0].startswith("http"):
paul@70	394	prefix = ""
paul@70	395	else:
paul@70	396	prefix = "attachment:"
paul@42	397
paul@70	398	# NOTE: Proper options conversion required.
paul@70	399
paul@70	400	if len(parts) == 1:
paul@70	401	return "{{%s%s}}" % (prefix, parts[0])
paul@70	402	else:
paul@70	403	return "{{%s%s\|%s}}" % (prefix, parts[0], parts[1])
paul@70	404
paul@71	405	elif match.group("macro"):
paul@71	406	macro_name = match.group("macro")
paul@71	407	if macrotypes.has_key(macro_name) and not self.forbids_macros():
paul@71	408	argname = macroargs.get(macro_name)
paul@71	409	return macrotypes[macro_name] % {
paul@71	410	"args" : quote_macro_argument((argname and ("%s=" % argname) or "") + match.group("options"))
paul@71	411	}
paul@71	412	else:
paul@71	413	return ""
paul@71	414
paul@70	415	elif match.group("italictext"):
paul@70	416	return "''%s''" % self.translate_content(match.group("italictext"))
paul@70	417
paul@70	418	elif match.group("boldtext"):
paul@70	419	return "'''%s'''" % self.translate_content(match.group("boldtext"))
paul@70	420
paul@70	421	elif match.group("deltext"):
paul@70	422	return "--(%s)--" % self.translate_content(match.group("deltext"))
paul@70	423
paul@70	424	elif match.group("underlinetext"):
paul@70	425	return "__%s__" % self.translate_content(match.group("underlinetext"))
paul@70	426
paul@70	427	elif match.group("subtext"):
paul@70	428	return ",,%s,," % self.translate_content(match.group("subtext"))
paul@11	429
paul@70	430	else:
paul@70	431	return self.translate_text(match.group())
paul@70	432
paul@70	433	def translate_text(self, s, preformatted=False):
paul@70	434
paul@70	435	"Translate the plain text string 's', converting notation."
paul@70	436
paul@70	437	for before, after in preformatted and preformatted_notation_mapping or notation_mapping:
paul@70	438	s = s.replace(before, after)
paul@70	439	return s
paul@70	440
paul@70	441	def translate_content(self, text, sectiontype=None):
paul@70	442
paul@70	443	"""
paul@70	444	Return a translation of the given 'text'. If the optional 'sectiontype' is
paul@70	445	specified, the translation may be modified to a form appropriate to the
paul@70	446	section being translated.
paul@70	447	"""
paul@70	448
paul@70	449	parts = []
paul@70	450	preformatted = sectiontype in preformatted_sectiontypes
paul@11	451
paul@70	452	last = 0
paul@70	453	for match in content_regexp.finditer(text):
paul@70	454	start, end = match.span()
paul@70	455	parts.append(self.translate_text(text[last:start], preformatted))
paul@70	456
paul@70	457	# Handle unformatted sections.
paul@70	458
paul@70	459	if sectiontype in ("code", "noformat"):
paul@70	460	parts.append(match.group())
paul@70	461	else:
paul@70	462	parts.append(self.translate_content_match(match))
paul@70	463
paul@70	464	last = end
paul@70	465
paul@70	466	parts.append(self.translate_text(text[last:], preformatted))
paul@70	467	return "".join(parts)
paul@70	468
paul@70	469	def translate_block(self, blocktype, blocktext):
paul@70	470
paul@70	471	"Translate the block with the given 'blocktype' and 'blocktext'."
paul@70	472
paul@71	473	if blocktype in headings:
paul@71	474	self.in_heading = True
paul@71	475
paul@70	476	parts = []
paul@42	477
paul@70	478	# Translate headings and blockquotes.
paul@70	479
paul@70	480	if blocktypes.has_key(blocktype):
paul@71	481	parts.append(blocktypes[blocktype] % self.translate_content(blocktext))
paul@70	482
paul@70	483	# Translate list items.
paul@70	484
paul@70	485	elif blocktype == "list":
paul@70	486	for listmarker, listitem in get_list_items(blocktext):
paul@70	487	parts.append("%s %s" % (self.translate_marker(listmarker), self.translate_content(listitem)))
paul@70	488
paul@70	489	# Translate table items.
paul@70	490
paul@70	491	elif blocktype == "table":
paul@70	492
paul@70	493	# Enter the table.
paul@70	494
paul@70	495	self.enter_section()
paul@70	496
paul@70	497	table_parts = []
paul@42	498	first = True
paul@70	499
paul@70	500	for cellsep, columns in get_table_rows(blocktext):
paul@42	501	if not first:
paul@70	502	table_parts.append("==")
paul@42	503	else:
paul@42	504	first = False
paul@70	505	moinsep = self.translate_cellsep(cellsep)
paul@70	506	table_parts.append(moinsep.join([self.translate_cell(cellsep, column) for column in columns]))
paul@70	507
paul@70	508	# Nest the section appropriately.
paul@70	509
paul@70	510	opening, closing = self.nest_section()
paul@42	511
paul@70	512	parts.append("%s#!table" % opening)
paul@70	513	parts += table_parts
paul@70	514	parts.append(closing)
paul@11	515
paul@70	516	# Leave the table.
paul@70	517
paul@70	518	self.leave_section()
paul@70	519
paul@70	520	# Handle anonymous blocks.
paul@11	521
paul@11	522	else:
paul@70	523	parts.append(self.translate_content(blocktext))
paul@70	524
paul@71	525	if blocktype in headings:
paul@71	526	self.in_heading = False
paul@71	527
paul@70	528	return "\n".join(parts)
paul@70	529
paul@70	530	def translate_section(self, sectiontype, options, text):
paul@70	531
paul@70	532	"""
paul@70	533	Translate the section with the given 'sectiontype', 'options' and
paul@70	534	'text'.
paul@70	535	"""
paul@70	536
paul@70	537	parts = []
paul@70	538
paul@70	539	# Enter the section.
paul@70	540
paul@70	541	self.enter_section()
paul@70	542
paul@70	543	mointype = sectiontypes.get(sectiontype)
paul@70	544	section_content = self.translate_content(text.strip(), sectiontype)
paul@70	545
paul@70	546	# Nest the section appropriately.
paul@70	547
paul@70	548	opening, closing = self.nest_section()
paul@70	549
paul@70	550	parts.append("%s%s\n" % (opening, mointype or ""))
paul@70	551	if options:
paul@70	552	parts.append("## %s\n" % options)
paul@70	553	parts.append(section_content)
paul@70	554	parts.append("\n%s\n" % closing)
paul@70	555
paul@70	556	# Leave the section.
paul@70	557
paul@70	558	self.leave_section()
paul@15	559
paul@70	560	return parts
paul@70	561
paul@70	562	def enter_section(self):
paul@70	563	self.level += 1
paul@70	564	self.max_level = max(self.level, self.max_level)
paul@70	565
paul@70	566	def leave_section(self):
paul@70	567	self.level -= 1
paul@70	568	if not self.level:
paul@70	569	self.max_level = 0
paul@70	570
paul@70	571	def nest_section(self):
paul@70	572	level = 3 + self.max_level - self.level
paul@70	573	opening = "{" * level
paul@70	574	closing = "}" * level
paul@70	575	return opening, closing
paul@15	576
paul@70	577	# General parsing.
paul@70	578
paul@70	579	def parse_text(self, s):
paul@70	580
paul@70	581	"Parse the content in the string 's', returning the translation."
paul@70	582
paul@70	583	parts = []
paul@70	584
paul@70	585	# Control spacing between blocks and other blocks or sections.
paul@70	586
paul@70	587	preceded_by_block = False
paul@70	588
paul@70	589	for type, text in get_regions(s):
paul@70	590
paul@70	591	# Handle list, heading, blockquote or anonymous blocks.
paul@70	592
paul@70	593	if type is None:
paul@42	594	if preceded_by_block:
paul@42	595	parts.append("\n")
paul@42	596
paul@70	597	first = True
paul@70	598	for blocktype, blocktext in get_blocks(text):
paul@70	599	if not first:
paul@70	600	parts.append("\n")
paul@70	601	else:
paul@70	602	first = False
paul@70	603	parts.append("%s" % self.translate_block(blocktype, blocktext))
paul@42	604
paul@70	605	if not first:
paul@70	606	preceded_by_block = True
paul@42	607
paul@70	608	# Handle sections.
paul@42	609
paul@15	610	else:
paul@70	611	sectiontype, options = type
paul@70	612
paul@70	613	# Direct translations of sections.
paul@70	614
paul@70	615	if sectiontypes.has_key(sectiontype):
paul@70	616	if preceded_by_block:
paul@70	617	parts.append("\n")
paul@70	618
paul@70	619	parts += self.translate_section(sectiontype, options, text)
paul@70	620	preceded_by_block = True
paul@39	621
paul@70	622	# Translations of macros (which can look like sections).
paul@70	623
paul@71	624	elif macrotypes.has_key(sectiontype) and not self.forbids_macros():
paul@71	625	argname = macroargs.get(sectiontype)
paul@71	626	parts.append(macrotypes[sectiontype] % {
paul@71	627	"content" : quote_macro_argument(self.translate_content(text, sectiontype)),
paul@71	628	"args" : quote_macro_argument((argname and ("%s=" % argname) or "") + options)
paul@71	629	})
paul@70	630	preceded_by_block = False
paul@70	631
paul@70	632	# Unrecognised sections.
paul@70	633
paul@70	634	else:
paul@70	635	parts += self.translate_section(sectiontype, None, text)
paul@70	636	preceded_by_block = False
paul@70	637
paul@70	638	return "".join(parts)
paul@39	639
paul@71	640	def forbids_macros(self):
paul@71	641	return self.in_heading
paul@71	642
paul@39	643	def parse(s, out):
paul@39	644
paul@39	645	"Parse the content in the string 's', writing a translation to 'out'."
paul@39	646
paul@70	647	parser = ConfluenceParser()
paul@70	648	out.write(parser.parse_text(s))
paul@11	649
paul@6	650	if __name__ == "__main__":
paul@62	651	s = codecs.getreader("utf-8")(sys.stdin).read()
paul@41	652	out = codecs.getwriter("utf-8")(sys.stdout)
paul@41	653	parse(s, out)
paul@6	654
paul@6	655	# vim: tabstop=4 expandtab shiftwidth=4