MoinLight

Annotated moinformat/parsers/moin.py

176:47af441b48bf
2018-11-26 Paul Boddie Support linking to stylesheets based on the collection of available files.
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Moin wiki format parser.
paul@0 5
paul@45 6
Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk>
paul@0 7
paul@0 8
This program is free software; you can redistribute it and/or modify it under
paul@0 9
the terms of the GNU General Public License as published by the Free Software
paul@0 10
Foundation; either version 3 of the License, or (at your option) any later
paul@0 11
version.
paul@0 12
paul@0 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 16
details.
paul@0 17
paul@0 18
You should have received a copy of the GNU General Public License along with
paul@0 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 20
"""
paul@0 21
paul@91 22
# Document transformations.
paul@91 23
paul@89 24
from moinformat.macros import get_macro
paul@91 25
paul@91 26
# Parser functionality and pattern definition.
paul@91 27
paul@122 28
from moinformat.parsers.common import ParserBase, get_patterns, choice, \
paul@83 29
                                      excl, expect, group, optional, recur, \
paul@83 30
                                      repeat
paul@91 31
paul@91 32
# Serialisation.
paul@91 33
paul@28 34
from moinformat.serialisers import serialise
paul@91 35
paul@91 36
# Document tree nodes.
paul@91 37
paul@151 38
from moinformat.tree.moin import Anchor, Break, Comment, DefItem, DefTerm, \
paul@151 39
                                 Directive, FontStyle, Heading, Larger, \
paul@167 40
                                 LineBreak, Link, LinkLabel, LinkParameter, \
paul@167 41
                                 List, ListItem, Macro, Monospace, Region, \
paul@167 42
                                 Rule, Smaller, Strikethrough, Subscript, \
paul@167 43
                                 Superscript, Table, TableAttr, TableAttrs, \
paul@167 44
                                 TableCell, TableRow, Text, Transclusion, \
paul@169 45
                                 Underline, Verbatim
paul@32 46
paul@55 47
join = "".join
paul@55 48
paul@42 49
class MoinParser(ParserBase):
paul@2 50
paul@32 51
    "A wiki region parser."
paul@2 52
paul@109 53
    format = "moin"
paul@109 54
paul@165 55
    def __init__(self, metadata, parsers=None, root=None):
paul@2 56
paul@2 57
        """
paul@165 58
        Initialise the parser with the given 'metadata' and optional 'parsers'.
paul@165 59
        An optional 'root' indicates the document-level parser.
paul@2 60
        """
paul@2 61
paul@165 62
        ParserBase.__init__(self, metadata, parsers, root)
paul@87 63
paul@91 64
        # Record certain node occurrences for later evaluation.
paul@87 65
paul@87 66
        self.macros = []
paul@2 67
paul@128 68
        # Record headings for identifier disambiguation.
paul@128 69
paul@128 70
        self.headings = []
paul@128 71
paul@33 72
    # Principal parser methods.
paul@33 73
paul@32 74
    def parse(self, s):
paul@1 75
paul@31 76
        """
paul@31 77
        Parse page text 's'. Pages consist of regions delimited by markers.
paul@31 78
        """
paul@1 79
paul@37 80
        self.items = self.get_items(s)
paul@52 81
        self.region = Region([], type="moin")
paul@2 82
paul@151 83
        # Parse page header and directives.
paul@31 84
paul@37 85
        self.parse_region_header(self.region)
paul@151 86
        self.parse_region_directives(self.region)
paul@31 87
paul@37 88
        # Handle pages directly with this parser. Pages do not need to use an
paul@37 89
        # explicit format indicator.
paul@37 90
paul@37 91
        if not self.region.type:
paul@37 92
            self.parse_region_content(self.items, self.region)
paul@37 93
paul@32 94
        # Otherwise, test the type and find an appropriate parser.
paul@2 95
paul@31 96
        else:
paul@37 97
            self.parse_region_type(self.region)
paul@0 98
paul@128 99
        # Assign heading identifiers.
paul@128 100
paul@128 101
        self.identify_headings()
paul@128 102
paul@37 103
        return self.region
paul@36 104
paul@0 105
paul@36 106
paul@89 107
    # Macro evaluation.
paul@89 108
paul@89 109
    def evaluate_macros(self):
paul@89 110
paul@89 111
        "Evaluate the macro nodes in the document."
paul@89 112
paul@89 113
        for node in self.macros:
paul@89 114
paul@89 115
            # Obtain a class for the named macro.
paul@89 116
paul@89 117
            macro_cls = get_macro(node.name)
paul@89 118
            if not macro_cls:
paul@89 119
                continue
paul@89 120
paul@89 121
            # Instantiate the class and evaluate the macro.
paul@89 122
paul@89 123
            macro = macro_cls(node, self.region)
paul@89 124
            macro.evaluate()
paul@89 125
paul@128 126
    # Heading disambiguation.
paul@128 127
paul@128 128
    def identify_headings(self):
paul@128 129
paul@128 130
        "Assign identifiers to headings based on their textual content."
paul@128 131
paul@128 132
        d = {}
paul@128 133
paul@128 134
        for heading in self.headings:
paul@128 135
            text = heading.text_content()
paul@128 136
paul@128 137
            if not d.has_key(text):
paul@128 138
                d[text] = 0
paul@128 139
                heading.identifier = text
paul@128 140
            else:
paul@128 141
                d[text] += 1
paul@128 142
                heading.identifier = "%s-%d" % (text, d[text])
paul@128 143
paul@89 144
paul@89 145
paul@165 146
    # Conversion back to text.
paul@165 147
paul@165 148
    def get_serialiser(self):
paul@165 149
paul@165 150
        "Return metadata employing Moin as the output format."
paul@165 151
paul@165 152
        metadata = self.metadata.copy()
paul@165 153
        metadata.set("link_format", None)
paul@165 154
        metadata.set("output_context", "standalone")
paul@165 155
        metadata.set("output_format", "moin")
paul@165 156
        return metadata.get_serialiser()
paul@165 157
paul@165 158
paul@165 159
paul@31 160
    # Parser methods supporting different page features.
paul@31 161
paul@37 162
    def parse_attrname(self, attrs):
paul@31 163
paul@31 164
        "Handle an attribute name within 'attrs'."
paul@8 165
paul@55 166
        name = self.match_group("name")
paul@31 167
        attr = TableAttr(name)
paul@1 168
paul@37 169
        preceding = self.read_until(["attrvalue"], False)
paul@31 170
        if preceding == "":
paul@55 171
            attr.quote = self.match_group("quote")
paul@55 172
            attr.value = self.match_group("value")
paul@25 173
paul@31 174
        attrs.append(attr)
paul@31 175
paul@37 176
    def parse_break(self, region):
paul@25 177
paul@31 178
        "Handle a paragraph break within 'region'."
paul@31 179
paul@43 180
        self.add_node(region, Break())
paul@43 181
        self.new_block(region)
paul@31 182
paul@151 183
    def parse_comment(self, region):
paul@151 184
paul@151 185
        "Handle a comment within 'region'."
paul@151 186
paul@151 187
        comment = self.match_group("comment")
paul@151 188
        extra = self.match_group("extra")
paul@151 189
        self.add_node(region, Comment(comment, extra))
paul@151 190
        self.new_block(region)
paul@151 191
paul@37 192
    def parse_defitem(self, region, extra=""):
paul@25 193
paul@31 194
        "Handle a definition item within 'region'."
paul@25 195
paul@55 196
        pad = self.match_group("pad")
paul@31 197
        item = DefItem([], pad, extra)
paul@123 198
        self.parse_region_details(item, self.listitem_pattern_names)
paul@43 199
        self.add_node(region, item)
paul@43 200
        self.new_block(region)
paul@8 201
paul@37 202
    def parse_defterm(self, region):
paul@31 203
paul@31 204
        "Handle a definition term within 'region'."
paul@7 205
paul@55 206
        pad = self.match_group("pad")
paul@31 207
        term = DefTerm([], pad)
paul@37 208
        self.parse_region_details(term, ["deftermend", "deftermsep"])
paul@43 209
        self.add_node(region, term)
paul@122 210
paul@54 211
        if self.matching_pattern() == "deftermsep":
paul@37 212
            self.parse_defitem(region)
paul@16 213
paul@122 214
        # Add padding from the separator to the term, there being no item.
paul@122 215
paul@122 216
        else:
paul@122 217
            term.extra = self.match_group("pad")
paul@122 218
paul@37 219
    def parse_defterm_empty(self, region):
paul@31 220
paul@31 221
        "Handle an empty definition term within 'region'."
paul@16 222
paul@55 223
        extra = self.match_group("pad")
paul@37 224
        self.parse_region_details(region, ["deftermsep"])
paul@37 225
        self.parse_defitem(region, extra)
paul@16 226
paul@151 227
    def parse_directive(self, region):
paul@151 228
paul@151 229
        "Handle a processing directive within 'region'."
paul@151 230
paul@151 231
        directive = self.match_group("directive")
paul@151 232
        extra = self.match_group("extra")
paul@151 233
        self.add_node(region, Directive(directive, extra))
paul@151 234
        self.new_block(region)
paul@151 235
paul@37 236
    def parse_fontstyle(self, region):
paul@31 237
paul@31 238
        "Handle emphasis and strong styles."
paul@16 239
paul@55 240
        n = len(self.match_group("style"))
paul@31 241
paul@31 242
        # Handle endings.
paul@16 243
paul@31 244
        if isinstance(region, FontStyle):
paul@31 245
            emphasis = n in (2, 4, 5)
paul@31 246
            strong = n in (3, 5, 6)
paul@31 247
            active = True
paul@16 248
paul@31 249
            if region.emphasis and emphasis:
paul@31 250
                active = region.close_emphasis()
paul@31 251
                n -= 2
paul@31 252
            if region.strong and strong:
paul@31 253
                active = region.close_strong()
paul@31 254
                n -= 3
paul@18 255
paul@31 256
            if not active:
paul@31 257
                if n:
paul@37 258
                    self.items.rewind(n)
paul@31 259
                raise StopIteration
paul@18 260
paul@31 261
            elif not n:
paul@31 262
                return
paul@18 263
paul@31 264
        # Handle new styles.
paul@31 265
paul@18 266
        emphasis = n in (2, 4, 5)
paul@18 267
        strong = n in (3, 5, 6)
paul@31 268
        double = n in (4, 6)
paul@31 269
paul@31 270
        span = FontStyle([], emphasis, strong)
paul@31 271
        if not double:
paul@37 272
            self.parse_region_details(span, self.inline_pattern_names)
paul@31 273
        region.append_inline(span)
paul@31 274
paul@37 275
    def parse_halign(self, attrs):
paul@31 276
paul@31 277
        "Handle horizontal alignment within 'attrs'."
paul@31 278
paul@55 279
        value = self.match_group("value")
paul@31 280
        attr = TableAttr("halign", value == "(" and "left" or value == ")" and "right" or "center", True)
paul@31 281
        attrs.append(attr)
paul@31 282
paul@37 283
    def parse_heading(self, region):
paul@17 284
paul@31 285
        "Handle a heading."
paul@18 286
paul@55 287
        start_extra = self.match_group("extra")
paul@55 288
        level = len(self.match_group("level"))
paul@55 289
        start_pad = self.match_group("pad")
paul@31 290
        heading = Heading([], level, start_extra, start_pad)
paul@37 291
        self.parse_region_details(heading, ["headingend"] + self.inline_pattern_names)
paul@43 292
        self.add_node(region, heading)
paul@43 293
        self.new_block(region)
paul@31 294
paul@128 295
        # Record the heading for later processing.
paul@128 296
paul@128 297
        self.root.headings.append(heading)
paul@128 298
paul@37 299
    def parse_heading_end(self, heading):
paul@31 300
paul@31 301
        "Handle the end of a heading."
paul@31 302
paul@55 303
        level = len(self.match_group("level"))
paul@31 304
        if heading.level == level:
paul@55 305
            heading.end_pad = self.match_group("pad")
paul@55 306
            heading.end_extra = self.match_group("extra")
paul@18 307
            raise StopIteration
paul@17 308
paul@43 309
    def parse_list(self, item):
paul@43 310
paul@43 311
        "Create a list, starting with 'item'."
paul@43 312
paul@51 313
        list = List([item], item.indent, item.marker, item.num)
paul@43 314
        self.parse_region_details(list, self.list_pattern_names, True)
paul@43 315
        return list
paul@43 316
paul@37 317
    def parse_listitem(self, region):
paul@31 318
paul@31 319
        "Handle a list item marker within 'region'."
paul@31 320
paul@55 321
        indent = len(self.match_group("indent"))
paul@55 322
        marker = self.match_group("marker")
paul@55 323
        num = self.match_group("num")
paul@55 324
        space = self.match_group("pad")
paul@43 325
paul@45 326
        last = region.node(-1)
paul@45 327
paul@51 328
        new_list = not isinstance(last, (List, ListItem))
paul@51 329
        same_indent = not new_list and indent == last.indent
paul@51 330
        new_marker = not new_list and last.marker != marker and same_indent
paul@51 331
        new_num = not new_list and num is not None and last.num != num and same_indent
paul@51 332
paul@51 333
        # If the marker or number changes at the same indent, or if the indent
paul@49 334
        # is smaller, queue the item and end the list.
paul@45 335
paul@51 336
        # Note that Moin format does not seek to support item renumbering,
paul@51 337
        # instead starting new lists on number changes.
paul@51 338
paul@51 339
        if not new_list and (new_marker or new_num or indent < last.indent):
paul@45 340
            self.queue_match()
paul@45 341
            self.end_region(region)
paul@45 342
paul@45 343
        # Obtain a list item and populate it.
paul@45 344
paul@51 345
        item = ListItem([], indent, marker, space, num)
paul@37 346
        self.parse_region_details(item, self.listitem_pattern_names)
paul@43 347
paul@49 348
        # Start a new list if not preceded by a list item, adding a trailing
paul@49 349
        # block for new elements.
paul@43 350
paul@49 351
        if new_list:
paul@49 352
            item = self.parse_list(item)
paul@45 353
            self.add_node(region, item)
paul@49 354
            self.new_block(region)
paul@43 355
paul@49 356
        # Add a nested list to the last item.
paul@49 357
paul@49 358
        elif indent > last.indent:
paul@49 359
            item = self.parse_list(item)
paul@49 360
            self.add_node(last, item)
paul@43 361
paul@45 362
        # Add the item to the current list.
paul@43 363
paul@45 364
        else:
paul@45 365
            self.add_node(region, item)
paul@31 366
paul@37 367
    def parse_rule(self, region):
paul@31 368
paul@31 369
        "Handle a horizontal rule within 'region'."
paul@31 370
paul@55 371
        length = len(self.match_group("rule"))
paul@31 372
        rule = Rule(length)
paul@43 373
        self.add_node(region, rule)
paul@43 374
        self.new_block(region)
paul@31 375
paul@37 376
    def parse_section(self, region):
paul@31 377
paul@31 378
        "Handle the start of a new section within 'region'."
paul@31 379
paul@31 380
        # Parse the section and start a new block after the section.
paul@31 381
paul@55 382
        indent = len(self.match_group("indent"))
paul@55 383
        level = len(self.match_group("level"))
paul@67 384
paul@67 385
        section = self.parse_region(level, indent, "inline")
paul@67 386
paul@67 387
        # If the section is inline, treat it like any other inline element.
paul@67 388
paul@67 389
        if section.type == "inline":
paul@67 390
            region.append_inline(section)
paul@67 391
paul@67 392
        # Otherwise, add it as a new block element.
paul@67 393
paul@67 394
        else:
paul@67 395
            self.add_node(region, section)
paul@67 396
            if region.allow_blocks:
paul@67 397
                self.new_block(region)
paul@31 398
paul@37 399
    def parse_table_attrs(self, cell):
paul@31 400
paul@31 401
        "Handle the start of table attributes within 'cell'."
paul@31 402
paul@31 403
        attrs = TableAttrs([])
paul@102 404
        self.parse_region_details(attrs, self.table_attr_pattern_names)
paul@31 405
paul@31 406
        # Test the validity of the attributes.
paul@31 407
paul@31 408
        last = None
paul@31 409
paul@31 410
        for node in attrs.nodes:
paul@31 411
paul@31 412
            # Text separator nodes must be whitespace.
paul@31 413
paul@31 414
            if isinstance(node, Text):
paul@31 415
                if node.s.strip():
paul@31 416
                    break
paul@31 417
paul@31 418
            # Named attributes must be preceded by space if not the first.
paul@31 419
paul@31 420
            elif last and not node.concise and not isinstance(last, Text):
paul@31 421
                break
paul@31 422
paul@31 423
            last = node
paul@31 424
paul@31 425
        # All nodes were valid: preserve the collection.
paul@31 426
paul@31 427
        else:
paul@67 428
            # Add the attributes as a node, also recording their presence.
paul@67 429
paul@67 430
            cell.append(attrs)
paul@31 431
            cell.attrs = attrs
paul@18 432
            return
paul@18 433
paul@31 434
        # Invalid nodes were found: serialise the attributes as text.
paul@18 435
paul@165 436
        cell.append_inline(Text(serialise(attrs, self.get_serialiser())))
paul@25 437
paul@37 438
    def parse_table_row(self, region):
paul@9 439
paul@31 440
        "Handle the start of a table row within 'region'."
paul@9 441
paul@36 442
        # Identify any active table.
paul@36 443
paul@36 444
        table = region.node(-2)
paul@36 445
        block = region.node(-1)
paul@36 446
paul@36 447
        if not (isinstance(table, Table) and block.empty()):
paul@36 448
            new_table = table = Table([])
paul@36 449
        else:
paul@36 450
            new_table = None
paul@36 451
paul@31 452
        row = TableRow([])
paul@9 453
paul@31 454
        while True:
paul@31 455
            cell = TableCell([])
paul@102 456
            self.parse_region_details(cell, self.table_row_pattern_names)
paul@9 457
paul@31 458
            # Handle the end of the row.
paul@12 459
paul@54 460
            if self.matching_pattern() == "tableend":
paul@55 461
                trailing = self.match_group("extra")
paul@2 462
paul@31 463
                # If the cell was started but not finished, convert the row into text.
paul@2 464
paul@31 465
                if not row.nodes or not cell.empty():
paul@165 466
paul@165 467
                    # Convert the nodes back to text.
paul@165 468
paul@165 469
                    serialiser = self.get_serialiser()
paul@165 470
paul@31 471
                    for node in row.nodes:
paul@165 472
                        region.append_inline(Text(serialise(node, serialiser)))
paul@165 473
paul@165 474
                    region.append_inline(Text(serialise(cell, serialiser) + trailing))
paul@17 475
paul@43 476
                    self.new_block(region)
paul@31 477
                    return
paul@29 478
paul@31 479
                # Append the final cell, if not empty.
paul@29 480
paul@31 481
                else:
paul@31 482
                    row.trailing = trailing
paul@29 483
paul@31 484
                    if not cell.empty():
paul@31 485
                        row.append(cell)
paul@31 486
                    break
paul@24 487
paul@31 488
            # A cell separator has been found.
paul@24 489
paul@31 490
            row.append(cell)
paul@24 491
paul@36 492
        # Add the row to the table and any new table to the region.
paul@36 493
paul@36 494
        table.add(row)
paul@36 495
        if new_table:
paul@43 496
            self.add_node(region, new_table)
paul@36 497
paul@43 498
        self.new_block(region)
paul@24 499
paul@37 500
    def parse_valign(self, attrs):
paul@24 501
paul@31 502
        "Handle vertical alignment within 'attrs'."
paul@24 503
paul@55 504
        value = self.match_group("value")
paul@31 505
        attr = TableAttr("valign", value == "^" and "top" or "bottom", True)
paul@31 506
        attrs.append(attr)
paul@25 507
paul@30 508
paul@30 509
paul@98 510
    def inline_patterns_for(self, name):
paul@172 511
paul@172 512
        "Return active patterns for the inline element having the given 'name'."
paul@172 513
paul@98 514
        names = self.inline_pattern_names[:]
paul@98 515
        names[names.index(name)] = "%send" % name
paul@98 516
        return names
paul@98 517
paul@98 518
paul@98 519
paul@31 520
    # Inline formatting handlers.
paul@31 521
paul@37 522
    def parse_inline(self, region, cls, pattern_name):
paul@23 523
paul@31 524
        "Handle an inline region."
paul@31 525
paul@31 526
        span = cls([])
paul@37 527
        self.parse_region_details(span, self.inline_patterns_for(pattern_name))
paul@31 528
        region.append_inline(span)
paul@20 529
paul@37 530
    def parse_larger(self, region):
paul@37 531
        self.parse_inline(region, Larger, "larger")
paul@31 532
paul@37 533
    def parse_monospace(self, region):
paul@64 534
        span = Monospace([])
paul@64 535
        self.parse_region_details(span, ["monospaceend"])
paul@64 536
        region.append_inline(span)
paul@20 537
paul@37 538
    def parse_smaller(self, region):
paul@37 539
        self.parse_inline(region, Smaller, "smaller")
paul@31 540
paul@48 541
    def parse_strike(self, region):
paul@48 542
        self.parse_inline(region, Strikethrough, "strike")
paul@48 543
paul@37 544
    def parse_sub(self, region):
paul@37 545
        self.parse_inline(region, Subscript, "sub")
paul@31 546
paul@37 547
    def parse_super(self, region):
paul@37 548
        self.parse_inline(region, Superscript, "super")
paul@20 549
paul@37 550
    def parse_underline(self, region):
paul@37 551
        self.parse_inline(region, Underline, "underline")
paul@31 552
paul@167 553
    # Link formatting handlers.
paul@167 554
paul@167 555
    def _parse_link(self, region, cls, pattern_names):
paul@167 556
        target = self.match_group("target")
paul@167 557
        end = self.match_group("end")
paul@167 558
paul@167 559
        span = cls([], target)
paul@167 560
paul@167 561
        # Obtain the extra details.
paul@167 562
paul@167 563
        if not end:
paul@167 564
            cls = LinkLabel
paul@167 565
paul@167 566
            # Introduce a label or parameter for each separated region.
paul@167 567
paul@167 568
            while True:
paul@167 569
                param = cls([])
paul@167 570
                self.parse_region_details(param, pattern_names)
paul@167 571
                span.append(param)
paul@167 572
paul@167 573
                if self.matching_pattern() != "linksep":
paul@167 574
                    break
paul@167 575
paul@167 576
                cls = LinkParameter
paul@167 577
paul@167 578
        region.append_inline(span)
paul@167 579
paul@167 580
    def parse_link(self, region):
paul@167 581
        self._parse_link(region, Link, self.link_pattern_names)
paul@167 582
paul@167 583
    def parse_transclusion(self, region):
paul@167 584
        self._parse_link(region, Transclusion, self.transclusion_pattern_names)
paul@167 585
paul@31 586
paul@19 587
paul@77 588
    # Complete inline pattern handlers.
paul@77 589
paul@116 590
    def parse_anchor(self, region):
paul@116 591
        target = self.match_group("target")
paul@116 592
        anchor = Anchor(target)
paul@116 593
        region.append_inline(anchor)
paul@116 594
paul@106 595
    def parse_linebreak(self, region):
paul@106 596
        region.append_inline(LineBreak())
paul@106 597
paul@77 598
    def parse_macro(self, region):
paul@77 599
        name = self.match_group("name")
paul@77 600
        args = self.match_group("args")
paul@77 601
paul@77 602
        # Obtain the raw arguments. Moin usually leaves it to the macro to
paul@77 603
        # interpret the individual arguments.
paul@77 604
paul@77 605
        arglist = args and args.split(",") or []
paul@162 606
        macro = Macro(name, arglist, region.append_point(), region)
paul@77 607
        region.append_inline(macro)
paul@77 608
paul@87 609
        # Record the macro for later processing.
paul@87 610
paul@87 611
        self.root.macros.append(macro)
paul@87 612
paul@169 613
    def parse_verbatim(self, region):
paul@169 614
        text = self.match_group("verbatim")
paul@169 615
        region.append_inline(Verbatim(text))
paul@169 616
paul@77 617
paul@77 618
paul@31 619
    # Table attribute handlers.
paul@31 620
paul@37 621
    def parse_table_attr(self, attrs, pattern_name):
paul@31 622
paul@31 623
        "Handle a table attribute."
paul@25 624
paul@55 625
        attrs.append(TableAttr(pattern_name, self.match_group("value"), True))
paul@37 626
paul@37 627
    def parse_colour(self, cell):
paul@37 628
        self.parse_table_attr(cell, "colour")
paul@37 629
paul@37 630
    def parse_colspan(self, cell):
paul@37 631
        self.parse_table_attr(cell, "colspan")
paul@37 632
paul@37 633
    def parse_rowspan(self, cell):
paul@37 634
        self.parse_table_attr(cell, "rowspan")
paul@37 635
paul@37 636
    def parse_width(self, cell):
paul@37 637
        self.parse_table_attr(cell, "width")
paul@37 638
paul@37 639
paul@37 640
paul@37 641
    # Regular expressions.
paul@37 642
paul@37 643
    syntax = {
paul@37 644
        # Page regions:
paul@55 645
paul@55 646
        "regionstart"   : join((group("indent", r"\N*"),                        # ws... (optional)
paul@55 647
                                group("level", repeat("[{]", 3)))),             # {{{...
paul@55 648
paul@55 649
        "regionend"     : join((r"\N*",                                         # ws... (optional)
paul@67 650
                                group("feature", join((
paul@67 651
                                    group("level", repeat("[}]", 3)),           # }}}...
paul@151 652
                                    optional(group("extra", r"\n"))))))),       # nl (optional)
paul@151 653
paul@151 654
        # Region header and directives:
paul@55 655
paul@55 656
        "header"        : join(("#!",                                           # #!
paul@55 657
                                group("args", ".*?"), "\n")),                   # text-excl-nl
paul@25 658
paul@151 659
        "directive"     : join((r"^#",                                          # #
paul@151 660
                                group("directive", r".*?$"),                    # rest of line
paul@151 661
                                optional(group("extra", r"\n")))),              # nl (optional)
paul@151 662
paul@37 663
        # Region contents:
paul@76 664
paul@76 665
        # Line-oriented patterns support features which require their own
paul@76 666
        # separate lines.
paul@55 667
paul@55 668
        "break"         : r"^(\s*?)\n",                                         # blank line
paul@55 669
paul@151 670
        "comment"       : join((r"^##",                                         # ##
paul@151 671
                                group("comment", r".*?$"),                      # rest of line
paul@151 672
                                optional(group("extra", r"\n")))),              # nl (optional)
paul@151 673
paul@55 674
        "defterm"       : join(("^",
paul@55 675
                                group("pad", r"\N+"),                           # ws...
paul@55 676
                                expect(".+?::"))),                              # text ::
paul@55 677
paul@55 678
        "defterm_empty" : join(("^",
paul@55 679
                                group("pad", r"\N+"),                           # ws...
paul@122 680
                                expect("::\s+"))),                              # :: ws...
paul@55 681
paul@55 682
        "heading"       : join(("^",
paul@55 683
                                group("extra", r"\N*"),                         # ws... (optional)
paul@55 684
                                group("level", "=+"),                           # =...
paul@55 685
                                group("pad", r"\s+"),                           # ws...
paul@55 686
                                expect(join((r".*?\N+",                         # text
paul@55 687
                                             recur("level"),                    # =...
paul@55 688
                                             r"\N*$"))))),                      # ws... (optional)
paul@55 689
paul@55 690
        "listitem"      : join(("^",
paul@55 691
                                group("indent", r"\N+"),                        # ws...
paul@55 692
                                group("marker", r"\*"),                         # list-marker
paul@55 693
                                group("pad", r"\s*"))),                         # ws... (optional)
paul@55 694
paul@55 695
        "listitem_num"  : join(("^",
paul@55 696
                                group("indent", r"\N+"),                        # ws...
paul@55 697
                                group("marker", r"\d+\."),                      # decimal-marker
paul@55 698
                                optional(join(("#", group("num", r"\d+")))),    # # num (optional)
paul@55 699
                                group("pad", r"\s+"))),                         # ws...
paul@55 700
paul@55 701
        "listitem_alpha": join(("^",
paul@55 702
                                group("indent", r"\N+"),                        # ws...
paul@55 703
                                group("marker", r"[aA]\."),                     # alpha-marker
paul@55 704
                                optional(join(("#", group("num", r"\d+")))),    # # num (optional)
paul@55 705
                                group("pad", r"\s+"))),                         # ws...
paul@55 706
paul@55 707
        "listitem_roman": join(("^",
paul@55 708
                                group("indent", r"\N+"),                        # ws...
paul@55 709
                                group("marker", r"[iI]\."),                     # roman-marker
paul@55 710
                                optional(join(("#", group("num", r"\d+")))),    # # num (optional)
paul@55 711
                                group("pad", r"\s+"))),                         # ws...
paul@55 712
paul@55 713
        "listitem_dot"  : join(("^",
paul@55 714
                                group("indent", r"\N+"),                        # ws...
paul@55 715
                                group("marker", r"\."),                         # dot-marker
paul@55 716
                                group("pad", r"\s*"))),                         # ws... (optional)
paul@55 717
paul@55 718
        "tablerow"      : r"^\|\|",                                             # ||
paul@37 719
paul@37 720
        # Region contents:
paul@76 721
paul@76 722
        # Inline patterns are for markup features that appear within blocks.
paul@76 723
        # The patterns below start inline spans that can contain other markup
paul@76 724
        # features.
paul@55 725
paul@55 726
        "fontstyle"     : group("style", repeat("'", 2, 6)),                    # ''...
paul@172 727
paul@172 728
        # Trivial markup balancing is done below using the end features.
paul@172 729
paul@172 730
        "larger"        : join((r"~\+",                                         # ~+
paul@172 731
                                expect(r"\P*?\+~"))),                           # ... +~
paul@172 732
paul@172 733
        "monospace"     : join((r"`",                                           # `
paul@172 734
                                expect(r"\P*?`"))),                             # ... `
paul@172 735
paul@172 736
        "smaller"       : join((r"~-",                                          # ~-
paul@172 737
                                expect(r"\P*?-~"))),                            # ... -~
paul@172 738
paul@172 739
        "strike"        : join((r"--\(",                                        # --(
paul@172 740
                                expect(r"\P*?\)--"))),                          # ... )--
paul@172 741
paul@172 742
        "sub"           : join((r",,",                                          # ,,
paul@172 743
                                expect(r"\P*?,,"))),                            # ... ,,
paul@172 744
paul@172 745
        "super"         : join((r"\^",                                          # ^
paul@172 746
                                expect(r"\P*?\^"))),                            # ... ^
paul@172 747
paul@172 748
        "underline"     : join((r"__",                                          # __
paul@172 749
                                expect(r"\P*?__"))),                            # ... __
paul@172 750
paul@172 751
        # Rules are treated as inline but, unlike the above, appear without
paul@172 752
        # contents.
paul@172 753
paul@55 754
        "rule"          : group("rule", "-----*"),                              # ----...
paul@25 755
paul@167 756
        # Links and transclusions may start inline spans.
paul@167 757
paul@167 758
        "link"          : join((r"\[\[",                                        # [[
paul@167 759
                                group("target", ".*?"),                         # ...
paul@167 760
                                choice((r"\|",                                  # |
paul@167 761
                                        group("end", r"]]"))))),                # ]]
paul@167 762
paul@167 763
        "transclusion"  : join((r"\{\{",                                        # {{
paul@167 764
                                excl(r"\{"),                                    # not-{
paul@167 765
                                group("target", ".*?"),                         # ...
paul@167 766
                                choice((r"\|",                                  # |
paul@167 767
                                        group("end", r"}}"))))),                # }}
paul@167 768
paul@76 769
        # Complete inline patterns are for markup features that do not support
paul@76 770
        # arbitrary content within them:
paul@55 771
paul@116 772
        "anchor"        : join((r"\(\(",                                        # ((
paul@116 773
                                group("target", ".*?"),                         # target
paul@116 774
                                r"\)\)")),                                      # ))
paul@116 775
paul@106 776
        "linebreak"     : r"\\\\",                                              # \\
paul@106 777
paul@77 778
        "macro"         : join(("<<",                                           # <<
paul@77 779
                                group("name", "\w+?"),                          # digit-letter...
paul@77 780
                                optional(join((r"\(",                           # ( (optional)
paul@77 781
                                               group("args", ".*?"),            # not-)...
paul@77 782
                                               r"\)"))),                        # ) (optional)
paul@77 783
                                ">>")),                                         # >>
paul@77 784
paul@169 785
        "verbatim"      : join(("<<<",                                          # <<<
paul@169 786
                                group("verbatim", ".*?"),                       # ...
paul@169 787
                                ">>>")),
paul@169 788
paul@76 789
        # Ending patterns for inline features:
paul@55 790
paul@55 791
        "largerend"     : r"\+~",                                               # +~
paul@167 792
        "linkend"       : r"]]",                                                # ]]
paul@55 793
        "monospaceend"  : r"`",                                                 # `
paul@55 794
        "smallerend"    : r"-~",                                                # -~
paul@55 795
        "strikeend"     : r"\)--",                                              # )--
paul@55 796
        "subend"        : r",,",                                                # ,,
paul@55 797
        "superend"      : r"\^",                                                # ^
paul@167 798
        "transclusionend": r"}}",                                               # }}
paul@55 799
        "underlineend"  : r"__",                                                # __
paul@37 800
paul@37 801
        # Heading contents:
paul@55 802
paul@55 803
        "headingend"    : join((group("pad", r"\N+"),                           # ws...
paul@55 804
                                group("level", "=+"),                           # =...
paul@63 805
                                group("extra", r"\N*\n"))),                     # ws (optional) nl
paul@37 806
paul@167 807
        # Link/transclusion contents:
paul@167 808
paul@167 809
        "linksep"       : r"\|",                                                # |
paul@167 810
paul@37 811
        # List contents:
paul@55 812
paul@55 813
        "deftermend"    : join(("::", group("pad", r"\s*?\n"))),                # ::
paul@55 814
                                                                                # ws... (optional)
paul@55 815
                                                                                # nl
paul@55 816
paul@55 817
        "deftermsep"    : join(("::", group("pad", r"\s+"))),                   # ::
paul@122 818
                                                                                # ws...
paul@55 819
paul@122 820
        "listitemend"   : join((r"^",                                           # next line
paul@148 821
                                choice((expect(r"[^\s]"),                       # without indent
paul@148 822
                                        expect(r"\Z"),                          # end of string
paul@122 823
                                        expect(r"\N+\*"),                       # or with ws... list-marker
paul@122 824
                                        expect(r"\N+\d\."),                     # or with ws... decimal-marker
paul@122 825
                                        expect(r"\N+[aA]\."),                   # or with ws... alpha-marker
paul@122 826
                                        expect(r"\N+[iI]\."),                   # or with ws... roman-marker
paul@122 827
                                        expect(r"\N+\."),                       # or with ws... dot-marker
paul@122 828
                                        expect(r"\N+.+?::\s"),                  # or with ws... text :: ws (next defterm)
paul@122 829
                                        expect(r"\N+::\s"))))),                 # or with ws... :: ws (next defitem)
paul@37 830
paul@37 831
        # Table contents:
paul@55 832
paul@107 833
        "tableattrs"    : join(("<",                                            # lt
paul@107 834
                                excl("<"))),                                    # not-lt
paul@107 835
paul@55 836
        "tablecell"     : r"\|\|",                                              # ||
paul@55 837
paul@55 838
        "tableend"      : join((group("extra", r"\s*?"),                        # ws... (optional)
paul@55 839
                                "^")),                                          # next line
paul@25 840
paul@37 841
        # Table attributes:
paul@55 842
paul@55 843
        "tableattrsend" : r">",                                                 # >
paul@55 844
        "halign"        : group("value", "[(:)]"),                              # halign-marker
paul@55 845
        "valign"        : group("value", "[v^]"),                               # valign-marker
paul@55 846
        "colour"        : group("value", join(("\#",                            # #
paul@55 847
                                               repeat("[0-9A-F]", 6, 6)))),     # nnnnnn
paul@55 848
paul@55 849
        "colspan"       : join(("-",                                            # -
paul@55 850
                                group("value", "\d+"))),                        # n...
paul@55 851
paul@55 852
        "rowspan"       : join((r"\|",                                          # |
paul@55 853
                                group("value", "\d+"))),                        # n...
paul@55 854
paul@55 855
        "width"         : group("value", "\d+%"),                               # n... %
paul@55 856
paul@55 857
        "attrname"      : join((excl(r"[-\d]"),                                 # not-dash-or-digit
paul@55 858
                                group("name", r"[-\w]+"))),                     # dash-digit-letter...
paul@55 859
paul@55 860
        "attrvalue"     : join(("=", group("quote", r"\Q"),                     # quote
paul@55 861
                                     group("value", ".*?"),                     # non-quote... (optional)
paul@55 862
                                     recur("quote"))),                          # quote
paul@37 863
        }
paul@37 864
paul@37 865
    patterns = get_patterns(syntax)
paul@37 866
paul@37 867
paul@37 868
paul@76 869
    # Patterns available within certain markup features.
paul@31 870
paul@102 871
    table_attr_pattern_names = [
paul@37 872
        "attrname", "colour", "colspan", "halign", "rowspan", "tableattrsend",
paul@37 873
        "valign", "width"
paul@37 874
        ]
paul@37 875
paul@167 876
    inline_without_links_pattern_names = [
paul@167 877
        "anchor", "fontstyle", "larger", "linebreak", "macro",
paul@116 878
        "monospace", "regionstart", "smaller", "strike", "sub", "super",
paul@169 879
        "underline", "verbatim"
paul@37 880
        ]
paul@37 881
paul@167 882
    inline_pattern_names = inline_without_links_pattern_names + [
paul@167 883
        "link", "transclusion"]
paul@167 884
paul@167 885
    link_pattern_names = inline_without_links_pattern_names + [
paul@167 886
        "linkend", "linksep", "transclusion"]
paul@167 887
paul@43 888
    list_pattern_names = [
paul@43 889
        "listitem", "listitem_alpha", "listitem_dot", "listitem_num",
paul@43 890
        "listitem_roman",
paul@43 891
        ]
paul@43 892
paul@37 893
    listitem_pattern_names = inline_pattern_names + ["listitemend"]
paul@37 894
paul@57 895
    region_without_table_pattern_names = inline_pattern_names + list_pattern_names + [
paul@151 896
        "break", "comment", "heading", "defterm", "defterm_empty",
paul@57 897
        "regionend", "rule",
paul@37 898
        ]
paul@37 899
paul@102 900
    table_row_pattern_names = inline_pattern_names + [
paul@37 901
        "tableattrs", "tablecell", "tableend"
paul@37 902
        ]
paul@37 903
paul@167 904
    transclusion_pattern_names = inline_without_links_pattern_names + [
paul@167 905
        "linksep", "transclusionend"]
paul@167 906
paul@98 907
    # The region pattern names are specifically used by the common parser
paul@98 908
    # functionality.
paul@98 909
paul@98 910
    region_pattern_names = region_without_table_pattern_names + ["tablerow"]
paul@25 911
paul@30 912
paul@30 913
paul@32 914
    # Pattern handlers.
paul@31 915
paul@32 916
    end_region = ParserBase.end_region
paul@98 917
    parse_section_end = ParserBase.parse_region_end
paul@31 918
paul@31 919
    handlers = {
paul@31 920
        None : end_region,
paul@116 921
        "anchor" : parse_anchor,
paul@31 922
        "attrname" : parse_attrname,
paul@31 923
        "break" : parse_break,
paul@31 924
        "colour" : parse_colour,
paul@31 925
        "colspan" : parse_colspan,
paul@151 926
        "comment" : parse_comment,
paul@31 927
        "defterm" : parse_defterm,
paul@31 928
        "defterm_empty" : parse_defterm_empty,
paul@31 929
        "deftermend" : end_region,
paul@31 930
        "deftermsep" : end_region,
paul@151 931
        "directive" : parse_directive,
paul@31 932
        "fontstyle" : parse_fontstyle,
paul@31 933
        "halign" : parse_halign,
paul@31 934
        "heading" : parse_heading,
paul@31 935
        "headingend" : parse_heading_end,
paul@31 936
        "larger" : parse_larger,
paul@31 937
        "largerend" : end_region,
paul@106 938
        "linebreak" : parse_linebreak,
paul@47 939
        "link" : parse_link,
paul@167 940
        "linkend" : end_region,
paul@167 941
        "linksep" : end_region,
paul@77 942
        "macro" : parse_macro,
paul@31 943
        "listitemend" : end_region,
paul@31 944
        "listitem" : parse_listitem,
paul@31 945
        "listitem_alpha" : parse_listitem,
paul@31 946
        "listitem_dot" : parse_listitem,
paul@31 947
        "listitem_num" : parse_listitem,
paul@31 948
        "listitem_roman" : parse_listitem,
paul@31 949
        "monospace" : parse_monospace,
paul@31 950
        "monospaceend" : end_region,
paul@31 951
        "regionstart" : parse_section,
paul@31 952
        "regionend" : parse_section_end,
paul@31 953
        "rowspan" : parse_rowspan,
paul@31 954
        "rule" : parse_rule,
paul@31 955
        "smaller" : parse_smaller,
paul@31 956
        "smallerend" : end_region,
paul@48 957
        "strike" : parse_strike,
paul@48 958
        "strikeend" : end_region,
paul@31 959
        "sub" : parse_sub,
paul@31 960
        "subend" : end_region,
paul@31 961
        "super" : parse_super,
paul@31 962
        "superend" : end_region,
paul@31 963
        "tableattrs" : parse_table_attrs,
paul@31 964
        "tableattrsend" : end_region,
paul@31 965
        "tablerow" : parse_table_row,
paul@31 966
        "tablecell" : end_region,
paul@31 967
        "tableend" : end_region,
paul@167 968
        "transclusion" : parse_transclusion,
paul@167 969
        "transclusionend" : end_region,
paul@31 970
        "underline" : parse_underline,
paul@31 971
        "underlineend" : end_region,
paul@31 972
        "valign" : parse_valign,
paul@169 973
        "verbatim" : parse_verbatim,
paul@31 974
        "width" : parse_width,
paul@31 975
        }
paul@2 976
paul@42 977
parser = MoinParser
paul@0 978
paul@0 979
# vim: tabstop=4 expandtab shiftwidth=4