1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format parser. 5 6 Copyright (C) 2017, 2018, 2019, 2020 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 # Document transformations. 23 24 from moinformat.macros import get_macro 25 26 # Parser functionality and pattern definition. 27 28 from moinformat.parsers.common import ParserBase, get_patterns, choice, \ 29 excl, expect, group, optional, recur, \ 30 repeat 31 32 # Serialisation. 33 34 from moinformat.serialisers import serialise 35 36 # Document tree nodes. 37 38 from moinformat.tree.moin import Anchor, Break, Comment, DefItem, DefTerm, \ 39 Directive, FontStyle, Heading, Larger, \ 40 LineBreak, Link, LinkLabel, LinkParameter, \ 41 List, ListItem, Macro, Monospace, Region, \ 42 Rule, Smaller, Strikethrough, Subscript, \ 43 Superscript, Table, TableAttr, TableAttrs, \ 44 TableCell, TableRow, Text, Transclusion, \ 45 Underline, Verbatim 46 47 # Link parsing. 48 49 from moinformat.utils.links import parse_link_target 50 51 join = "".join 52 53 class MoinParser(ParserBase): 54 55 "A wiki region parser." 56 57 formats = ["moin", "wiki"] 58 59 def __init__(self, metadata, parsers=None, root=None): 60 61 """ 62 Initialise the parser with the given 'metadata' and optional 'parsers'. 63 An optional 'root' indicates the document-level parser. 64 """ 65 66 ParserBase.__init__(self, metadata, parsers, root) 67 68 # Record certain node occurrences for later evaluation. 69 70 self.macros = [] 71 72 # Record headings for identifier disambiguation. 73 74 self.headings = [] 75 76 # Record link targets for resource identification. 77 78 self.link_targets = [] 79 80 # Principal parser methods. 81 82 def parse(self, s): 83 84 """ 85 Parse page text 's'. Pages consist of regions delimited by markers. 86 """ 87 88 self.items = self.get_items(s) 89 self.region = Region([], type="moin") 90 91 # Parse page header and directives. 92 93 self.parse_region_header(self.region) 94 self.parse_region_directives(self.region) 95 96 # Handle pages directly with this parser. Pages do not need to use an 97 # explicit format indicator. 98 99 if not self.region.type: 100 self.parse_region_content(self.items, self.region) 101 102 # Otherwise, test the type and find an appropriate parser. 103 104 else: 105 self.parse_region_type(self.region) 106 107 # Assign heading identifiers. 108 109 self.identify_headings() 110 111 return self.region 112 113 114 115 # Macro evaluation. 116 117 def evaluate_macros(self): 118 119 "Evaluate the macro nodes in the document." 120 121 for node in self.macros: 122 123 # Obtain a class for the named macro. 124 125 macro_cls = get_macro(node.name) 126 if not macro_cls: 127 continue 128 129 # Instantiate the class and evaluate the macro. 130 131 macro = macro_cls(node, self.region, self.metadata) 132 macro.evaluate() 133 134 # Metadata extraction. 135 136 def update_metadata(self, metadata): 137 138 "Update 'metadata' for the document." 139 140 if self.headings: 141 metadata.set("title", self.headings[0].text_content()) 142 else: 143 metadata.set("title", self.metadata.get("pagename")) 144 145 # Heading disambiguation. 146 147 def identify_headings(self): 148 149 "Assign identifiers to headings based on their textual content." 150 151 d = {} 152 153 for heading in self.headings: 154 text = heading.text_content() 155 156 if not d.has_key(text): 157 d[text] = 0 158 heading.identifier = text 159 else: 160 d[text] += 1 161 heading.identifier = "%s-%d" % (text, d[text]) 162 163 164 165 # Conversion back to text. 166 167 def get_serialiser(self): 168 169 "Return metadata employing Moin as the output format." 170 171 metadata = self.metadata.copy() 172 metadata.set("link_format", None) 173 metadata.set("output_context", "standalone") 174 metadata.set("output_format", "moin") 175 return metadata.get_serialiser() 176 177 178 179 # Parser methods supporting different page features. 180 181 def parse_attrname(self, attrs): 182 183 "Handle an attribute name within 'attrs'." 184 185 name = self.match_group("name") 186 attr = TableAttr(name) 187 188 preceding = self.read_until(["attrvalue"], False) 189 if preceding == "": 190 attr.quote = self.match_group("quote") 191 attr.value = self.match_group("value") 192 193 attrs.append(attr) 194 195 def parse_break(self, region): 196 197 "Handle a paragraph break within 'region'." 198 199 self.add_node(region, Break()) 200 self.new_block(region) 201 202 def parse_comment(self, region): 203 204 "Handle a comment within 'region'." 205 206 comment = self.match_group("comment") 207 extra = self.match_group("extra") 208 self.add_node(region, Comment(comment, extra)) 209 self.new_block(region) 210 211 def parse_defitem(self, region, extra=""): 212 213 "Handle a definition item within 'region'." 214 215 pad = self.match_group("pad") 216 item = DefItem([], pad, extra) 217 self.parse_region_details(item, self.listitem_pattern_names) 218 self.add_node(region, item) 219 self.new_block(region) 220 221 def parse_defterm(self, region): 222 223 "Handle a definition term within 'region'." 224 225 pad = self.match_group("pad") 226 term = DefTerm([], pad) 227 self.parse_region_details(term, ["deftermend", "deftermsep"] + self.inline_pattern_names) 228 self.add_node(region, term) 229 230 if self.matching_pattern() == "deftermsep": 231 self.parse_defitem(region) 232 233 # Add padding from the separator to the term, there being no item. 234 235 else: 236 term.extra = self.match_group("pad") 237 238 def parse_defterm_empty(self, region): 239 240 "Handle an empty definition term within 'region'." 241 242 extra = self.match_group("pad") 243 self.parse_region_details(region, ["deftermsep"]) 244 self.parse_defitem(region, extra) 245 246 def parse_directive(self, region): 247 248 "Handle a processing directive within 'region'." 249 250 directive = self.match_group("directive") 251 extra = self.match_group("extra") 252 self.add_node(region, Directive(directive, extra)) 253 self.new_block(region) 254 255 def parse_fontstyle(self, region): 256 257 "Handle emphasis and strong styles." 258 259 n = len(self.match_group("style")) 260 261 # Handle endings. 262 263 if isinstance(region, FontStyle): 264 emphasis = n in (2, 4, 5) 265 strong = n in (3, 5, 6) 266 active = True 267 268 if region.emphasis and emphasis: 269 active = region.close_emphasis() 270 n -= 2 271 if region.strong and strong: 272 active = region.close_strong() 273 n -= 3 274 275 if not active: 276 if n: 277 self.items.rewind(n) 278 raise StopIteration 279 280 elif not n: 281 return 282 283 # Handle new styles. 284 285 emphasis = n in (2, 4, 5) 286 strong = n in (3, 5, 6) 287 double = n in (4, 6) 288 289 span = FontStyle([], emphasis, strong) 290 if not double: 291 self.parse_region_details(span, self.inline_pattern_names) 292 region.append_inline(span) 293 294 def parse_halign(self, attrs): 295 296 "Handle horizontal alignment within 'attrs'." 297 298 value = self.match_group("value") 299 attr = TableAttr("align", value == "(" and "left" or value == ")" and "right" or "center", True) 300 attrs.append(attr) 301 302 def parse_heading(self, region): 303 304 "Handle a heading." 305 306 start_extra = self.match_group("extra") 307 level = len(self.match_group("level")) 308 start_pad = self.match_group("pad") 309 heading = Heading([], level, start_extra, start_pad) 310 self.parse_region_details(heading, ["headingend"] + self.inline_pattern_names) 311 self.add_node(region, heading) 312 self.new_block(region) 313 314 # Record the heading for later processing. 315 316 self.root.headings.append(heading) 317 318 def parse_heading_end(self, heading): 319 320 "Handle the end of a heading." 321 322 level = len(self.match_group("level")) 323 if heading.level == level: 324 heading.end_pad = self.match_group("pad") 325 heading.end_extra = self.match_group("extra") 326 raise StopIteration 327 328 def parse_list(self, item): 329 330 "Create a list, starting with 'item'." 331 332 list = List([item]) 333 self.parse_region_details(list, self.list_pattern_names, True) 334 return list 335 336 def parse_listitem(self, region): 337 338 "Handle a list item marker within 'region'." 339 340 indent = len(self.match_group("indent")) 341 marker = self.match_group("marker") 342 num = self.match_group("num") 343 space = self.match_group("pad") 344 345 last = region.node(-1) 346 347 new_list = not isinstance(last, (List, ListItem)) 348 same_indent = not new_list and indent == last.indent 349 new_marker = not new_list and last.marker != marker and same_indent 350 new_num = not new_list and num is not None and last.num != num and same_indent 351 352 # If the marker or number changes at the same indent, or if the indent 353 # is smaller, queue the item and end the list. 354 355 # Note that Moin format does not seek to support item renumbering, 356 # instead starting new lists on number changes. 357 358 if not new_list and (new_marker or new_num or indent < last.indent): 359 self.queue_match() 360 self.end_region(region) 361 362 # Obtain a list item and populate it. 363 364 item = ListItem([], indent, marker, space, num) 365 self.parse_region_details(item, self.listitem_pattern_names) 366 367 # Start a new list if not preceded by a list item, adding a trailing 368 # block for new elements. 369 370 if new_list: 371 item = self.parse_list(item) 372 self.add_node(region, item) 373 self.new_block(region) 374 375 # Add a nested list to the last item. 376 377 elif indent > last.indent: 378 item = self.parse_list(item) 379 self.add_node(last, item) 380 381 # Add the item to the current list. 382 383 else: 384 self.add_node(region, item) 385 386 def parse_rule(self, region): 387 388 "Handle a horizontal rule within 'region'." 389 390 height = len(self.match_group("rule")) - 4 391 rule = Rule(height) 392 self.add_node(region, rule) 393 self.new_block(region) 394 395 def parse_section(self, region): 396 397 "Handle the start of a new section within 'region'." 398 399 # Parse the section and start a new block after the section. 400 401 indent = len(self.match_group("indent")) 402 level = len(self.match_group("level")) 403 404 section = self.parse_region(level, indent, "inline") 405 406 # If the section is inline, treat it like any other inline element. 407 408 if section.type == "inline": 409 region.append_inline(section) 410 411 # Otherwise, add it as a new block element. 412 413 else: 414 self.add_node(region, section) 415 if region.allow_blocks: 416 self.new_block(region) 417 418 def parse_table_attrs(self, cell): 419 420 "Handle the start of table attributes within 'cell'." 421 422 attrs = TableAttrs([]) 423 self.parse_region_details(attrs, self.table_attr_pattern_names) 424 425 # If no end marker was found, consider that the text was not table 426 # attributes at all. 427 428 if attrs.incomplete: 429 cell.append_inline(Text(serialise(attrs, self.get_serialiser()))) 430 if attrs.found_cell: 431 self.end_region(cell) 432 return 433 434 # Test the validity of the attributes. 435 436 last = None 437 438 for node in attrs.nodes: 439 440 # Text separator nodes must be whitespace. 441 442 if isinstance(node, Text): 443 if node.s.strip(): 444 break 445 446 # Named attributes must be preceded by space if not the first. 447 448 elif last and not node.concise and not isinstance(last, Text): 449 break 450 451 last = node 452 453 # All nodes were valid: preserve the collection. 454 455 else: 456 # Add the attributes as a node, also recording their presence. 457 458 cell.attrs = attrs 459 self.add_node(cell, attrs) 460 return 461 462 # Invalid nodes were found: serialise the attributes as text. 463 464 cell.append_inline(Text(serialise(attrs, self.get_serialiser()))) 465 466 def parse_table_row(self, region): 467 468 "Handle the start of a table row within 'region'." 469 470 # Identify any active table. 471 472 table = region.node(-2) 473 block = region.node(-1) 474 475 if not (isinstance(table, Table) and block.empty()): 476 new_table = table = Table([]) 477 else: 478 new_table = None 479 480 row = TableRow([]) 481 482 while True: 483 cell = TableCell([]) 484 self.parse_region_details(cell, self.table_row_pattern_names) 485 486 # Handle the end of the row. 487 488 if self.matching_pattern() == "tableend": 489 trailing = self.match_group("extra") 490 491 # If the cell was started but not finished, convert the row into text. 492 493 if not row.nodes or not cell.empty(): 494 495 # Convert the nodes back to text. 496 497 serialiser = self.get_serialiser() 498 499 for node in row.nodes: 500 region.append_inline(Text(serialise(node, serialiser))) 501 502 region.append_inline(Text(serialise(cell, serialiser) + trailing)) 503 504 self.new_block(region) 505 return 506 507 # Append the final cell, if not empty. 508 509 else: 510 row.trailing = trailing 511 512 if not cell.empty(): 513 row.append(cell) 514 break 515 516 # A cell separator has been found. 517 518 row.append(cell) 519 520 # Add the row to the table and any new table to the region. 521 522 self.add_node(table, row) 523 if new_table: 524 self.add_node(region, new_table) 525 526 self.new_block(region) 527 528 def parse_valign(self, attrs): 529 530 "Handle vertical alignment within 'attrs'." 531 532 value = self.match_group("value") 533 attr = TableAttr("valign", value == "^" and "top" or "bottom", True) 534 attrs.append(attr) 535 536 537 538 def inline_patterns_for(self, name): 539 540 "Return active patterns for the inline element having the given 'name'." 541 542 names = self.inline_pattern_names[:] 543 names[names.index(name)] = "%send" % name 544 return names 545 546 547 548 # Inline formatting handlers. 549 550 def parse_inline(self, region, cls, pattern_name): 551 552 "Handle an inline region." 553 554 span = cls([]) 555 self.parse_region_details(span, self.inline_patterns_for(pattern_name)) 556 region.append_inline(span) 557 558 def parse_larger(self, region): 559 self.parse_inline(region, Larger, "larger") 560 561 def parse_monospace(self, region): 562 span = Monospace([]) 563 self.parse_region_details(span, ["monospaceend"]) 564 region.append_inline(span) 565 566 def parse_smaller(self, region): 567 self.parse_inline(region, Smaller, "smaller") 568 569 def parse_strike(self, region): 570 self.parse_inline(region, Strikethrough, "strike") 571 572 def parse_sub(self, region): 573 self.parse_inline(region, Subscript, "sub") 574 575 def parse_super(self, region): 576 self.parse_inline(region, Superscript, "super") 577 578 def parse_underline(self, region): 579 self.parse_inline(region, Underline, "underline") 580 581 # Link formatting handlers. 582 583 def _parse_link(self, region, cls, pattern_names): 584 target = self.match_group("target") 585 end = self.match_group("end") 586 587 # Obtain an object for the link target. 588 589 link_target = parse_link_target(target, self.metadata) 590 591 # Obtain an object for the node. 592 593 span = cls([], link_target) 594 595 # Obtain the extra details. 596 597 if not end: 598 cls = LinkLabel 599 600 # Introduce a label or parameter for each separated region. 601 602 while True: 603 param = cls([]) 604 self.parse_region_details(param, pattern_names) 605 span.append(param) 606 607 if self.matching_pattern() != "linksep": 608 break 609 610 cls = LinkParameter 611 612 region.append_inline(span) 613 614 # Record the link target for later processing. 615 616 self.root.link_targets.append(link_target) 617 618 def parse_link(self, region): 619 self._parse_link(region, Link, self.link_pattern_names) 620 621 def parse_transclusion(self, region): 622 self._parse_link(region, Transclusion, self.transclusion_pattern_names) 623 624 625 626 # Complete inline pattern handlers. 627 628 def parse_anchor(self, region): 629 target = self.match_group("target") 630 anchor = Anchor(target) 631 region.append_inline(anchor) 632 633 def parse_linebreak(self, region): 634 region.append_inline(LineBreak()) 635 636 def parse_macro(self, region): 637 name = self.match_group("name") 638 args = self.match_group("args") 639 640 # Obtain the raw arguments. Moin usually leaves it to the macro to 641 # interpret the individual arguments. 642 643 arglist = args and args.split(",") or [] 644 macro = Macro(name, arglist, region.append_point(), region) 645 region.append_inline(macro) 646 647 # Record the macro for later processing. 648 649 self.root.macros.append(macro) 650 651 def parse_verbatim(self, region): 652 text = self.match_group("verbatim") 653 region.append_inline(Verbatim(text)) 654 655 656 657 # Table attribute handlers. 658 659 def parse_table_attr(self, attrs, pattern_name): 660 661 "Handle a table attribute." 662 663 attrs.append(TableAttr(pattern_name, self.match_group("value"), True)) 664 665 def parse_colour(self, attrs): 666 self.parse_table_attr(attrs, "bgcolor") 667 668 def parse_colspan(self, attrs): 669 self.parse_table_attr(attrs, "colspan") 670 671 def parse_rowspan(self, attrs): 672 self.parse_table_attr(attrs, "rowspan") 673 674 def parse_width(self, attrs): 675 self.parse_table_attr(attrs, "width") 676 677 def parse_table_attrs_end(self, attrs): 678 attrs.incomplete = False 679 self.end_region(attrs) 680 681 def parse_table_attrs_cell(self, attrs): 682 attrs.found_cell = True 683 self.end_region(attrs) 684 685 686 687 # Regular expressions. 688 689 syntax = { 690 # Page regions: 691 692 "regionstart" : join((group("indent", r"\N*"), # ws... (optional) 693 group("level", repeat("[{]", 3)))), # {{{... 694 695 "regionend" : join((r"\N*", # ws... (optional) 696 group("feature", join(( 697 group("level", repeat("[}]", 3)), # }}}... 698 optional(group("extra", r"\n"))))))), # nl (optional) 699 700 # Region header and directives: 701 702 "header" : join(("#!", # #! 703 group("args", ".*?"), "\n")), # text-excl-nl 704 705 "directive" : join((r"^#", # # 706 group("directive", r".*?$"), # rest of line 707 optional(group("extra", r"\n")))), # nl (optional) 708 709 # Region contents: 710 711 # Line-oriented patterns support features which require their own 712 # separate lines. 713 714 "break" : r"^(\s*?)\n", # blank line 715 716 "comment" : join((r"^##", # ## 717 group("comment", r".*?$"), # rest of line 718 optional(group("extra", r"\n")))), # nl (optional) 719 720 "defterm" : join(("^", 721 group("pad", r"\N+"), # ws... 722 expect(".+?::"))), # text :: 723 724 "defterm_empty" : join(("^", 725 group("pad", r"\N+"), # ws... 726 expect("::\s+"))), # :: ws... 727 728 "heading" : join(("^", 729 group("extra", r"\N*"), # ws... (optional) 730 group("level", "=+"), # =... 731 group("pad", r"\s+"), # ws... 732 expect(join((r".*?\N+", # text 733 recur("level"), # =... 734 r"\N*$"))))), # ws... (optional) 735 736 "listitem" : join(("^", 737 group("indent", r"\N+"), # ws... 738 group("marker", r"\*"), # list-marker 739 group("pad", r"\s*"))), # ws... (optional) 740 741 "listitem_num" : join(("^", 742 group("indent", r"\N+"), # ws... 743 group("marker", r"\d+\."), # decimal-marker 744 optional(join(("#", group("num", r"\d+")))), # # num (optional) 745 group("pad", r"\s+"))), # ws... 746 747 "listitem_alpha": join(("^", 748 group("indent", r"\N+"), # ws... 749 group("marker", r"[aA]\."), # alpha-marker 750 optional(join(("#", group("num", r"\d+")))), # # num (optional) 751 group("pad", r"\s+"))), # ws... 752 753 "listitem_roman": join(("^", 754 group("indent", r"\N+"), # ws... 755 group("marker", r"[iI]\."), # roman-marker 756 optional(join(("#", group("num", r"\d+")))), # # num (optional) 757 group("pad", r"\s+"))), # ws... 758 759 "listitem_dot" : join(("^", 760 group("indent", r"\N+"), # ws... 761 group("marker", r"\."), # dot-marker 762 group("pad", r"\s*"))), # ws... (optional) 763 764 "tablerow" : r"^\|\|", # || 765 766 # Region contents: 767 768 # Inline patterns are for markup features that appear within blocks. 769 # The patterns below start inline spans that can contain other markup 770 # features. 771 772 "fontstyle" : group("style", repeat("'", 2, 6)), # ''... 773 774 # Trivial markup balancing is done below using the end features. 775 776 "larger" : join((r"~\+", # ~+ 777 expect(r"\P*?\+~"))), # ... +~ 778 779 "monospace" : join((r"`", # ` 780 expect(r"\P*?`"))), # ... ` 781 782 "smaller" : join((r"~-", # ~- 783 expect(r"\P*?-~"))), # ... -~ 784 785 "strike" : join((r"--\(", # --( 786 expect(r"\P*?\)--"))), # ... )-- 787 788 "sub" : join((r",,", # ,, 789 expect(r"\P*?,,"))), # ... ,, 790 791 "super" : join((r"\^", # ^ 792 expect(r"\P*?\^"))), # ... ^ 793 794 "underline" : join((r"__", # __ 795 expect(r"\P*?__"))), # ... __ 796 797 # Rules are treated as inline but, unlike the above, appear without 798 # contents. 799 800 "rule" : group("rule", "-----*"), # ----... 801 802 # Links and transclusions may start inline spans. 803 804 "link" : join((r"\[\[", # [[ 805 group("target", r"\P*?"), # ... 806 choice((r"\|", # | 807 group("end", r"]]"))))), # ]] 808 809 "transclusion" : join((r"\{\{", # {{ 810 excl(r"\{"), # not-{ 811 group("target", r"\P*?"), # ... 812 choice((r"\|", # | 813 group("end", r"}}"))))), # }} 814 815 # Complete inline patterns are for markup features that do not support 816 # arbitrary content within them: 817 818 "anchor" : join((r"\(\(", # (( 819 group("target", ".*?"), # target 820 r"\)\)")), # )) 821 822 "linebreak" : r"\\\\", # \\ 823 824 "macro" : join(("<<", # << 825 group("name", "\w+?"), # digit-letter... 826 optional(join((r"\(", # ( (optional) 827 group("args", ".*?"), # not-)... 828 r"\)"))), # ) (optional) 829 ">>")), # >> 830 831 "verbatim" : join(("<<<", # <<< 832 group("verbatim", r"\P*?"), # ... 833 ">>>")), 834 835 # Ending patterns for inline features: 836 837 "largerend" : r"\+~", # +~ 838 "linkend" : r"]]", # ]] 839 "monospaceend" : r"`", # ` 840 "smallerend" : r"-~", # -~ 841 "strikeend" : r"\)--", # )-- 842 "subend" : r",,", # ,, 843 "superend" : r"\^", # ^ 844 "transclusionend": r"}}", # }} 845 "underlineend" : r"__", # __ 846 847 # Heading contents: 848 849 "headingend" : join((group("pad", r"\N+"), # ws... 850 group("level", "=+"), # =... 851 group("extra", r"\N*\n"))), # ws (optional) nl 852 853 # Link/transclusion contents: 854 855 "linksep" : r"\|", # | 856 857 # List contents: 858 859 "deftermend" : join(("::", group("pad", r"\s*?\n"))), # :: 860 # ws... (optional) 861 # nl 862 863 "deftermsep" : join(("::", group("pad", r"\s+"))), # :: 864 # ws... 865 866 "listitemend" : join((r"^", # next line 867 choice((expect(r"[^\s]"), # without indent 868 expect(r"\Z"), # end of string 869 expect(r"\N+\*"), # or with ws... list-marker 870 expect(r"\N+\d\."), # or with ws... decimal-marker 871 expect(r"\N+[aA]\."), # or with ws... alpha-marker 872 expect(r"\N+[iI]\."), # or with ws... roman-marker 873 expect(r"\N+\."), # or with ws... dot-marker 874 expect(r"\N+.+?::\s"), # or with ws... text :: ws (next defterm) 875 expect(r"\N+::\s"))))), # or with ws... :: ws (next defitem) 876 877 # Table contents: 878 879 "tableattrs" : join(("<", # lt 880 excl("<"))), # not-lt 881 882 "tablecell" : r"\|\|", # || 883 884 "tableend" : join((group("extra", r"\s*?"), # ws... (optional) 885 "^")), # next line 886 887 # Table attributes: 888 889 "tableattrsend" : r">", # > 890 "halign" : group("value", "[(:)]"), # halign-marker 891 "valign" : group("value", "[v^]"), # valign-marker 892 "colour" : group("value", join(("\#", # # 893 repeat("[0-9A-Fa-f]", 6, 6)))), # nnnnnn 894 895 "colspan" : join(("-", # - 896 group("value", "\d+"))), # n... 897 898 "rowspan" : join((r"\|", # | 899 group("value", "\d+"))), # n... 900 901 "width" : group("value", "\d+%"), # n... % 902 903 "attrname" : join((excl(r"[-\d]"), # not-dash-or-digit 904 group("name", r"[-\w]+"))), # dash-digit-letter... 905 906 "attrvalue" : join(("=", group("quote", r"\Q"), # quote 907 group("value", ".*?"), # non-quote... (optional) 908 recur("quote"))), # quote 909 910 "bad_tablecell" : r"\|\|", # || 911 } 912 913 patterns = get_patterns(syntax) 914 915 916 917 # Patterns available within certain markup features. 918 919 table_attr_pattern_names = [ 920 "attrname", "colour", "colspan", "halign", "rowspan", "tableattrsend", 921 "valign", "width", 922 "bad_tablecell" 923 ] 924 925 inline_without_links_pattern_names = [ 926 "anchor", "fontstyle", "larger", "linebreak", "macro", 927 "monospace", "regionstart", "smaller", "strike", "sub", "super", 928 "underline", "verbatim" 929 ] 930 931 inline_pattern_names = inline_without_links_pattern_names + [ 932 "link", "transclusion"] 933 934 link_pattern_names = inline_without_links_pattern_names + [ 935 "linkend", "linksep", "transclusion"] 936 937 list_pattern_names = [ 938 "listitem", "listitem_alpha", "listitem_dot", "listitem_num", 939 "listitem_roman", 940 ] 941 942 listitem_pattern_names = inline_pattern_names + ["listitemend"] 943 944 region_without_table_pattern_names = inline_pattern_names + list_pattern_names + [ 945 "break", "comment", "heading", "defterm", "defterm_empty", 946 "regionend", "rule", 947 ] 948 949 table_row_pattern_names = inline_pattern_names + [ 950 "tableattrs", "tablecell", "tableend" 951 ] 952 953 transclusion_pattern_names = inline_without_links_pattern_names + [ 954 "linksep", "transclusionend"] 955 956 # The region pattern names are specifically used by the common parser 957 # functionality. 958 959 region_pattern_names = region_without_table_pattern_names + ["tablerow"] 960 961 962 963 # Pattern handlers. 964 965 end_region = ParserBase.end_region 966 parse_section_end = ParserBase.parse_region_end 967 968 handlers = { 969 None : end_region, 970 "anchor" : parse_anchor, 971 "attrname" : parse_attrname, 972 "break" : parse_break, 973 "colour" : parse_colour, 974 "colspan" : parse_colspan, 975 "comment" : parse_comment, 976 "defterm" : parse_defterm, 977 "defterm_empty" : parse_defterm_empty, 978 "deftermend" : end_region, 979 "deftermsep" : end_region, 980 "directive" : parse_directive, 981 "fontstyle" : parse_fontstyle, 982 "halign" : parse_halign, 983 "heading" : parse_heading, 984 "headingend" : parse_heading_end, 985 "larger" : parse_larger, 986 "largerend" : end_region, 987 "linebreak" : parse_linebreak, 988 "link" : parse_link, 989 "linkend" : end_region, 990 "linksep" : end_region, 991 "macro" : parse_macro, 992 "listitemend" : end_region, 993 "listitem" : parse_listitem, 994 "listitem_alpha" : parse_listitem, 995 "listitem_dot" : parse_listitem, 996 "listitem_num" : parse_listitem, 997 "listitem_roman" : parse_listitem, 998 "monospace" : parse_monospace, 999 "monospaceend" : end_region, 1000 "regionstart" : parse_section, 1001 "regionend" : parse_section_end, 1002 "rowspan" : parse_rowspan, 1003 "rule" : parse_rule, 1004 "smaller" : parse_smaller, 1005 "smallerend" : end_region, 1006 "strike" : parse_strike, 1007 "strikeend" : end_region, 1008 "sub" : parse_sub, 1009 "subend" : end_region, 1010 "super" : parse_super, 1011 "superend" : end_region, 1012 "tableattrs" : parse_table_attrs, 1013 "tableattrsend" : parse_table_attrs_end, 1014 "tablerow" : parse_table_row, 1015 "tablecell" : end_region, 1016 "tableend" : end_region, 1017 "transclusion" : parse_transclusion, 1018 "transclusionend" : end_region, 1019 "underline" : parse_underline, 1020 "underlineend" : end_region, 1021 "bad_tablecell" : parse_table_attrs_cell, 1022 "valign" : parse_valign, 1023 "verbatim" : parse_verbatim, 1024 "width" : parse_width, 1025 } 1026 1027 parser = MoinParser 1028 1029 # vim: tabstop=4 expandtab shiftwidth=4