1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format parser. 5 6 Copyright (C) 2017, 2018, 2019, 2020, 2022 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 # Document transformations. 23 24 from moinformat.macros import get_macro 25 26 # Parser functionality and pattern definition. 27 28 from moinformat.parsers.common import ParserBase, get_patterns, choice, \ 29 excl, expect, group, optional, recur, \ 30 repeat 31 32 # Serialisation. 33 34 from moinformat.serialisers import serialise 35 36 # Document tree nodes. 37 38 from moinformat.tree.moin import Anchor, Break, Comment, DefItem, DefTerm, \ 39 Directive, FontStyle, Heading, Larger, \ 40 LineBreak, Link, LinkLabel, LinkParameter, \ 41 List, ListItem, Macro, Monospace, \ 42 NonBreakingSpace, Region, Rule, Smaller, \ 43 Strikethrough, Subscript, Superscript, Table, \ 44 TableAttr, TableAttrs, TableCell, TableRow, \ 45 Text, Transclusion, Underline, Verbatim 46 47 # Link parsing. 48 49 from moinformat.utils.links import parse_link_target 50 51 join = "".join 52 53 class MoinParser(ParserBase): 54 55 "A wiki region parser." 56 57 formats = ["moin", "wiki"] 58 59 # Principal parser methods. 60 61 def parse(self, s): 62 63 """ 64 Parse page text 's'. Pages consist of regions delimited by markers. 65 """ 66 67 # Record certain node occurrences for later evaluation. 68 69 self.macros = [] 70 71 # Record headings for identifier disambiguation. 72 73 self.headings = [] 74 75 # Record link targets for resource identification. 76 77 self.link_targets = [] 78 79 # Obtain the token stream and a region to populate. 80 81 self.items = self.get_items(s) 82 self.region = Region([], type="moin") 83 84 # Parse page header and directives. 85 86 self.parse_region_header(self.region) 87 self.parse_region_directives(self.region) 88 89 # Handle pages directly with this parser. Pages do not need to use an 90 # explicit format indicator. 91 92 if not self.region.type: 93 self.parse_region_content(self.items, self.region) 94 95 # Otherwise, test the type and find an appropriate parser. 96 97 else: 98 self.parse_region_type(self.region) 99 100 # Assign heading identifiers. 101 102 self.identify_headings() 103 104 return self.region 105 106 107 108 # Macro evaluation. 109 110 def evaluate_macros(self): 111 112 "Evaluate the macro nodes in the document." 113 114 for node in self.macros: 115 116 # Obtain a class for the named macro. 117 118 macro_cls = get_macro(node.name) 119 if not macro_cls: 120 continue 121 122 # Instantiate the class and evaluate the macro. 123 124 macro = macro_cls(node, self.region, self.metadata) 125 macro.evaluate() 126 127 # Metadata extraction. 128 129 def update_metadata(self, metadata): 130 131 "Update 'metadata' for the document." 132 133 if self.headings: 134 metadata.set("title", self.headings[0].text_content()) 135 else: 136 metadata.set("title", self.metadata.get("pagename")) 137 138 # Heading disambiguation. 139 140 def identify_headings(self): 141 142 "Assign identifiers to headings based on their textual content." 143 144 d = {} 145 146 for heading in self.headings: 147 text = heading.text_content() 148 149 if not d.has_key(text): 150 d[text] = 0 151 heading.identifier = text 152 else: 153 d[text] += 1 154 heading.identifier = "%s-%d" % (text, d[text]) 155 156 157 158 # Conversion back to text. 159 160 def get_serialiser(self): 161 162 "Return metadata employing Moin as the output format." 163 164 metadata = self.metadata.copy() 165 metadata.set("link_format", None) 166 metadata.set("output_context", "standalone") 167 metadata.set("output_format", "moin") 168 return metadata.get_serialiser() 169 170 171 172 # Parser methods supporting different page features. 173 174 def parse_attrname(self, attrs): 175 176 "Handle an attribute name within 'attrs'." 177 178 name = self.match_group("name") 179 attr = TableAttr(name) 180 181 preceding = self.read_until(["attrvalue"], False) 182 if preceding == "": 183 attr.quote = self.match_group("quote") 184 attr.value = self.match_group("value") 185 186 attrs.append(attr) 187 188 def parse_break(self, region): 189 190 "Handle a paragraph break within 'region'." 191 192 self.add_node(region, Break()) 193 self.new_block(region) 194 195 def parse_comment(self, region): 196 197 "Handle a comment within 'region'." 198 199 comment = self.match_group("comment") 200 extra = self.match_group("extra") 201 self.add_node(region, Comment(comment, extra)) 202 self.new_block(region) 203 204 def parse_defitem(self, region, extra=""): 205 206 "Handle a definition item within 'region'." 207 208 pad = self.match_group("pad") 209 item = DefItem([], pad, extra) 210 self.parse_region_details(item, self.listitem_pattern_names) 211 self.add_node(region, item) 212 self.new_block(region) 213 214 def parse_defterm(self, region): 215 216 "Handle a definition term within 'region'." 217 218 pad = self.match_group("pad") 219 term = DefTerm([], pad) 220 self.parse_region_details(term, ["deftermend", "deftermsep"] + self.inline_pattern_names) 221 self.add_node(region, term) 222 223 if self.matching_pattern() == "deftermsep": 224 self.parse_defitem(region) 225 226 # Add padding from the separator to the term, there being no item. 227 228 else: 229 term.extra = self.match_group("pad") 230 231 def parse_defterm_empty(self, region): 232 233 "Handle an empty definition term within 'region'." 234 235 extra = self.match_group("pad") 236 self.parse_region_details(region, ["deftermsep"]) 237 self.parse_defitem(region, extra) 238 239 def parse_directive(self, region): 240 241 "Handle a processing directive within 'region'." 242 243 directive = self.match_group("directive") 244 extra = self.match_group("extra") 245 self.add_node(region, Directive(directive, extra)) 246 self.new_block(region) 247 248 def parse_fontstyle(self, region): 249 250 "Handle emphasis and strong styles." 251 252 n = len(self.match_group("style")) 253 254 # Handle endings. 255 256 if isinstance(region, FontStyle): 257 emphasis = n in (2, 4, 5) 258 strong = n in (3, 5, 6) 259 active = True 260 261 if region.emphasis and emphasis: 262 active = region.close_emphasis() 263 n -= 2 264 if region.strong and strong: 265 active = region.close_strong() 266 n -= 3 267 268 if not active: 269 if n: 270 self.items.rewind(n) 271 raise StopIteration 272 273 elif not n: 274 return 275 276 # Handle new styles. 277 278 emphasis = n in (2, 4, 5) 279 strong = n in (3, 5, 6) 280 double = n in (4, 6) 281 282 span = FontStyle([], emphasis, strong) 283 if not double: 284 self.parse_region_details(span, self.inline_pattern_names) 285 region.append_inline(span) 286 287 def parse_halign(self, attrs): 288 289 "Handle horizontal alignment within 'attrs'." 290 291 value = self.match_group("value") 292 attr = TableAttr("align", value == "(" and "left" or value == ")" and "right" or "center", True) 293 attrs.append(attr) 294 295 def parse_heading(self, region): 296 297 "Handle a heading." 298 299 start_extra = self.match_group("extra") 300 level = len(self.match_group("level")) 301 start_pad = self.match_group("pad") 302 heading = Heading([], level, start_extra, start_pad) 303 self.parse_region_details(heading, ["headingend"] + self.inline_pattern_names) 304 self.add_node(region, heading) 305 self.new_block(region) 306 307 # Record the heading for later processing. 308 309 self.root.headings.append(heading) 310 311 def parse_heading_end(self, heading): 312 313 "Handle the end of a heading." 314 315 level = len(self.match_group("level")) 316 if heading.level == level: 317 heading.end_pad = self.match_group("pad") 318 heading.end_extra = self.match_group("extra") 319 raise StopIteration 320 321 def parse_list(self, item): 322 323 "Create a list, starting with 'item'." 324 325 list = List([item]) 326 self.parse_region_details(list, self.list_pattern_names, True) 327 return list 328 329 def parse_listitem(self, region): 330 331 "Handle a list item marker within 'region'." 332 333 indent = len(self.match_group("indent")) 334 marker = self.match_group("marker") 335 num = self.match_group("num") 336 space = self.match_group("pad") 337 338 last = region.node(-1) 339 340 new_list = not isinstance(last, (List, ListItem)) 341 same_indent = not new_list and indent == last.indent 342 new_marker = not new_list and last.marker != marker and same_indent 343 new_num = not new_list and num is not None and last.num != num and same_indent 344 345 # If the marker or number changes at the same indent, or if the indent 346 # is smaller, queue the item and end the list. 347 348 # Note that Moin format does not seek to support item renumbering, 349 # instead starting new lists on number changes. 350 351 if not new_list and (new_marker or new_num or indent < last.indent): 352 self.queue_match() 353 self.end_region(region) 354 355 # Obtain a list item and populate it. 356 357 item = ListItem([], indent, marker, space, num) 358 self.parse_region_details(item, self.listitem_pattern_names) 359 360 # Start a new list if not preceded by a list item, adding a trailing 361 # block for new elements. 362 363 if new_list: 364 item = self.parse_list(item) 365 self.add_node(region, item) 366 self.new_block(region) 367 368 # Add a nested list to the last item. 369 370 elif indent > last.indent: 371 item = self.parse_list(item) 372 self.add_node(last, item) 373 374 # Add the item to the current list. 375 376 else: 377 self.add_node(region, item) 378 379 def parse_rule(self, region): 380 381 "Handle a horizontal rule within 'region'." 382 383 height = len(self.match_group("rule")) - 4 384 rule = Rule(height) 385 self.add_node(region, rule) 386 self.new_block(region) 387 388 def parse_section(self, region): 389 390 "Handle the start of a new section within 'region'." 391 392 # Parse the section and start a new block after the section. 393 394 indent = len(self.match_group("indent")) 395 level = len(self.match_group("level")) 396 397 section = self.parse_region(level, indent, "inline") 398 399 # If the section is inline, treat it like any other inline element. 400 401 if section.type == "inline": 402 region.append_inline(section) 403 404 # Otherwise, add it as a new block element. 405 406 else: 407 self.add_node(region, section) 408 if region.allow_blocks: 409 self.new_block(region) 410 411 def parse_table_attrs(self, cell): 412 413 "Handle the start of table attributes within 'cell'." 414 415 attrs = TableAttrs([]) 416 self.parse_region_details(attrs, self.table_attr_pattern_names) 417 418 # If no end marker was found, consider that the text was not table 419 # attributes at all. 420 421 if attrs.incomplete: 422 cell.append_inline(Text(serialise(attrs, self.get_serialiser()))) 423 if attrs.found_cell: 424 self.end_region(cell) 425 return 426 427 # Test the validity of the attributes. 428 429 last = None 430 431 for node in attrs.nodes: 432 433 # Text separator nodes must be whitespace. 434 435 if isinstance(node, Text): 436 if node.s.strip(): 437 break 438 439 # Named attributes must be preceded by space if not the first. 440 441 elif last and not node.concise and not isinstance(last, Text): 442 break 443 444 last = node 445 446 # All nodes were valid: preserve the collection. 447 448 else: 449 # Add the attributes as a node, also recording their presence. 450 451 cell.attrs = attrs 452 self.add_node(cell, attrs) 453 return 454 455 # Invalid nodes were found: serialise the attributes as text. 456 457 cell.append_inline(Text(serialise(attrs, self.get_serialiser()))) 458 459 def parse_table_row(self, region): 460 461 "Handle the start of a table row within 'region'." 462 463 # Identify any active table. 464 465 table = region.node(-2) 466 block = region.node(-1) 467 468 if not (isinstance(table, Table) and block.empty()): 469 new_table = table = Table([]) 470 else: 471 new_table = None 472 473 row = TableRow([]) 474 475 while True: 476 cell = TableCell([]) 477 self.parse_region_details(cell, self.table_row_pattern_names) 478 479 # Handle the end of the row. 480 481 if self.matching_pattern() == "tableend": 482 trailing = self.match_group("extra") 483 484 # If the cell was started but not finished, convert the row into text. 485 486 if not row.nodes or not cell.empty(): 487 488 # Convert the nodes back to text. 489 490 serialiser = self.get_serialiser() 491 492 for node in row.nodes: 493 region.append_inline(Text(serialise(node, serialiser))) 494 495 region.append_inline(Text(serialise(cell, serialiser) + trailing)) 496 497 self.new_block(region) 498 return 499 500 # Append the final cell, if not empty. 501 502 else: 503 row.trailing = trailing 504 505 if not cell.empty(): 506 row.append(cell) 507 break 508 509 # A cell separator has been found. 510 511 row.append(cell) 512 513 # Add the row to the table and any new table to the region. 514 515 self.add_node(table, row) 516 if new_table: 517 self.add_node(region, new_table) 518 519 self.new_block(region) 520 521 def parse_valign(self, attrs): 522 523 "Handle vertical alignment within 'attrs'." 524 525 value = self.match_group("value") 526 attr = TableAttr("valign", value == "^" and "top" or "bottom", True) 527 attrs.append(attr) 528 529 530 531 def inline_patterns_for(self, name): 532 533 "Return active patterns for the inline element having the given 'name'." 534 535 names = self.inline_pattern_names[:] 536 names[names.index(name)] = "%send" % name 537 return names 538 539 540 541 # Inline formatting handlers. 542 543 def parse_inline(self, region, cls, pattern_name): 544 545 "Handle an inline region." 546 547 span = cls([]) 548 self.parse_region_details(span, self.inline_patterns_for(pattern_name)) 549 region.append_inline(span) 550 551 def parse_larger(self, region): 552 self.parse_inline(region, Larger, "larger") 553 554 def parse_monospace(self, region): 555 span = Monospace([]) 556 self.parse_region_details(span, ["monospaceend"]) 557 region.append_inline(span) 558 559 def parse_smaller(self, region): 560 self.parse_inline(region, Smaller, "smaller") 561 562 def parse_strike(self, region): 563 self.parse_inline(region, Strikethrough, "strike") 564 565 def parse_sub(self, region): 566 self.parse_inline(region, Subscript, "sub") 567 568 def parse_super(self, region): 569 self.parse_inline(region, Superscript, "super") 570 571 def parse_underline(self, region): 572 self.parse_inline(region, Underline, "underline") 573 574 # Link formatting handlers. 575 576 def _parse_link(self, region, cls, pattern_names): 577 target = self.match_group("target") 578 end = self.match_group("end") 579 580 # Obtain an object for the link target. 581 582 link_target = parse_link_target(target, self.metadata) 583 584 # Obtain an object for the node. 585 586 span = cls([], link_target) 587 588 # Obtain the extra details. 589 590 if not end: 591 cls = LinkLabel 592 593 # Introduce a label or parameter for each separated region. 594 595 while True: 596 param = cls([]) 597 self.parse_region_details(param, pattern_names) 598 span.append(param) 599 600 if self.matching_pattern() != "linksep": 601 break 602 603 cls = LinkParameter 604 605 region.append_inline(span) 606 607 # Record the link target for later processing. 608 609 self.root.link_targets.append(link_target) 610 611 def parse_link(self, region): 612 self._parse_link(region, Link, self.link_pattern_names) 613 614 def parse_transclusion(self, region): 615 self._parse_link(region, Transclusion, self.transclusion_pattern_names) 616 617 618 619 # Complete inline pattern handlers. 620 621 def parse_anchor(self, region): 622 target = self.match_group("target") 623 anchor = Anchor(target) 624 region.append_inline(anchor) 625 626 def parse_linebreak(self, region): 627 region.append_inline(LineBreak()) 628 629 def parse_macro(self, region): 630 name = self.match_group("name") 631 args = self.match_group("args") 632 633 # Obtain the raw arguments. Moin usually leaves it to the macro to 634 # interpret the individual arguments. 635 636 arglist = args and args.split(",") or [] 637 macro = Macro(name, arglist, region.append_point(), region) 638 region.append_inline(macro) 639 640 # Record the macro for later processing. 641 642 self.root.macros.append(macro) 643 644 def parse_nbsp(self, region): 645 region.append_inline(NonBreakingSpace()) 646 647 def parse_verbatim(self, region): 648 text = self.match_group("verbatim") 649 region.append_inline(Verbatim(text)) 650 651 652 653 # Table attribute handlers. 654 655 def parse_table_attr(self, attrs, pattern_name): 656 657 "Handle a table attribute." 658 659 attrs.append(TableAttr(pattern_name, self.match_group("value"), True)) 660 661 def parse_colour(self, attrs): 662 self.parse_table_attr(attrs, "bgcolor") 663 664 def parse_colspan(self, attrs): 665 self.parse_table_attr(attrs, "colspan") 666 667 def parse_rowspan(self, attrs): 668 self.parse_table_attr(attrs, "rowspan") 669 670 def parse_width(self, attrs): 671 self.parse_table_attr(attrs, "width") 672 673 def parse_table_attrs_end(self, attrs): 674 attrs.incomplete = False 675 self.end_region(attrs) 676 677 def parse_table_attrs_cell(self, attrs): 678 attrs.found_cell = True 679 self.end_region(attrs) 680 681 682 683 # Regular expressions. 684 685 syntax = { 686 # Page regions: 687 688 "regionstart" : join((group("indent", r"\N*"), # ws... (optional) 689 group("level", repeat("[{]", 3)))), # {{{... 690 691 "regionend" : join((r"\N*", # ws... (optional) 692 group("feature", join(( 693 group("level", repeat("[}]", 3)), # }}}... 694 optional(group("extra", r"\n"))))))), # nl (optional) 695 696 # Region header and directives: 697 698 "header" : join(("#!", # #! 699 group("args", ".*?"), "\n")), # text-excl-nl 700 701 "directive" : join((r"^#", # # 702 group("directive", r".*?$"), # rest of line 703 optional(group("extra", r"\n")))), # nl (optional) 704 705 # Region contents: 706 707 # Line-oriented patterns support features which require their own 708 # separate lines. 709 710 "break" : r"^(\s*?)\n", # blank line 711 712 "comment" : join((r"^##", # ## 713 group("comment", r".*?$"), # rest of line 714 optional(group("extra", r"\n")))), # nl (optional) 715 716 "defterm" : join(("^", 717 group("pad", r"\N+"), # ws... 718 expect(".+?::"))), # text :: 719 720 "defterm_empty" : join(("^", 721 group("pad", r"\N+"), # ws... 722 expect("::\s+"))), # :: ws... 723 724 "heading" : join(("^", 725 group("extra", r"\N*"), # ws... (optional) 726 group("level", "=+"), # =... 727 group("pad", r"\s+"), # ws... 728 expect(join((r".*?\N+", # text 729 recur("level"), # =... 730 r"\N*$"))))), # ws... (optional) 731 732 "listitem" : join(("^", 733 group("indent", r"\N+"), # ws... 734 group("marker", r"\*"), # list-marker 735 group("pad", r"\s*"))), # ws... (optional) 736 737 "listitem_num" : join(("^", 738 group("indent", r"\N+"), # ws... 739 group("marker", r"\d+\."), # decimal-marker 740 optional(join(("#", group("num", r"\d+")))), # # num (optional) 741 group("pad", r"\s+"))), # ws... 742 743 "listitem_alpha": join(("^", 744 group("indent", r"\N+"), # ws... 745 group("marker", r"[aA]\."), # alpha-marker 746 optional(join(("#", group("num", r"\d+")))), # # num (optional) 747 group("pad", r"\s+"))), # ws... 748 749 "listitem_roman": join(("^", 750 group("indent", r"\N+"), # ws... 751 group("marker", r"[iI]\."), # roman-marker 752 optional(join(("#", group("num", r"\d+")))), # # num (optional) 753 group("pad", r"\s+"))), # ws... 754 755 "listitem_dot" : join(("^", 756 group("indent", r"\N+"), # ws... 757 group("marker", r"\."), # dot-marker 758 group("pad", r"\s*"))), # ws... (optional) 759 760 "tablerow" : r"^\|\|", # || 761 762 # Region contents: 763 764 # Inline patterns are for markup features that appear within blocks. 765 # The patterns below start inline spans that can contain other markup 766 # features. 767 768 "fontstyle" : group("style", repeat("'", 2, 6)), # ''... 769 770 # Trivial markup balancing is done below using the end features. 771 772 "larger" : join((r"~\+", # ~+ 773 expect(r"\P*?\+~"))), # ... +~ 774 775 "monospace" : join((r"`", # ` 776 expect(r"\P*?`"))), # ... ` 777 778 "smaller" : join((r"~-", # ~- 779 expect(r"\P*?-~"))), # ... -~ 780 781 "strike" : join((r"--\(", # --( 782 expect(r"\P*?\)--"))), # ... )-- 783 784 "sub" : join((r",,", # ,, 785 expect(r"\P*?,,"))), # ... ,, 786 787 "super" : join((r"\^", # ^ 788 expect(r"\P*?\^"))), # ... ^ 789 790 "underline" : join((r"__", # __ 791 expect(r"\P*?__"))), # ... __ 792 793 # Rules are treated as inline but, unlike the above, appear without 794 # contents. 795 796 "rule" : group("rule", "-----*"), # ----... 797 798 # Links and transclusions may start inline spans. 799 800 "link" : join((r"\[\[", # [[ 801 group("target", r"\P*?"), # ... 802 choice((r"\|", # | 803 group("end", r"]]"))))), # ]] 804 805 "transclusion" : join((r"\{\{", # {{ 806 excl(r"\{"), # not-{ 807 group("target", r"\P*?"), # ... 808 choice((r"\|", # | 809 group("end", r"}}"))))), # }} 810 811 # Complete inline patterns are for markup features that do not support 812 # arbitrary content within them: 813 814 "anchor" : join((r"\(\(", # (( 815 group("target", ".*?"), # target 816 r"\)\)")), # )) 817 818 "linebreak" : r"\\\\", # \\ 819 820 "macro" : join(("<<", # << 821 group("name", "\w+?"), # digit-letter... 822 optional(join((r"\(", # ( (optional) 823 group("args", ".*?"), # not-)... 824 r"\)"))), # ) (optional) 825 ">>")), # >> 826 827 "nbsp" : r"\\_", # \_ 828 829 "verbatim" : join(("<<<", # <<< 830 group("verbatim", r"\P*?"), # ... 831 ">>>")), 832 833 # Ending patterns for inline features: 834 835 "largerend" : r"\+~", # +~ 836 "linkend" : r"]]", # ]] 837 "monospaceend" : r"`", # ` 838 "smallerend" : r"-~", # -~ 839 "strikeend" : r"\)--", # )-- 840 "subend" : r",,", # ,, 841 "superend" : r"\^", # ^ 842 "transclusionend": r"}}", # }} 843 "underlineend" : r"__", # __ 844 845 # Heading contents: 846 847 "headingend" : join((group("pad", r"\N+"), # ws... 848 group("level", "=+"), # =... 849 group("extra", r"\N*\n"))), # ws (optional) nl 850 851 # Link/transclusion contents: 852 853 "linksep" : r"\|", # | 854 855 # List contents: 856 857 "deftermend" : join(("::", group("pad", r"\s*?\n"))), # :: 858 # ws... (optional) 859 # nl 860 861 "deftermsep" : join(("::", group("pad", r"\s+"))), # :: 862 # ws... 863 864 "listitemend" : join((r"^", # next line 865 choice((expect(r"[^\s]"), # without indent 866 expect(r"\Z"), # end of string 867 expect(r"\N+\*"), # or with ws... list-marker 868 expect(r"\N+\d\."), # or with ws... decimal-marker 869 expect(r"\N+[aA]\."), # or with ws... alpha-marker 870 expect(r"\N+[iI]\."), # or with ws... roman-marker 871 expect(r"\N+\."), # or with ws... dot-marker 872 expect(r"\N+.+?::\s"), # or with ws... text :: ws (next defterm) 873 expect(r"\N+::\s"))))), # or with ws... :: ws (next defitem) 874 875 # Table contents: 876 877 "tableattrs" : join(("<", # lt 878 excl("<"))), # not-lt 879 880 "tablecell" : r"\|\|", # || 881 882 "tableend" : join((group("extra", r"\s*?"), # ws... (optional) 883 "^")), # next line 884 885 # Table attributes: 886 887 "tableattrsend" : r">", # > 888 "halign" : group("value", "[(:)]"), # halign-marker 889 "valign" : group("value", "[v^]"), # valign-marker 890 "colour" : group("value", join(("\#", # # 891 repeat("[0-9A-Fa-f]", 6, 6)))), # nnnnnn 892 893 "colspan" : join(("-", # - 894 group("value", "\d+"))), # n... 895 896 "rowspan" : join((r"\|", # | 897 group("value", "\d+"))), # n... 898 899 "width" : group("value", "\d+%"), # n... % 900 901 "attrname" : join((excl(r"[-\d]"), # not-dash-or-digit 902 group("name", r"[-\w]+"))), # dash-digit-letter... 903 904 "attrvalue" : join(("=", group("quote", r"\Q"), # quote 905 group("value", ".*?"), # non-quote... (optional) 906 recur("quote"))), # quote 907 908 "bad_tablecell" : r"\|\|", # || 909 } 910 911 patterns = get_patterns(syntax) 912 913 914 915 # Patterns available within certain markup features. 916 917 table_attr_pattern_names = [ 918 "attrname", "colour", "colspan", "halign", "rowspan", "tableattrsend", 919 "valign", "width", 920 "bad_tablecell" 921 ] 922 923 inline_without_links_pattern_names = [ 924 "anchor", "fontstyle", "larger", "linebreak", "macro", 925 "monospace", "nbsp", "regionstart", "smaller", "strike", "sub", "super", 926 "underline", "verbatim" 927 ] 928 929 inline_pattern_names = inline_without_links_pattern_names + [ 930 "link", "transclusion"] 931 932 link_pattern_names = inline_without_links_pattern_names + [ 933 "linkend", "linksep", "transclusion"] 934 935 list_pattern_names = [ 936 "listitem", "listitem_alpha", "listitem_dot", "listitem_num", 937 "listitem_roman", 938 ] 939 940 listitem_pattern_names = inline_pattern_names + ["listitemend"] 941 942 region_without_table_pattern_names = inline_pattern_names + list_pattern_names + [ 943 "break", "comment", "heading", "defterm", "defterm_empty", 944 "regionend", "rule", 945 ] 946 947 table_row_pattern_names = inline_pattern_names + [ 948 "tableattrs", "tablecell", "tableend" 949 ] 950 951 transclusion_pattern_names = inline_without_links_pattern_names + [ 952 "linksep", "transclusionend"] 953 954 # The region pattern names are specifically used by the common parser 955 # functionality. 956 957 region_pattern_names = region_without_table_pattern_names + ["tablerow"] 958 959 960 961 # Pattern handlers. 962 963 end_region = ParserBase.end_region 964 parse_section_end = ParserBase.parse_region_end 965 966 handlers = { 967 None : end_region, 968 "anchor" : parse_anchor, 969 "attrname" : parse_attrname, 970 "break" : parse_break, 971 "colour" : parse_colour, 972 "colspan" : parse_colspan, 973 "comment" : parse_comment, 974 "defterm" : parse_defterm, 975 "defterm_empty" : parse_defterm_empty, 976 "deftermend" : end_region, 977 "deftermsep" : end_region, 978 "directive" : parse_directive, 979 "fontstyle" : parse_fontstyle, 980 "halign" : parse_halign, 981 "heading" : parse_heading, 982 "headingend" : parse_heading_end, 983 "larger" : parse_larger, 984 "largerend" : end_region, 985 "linebreak" : parse_linebreak, 986 "link" : parse_link, 987 "linkend" : end_region, 988 "linksep" : end_region, 989 "macro" : parse_macro, 990 "nbsp" : parse_nbsp, 991 "listitemend" : end_region, 992 "listitem" : parse_listitem, 993 "listitem_alpha" : parse_listitem, 994 "listitem_dot" : parse_listitem, 995 "listitem_num" : parse_listitem, 996 "listitem_roman" : parse_listitem, 997 "monospace" : parse_monospace, 998 "monospaceend" : end_region, 999 "regionstart" : parse_section, 1000 "regionend" : parse_section_end, 1001 "rowspan" : parse_rowspan, 1002 "rule" : parse_rule, 1003 "smaller" : parse_smaller, 1004 "smallerend" : end_region, 1005 "strike" : parse_strike, 1006 "strikeend" : end_region, 1007 "sub" : parse_sub, 1008 "subend" : end_region, 1009 "super" : parse_super, 1010 "superend" : end_region, 1011 "tableattrs" : parse_table_attrs, 1012 "tableattrsend" : parse_table_attrs_end, 1013 "tablerow" : parse_table_row, 1014 "tablecell" : end_region, 1015 "tableend" : end_region, 1016 "transclusion" : parse_transclusion, 1017 "transclusionend" : end_region, 1018 "underline" : parse_underline, 1019 "underlineend" : end_region, 1020 "bad_tablecell" : parse_table_attrs_cell, 1021 "valign" : parse_valign, 1022 "verbatim" : parse_verbatim, 1023 "width" : parse_width, 1024 } 1025 1026 parser = MoinParser 1027 1028 # vim: tabstop=4 expandtab shiftwidth=4