1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format parser. 5 6 Copyright (C) 2017, 2018, 2019 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 # Document transformations. 23 24 from moinformat.macros import get_macro 25 26 # Parser functionality and pattern definition. 27 28 from moinformat.parsers.common import ParserBase, get_patterns, choice, \ 29 excl, expect, group, optional, recur, \ 30 repeat 31 32 # Serialisation. 33 34 from moinformat.serialisers import serialise 35 36 # Document tree nodes. 37 38 from moinformat.tree.moin import Anchor, Break, Comment, DefItem, DefTerm, \ 39 Directive, FontStyle, Heading, Larger, \ 40 LineBreak, Link, LinkLabel, LinkParameter, \ 41 List, ListItem, Macro, Monospace, Region, \ 42 Rule, Smaller, Strikethrough, Subscript, \ 43 Superscript, Table, TableAttr, TableAttrs, \ 44 TableCell, TableRow, Text, Transclusion, \ 45 Underline, Verbatim 46 47 # Link parsing. 48 49 from moinformat.utils.links import parse_link_target 50 51 join = "".join 52 53 class MoinParser(ParserBase): 54 55 "A wiki region parser." 56 57 format = "moin" 58 59 def __init__(self, metadata, parsers=None, root=None): 60 61 """ 62 Initialise the parser with the given 'metadata' and optional 'parsers'. 63 An optional 'root' indicates the document-level parser. 64 """ 65 66 ParserBase.__init__(self, metadata, parsers, root) 67 68 # Record certain node occurrences for later evaluation. 69 70 self.macros = [] 71 72 # Record headings for identifier disambiguation. 73 74 self.headings = [] 75 76 # Record link targets for resource identification. 77 78 self.link_targets = [] 79 80 # Principal parser methods. 81 82 def parse(self, s): 83 84 """ 85 Parse page text 's'. Pages consist of regions delimited by markers. 86 """ 87 88 self.items = self.get_items(s) 89 self.region = Region([], type="moin") 90 91 # Parse page header and directives. 92 93 self.parse_region_header(self.region) 94 self.parse_region_directives(self.region) 95 96 # Handle pages directly with this parser. Pages do not need to use an 97 # explicit format indicator. 98 99 if not self.region.type: 100 self.parse_region_content(self.items, self.region) 101 102 # Otherwise, test the type and find an appropriate parser. 103 104 else: 105 self.parse_region_type(self.region) 106 107 # Assign heading identifiers. 108 109 self.identify_headings() 110 111 return self.region 112 113 114 115 # Macro evaluation. 116 117 def evaluate_macros(self): 118 119 "Evaluate the macro nodes in the document." 120 121 for node in self.macros: 122 123 # Obtain a class for the named macro. 124 125 macro_cls = get_macro(node.name) 126 if not macro_cls: 127 continue 128 129 # Instantiate the class and evaluate the macro. 130 131 macro = macro_cls(node, self.region) 132 macro.evaluate() 133 134 # Metadata extraction. 135 136 def update_metadata(self, metadata): 137 138 "Update 'metadata' for the document." 139 140 if self.headings: 141 metadata.set("title", self.headings[0].text_content()) 142 143 # Heading disambiguation. 144 145 def identify_headings(self): 146 147 "Assign identifiers to headings based on their textual content." 148 149 d = {} 150 151 for heading in self.headings: 152 text = heading.text_content() 153 154 if not d.has_key(text): 155 d[text] = 0 156 heading.identifier = text 157 else: 158 d[text] += 1 159 heading.identifier = "%s-%d" % (text, d[text]) 160 161 162 163 # Conversion back to text. 164 165 def get_serialiser(self): 166 167 "Return metadata employing Moin as the output format." 168 169 metadata = self.metadata.copy() 170 metadata.set("link_format", None) 171 metadata.set("output_context", "standalone") 172 metadata.set("output_format", "moin") 173 return metadata.get_serialiser() 174 175 176 177 # Parser methods supporting different page features. 178 179 def parse_attrname(self, attrs): 180 181 "Handle an attribute name within 'attrs'." 182 183 name = self.match_group("name") 184 attr = TableAttr(name) 185 186 preceding = self.read_until(["attrvalue"], False) 187 if preceding == "": 188 attr.quote = self.match_group("quote") 189 attr.value = self.match_group("value") 190 191 attrs.append(attr) 192 193 def parse_break(self, region): 194 195 "Handle a paragraph break within 'region'." 196 197 self.add_node(region, Break()) 198 self.new_block(region) 199 200 def parse_comment(self, region): 201 202 "Handle a comment within 'region'." 203 204 comment = self.match_group("comment") 205 extra = self.match_group("extra") 206 self.add_node(region, Comment(comment, extra)) 207 self.new_block(region) 208 209 def parse_defitem(self, region, extra=""): 210 211 "Handle a definition item within 'region'." 212 213 pad = self.match_group("pad") 214 item = DefItem([], pad, extra) 215 self.parse_region_details(item, self.listitem_pattern_names) 216 self.add_node(region, item) 217 self.new_block(region) 218 219 def parse_defterm(self, region): 220 221 "Handle a definition term within 'region'." 222 223 pad = self.match_group("pad") 224 term = DefTerm([], pad) 225 self.parse_region_details(term, ["deftermend", "deftermsep"]) 226 self.add_node(region, term) 227 228 if self.matching_pattern() == "deftermsep": 229 self.parse_defitem(region) 230 231 # Add padding from the separator to the term, there being no item. 232 233 else: 234 term.extra = self.match_group("pad") 235 236 def parse_defterm_empty(self, region): 237 238 "Handle an empty definition term within 'region'." 239 240 extra = self.match_group("pad") 241 self.parse_region_details(region, ["deftermsep"]) 242 self.parse_defitem(region, extra) 243 244 def parse_directive(self, region): 245 246 "Handle a processing directive within 'region'." 247 248 directive = self.match_group("directive") 249 extra = self.match_group("extra") 250 self.add_node(region, Directive(directive, extra)) 251 self.new_block(region) 252 253 def parse_fontstyle(self, region): 254 255 "Handle emphasis and strong styles." 256 257 n = len(self.match_group("style")) 258 259 # Handle endings. 260 261 if isinstance(region, FontStyle): 262 emphasis = n in (2, 4, 5) 263 strong = n in (3, 5, 6) 264 active = True 265 266 if region.emphasis and emphasis: 267 active = region.close_emphasis() 268 n -= 2 269 if region.strong and strong: 270 active = region.close_strong() 271 n -= 3 272 273 if not active: 274 if n: 275 self.items.rewind(n) 276 raise StopIteration 277 278 elif not n: 279 return 280 281 # Handle new styles. 282 283 emphasis = n in (2, 4, 5) 284 strong = n in (3, 5, 6) 285 double = n in (4, 6) 286 287 span = FontStyle([], emphasis, strong) 288 if not double: 289 self.parse_region_details(span, self.inline_pattern_names) 290 region.append_inline(span) 291 292 def parse_halign(self, attrs): 293 294 "Handle horizontal alignment within 'attrs'." 295 296 value = self.match_group("value") 297 attr = TableAttr("align", value == "(" and "left" or value == ")" and "right" or "center", True) 298 attrs.append(attr) 299 300 def parse_heading(self, region): 301 302 "Handle a heading." 303 304 start_extra = self.match_group("extra") 305 level = len(self.match_group("level")) 306 start_pad = self.match_group("pad") 307 heading = Heading([], level, start_extra, start_pad) 308 self.parse_region_details(heading, ["headingend"] + self.inline_pattern_names) 309 self.add_node(region, heading) 310 self.new_block(region) 311 312 # Record the heading for later processing. 313 314 self.root.headings.append(heading) 315 316 def parse_heading_end(self, heading): 317 318 "Handle the end of a heading." 319 320 level = len(self.match_group("level")) 321 if heading.level == level: 322 heading.end_pad = self.match_group("pad") 323 heading.end_extra = self.match_group("extra") 324 raise StopIteration 325 326 def parse_list(self, item): 327 328 "Create a list, starting with 'item'." 329 330 list = List([item], item.indent, item.marker, item.num) 331 self.parse_region_details(list, self.list_pattern_names, True) 332 return list 333 334 def parse_listitem(self, region): 335 336 "Handle a list item marker within 'region'." 337 338 indent = len(self.match_group("indent")) 339 marker = self.match_group("marker") 340 num = self.match_group("num") 341 space = self.match_group("pad") 342 343 last = region.node(-1) 344 345 new_list = not isinstance(last, (List, ListItem)) 346 same_indent = not new_list and indent == last.indent 347 new_marker = not new_list and last.marker != marker and same_indent 348 new_num = not new_list and num is not None and last.num != num and same_indent 349 350 # If the marker or number changes at the same indent, or if the indent 351 # is smaller, queue the item and end the list. 352 353 # Note that Moin format does not seek to support item renumbering, 354 # instead starting new lists on number changes. 355 356 if not new_list and (new_marker or new_num or indent < last.indent): 357 self.queue_match() 358 self.end_region(region) 359 360 # Obtain a list item and populate it. 361 362 item = ListItem([], indent, marker, space, num) 363 self.parse_region_details(item, self.listitem_pattern_names) 364 365 # Start a new list if not preceded by a list item, adding a trailing 366 # block for new elements. 367 368 if new_list: 369 item = self.parse_list(item) 370 self.add_node(region, item) 371 self.new_block(region) 372 373 # Add a nested list to the last item. 374 375 elif indent > last.indent: 376 item = self.parse_list(item) 377 self.add_node(last, item) 378 379 # Add the item to the current list. 380 381 else: 382 self.add_node(region, item) 383 384 def parse_rule(self, region): 385 386 "Handle a horizontal rule within 'region'." 387 388 length = len(self.match_group("rule")) 389 rule = Rule(length) 390 self.add_node(region, rule) 391 self.new_block(region) 392 393 def parse_section(self, region): 394 395 "Handle the start of a new section within 'region'." 396 397 # Parse the section and start a new block after the section. 398 399 indent = len(self.match_group("indent")) 400 level = len(self.match_group("level")) 401 402 section = self.parse_region(level, indent, "inline") 403 404 # If the section is inline, treat it like any other inline element. 405 406 if section.type == "inline": 407 region.append_inline(section) 408 409 # Otherwise, add it as a new block element. 410 411 else: 412 self.add_node(region, section) 413 if region.allow_blocks: 414 self.new_block(region) 415 416 def parse_table_attrs(self, cell): 417 418 "Handle the start of table attributes within 'cell'." 419 420 attrs = TableAttrs([]) 421 self.parse_region_details(attrs, self.table_attr_pattern_names) 422 423 # Test the validity of the attributes. 424 425 last = None 426 427 for node in attrs.nodes: 428 429 # Text separator nodes must be whitespace. 430 431 if isinstance(node, Text): 432 if node.s.strip(): 433 break 434 435 # Named attributes must be preceded by space if not the first. 436 437 elif last and not node.concise and not isinstance(last, Text): 438 break 439 440 last = node 441 442 # All nodes were valid: preserve the collection. 443 444 else: 445 # Add the attributes as a node, also recording their presence. 446 447 cell.append(attrs) 448 cell.attrs = attrs 449 return 450 451 # Invalid nodes were found: serialise the attributes as text. 452 453 cell.append_inline(Text(serialise(attrs, self.get_serialiser()))) 454 455 def parse_table_row(self, region): 456 457 "Handle the start of a table row within 'region'." 458 459 # Identify any active table. 460 461 table = region.node(-2) 462 block = region.node(-1) 463 464 if not (isinstance(table, Table) and block.empty()): 465 new_table = table = Table([]) 466 else: 467 new_table = None 468 469 row = TableRow([]) 470 471 while True: 472 cell = TableCell([]) 473 self.parse_region_details(cell, self.table_row_pattern_names) 474 475 # Handle the end of the row. 476 477 if self.matching_pattern() == "tableend": 478 trailing = self.match_group("extra") 479 480 # If the cell was started but not finished, convert the row into text. 481 482 if not row.nodes or not cell.empty(): 483 484 # Convert the nodes back to text. 485 486 serialiser = self.get_serialiser() 487 488 for node in row.nodes: 489 region.append_inline(Text(serialise(node, serialiser))) 490 491 region.append_inline(Text(serialise(cell, serialiser) + trailing)) 492 493 self.new_block(region) 494 return 495 496 # Append the final cell, if not empty. 497 498 else: 499 row.trailing = trailing 500 501 if not cell.empty(): 502 row.append(cell) 503 break 504 505 # A cell separator has been found. 506 507 row.append(cell) 508 509 # Add the row to the table and any new table to the region. 510 511 table.add(row) 512 if new_table: 513 self.add_node(region, new_table) 514 515 self.new_block(region) 516 517 def parse_valign(self, attrs): 518 519 "Handle vertical alignment within 'attrs'." 520 521 value = self.match_group("value") 522 attr = TableAttr("valign", value == "^" and "top" or "bottom", True) 523 attrs.append(attr) 524 525 526 527 def inline_patterns_for(self, name): 528 529 "Return active patterns for the inline element having the given 'name'." 530 531 names = self.inline_pattern_names[:] 532 names[names.index(name)] = "%send" % name 533 return names 534 535 536 537 # Inline formatting handlers. 538 539 def parse_inline(self, region, cls, pattern_name): 540 541 "Handle an inline region." 542 543 span = cls([]) 544 self.parse_region_details(span, self.inline_patterns_for(pattern_name)) 545 region.append_inline(span) 546 547 def parse_larger(self, region): 548 self.parse_inline(region, Larger, "larger") 549 550 def parse_monospace(self, region): 551 span = Monospace([]) 552 self.parse_region_details(span, ["monospaceend"]) 553 region.append_inline(span) 554 555 def parse_smaller(self, region): 556 self.parse_inline(region, Smaller, "smaller") 557 558 def parse_strike(self, region): 559 self.parse_inline(region, Strikethrough, "strike") 560 561 def parse_sub(self, region): 562 self.parse_inline(region, Subscript, "sub") 563 564 def parse_super(self, region): 565 self.parse_inline(region, Superscript, "super") 566 567 def parse_underline(self, region): 568 self.parse_inline(region, Underline, "underline") 569 570 # Link formatting handlers. 571 572 def _parse_link(self, region, cls, pattern_names): 573 target = self.match_group("target") 574 end = self.match_group("end") 575 576 # Obtain an object for the link target. 577 578 link_target = parse_link_target(target, self.metadata) 579 580 # Obtain an object for the node. 581 582 span = cls([], link_target) 583 584 # Obtain the extra details. 585 586 if not end: 587 cls = LinkLabel 588 589 # Introduce a label or parameter for each separated region. 590 591 while True: 592 param = cls([]) 593 self.parse_region_details(param, pattern_names) 594 span.append(param) 595 596 if self.matching_pattern() != "linksep": 597 break 598 599 cls = LinkParameter 600 601 region.append_inline(span) 602 603 # Record the link target for later processing. 604 605 self.root.link_targets.append(link_target) 606 607 def parse_link(self, region): 608 self._parse_link(region, Link, self.link_pattern_names) 609 610 def parse_transclusion(self, region): 611 self._parse_link(region, Transclusion, self.transclusion_pattern_names) 612 613 614 615 # Complete inline pattern handlers. 616 617 def parse_anchor(self, region): 618 target = self.match_group("target") 619 anchor = Anchor(target) 620 region.append_inline(anchor) 621 622 def parse_linebreak(self, region): 623 region.append_inline(LineBreak()) 624 625 def parse_macro(self, region): 626 name = self.match_group("name") 627 args = self.match_group("args") 628 629 # Obtain the raw arguments. Moin usually leaves it to the macro to 630 # interpret the individual arguments. 631 632 arglist = args and args.split(",") or [] 633 macro = Macro(name, arglist, region.append_point(), region) 634 region.append_inline(macro) 635 636 # Record the macro for later processing. 637 638 self.root.macros.append(macro) 639 640 def parse_verbatim(self, region): 641 text = self.match_group("verbatim") 642 region.append_inline(Verbatim(text)) 643 644 645 646 # Table attribute handlers. 647 648 def parse_table_attr(self, attrs, pattern_name): 649 650 "Handle a table attribute." 651 652 attrs.append(TableAttr(pattern_name, self.match_group("value"), True)) 653 654 def parse_colour(self, cell): 655 self.parse_table_attr(cell, "bgcolor") 656 657 def parse_colspan(self, cell): 658 self.parse_table_attr(cell, "colspan") 659 660 def parse_rowspan(self, cell): 661 self.parse_table_attr(cell, "rowspan") 662 663 def parse_width(self, cell): 664 self.parse_table_attr(cell, "width") 665 666 667 668 # Regular expressions. 669 670 syntax = { 671 # Page regions: 672 673 "regionstart" : join((group("indent", r"\N*"), # ws... (optional) 674 group("level", repeat("[{]", 3)))), # {{{... 675 676 "regionend" : join((r"\N*", # ws... (optional) 677 group("feature", join(( 678 group("level", repeat("[}]", 3)), # }}}... 679 optional(group("extra", r"\n"))))))), # nl (optional) 680 681 # Region header and directives: 682 683 "header" : join(("#!", # #! 684 group("args", ".*?"), "\n")), # text-excl-nl 685 686 "directive" : join((r"^#", # # 687 group("directive", r".*?$"), # rest of line 688 optional(group("extra", r"\n")))), # nl (optional) 689 690 # Region contents: 691 692 # Line-oriented patterns support features which require their own 693 # separate lines. 694 695 "break" : r"^(\s*?)\n", # blank line 696 697 "comment" : join((r"^##", # ## 698 group("comment", r".*?$"), # rest of line 699 optional(group("extra", r"\n")))), # nl (optional) 700 701 "defterm" : join(("^", 702 group("pad", r"\N+"), # ws... 703 expect(".+?::"))), # text :: 704 705 "defterm_empty" : join(("^", 706 group("pad", r"\N+"), # ws... 707 expect("::\s+"))), # :: ws... 708 709 "heading" : join(("^", 710 group("extra", r"\N*"), # ws... (optional) 711 group("level", "=+"), # =... 712 group("pad", r"\s+"), # ws... 713 expect(join((r".*?\N+", # text 714 recur("level"), # =... 715 r"\N*$"))))), # ws... (optional) 716 717 "listitem" : join(("^", 718 group("indent", r"\N+"), # ws... 719 group("marker", r"\*"), # list-marker 720 group("pad", r"\s*"))), # ws... (optional) 721 722 "listitem_num" : join(("^", 723 group("indent", r"\N+"), # ws... 724 group("marker", r"\d+\."), # decimal-marker 725 optional(join(("#", group("num", r"\d+")))), # # num (optional) 726 group("pad", r"\s+"))), # ws... 727 728 "listitem_alpha": join(("^", 729 group("indent", r"\N+"), # ws... 730 group("marker", r"[aA]\."), # alpha-marker 731 optional(join(("#", group("num", r"\d+")))), # # num (optional) 732 group("pad", r"\s+"))), # ws... 733 734 "listitem_roman": join(("^", 735 group("indent", r"\N+"), # ws... 736 group("marker", r"[iI]\."), # roman-marker 737 optional(join(("#", group("num", r"\d+")))), # # num (optional) 738 group("pad", r"\s+"))), # ws... 739 740 "listitem_dot" : join(("^", 741 group("indent", r"\N+"), # ws... 742 group("marker", r"\."), # dot-marker 743 group("pad", r"\s*"))), # ws... (optional) 744 745 "tablerow" : r"^\|\|", # || 746 747 # Region contents: 748 749 # Inline patterns are for markup features that appear within blocks. 750 # The patterns below start inline spans that can contain other markup 751 # features. 752 753 "fontstyle" : group("style", repeat("'", 2, 6)), # ''... 754 755 # Trivial markup balancing is done below using the end features. 756 757 "larger" : join((r"~\+", # ~+ 758 expect(r"\P*?\+~"))), # ... +~ 759 760 "monospace" : join((r"`", # ` 761 expect(r"\P*?`"))), # ... ` 762 763 "smaller" : join((r"~-", # ~- 764 expect(r"\P*?-~"))), # ... -~ 765 766 "strike" : join((r"--\(", # --( 767 expect(r"\P*?\)--"))), # ... )-- 768 769 "sub" : join((r",,", # ,, 770 expect(r"\P*?,,"))), # ... ,, 771 772 "super" : join((r"\^", # ^ 773 expect(r"\P*?\^"))), # ... ^ 774 775 "underline" : join((r"__", # __ 776 expect(r"\P*?__"))), # ... __ 777 778 # Rules are treated as inline but, unlike the above, appear without 779 # contents. 780 781 "rule" : group("rule", "-----*"), # ----... 782 783 # Links and transclusions may start inline spans. 784 785 "link" : join((r"\[\[", # [[ 786 group("target", r"\E*?"), # ... 787 choice((r"\|", # | 788 group("end", r"]]"))))), # ]] 789 790 "transclusion" : join((r"\{\{", # {{ 791 excl(r"\{"), # not-{ 792 group("target", ".*?"), # ... 793 choice((r"\|", # | 794 group("end", r"}}"))))), # }} 795 796 # Complete inline patterns are for markup features that do not support 797 # arbitrary content within them: 798 799 "anchor" : join((r"\(\(", # (( 800 group("target", ".*?"), # target 801 r"\)\)")), # )) 802 803 "linebreak" : r"\\\\", # \\ 804 805 "macro" : join(("<<", # << 806 group("name", "\w+?"), # digit-letter... 807 optional(join((r"\(", # ( (optional) 808 group("args", ".*?"), # not-)... 809 r"\)"))), # ) (optional) 810 ">>")), # >> 811 812 "verbatim" : join(("<<<", # <<< 813 group("verbatim", ".*?"), # ... 814 ">>>")), 815 816 # Ending patterns for inline features: 817 818 "largerend" : r"\+~", # +~ 819 "linkend" : r"]]", # ]] 820 "monospaceend" : r"`", # ` 821 "smallerend" : r"-~", # -~ 822 "strikeend" : r"\)--", # )-- 823 "subend" : r",,", # ,, 824 "superend" : r"\^", # ^ 825 "transclusionend": r"}}", # }} 826 "underlineend" : r"__", # __ 827 828 # Heading contents: 829 830 "headingend" : join((group("pad", r"\N+"), # ws... 831 group("level", "=+"), # =... 832 group("extra", r"\N*\n"))), # ws (optional) nl 833 834 # Link/transclusion contents: 835 836 "linksep" : r"\|", # | 837 838 # List contents: 839 840 "deftermend" : join(("::", group("pad", r"\s*?\n"))), # :: 841 # ws... (optional) 842 # nl 843 844 "deftermsep" : join(("::", group("pad", r"\s+"))), # :: 845 # ws... 846 847 "listitemend" : join((r"^", # next line 848 choice((expect(r"[^\s]"), # without indent 849 expect(r"\Z"), # end of string 850 expect(r"\N+\*"), # or with ws... list-marker 851 expect(r"\N+\d\."), # or with ws... decimal-marker 852 expect(r"\N+[aA]\."), # or with ws... alpha-marker 853 expect(r"\N+[iI]\."), # or with ws... roman-marker 854 expect(r"\N+\."), # or with ws... dot-marker 855 expect(r"\N+.+?::\s"), # or with ws... text :: ws (next defterm) 856 expect(r"\N+::\s"))))), # or with ws... :: ws (next defitem) 857 858 # Table contents: 859 860 "tableattrs" : join(("<", # lt 861 excl("<"))), # not-lt 862 863 "tablecell" : r"\|\|", # || 864 865 "tableend" : join((group("extra", r"\s*?"), # ws... (optional) 866 "^")), # next line 867 868 # Table attributes: 869 870 "tableattrsend" : r">", # > 871 "halign" : group("value", "[(:)]"), # halign-marker 872 "valign" : group("value", "[v^]"), # valign-marker 873 "colour" : group("value", join(("\#", # # 874 repeat("[0-9A-Fa-f]", 6, 6)))), # nnnnnn 875 876 "colspan" : join(("-", # - 877 group("value", "\d+"))), # n... 878 879 "rowspan" : join((r"\|", # | 880 group("value", "\d+"))), # n... 881 882 "width" : group("value", "\d+%"), # n... % 883 884 "attrname" : join((excl(r"[-\d]"), # not-dash-or-digit 885 group("name", r"[-\w]+"))), # dash-digit-letter... 886 887 "attrvalue" : join(("=", group("quote", r"\Q"), # quote 888 group("value", ".*?"), # non-quote... (optional) 889 recur("quote"))), # quote 890 } 891 892 patterns = get_patterns(syntax) 893 894 895 896 # Patterns available within certain markup features. 897 898 table_attr_pattern_names = [ 899 "attrname", "colour", "colspan", "halign", "rowspan", "tableattrsend", 900 "valign", "width" 901 ] 902 903 inline_without_links_pattern_names = [ 904 "anchor", "fontstyle", "larger", "linebreak", "macro", 905 "monospace", "regionstart", "smaller", "strike", "sub", "super", 906 "underline", "verbatim" 907 ] 908 909 inline_pattern_names = inline_without_links_pattern_names + [ 910 "link", "transclusion"] 911 912 link_pattern_names = inline_without_links_pattern_names + [ 913 "linkend", "linksep", "transclusion"] 914 915 list_pattern_names = [ 916 "listitem", "listitem_alpha", "listitem_dot", "listitem_num", 917 "listitem_roman", 918 ] 919 920 listitem_pattern_names = inline_pattern_names + ["listitemend"] 921 922 region_without_table_pattern_names = inline_pattern_names + list_pattern_names + [ 923 "break", "comment", "heading", "defterm", "defterm_empty", 924 "regionend", "rule", 925 ] 926 927 table_row_pattern_names = inline_pattern_names + [ 928 "tableattrs", "tablecell", "tableend" 929 ] 930 931 transclusion_pattern_names = inline_without_links_pattern_names + [ 932 "linksep", "transclusionend"] 933 934 # The region pattern names are specifically used by the common parser 935 # functionality. 936 937 region_pattern_names = region_without_table_pattern_names + ["tablerow"] 938 939 940 941 # Pattern handlers. 942 943 end_region = ParserBase.end_region 944 parse_section_end = ParserBase.parse_region_end 945 946 handlers = { 947 None : end_region, 948 "anchor" : parse_anchor, 949 "attrname" : parse_attrname, 950 "break" : parse_break, 951 "colour" : parse_colour, 952 "colspan" : parse_colspan, 953 "comment" : parse_comment, 954 "defterm" : parse_defterm, 955 "defterm_empty" : parse_defterm_empty, 956 "deftermend" : end_region, 957 "deftermsep" : end_region, 958 "directive" : parse_directive, 959 "fontstyle" : parse_fontstyle, 960 "halign" : parse_halign, 961 "heading" : parse_heading, 962 "headingend" : parse_heading_end, 963 "larger" : parse_larger, 964 "largerend" : end_region, 965 "linebreak" : parse_linebreak, 966 "link" : parse_link, 967 "linkend" : end_region, 968 "linksep" : end_region, 969 "macro" : parse_macro, 970 "listitemend" : end_region, 971 "listitem" : parse_listitem, 972 "listitem_alpha" : parse_listitem, 973 "listitem_dot" : parse_listitem, 974 "listitem_num" : parse_listitem, 975 "listitem_roman" : parse_listitem, 976 "monospace" : parse_monospace, 977 "monospaceend" : end_region, 978 "regionstart" : parse_section, 979 "regionend" : parse_section_end, 980 "rowspan" : parse_rowspan, 981 "rule" : parse_rule, 982 "smaller" : parse_smaller, 983 "smallerend" : end_region, 984 "strike" : parse_strike, 985 "strikeend" : end_region, 986 "sub" : parse_sub, 987 "subend" : end_region, 988 "super" : parse_super, 989 "superend" : end_region, 990 "tableattrs" : parse_table_attrs, 991 "tableattrsend" : end_region, 992 "tablerow" : parse_table_row, 993 "tablecell" : end_region, 994 "tableend" : end_region, 995 "transclusion" : parse_transclusion, 996 "transclusionend" : end_region, 997 "underline" : parse_underline, 998 "underlineend" : end_region, 999 "valign" : parse_valign, 1000 "verbatim" : parse_verbatim, 1001 "width" : parse_width, 1002 } 1003 1004 parser = MoinParser 1005 1006 # vim: tabstop=4 expandtab shiftwidth=4