1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022, 7 2023 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 class Container: 24 25 "A container of document nodes." 26 27 def __init__(self, nodes): 28 self.nodes = nodes 29 30 # In principle, allow blocks within containers. Some nodes may forbid 31 # them to simplify the document structure. 32 33 self.allow_blocks = True 34 35 def append(self, node): 36 self.nodes.append(node) 37 38 def append_many(self, nodes): 39 for node in nodes: 40 self.append(node) 41 42 def add(self, node): 43 last = self.node(-1) 44 if last and last.empty(): 45 self.nodes[-1] = node 46 else: 47 self.append(node) 48 49 def append_inline(self, node): 50 51 "Append 'node' inline within the appropriate container." 52 53 n = self.append_point() 54 55 # Redirect the addition if another container is to accept the node. 56 57 if n is not self: 58 n.append_inline(node) 59 60 # Otherwise, append within this container. 61 62 else: 63 n.append(node) 64 65 def append_inline_many(self, nodes): 66 for node in nodes: 67 self.append_inline(node) 68 69 def append_point(self): 70 71 "Return the container to which inline nodes are added." 72 73 last = self.node(-1) 74 75 if isinstance(last, Block): 76 return last 77 else: 78 return self 79 80 def empty(self): 81 return not self.nodes 82 83 def insert_after(self, old, new): 84 85 "Insert after 'old' in the children the 'new' node." 86 87 index = self.nodes.index(old) 88 self.nodes.insert(index + 1, new) 89 90 def node(self, index): 91 try: 92 return self.nodes[index] 93 except IndexError: 94 return None 95 96 def normalise(self): 97 98 "Combine adjacent text nodes." 99 100 nodes = self.nodes 101 self.nodes = [] 102 text = None 103 104 for node in nodes: 105 106 # Open a text node or merge text into an open node. 107 108 if isinstance(node, Text): 109 if not text: 110 text = node 111 else: 112 text.merge(node) 113 114 # Close any open text node and append the current node. 115 116 else: 117 if text: 118 self.append(text) 119 text = None 120 self.append(node) 121 122 # Add any open text node. 123 124 if text: 125 self.append(text) 126 127 def remove(self, node): 128 129 "Remove 'node' from the children." 130 131 self.nodes.remove(node) 132 133 def replace(self, old, new): 134 135 "Replace 'old' with 'new' in the children." 136 137 i = self.nodes.index(old) 138 self.nodes[i] = new 139 140 def split_at(self, node): 141 142 """ 143 Split the container at 'node', returning a new container holding the 144 nodes following 'node' that are moved from this container. 145 """ 146 147 i = self.nodes.index(node) 148 following = self.__class__(self.nodes[i+1:]) 149 150 # Remove the node and the following parts from this container. 151 152 del self.nodes[i:] 153 return following 154 155 def text_content(self): 156 157 """ 158 Return a string containing the content of text nodes within this 159 container. 160 """ 161 162 l = [] 163 164 for node in self.nodes: 165 if isinstance(node, Text): 166 l.append(node.s) 167 elif isinstance(node, Container): 168 l.append(node.text_content()) 169 170 return "".join(l) 171 172 def whitespace_only(self): 173 174 "Return whether the container provides only whitespace text." 175 176 return not self.text_content().strip() 177 178 def __str__(self): 179 return self.prettyprint() 180 181 def _prettyprint(self, l, indent=""): 182 for node in self.nodes: 183 l.append(node.prettyprint(indent + " ")) 184 return "\n".join(l) 185 186 class Region(Container): 187 188 "A region of the page." 189 190 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 191 transparent=True, extra=None): 192 Container.__init__(self, nodes) 193 self.level = level 194 self.indent = indent 195 self.type = type 196 self.args = args 197 self.transparent = transparent 198 self.extra = extra 199 200 def append_point(self): 201 202 "Return the container to which inline nodes are added." 203 204 if self.transparent: 205 return self.nodes[-1] 206 else: 207 return self 208 209 def have_end(self, s): 210 return self.level and s.startswith("}") and self.level == len(s) 211 212 def __repr__(self): 213 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 214 self.indent, self.type, self.args, self.transparent, self.extra) 215 216 def prettyprint(self, indent=""): 217 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 218 self.level, self.indent, self.type, self.args, self.extra)] 219 return self._prettyprint(l, indent) 220 221 def visit(self, visitor): 222 visitor.region(self) 223 224 225 226 # Block nodes. 227 228 class Block(Container): 229 230 "A block in the page." 231 232 def __repr__(self): 233 return "Block(%r)" % self.nodes 234 235 def prettyprint(self, indent=""): 236 l = ["%sBlock" % indent] 237 return self._prettyprint(l, indent) 238 239 def visit(self, visitor): 240 visitor.block(self) 241 242 class DefItem(Container): 243 244 "A definition item." 245 246 def __init__(self, nodes, pad, extra): 247 Container.__init__(self, nodes) 248 self.pad = pad 249 self.extra = extra 250 251 def __repr__(self): 252 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 253 254 def prettyprint(self, indent=""): 255 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 256 return self._prettyprint(l, indent) 257 258 def visit(self, visitor): 259 visitor.defitem(self) 260 261 class DefTerm(Container): 262 263 "A definition term." 264 265 def __init__(self, nodes, pad, extra=""): 266 Container.__init__(self, nodes) 267 self.pad = pad 268 self.extra = extra 269 270 def __repr__(self): 271 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 272 273 def prettyprint(self, indent=""): 274 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 275 return self._prettyprint(l, indent) 276 277 def visit(self, visitor): 278 visitor.defterm(self) 279 280 class FontStyle(Container): 281 282 "Emphasised and/or strong text." 283 284 def __init__(self, nodes, emphasis=False, strong=False): 285 Container.__init__(self, nodes) 286 self.emphasis = emphasis 287 self.strong = strong 288 289 def close_emphasis(self): 290 if self.strong: 291 span = FontStyle(self.nodes, emphasis=True) 292 self.nodes = [span] 293 self.emphasis = False 294 return self.strong 295 296 def close_strong(self): 297 if self.emphasis: 298 span = FontStyle(self.nodes, strong=True) 299 self.nodes = [span] 300 self.strong = False 301 return self.emphasis 302 303 def __repr__(self): 304 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 305 306 def prettyprint(self, indent=""): 307 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 308 return self._prettyprint(l, indent) 309 310 def visit(self, visitor): 311 visitor.fontstyle(self) 312 313 class Heading(Container): 314 315 "A heading." 316 317 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 318 identifier=None): 319 Container.__init__(self, nodes) 320 self.level = level 321 self.start_extra = start_extra 322 self.start_pad = start_pad 323 self.end_pad = end_pad 324 self.end_extra = end_extra 325 self.identifier = identifier 326 327 def __repr__(self): 328 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 329 self.nodes, self.level, self.start_extra, self.start_pad, 330 self.end_pad, self.end_extra, self.identifier) 331 332 def prettyprint(self, indent=""): 333 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 334 " end_extra=%r identifier=%r" % ( 335 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 336 self.end_extra, self.identifier)] 337 return self._prettyprint(l, indent) 338 339 def visit(self, visitor): 340 visitor.heading(self) 341 342 class LinkLabel(Container): 343 344 "A link or transclusion label." 345 346 def __repr__(self): 347 return "LinkLabel(%r)" % self.nodes 348 349 def prettyprint(self, indent=""): 350 l = ["%sLinkLabel" % indent] 351 return self._prettyprint(l, indent) 352 353 def visit(self, visitor): 354 visitor.link_label(self) 355 356 class LinkParameter(Container): 357 358 "A link or transclusion parameter." 359 360 def __repr__(self): 361 return "LinkParameter(%r)" % self.nodes 362 363 def prettyprint(self, indent=""): 364 l = ["%sLinkParameter" % indent] 365 return self._prettyprint(l, indent) 366 367 def visit(self, visitor): 368 visitor.link_parameter(self) 369 370 class List(Container): 371 372 "A list." 373 374 def __init__(self, nodes): 375 Container.__init__(self, nodes) 376 self.init() 377 378 def init(self): 379 self.first = first = self.nodes and self.nodes[0] or None 380 self.indent = first and first.indent 381 self.marker = first and first.marker 382 self.num = first and first.num 383 384 def __repr__(self): 385 return "List(%r)" % self.nodes 386 387 def prettyprint(self, indent=""): 388 if not self.first: 389 self.init() 390 l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 391 return self._prettyprint(l, indent) 392 393 def visit(self, visitor): 394 if not self.first: 395 self.init() 396 visitor.list(self) 397 398 class ListItem(Container): 399 400 "A list item." 401 402 def __init__(self, nodes, indent, marker, space, num): 403 Container.__init__(self, nodes) 404 self.indent = indent 405 self.marker = marker 406 self.space = space 407 self.num = num 408 409 # Forbid blocks within list items for simpler structure. 410 411 self.allow_blocks = False 412 413 def __repr__(self): 414 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 415 416 def prettyprint(self, indent=""): 417 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 418 return self._prettyprint(l, indent) 419 420 def visit(self, visitor): 421 visitor.listitem(self) 422 423 class TableAttrs(Container): 424 425 "A collection of table attributes." 426 427 def __init__(self, nodes): 428 Container.__init__(self, nodes) 429 430 # Parsing state flags, inconsequential to any final document tree. 431 # If incomplete remains set, the attributes are discarded. 432 433 self.incomplete = True 434 self.found_cell = False 435 436 def __repr__(self): 437 return "TableAttrs(%r)" % self.nodes 438 439 def prettyprint(self, indent=""): 440 l = ["%sTableAttrs:" % indent] 441 return self._prettyprint(l, indent) 442 443 def visit(self, visitor): 444 visitor.table_attrs(self) 445 446 class Table(Container): 447 448 "A table." 449 450 def __repr__(self): 451 return "Table(%r)" % self.nodes 452 453 def prettyprint(self, indent=""): 454 l = ["%sTable:" % indent] 455 return self._prettyprint(l, indent) 456 457 def visit(self, visitor): 458 visitor.table(self) 459 460 class TableCell(Container): 461 462 "A table cell." 463 464 def __init__(self, nodes, attrs=None, leading="", padding=""): 465 Container.__init__(self, nodes) 466 self.attrs = attrs 467 self.leading = leading 468 self.padding = padding 469 470 def __repr__(self): 471 return "TableCell(%r, %r, %r, %r)" % (self.nodes, self.attrs, 472 self.leading, self.padding) 473 474 def prettyprint(self, indent=""): 475 l = ["%sTableCell: leading=%r padding=%r" % (indent, self.leading, 476 self.padding)] 477 return self._prettyprint(l, indent) 478 479 def visit(self, visitor): 480 visitor.table_cell(self) 481 482 class TableRow(Container): 483 484 "A table row." 485 486 def __init__(self, nodes, trailing="", leading="", padding=""): 487 Container.__init__(self, nodes) 488 self.trailing = trailing 489 self.leading = leading 490 self.padding = padding 491 492 def __repr__(self): 493 return "TableRow(%r, %r, %r, %r)" % (self.nodes, self.trailing, 494 self.leading, self.padding) 495 496 def prettyprint(self, indent=""): 497 l = ["%sTableRow: trailing=%r leading=%r padding=%r" % ( 498 indent, self.trailing, self.leading, self.padding)] 499 return self._prettyprint(l, indent) 500 501 def visit(self, visitor): 502 visitor.table_row(self) 503 504 505 506 # Inline nodes with children. 507 508 class Inline(Container): 509 510 "Generic inline formatting." 511 512 def __repr__(self): 513 return "%s(%r)" % (self.__class__.__name__, self.nodes) 514 515 def prettyprint(self, indent=""): 516 l = ["%s%s" % (indent, self.__class__.__name__)] 517 return self._prettyprint(l, indent) 518 519 class Larger(Inline): 520 521 "Larger text." 522 523 def visit(self, visitor): 524 visitor.larger(self) 525 526 class Link(Container): 527 528 "Link details." 529 530 def __init__(self, nodes, target): 531 Container.__init__(self, nodes) 532 self.target = target 533 534 def __repr__(self): 535 return "Link(%r, %r)" % (self.nodes, self.target) 536 537 def prettyprint(self, indent=""): 538 l = ["%sLink: target=%r" % (indent, self.target)] 539 return self._prettyprint(l, indent) 540 541 def visit(self, visitor): 542 visitor.link(self) 543 544 class Macro(Container): 545 546 "Macro details." 547 548 def __init__(self, name, args, parent, region, nodes=None, inline=True): 549 Container.__init__(self, nodes or []) 550 self.name = name 551 self.args = args 552 self.parent = parent 553 self.region = region 554 self.inline = inline 555 556 def __repr__(self): 557 return "Macro(%r, %r, %r, %r, %r, %r)" % (self.name, self.args, 558 self.parent, self.region, 559 self.nodes, self.inline) 560 561 def prettyprint(self, indent=""): 562 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 563 return self._prettyprint(l, indent) 564 565 def visit(self, visitor): 566 visitor.macro(self) 567 568 class Monospace(Inline): 569 570 "Monospaced text." 571 572 def visit(self, visitor): 573 visitor.monospace(self) 574 575 class Smaller(Inline): 576 577 "Smaller text." 578 579 def visit(self, visitor): 580 visitor.smaller(self) 581 582 class Strikethrough(Inline): 583 584 "Crossed-visitor text." 585 586 def visit(self, visitor): 587 visitor.strikethrough(self) 588 589 class Subscript(Inline): 590 591 "Subscripted text." 592 593 def visit(self, visitor): 594 visitor.subscript(self) 595 596 class Superscript(Inline): 597 598 "Superscripted text." 599 600 def visit(self, visitor): 601 visitor.superscript(self) 602 603 class Transclusion(Container): 604 605 "Transclusion details." 606 607 def __init__(self, nodes, target): 608 Container.__init__(self, nodes) 609 self.target = target 610 611 def __repr__(self): 612 return "Transclusion(%r, %r)" % (self.nodes, self.target) 613 614 def prettyprint(self, indent=""): 615 l = ["%sTransclusion: target=%r" % (indent, self.target)] 616 return self._prettyprint(l, indent) 617 618 def visit(self, visitor): 619 visitor.transclusion(self) 620 621 class Underline(Inline): 622 623 "Underlined text." 624 625 def visit(self, visitor): 626 visitor.underline(self) 627 628 629 630 # Nodes without children. 631 632 class Node: 633 634 "A document node without children." 635 636 def empty(self): 637 return False 638 639 class Anchor(Node): 640 641 "Anchor details." 642 643 def __init__(self, target): 644 self.target = target 645 646 def __repr__(self): 647 return "Anchor(%r)" % self.target 648 649 def prettyprint(self, indent=""): 650 return "%sAnchor: target=%r" % (indent, self.target) 651 652 def visit(self, visitor): 653 visitor.anchor(self) 654 655 class Break(Node): 656 657 "A paragraph break." 658 659 def __repr__(self): 660 return "Break()" 661 662 def prettyprint(self, indent=""): 663 return "%sBreak" % indent 664 665 def visit(self, visitor): 666 visitor.break_(self) 667 668 class Comment(Node): 669 670 "A comment." 671 672 def __init__(self, comment, extra): 673 self.comment = comment 674 self.extra = extra 675 676 def __repr__(self): 677 return "Comment(%r, %r)" % (self.comment, self.extra) 678 679 def prettyprint(self, indent=""): 680 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 681 682 def visit(self, visitor): 683 visitor.comment(self) 684 685 class Directive(Node): 686 687 "A processing directive." 688 689 def __init__(self, directive, extra): 690 self.directive = directive 691 self.extra = extra 692 693 def __repr__(self): 694 return "Directive(%r, %r)" % (self.directive, self.extra) 695 696 def prettyprint(self, indent=""): 697 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 698 699 def visit(self, visitor): 700 visitor.directive(self) 701 702 class LineBreak(Node): 703 704 "A line break within a block." 705 706 def __repr__(self): 707 return "LineBreak()" 708 709 def prettyprint(self, indent=""): 710 return "%sLineBreak" % indent 711 712 def visit(self, visitor): 713 visitor.linebreak(self) 714 715 class NonBreakingSpace(Node): 716 717 "A non-breaking space within a block." 718 719 def __repr__(self): 720 return "NonBreakingSpace()" 721 722 def prettyprint(self, indent=""): 723 return "%sNonBreakingSpace" % indent 724 725 def visit(self, visitor): 726 visitor.nbsp(self) 727 728 class Rule(Node): 729 730 "A horizontal rule." 731 732 def __init__(self, height): 733 self.height = height 734 735 def __repr__(self): 736 return "Rule(%d)" % self.height 737 738 def prettyprint(self, indent=""): 739 return "%sRule: height=%d" % (indent, self.height) 740 741 def visit(self, visitor): 742 visitor.rule(self) 743 744 class TableAttr(Node): 745 746 "A table attribute." 747 748 def __init__(self, name, value=None, concise=False, quote=None): 749 self.name = name 750 self.value = value 751 self.concise = concise 752 self.quote = quote 753 754 def __repr__(self): 755 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 756 757 def prettyprint(self, indent=""): 758 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 759 760 def visit(self, visitor): 761 visitor.table_attr(self) 762 763 class Text(Node): 764 765 "A text node." 766 767 def __init__(self, s): 768 self.s = s 769 770 def empty(self): 771 return not self.s 772 773 def multiline(self): 774 return "\n" in self.s 775 776 def merge(self, text): 777 self.s += text.s 778 779 def __repr__(self): 780 return "Text(%r)" % self.s 781 782 def prettyprint(self, indent=""): 783 return "%sText: %r" % (indent, self.s) 784 785 def visit(self, visitor): 786 visitor.text(self) 787 788 class Verbatim(Node): 789 790 "Verbatim text." 791 792 def __init__(self, text): 793 self.text = text 794 795 def __repr__(self): 796 return "Verbatim(%r)" % self.text 797 798 def prettyprint(self, indent=""): 799 return "%sVerbatim: text=%r" % (indent, self.text) 800 801 def visit(self, visitor): 802 visitor.verbatim(self) 803 804 # vim: tabstop=4 expandtab shiftwidth=4