1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format document tree nodes. 5 6 Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 class Container: 23 24 "A container of document nodes." 25 26 def __init__(self, nodes): 27 self.nodes = nodes 28 29 # In principle, allow blocks within containers. Some nodes may forbid 30 # them to simplify the document structure. 31 32 self.allow_blocks = True 33 34 def append(self, node): 35 self.nodes.append(node) 36 37 def append_many(self, nodes): 38 for node in nodes: 39 self.append(node) 40 41 add = append 42 43 def append_inline(self, node): 44 45 "Append 'node' inline within the appropriate container." 46 47 n = self.append_point() 48 49 # Redirect the addition if another container is to accept the node. 50 51 if n is not self: 52 n.append_inline(node) 53 54 # Otherwise, append within this container. 55 56 else: 57 n.append(node) 58 59 def append_inline_many(self, nodes): 60 for node in nodes: 61 self.append_inline(node) 62 63 def append_point(self): 64 65 "Return the container to which inline nodes are added." 66 67 return self 68 69 def empty(self): 70 return not self.nodes 71 72 def insert_after(self, old, new): 73 74 "Insert after 'old' in the children the 'new' node." 75 76 index = self.nodes.index(old) 77 self.nodes.insert(index + 1, new) 78 79 def node(self, index): 80 try: 81 return self.nodes[index] 82 except IndexError: 83 return None 84 85 def normalise(self): 86 87 "Combine adjacent text nodes." 88 89 nodes = self.nodes 90 self.nodes = [] 91 text = None 92 93 for node in nodes: 94 95 # Open a text node or merge text into an open node. 96 97 if isinstance(node, Text): 98 if not text: 99 text = node 100 else: 101 text.merge(node) 102 103 # Close any open text node and append the current node. 104 105 else: 106 if text: 107 self.append(text) 108 text = None 109 self.append(node) 110 111 # Add any open text node. 112 113 if text: 114 self.append(text) 115 116 def remove(self, node): 117 118 "Remove 'node' from the children." 119 120 self.nodes.remove(node) 121 122 def replace(self, old, new): 123 124 "Replace 'old' with 'new' in the children." 125 126 i = self.nodes.index(old) 127 self.nodes[i] = new 128 129 def split_at(self, node): 130 131 """ 132 Split the container at 'node', returning a new container holding the 133 nodes following 'node' that are moved from this container. 134 """ 135 136 i = self.nodes.index(node) 137 following = self.__class__(self.nodes[i+1:]) 138 139 # Remove the node and the following parts from this container. 140 141 del self.nodes[i:] 142 return following 143 144 def text_content(self): 145 146 """ 147 Return a string containing the content of text nodes within this 148 container. 149 """ 150 151 l = [] 152 153 for node in self.nodes: 154 if isinstance(node, Text): 155 l.append(node.s) 156 elif isinstance(node, Container): 157 l.append(node.text_content()) 158 159 return "".join(l) 160 161 def whitespace_only(self): 162 163 "Return whether the container provides only whitespace text." 164 165 return not self.text_content().strip() 166 167 def __str__(self): 168 return self.prettyprint() 169 170 def _prettyprint(self, l, indent=""): 171 for node in self.nodes: 172 l.append(node.prettyprint(indent + " ")) 173 return "\n".join(l) 174 175 def _to_string(self, out): 176 for node in self.nodes: 177 node.to_string(out) 178 179 class Region(Container): 180 181 "A region of the page." 182 183 def __init__(self, nodes, level=0, indent=0, type=None, args=None, 184 transparent=True, extra=None): 185 Container.__init__(self, nodes) 186 self.level = level 187 self.indent = indent 188 self.type = type 189 self.args = args 190 self.transparent = transparent 191 self.extra = extra 192 193 def add(self, node): 194 last = self.node(-1) 195 if last and last.empty(): 196 self.nodes[-1] = node 197 else: 198 self.append(node) 199 200 def append_point(self): 201 202 "Return the container to which inline nodes are added." 203 204 if self.transparent: 205 return self.nodes[-1] 206 else: 207 return self 208 209 def have_end(self, s): 210 return self.level and s.startswith("}") and self.level == len(s) 211 212 def __repr__(self): 213 return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level, 214 self.indent, self.type, self.args, self.transparent, self.extra) 215 216 def prettyprint(self, indent=""): 217 l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent, 218 self.level, self.indent, self.type, self.args, self.extra)] 219 return self._prettyprint(l, indent) 220 221 def to_string(self, out): 222 out.start_region(self.level, self.indent, self.type, self.extra) 223 224 # Obtain a serialiser for the region from the same format family. 225 # Retain the same serialiser if no appropriate serialiser could be 226 # obtained. 227 228 serialiser_name = "%s.%s" % (out.format, self.type) 229 serialiser = out.get_serialiser(serialiser_name) 230 231 # Serialise the region. 232 233 self._to_string(serialiser) 234 235 # End the region with the previous serialiser. 236 237 out.end_region(self.level, self.indent, self.type, self.extra) 238 239 240 241 # Block nodes. 242 243 class Block(Container): 244 245 "A block in the page." 246 247 def __repr__(self): 248 return "Block(%r)" % self.nodes 249 250 def prettyprint(self, indent=""): 251 l = ["%sBlock" % indent] 252 return self._prettyprint(l, indent) 253 254 def to_string(self, out): 255 out.start_block() 256 self._to_string(out) 257 out.end_block() 258 259 class DefItem(Container): 260 261 "A definition item." 262 263 def __init__(self, nodes, pad, extra): 264 Container.__init__(self, nodes) 265 self.pad = pad 266 self.extra = extra 267 268 def __repr__(self): 269 return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 270 271 def prettyprint(self, indent=""): 272 l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)] 273 return self._prettyprint(l, indent) 274 275 def to_string(self, out): 276 out.start_defitem(self.pad, self.extra) 277 self._to_string(out) 278 out.end_defitem(self.pad, self.extra) 279 280 class DefTerm(Container): 281 282 "A definition term." 283 284 def __init__(self, nodes, pad, extra=""): 285 Container.__init__(self, nodes) 286 self.pad = pad 287 self.extra = extra 288 289 def __repr__(self): 290 return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra) 291 292 def prettyprint(self, indent=""): 293 l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)] 294 return self._prettyprint(l, indent) 295 296 def to_string(self, out): 297 out.start_defterm(self.pad, self.extra) 298 self._to_string(out) 299 out.end_defterm(self.pad, self.extra) 300 301 class FontStyle(Container): 302 303 "Emphasised and/or strong text." 304 305 def __init__(self, nodes, emphasis=False, strong=False): 306 Container.__init__(self, nodes) 307 self.emphasis = emphasis 308 self.strong = strong 309 310 def close_emphasis(self): 311 if self.strong: 312 span = FontStyle(self.nodes, emphasis=True) 313 self.nodes = [span] 314 self.emphasis = False 315 return self.strong 316 317 def close_strong(self): 318 if self.emphasis: 319 span = FontStyle(self.nodes, strong=True) 320 self.nodes = [span] 321 self.strong = False 322 return self.emphasis 323 324 def __repr__(self): 325 return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong) 326 327 def prettyprint(self, indent=""): 328 l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)] 329 return self._prettyprint(l, indent) 330 331 def to_string(self, out): 332 if self.emphasis: 333 out.start_emphasis() 334 elif self.strong: 335 out.start_strong() 336 self._to_string(out) 337 if self.emphasis: 338 out.end_emphasis() 339 elif self.strong: 340 out.end_strong() 341 342 class Heading(Container): 343 344 "A heading." 345 346 def __init__(self, nodes, level, start_extra="", start_pad="", end_pad="", end_extra="", 347 identifier=None): 348 Container.__init__(self, nodes) 349 self.level = level 350 self.start_extra = start_extra 351 self.start_pad = start_pad 352 self.end_pad = end_pad 353 self.end_extra = end_extra 354 self.identifier = identifier 355 356 def __repr__(self): 357 return "Heading(%r, %d, %r, %r, %r, %r, %r)" % ( 358 self.nodes, self.level, self.start_extra, self.start_pad, 359 self.end_pad, self.end_extra, self.identifier) 360 361 def prettyprint(self, indent=""): 362 l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r" 363 " end_extra=%r identifier=%r" % ( 364 indent, self.level, self.start_extra, self.start_pad, self.end_pad, 365 self.end_extra, self.identifier)] 366 return self._prettyprint(l, indent) 367 368 def to_string(self, out): 369 out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier) 370 self._to_string(out) 371 out.end_heading(self.level, self.end_pad, self.end_extra) 372 373 class List(Container): 374 375 "A list." 376 377 def __init__(self, nodes, indent, marker, num): 378 Container.__init__(self, nodes) 379 self.indent = indent 380 self.marker = marker 381 self.num = num 382 383 def __repr__(self): 384 return "List(%r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.num) 385 386 def prettyprint(self, indent=""): 387 l = ["%sList: indent=%d marker=%r num=%r" % (indent, self.indent, self.marker, self.num)] 388 return self._prettyprint(l, indent) 389 390 def to_string(self, out): 391 out.start_list(self.indent, self.marker, self.num) 392 self._to_string(out) 393 out.end_list(self.indent, self.marker, self.num) 394 395 class ListItem(Container): 396 397 "A list item." 398 399 def __init__(self, nodes, indent, marker, space, num): 400 Container.__init__(self, nodes) 401 self.indent = indent 402 self.marker = marker 403 self.space = space 404 self.num = num 405 406 # Forbid blocks within list items for simpler structure. 407 408 self.allow_blocks = False 409 410 def __repr__(self): 411 return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num) 412 413 def prettyprint(self, indent=""): 414 l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)] 415 return self._prettyprint(l, indent) 416 417 def to_string(self, out): 418 out.start_listitem(self.indent, self.marker, self.space, self.num) 419 self._to_string(out) 420 out.end_listitem(self.indent, self.marker, self.space, self.num) 421 422 class TableAttrs(Container): 423 424 "A collection of table attributes." 425 426 def __repr__(self): 427 return "TableAttrs(%r)" % self.nodes 428 429 def prettyprint(self, indent=""): 430 l = ["%sTableAttrs:" % indent] 431 return self._prettyprint(l, indent) 432 433 def to_string(self, out): 434 out.start_table_attrs() 435 out.table_attrs(self.nodes) 436 out.end_table_attrs() 437 438 class Table(Container): 439 440 "A table." 441 442 def __repr__(self): 443 return "Table(%r)" % self.nodes 444 445 def prettyprint(self, indent=""): 446 l = ["%sTable:" % indent] 447 return self._prettyprint(l, indent) 448 449 def to_string(self, out): 450 out.start_table() 451 self._to_string(out) 452 out.end_table() 453 454 class TableCell(Container): 455 456 "A table cell." 457 458 def __init__(self, nodes, attrs=None): 459 Container.__init__(self, nodes) 460 self.attrs = attrs 461 462 def __repr__(self): 463 return "TableCell(%r, %r)" % (self.nodes, self.attrs) 464 465 def prettyprint(self, indent=""): 466 l = ["%sTableCell:" % indent] 467 return self._prettyprint(l, indent) 468 469 def to_string(self, out): 470 out.start_table_cell(self.attrs) 471 self._to_string(out) 472 out.end_table_cell() 473 474 class TableRow(Container): 475 476 "A table row." 477 478 def __init__(self, nodes, trailing=""): 479 Container.__init__(self, nodes) 480 self.trailing = trailing 481 482 def __repr__(self): 483 return "TableRow(%r, %r)" % (self.nodes, self.trailing) 484 485 def prettyprint(self, indent=""): 486 l = ["%sTableRow: trailing=%r" % (indent, self.trailing)] 487 return self._prettyprint(l, indent) 488 489 def to_string(self, out): 490 out.start_table_row() 491 self._to_string(out) 492 out.end_table_row(self.trailing) 493 494 495 496 # Inline nodes with children. 497 498 class Inline(Container): 499 500 "Generic inline formatting." 501 502 def __repr__(self): 503 return "%s(%r)" % (self.__class__.__name__, self.nodes) 504 505 def prettyprint(self, indent=""): 506 l = ["%s%s" % (indent, self.__class__.__name__)] 507 return self._prettyprint(l, indent) 508 509 class Larger(Inline): 510 511 "Larger text." 512 513 def to_string(self, out): 514 out.start_larger() 515 self._to_string(out) 516 out.end_larger() 517 518 class Link(Container): 519 520 "Link details." 521 522 def __init__(self, nodes, target): 523 Container.__init__(self, nodes) 524 self.target = target 525 526 def __repr__(self): 527 return "Link(%r, %r)" % (self.nodes, self.target) 528 529 def prettyprint(self, indent=""): 530 l = ["%sLink: target=%r" % (indent, self.target)] 531 return self._prettyprint(l, indent) 532 533 def to_string(self, out): 534 out.start_link(self.target, self.nodes) 535 if self.nodes: 536 out.start_linktext() 537 self._to_string(out) 538 out.end_linktext() 539 out.end_link() 540 541 class Macro(Container): 542 543 "Macro details." 544 545 def __init__(self, name, args, parent, region, nodes=None): 546 Container.__init__(self, nodes or []) 547 self.name = name 548 self.args = args 549 self.parent = parent 550 self.region = region 551 552 def __repr__(self): 553 return "Macro(%r, %r, %r, %r, %r)" % (self.name, self.args, self.parent, self.region, self.nodes) 554 555 def prettyprint(self, indent=""): 556 l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)] 557 return self._prettyprint(l, indent) 558 559 def to_string(self, out): 560 out.start_macro(self.name, self.args, self.nodes) 561 if self.nodes: 562 self._to_string(out) 563 out.end_macro() 564 565 class Monospace(Inline): 566 567 "Monospaced text." 568 569 def to_string(self, out): 570 out.start_monospace() 571 self._to_string(out) 572 out.end_monospace() 573 574 class Smaller(Inline): 575 576 "Smaller text." 577 578 def to_string(self, out): 579 out.start_smaller() 580 self._to_string(out) 581 out.end_smaller() 582 583 class Strikethrough(Inline): 584 585 "Crossed-out text." 586 587 def to_string(self, out): 588 out.start_strikethrough() 589 self._to_string(out) 590 out.end_strikethrough() 591 592 class Subscript(Inline): 593 594 "Subscripted text." 595 596 def to_string(self, out): 597 out.start_subscript() 598 self._to_string(out) 599 out.end_subscript() 600 601 class Superscript(Inline): 602 603 "Superscripted text." 604 605 def to_string(self, out): 606 out.start_superscript() 607 self._to_string(out) 608 out.end_superscript() 609 610 class Underline(Inline): 611 612 "Underlined text." 613 614 def to_string(self, out): 615 out.start_underline() 616 self._to_string(out) 617 out.end_underline() 618 619 620 621 # Nodes without children. 622 623 class Node: 624 625 "A document node without children." 626 627 def empty(self): 628 return False 629 630 class Anchor(Node): 631 632 "Anchor details." 633 634 def __init__(self, target): 635 self.target = target 636 637 def __repr__(self): 638 return "Anchor(%r)" % self.target 639 640 def prettyprint(self, indent=""): 641 return "%sAnchor: target=%r" % (indent, self.target) 642 643 def to_string(self, out): 644 out.anchor(self.target) 645 646 class Break(Node): 647 648 "A paragraph break." 649 650 def __repr__(self): 651 return "Break()" 652 653 def prettyprint(self, indent=""): 654 return "%sBreak" % indent 655 656 def to_string(self, out): 657 out.break_() 658 659 class Comment(Node): 660 661 "A comment." 662 663 def __init__(self, comment, extra): 664 self.comment = comment 665 self.extra = extra 666 667 def __repr__(self): 668 return "Comment(%r, %r)" % (self.comment, self.extra) 669 670 def prettyprint(self, indent=""): 671 return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra) 672 673 def to_string(self, out): 674 out.comment(self.comment, self.extra) 675 676 class Directive(Node): 677 678 "A processing directive." 679 680 def __init__(self, directive, extra): 681 self.directive = directive 682 self.extra = extra 683 684 def __repr__(self): 685 return "Directive(%r, %r)" % (self.directive, self.extra) 686 687 def prettyprint(self, indent=""): 688 return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra) 689 690 def to_string(self, out): 691 out.directive(self.directive, self.extra) 692 693 class LineBreak(Node): 694 695 "A line break within a block." 696 697 def __repr__(self): 698 return "LineBreak()" 699 700 def prettyprint(self, indent=""): 701 return "%sLineBreak" % indent 702 703 def to_string(self, out): 704 out.linebreak() 705 706 class Rule(Node): 707 708 "A horizontal rule." 709 710 def __init__(self, length): 711 self.length = length 712 713 def __repr__(self): 714 return "Rule(%d)" % self.length 715 716 def prettyprint(self, indent=""): 717 return "%sRule: length=%d" % (indent, self.length) 718 719 def to_string(self, out): 720 out.rule(self.length) 721 722 class TableAttr(Node): 723 724 "A table attribute." 725 726 def __init__(self, name, value=None, concise=False, quote=None): 727 self.name = name 728 self.value = value 729 self.concise = concise 730 self.quote = quote 731 732 def __repr__(self): 733 return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote) 734 735 def prettyprint(self, indent=""): 736 return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote) 737 738 def to_string(self, out): 739 out.table_attr(self.name, self.value, self.concise, self.quote) 740 741 class Text(Node): 742 743 "A text node." 744 745 def __init__(self, s): 746 self.s = s 747 748 def empty(self): 749 return not self.s 750 751 def multiline(self): 752 return "\n" in self.s 753 754 def merge(self, text): 755 self.s += text.s 756 757 def __repr__(self): 758 return "Text(%r)" % self.s 759 760 def prettyprint(self, indent=""): 761 return "%sText: %r" % (indent, self.s) 762 763 def to_string(self, out): 764 out.text(self.s) 765 766 # vim: tabstop=4 expandtab shiftwidth=4