1.1 --- a/moinformat/links/common.py Sat Apr 13 00:04:21 2019 +0200
1.2 +++ b/moinformat/links/common.py Sat Apr 13 00:07:45 2019 +0200
1.3 @@ -23,13 +23,19 @@
1.4
1.5 "A link abstraction."
1.6
1.7 - def __init__(self, target, label, type):
1.8 + def __init__(self, target, label, link_target=None):
1.9
1.10 - "Initialise the link with the given 'target', 'label' and 'type'."
1.11 + """
1.12 + Initialise the link with the given 'target' and 'label' and
1.13 + 'link_target' object.
1.14 + """
1.15
1.16 self.target = target
1.17 self.label = label
1.18 - self.type = type
1.19 + self.link_target = link_target
1.20 +
1.21 + def __repr__(self):
1.22 + return "Link(%r, %r, %r)" % (self.target, self.label, self.link_target)
1.23
1.24 def get_target(self):
1.25 return self.target
1.26 @@ -37,8 +43,8 @@
1.27 def get_label(self):
1.28 return self.label or self.target
1.29
1.30 - def get_type(self):
1.31 - return self.type
1.32 + def get_link_target(self):
1.33 + return self.link_target
1.34
1.35 class Linker:
1.36
2.1 --- a/moinformat/links/html.py Sat Apr 13 00:04:21 2019 +0200
2.2 +++ b/moinformat/links/html.py Sat Apr 13 00:07:45 2019 +0200
2.3 @@ -21,7 +21,6 @@
2.4
2.5 from moinformat.links.common import Link, Linker, resolve
2.6 from urllib import quote, quote_plus
2.7 -from urlparse import urlparse
2.8
2.9 class HTMLLinker(Linker):
2.10
2.11 @@ -45,13 +44,6 @@
2.12 levels = pagename.count("/") + 1
2.13 return "/".join([".."] * levels)
2.14
2.15 - def is_url(self, target):
2.16 -
2.17 - "Return whether the 'target' references a URL."
2.18 -
2.19 - scheme, host, path, params, query, fragment = urlparse(target)
2.20 - return scheme and target or None
2.21 -
2.22 def normalise(self, path):
2.23
2.24 "Return a normalised form of 'path'."
2.25 @@ -61,51 +53,51 @@
2.26 def translate(self, target):
2.27
2.28 """
2.29 - Translate the 'target', returning a tuple containing the rewritten
2.30 - target string and a suitable default label.
2.31 + Translate the 'target', returning a link object containing the rewritten
2.32 + target and a suitable default label.
2.33 """
2.34
2.35 - target = target.rstrip("/")
2.36 + identifier = target.get_identifier()
2.37 + text = target.get_text()
2.38 + type = target.get_type()
2.39
2.40 - # Fragments. Remove the leading hash for the label.
2.41 + # Fragments.
2.42
2.43 - if target.startswith("#"):
2.44 - return Link(self.quote(target), target.lstrip("#"), "fragment")
2.45 + if type == "fragment":
2.46 + return Link(self.quote(text), identifier, target)
2.47
2.48 # Sub-pages. Remove the leading slash for the label.
2.49
2.50 - if target.startswith("/"):
2.51 - return Link(self.translate_pagename(target), target.lstrip("/"), "page")
2.52 + if type == "sub-page":
2.53 + return Link(self.translate_pagename(text), identifier, target)
2.54
2.55 # Sibling (of ancestor) pages.
2.56
2.57 - if target.startswith("../"):
2.58 - return Link(self.translate_pagename(target), None, "page")
2.59 -
2.60 - # Attachment or interwiki link.
2.61 -
2.62 - rewritten = self.translate_qualified_link(target)
2.63 - if rewritten:
2.64 - return rewritten # includes label
2.65 + if type == "sibling-page":
2.66 + return Link(self.translate_pagename(text), identifier, target)
2.67
2.68 # Plain URL.
2.69
2.70 - rewritten = self.is_url(target)
2.71 - if rewritten:
2.72 - return Link(rewritten, None, "url")
2.73 + if type == "url":
2.74 + return Link(text, identifier, target)
2.75
2.76 # Top-level pages.
2.77
2.78 - return Link(self.translate_pagename(target), None, "page")
2.79 + if type == "page":
2.80 + return Link(self.translate_pagename(text), identifier, target)
2.81 +
2.82 + # Attachment or interwiki link.
2.83
2.84 - def translate_pagename(self, target):
2.85 + return self.translate_qualified_link(target)
2.86
2.87 - "Translate the pagename in 'target'."
2.88 + def translate_pagename(self, text):
2.89 +
2.90 + "Translate the pagename in 'text'."
2.91
2.92 # Obtain the target pagename and the fragment.
2.93 # Split the pagename into path components.
2.94
2.95 - t = target.split("#", 1)
2.96 + t = text.split("#", 1)
2.97
2.98 # Determine the actual pagename referenced.
2.99 # Replace the root pagename if it appears.
2.100 @@ -136,22 +128,20 @@
2.101 Return None if the link is not suitable.
2.102 """
2.103
2.104 - t = target.split(":", 1)
2.105 - if len(t) != 2:
2.106 - return None
2.107 -
2.108 - prefix, target = t
2.109 + identifier = target.get_identifier()
2.110 + text = target.get_text()
2.111 + type = target.get_type()
2.112
2.113 # Attachment links.
2.114
2.115 - if prefix == "attachment":
2.116 - return Link(self.translate_attachment(target), target, "attachment")
2.117 + if type == "attachment":
2.118 + return Link(self.translate_attachment(identifier), identifier, target)
2.119
2.120 # Interwiki links.
2.121
2.122 - url = self.mapping.get(prefix)
2.123 + url = self.mapping.get(type)
2.124 if url:
2.125 - return Link(self.translate_interwiki(url, target), target, "interwiki")
2.126 + return Link(self.translate_interwiki(url, identifier), identifier, target)
2.127
2.128 return None
2.129
3.1 --- a/moinformat/parsers/moin.py Sat Apr 13 00:04:21 2019 +0200
3.2 +++ b/moinformat/parsers/moin.py Sat Apr 13 00:07:45 2019 +0200
3.3 @@ -44,6 +44,10 @@
3.4 TableCell, TableRow, Text, Transclusion, \
3.5 Underline, Verbatim
3.6
3.7 +# Link parsing.
3.8 +
3.9 +from moinformat.utils.links import parse_link_target
3.10 +
3.11 join = "".join
3.12
3.13 class MoinParser(ParserBase):
3.14 @@ -69,6 +73,10 @@
3.15
3.16 self.headings = []
3.17
3.18 + # Record link targets for resource identification.
3.19 +
3.20 + self.link_targets = []
3.21 +
3.22 # Principal parser methods.
3.23
3.24 def parse(self, s):
3.25 @@ -565,7 +573,13 @@
3.26 target = self.match_group("target")
3.27 end = self.match_group("end")
3.28
3.29 - span = cls([], target)
3.30 + # Obtain an object for the link target.
3.31 +
3.32 + link_target = parse_link_target(target, self.metadata)
3.33 +
3.34 + # Obtain an object for the node.
3.35 +
3.36 + span = cls([], link_target)
3.37
3.38 # Obtain the extra details.
3.39
3.40 @@ -586,6 +600,10 @@
3.41
3.42 region.append_inline(span)
3.43
3.44 + # Record the link target for later processing.
3.45 +
3.46 + self.root.link_targets.append(link_target)
3.47 +
3.48 def parse_link(self, region):
3.49 self._parse_link(region, Link, self.link_pattern_names)
3.50
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/moinformat/utils/links.py Sat Apr 13 00:07:45 2019 +0200
4.3 @@ -0,0 +1,121 @@
4.4 +#!/usr/bin/env python
4.5 +
4.6 +"""
4.7 +Link target parsing.
4.8 +
4.9 +Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>
4.10 +
4.11 +This program is free software; you can redistribute it and/or modify it under
4.12 +the terms of the GNU General Public License as published by the Free Software
4.13 +Foundation; either version 3 of the License, or (at your option) any later
4.14 +version.
4.15 +
4.16 +This program is distributed in the hope that it will be useful, but WITHOUT
4.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
4.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
4.19 +details.
4.20 +
4.21 +You should have received a copy of the GNU General Public License along with
4.22 +this program. If not, see <http://www.gnu.org/licenses/>.
4.23 +"""
4.24 +
4.25 +from urlparse import urlparse
4.26 +
4.27 +class LinkTarget:
4.28 +
4.29 + "A link target abstraction."
4.30 +
4.31 + def __init__(self, type, text, identifier=None):
4.32 +
4.33 + "Initialise the link with the given 'type', 'text' and 'identifier'."
4.34 +
4.35 + self.type = type
4.36 + self.text = text
4.37 + self.identifier = identifier
4.38 +
4.39 + def __repr__(self):
4.40 + return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier)
4.41 +
4.42 + def __str__(self):
4.43 + return self.text
4.44 +
4.45 + __unicode__ = __str__
4.46 +
4.47 + def get_identifier(self):
4.48 + return self.identifier or self.text
4.49 +
4.50 + def get_text(self):
4.51 + return self.text
4.52 +
4.53 + def get_type(self):
4.54 + return self.type
4.55 +
4.56 +# Parsing and recognition functions.
4.57 +
4.58 +def is_url(target):
4.59 +
4.60 + "Return whether the 'target' references a URL."
4.61 +
4.62 + scheme, host, path, params, query, fragment = urlparse(target)
4.63 + return scheme and target or None
4.64 +
4.65 +def parse_link_target(target, metadata=None):
4.66 +
4.67 + """
4.68 + Parse a link 'target', returning a link target object. Use any 'metadata'
4.69 + to identify certain link types.
4.70 + """
4.71 +
4.72 + # Fragments.
4.73 +
4.74 + if target.startswith("#"):
4.75 + return LinkTarget("fragment", target, target.lstrip("#"))
4.76 +
4.77 + # Sub-pages.
4.78 +
4.79 + if target.startswith("/"):
4.80 + return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/"))
4.81 +
4.82 + # Sibling (of ancestor) pages.
4.83 +
4.84 + if target.startswith("../"):
4.85 + return LinkTarget("sibling-page", target, target.rstrip("/"))
4.86 +
4.87 + # Attachment or interwiki link.
4.88 +
4.89 + result = parse_qualified_link_target(target, metadata)
4.90 + if result:
4.91 + return result
4.92 +
4.93 + # Plain URL.
4.94 +
4.95 + if is_url(target):
4.96 + return LinkTarget("url", target)
4.97 +
4.98 + # Top-level pages.
4.99 +
4.100 + return LinkTarget("page", target)
4.101 +
4.102 +def parse_qualified_link_target(target, metadata=None):
4.103 +
4.104 + """
4.105 + Parse a possible qualified link 'target', returning a link target object or
4.106 + None if the target is not suitable. Use any 'metadata' to identify certain
4.107 + link types.
4.108 + """
4.109 +
4.110 + t = target.split(":", 1)
4.111 +
4.112 + if len(t) != 2:
4.113 + return None
4.114 +
4.115 + prefix, identifier = t
4.116 +
4.117 + mapping = metadata and metadata.get("mapping")
4.118 +
4.119 + if prefix == "attachment" or mapping and mapping.get(prefix):
4.120 + return LinkTarget(prefix, target, identifier)
4.121 +
4.122 + return None
4.123 +
4.124 +# vim: tabstop=4 expandtab shiftwidth=4