Separated link target parsing from translation; added a link target abstraction. Changed HTML link translation to work with this new abstraction. Added aggregation of link targets in the parser for further processing.

     1.1 --- a/moinformat/links/common.py	Sat Apr 13 00:04:21 2019 +0200
     1.2 +++ b/moinformat/links/common.py	Sat Apr 13 00:07:45 2019 +0200
     1.3 @@ -23,13 +23,19 @@
     1.4  
     1.5      "A link abstraction."
     1.6  
     1.7 -    def __init__(self, target, label, type):
     1.8 +    def __init__(self, target, label, link_target=None):
     1.9  
    1.10 -        "Initialise the link with the given 'target', 'label' and 'type'."
    1.11 +        """
    1.12 +        Initialise the link with the given 'target' and 'label' and
    1.13 +        'link_target' object.
    1.14 +        """
    1.15  
    1.16          self.target = target
    1.17          self.label = label
    1.18 -        self.type = type
    1.19 +        self.link_target = link_target
    1.20 +
    1.21 +    def __repr__(self):
    1.22 +        return "Link(%r, %r, %r)" % (self.target, self.label, self.link_target)
    1.23  
    1.24      def get_target(self):
    1.25          return self.target
    1.26 @@ -37,8 +43,8 @@
    1.27      def get_label(self):
    1.28          return self.label or self.target
    1.29  
    1.30 -    def get_type(self):
    1.31 -        return self.type
    1.32 +    def get_link_target(self):
    1.33 +        return self.link_target
    1.34  
    1.35  class Linker:
    1.36  

     2.1 --- a/moinformat/links/html.py	Sat Apr 13 00:04:21 2019 +0200
     2.2 +++ b/moinformat/links/html.py	Sat Apr 13 00:07:45 2019 +0200
     2.3 @@ -21,7 +21,6 @@
     2.4  
     2.5  from moinformat.links.common import Link, Linker, resolve
     2.6  from urllib import quote, quote_plus
     2.7 -from urlparse import urlparse
     2.8  
     2.9  class HTMLLinker(Linker):
    2.10  
    2.11 @@ -45,13 +44,6 @@
    2.12          levels = pagename.count("/") + 1
    2.13          return "/".join([".."] * levels)
    2.14  
    2.15 -    def is_url(self, target):
    2.16 -
    2.17 -        "Return whether the 'target' references a URL."
    2.18 -
    2.19 -        scheme, host, path, params, query, fragment = urlparse(target)
    2.20 -        return scheme and target or None
    2.21 -
    2.22      def normalise(self, path):
    2.23  
    2.24          "Return a normalised form of 'path'."
    2.25 @@ -61,51 +53,51 @@
    2.26      def translate(self, target):
    2.27  
    2.28          """
    2.29 -        Translate the 'target', returning a tuple containing the rewritten
    2.30 -        target string and a suitable default label.
    2.31 +        Translate the 'target', returning a link object containing the rewritten
    2.32 +        target and a suitable default label.
    2.33          """
    2.34  
    2.35 -        target = target.rstrip("/")
    2.36 +        identifier = target.get_identifier()
    2.37 +        text = target.get_text()
    2.38 +        type = target.get_type()
    2.39  
    2.40 -        # Fragments. Remove the leading hash for the label.
    2.41 +        # Fragments.
    2.42  
    2.43 -        if target.startswith("#"):
    2.44 -            return Link(self.quote(target), target.lstrip("#"), "fragment")
    2.45 +        if type == "fragment":
    2.46 +            return Link(self.quote(text), identifier, target)
    2.47  
    2.48          # Sub-pages. Remove the leading slash for the label.
    2.49  
    2.50 -        if target.startswith("/"):
    2.51 -            return Link(self.translate_pagename(target), target.lstrip("/"), "page")
    2.52 +        if type == "sub-page":
    2.53 +            return Link(self.translate_pagename(text), identifier, target)
    2.54  
    2.55          # Sibling (of ancestor) pages.
    2.56  
    2.57 -        if target.startswith("../"):
    2.58 -            return Link(self.translate_pagename(target), None, "page")
    2.59 -
    2.60 -        # Attachment or interwiki link.
    2.61 -
    2.62 -        rewritten = self.translate_qualified_link(target)
    2.63 -        if rewritten:
    2.64 -            return rewritten # includes label
    2.65 +        if type == "sibling-page":
    2.66 +            return Link(self.translate_pagename(text), identifier, target)
    2.67  
    2.68          # Plain URL.
    2.69  
    2.70 -        rewritten = self.is_url(target)
    2.71 -        if rewritten:
    2.72 -            return Link(rewritten, None, "url")
    2.73 +        if type == "url":
    2.74 +            return Link(text, identifier, target)
    2.75  
    2.76          # Top-level pages.
    2.77  
    2.78 -        return Link(self.translate_pagename(target), None, "page")
    2.79 +        if type == "page":
    2.80 +            return Link(self.translate_pagename(text), identifier, target)
    2.81 +
    2.82 +        # Attachment or interwiki link.
    2.83  
    2.84 -    def translate_pagename(self, target):
    2.85 +        return self.translate_qualified_link(target)
    2.86  
    2.87 -        "Translate the pagename in 'target'."
    2.88 +    def translate_pagename(self, text):
    2.89 +
    2.90 +        "Translate the pagename in 'text'."
    2.91  
    2.92          # Obtain the target pagename and the fragment.
    2.93          # Split the pagename into path components.
    2.94  
    2.95 -        t = target.split("#", 1)
    2.96 +        t = text.split("#", 1)
    2.97  
    2.98          # Determine the actual pagename referenced.
    2.99          # Replace the root pagename if it appears.
   2.100 @@ -136,22 +128,20 @@
   2.101          Return None if the link is not suitable.
   2.102          """
   2.103  
   2.104 -        t = target.split(":", 1)
   2.105 -        if len(t) != 2:
   2.106 -            return None
   2.107 -
   2.108 -        prefix, target = t
   2.109 +        identifier = target.get_identifier()
   2.110 +        text = target.get_text()
   2.111 +        type = target.get_type()
   2.112  
   2.113          # Attachment links.
   2.114  
   2.115 -        if prefix == "attachment":
   2.116 -            return Link(self.translate_attachment(target), target, "attachment")
   2.117 +        if type == "attachment":
   2.118 +            return Link(self.translate_attachment(identifier), identifier, target)
   2.119  
   2.120          # Interwiki links.
   2.121  
   2.122 -        url = self.mapping.get(prefix)
   2.123 +        url = self.mapping.get(type)
   2.124          if url:
   2.125 -            return Link(self.translate_interwiki(url, target), target, "interwiki")
   2.126 +            return Link(self.translate_interwiki(url, identifier), identifier, target)
   2.127  
   2.128          return None
   2.129  

     3.1 --- a/moinformat/parsers/moin.py	Sat Apr 13 00:04:21 2019 +0200
     3.2 +++ b/moinformat/parsers/moin.py	Sat Apr 13 00:07:45 2019 +0200
     3.3 @@ -44,6 +44,10 @@
     3.4                                   TableCell, TableRow, Text, Transclusion, \
     3.5                                   Underline, Verbatim
     3.6  
     3.7 +# Link parsing.
     3.8 +
     3.9 +from moinformat.utils.links import parse_link_target
    3.10 +
    3.11  join = "".join
    3.12  
    3.13  class MoinParser(ParserBase):
    3.14 @@ -69,6 +73,10 @@
    3.15  
    3.16          self.headings = []
    3.17  
    3.18 +        # Record link targets for resource identification.
    3.19 +
    3.20 +        self.link_targets = []
    3.21 +
    3.22      # Principal parser methods.
    3.23  
    3.24      def parse(self, s):
    3.25 @@ -565,7 +573,13 @@
    3.26          target = self.match_group("target")
    3.27          end = self.match_group("end")
    3.28  
    3.29 -        span = cls([], target)
    3.30 +        # Obtain an object for the link target.
    3.31 +
    3.32 +        link_target = parse_link_target(target, self.metadata)
    3.33 +
    3.34 +        # Obtain an object for the node.
    3.35 +
    3.36 +        span = cls([], link_target)
    3.37  
    3.38          # Obtain the extra details.
    3.39  
    3.40 @@ -586,6 +600,10 @@
    3.41  
    3.42          region.append_inline(span)
    3.43  
    3.44 +        # Record the link target for later processing.
    3.45 +
    3.46 +        self.root.link_targets.append(link_target)
    3.47 +
    3.48      def parse_link(self, region):
    3.49          self._parse_link(region, Link, self.link_pattern_names)
    3.50  

     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/moinformat/utils/links.py	Sat Apr 13 00:07:45 2019 +0200
     4.3 @@ -0,0 +1,121 @@
     4.4 +#!/usr/bin/env python
     4.5 +
     4.6 +"""
     4.7 +Link target parsing.
     4.8 +
     4.9 +Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>
    4.10 +
    4.11 +This program is free software; you can redistribute it and/or modify it under
    4.12 +the terms of the GNU General Public License as published by the Free Software
    4.13 +Foundation; either version 3 of the License, or (at your option) any later
    4.14 +version.
    4.15 +
    4.16 +This program is distributed in the hope that it will be useful, but WITHOUT
    4.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    4.18 +FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
    4.19 +details.
    4.20 +
    4.21 +You should have received a copy of the GNU General Public License along with
    4.22 +this program.  If not, see <http://www.gnu.org/licenses/>.
    4.23 +"""
    4.24 +
    4.25 +from urlparse import urlparse
    4.26 +
    4.27 +class LinkTarget:
    4.28 +
    4.29 +    "A link target abstraction."
    4.30 +
    4.31 +    def __init__(self, type, text, identifier=None):
    4.32 +
    4.33 +        "Initialise the link with the given 'type', 'text' and 'identifier'."
    4.34 +
    4.35 +        self.type = type
    4.36 +        self.text = text
    4.37 +        self.identifier = identifier
    4.38 +
    4.39 +    def __repr__(self):
    4.40 +        return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier)
    4.41 +
    4.42 +    def __str__(self):
    4.43 +        return self.text
    4.44 +
    4.45 +    __unicode__ = __str__
    4.46 +
    4.47 +    def get_identifier(self):
    4.48 +        return self.identifier or self.text
    4.49 +
    4.50 +    def get_text(self):
    4.51 +        return self.text
    4.52 +
    4.53 +    def get_type(self):
    4.54 +        return self.type
    4.55 +
    4.56 +# Parsing and recognition functions.
    4.57 +
    4.58 +def is_url(target):
    4.59 +
    4.60 +    "Return whether the 'target' references a URL."
    4.61 +
    4.62 +    scheme, host, path, params, query, fragment = urlparse(target)
    4.63 +    return scheme and target or None
    4.64 +
    4.65 +def parse_link_target(target, metadata=None):
    4.66 +
    4.67 +    """
    4.68 +    Parse a link 'target', returning a link target object. Use any 'metadata'
    4.69 +    to identify certain link types.
    4.70 +    """
    4.71 +
    4.72 +    # Fragments.
    4.73 +
    4.74 +    if target.startswith("#"):
    4.75 +        return LinkTarget("fragment", target, target.lstrip("#"))
    4.76 +
    4.77 +    # Sub-pages.
    4.78 +
    4.79 +    if target.startswith("/"):
    4.80 +        return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/"))
    4.81 +
    4.82 +    # Sibling (of ancestor) pages.
    4.83 +
    4.84 +    if target.startswith("../"):
    4.85 +        return LinkTarget("sibling-page", target, target.rstrip("/"))
    4.86 +
    4.87 +    # Attachment or interwiki link.
    4.88 +
    4.89 +    result = parse_qualified_link_target(target, metadata)
    4.90 +    if result:
    4.91 +        return result 
    4.92 +
    4.93 +    # Plain URL.
    4.94 +
    4.95 +    if is_url(target):
    4.96 +        return LinkTarget("url", target)
    4.97 +
    4.98 +    # Top-level pages.
    4.99 +
   4.100 +    return LinkTarget("page", target)
   4.101 +
   4.102 +def parse_qualified_link_target(target, metadata=None):
   4.103 +
   4.104 +    """
   4.105 +    Parse a possible qualified link 'target', returning a link target object or
   4.106 +    None if the target is not suitable. Use any 'metadata' to identify certain
   4.107 +    link types.
   4.108 +    """
   4.109 +
   4.110 +    t = target.split(":", 1)
   4.111 +
   4.112 +    if len(t) != 2:
   4.113 +        return None
   4.114 +
   4.115 +    prefix, identifier = t
   4.116 +
   4.117 +    mapping = metadata and metadata.get("mapping")
   4.118 +
   4.119 +    if prefix == "attachment" or mapping and mapping.get(prefix):
   4.120 +        return LinkTarget(prefix, target, identifier)
   4.121 +
   4.122 +    return None
   4.123 +
   4.124 +# vim: tabstop=4 expandtab shiftwidth=4
2019-04-13	Paul Boddie	raw files shortlog changelog graph	Separated link target parsing from translation; added a link target abstraction. Changed HTML link translation to work with this new abstraction. Added aggregation of link targets in the parser for further processing.
			moinformat/links/common.py (file) moinformat/links/html.py (file) moinformat/parsers/moin.py (file) moinformat/utils/links.py (file)