MoinLight

Annotated moinformat/utils/links.py

248:0eb11fcba6d0
2019-04-15 Paul Boddie Attempted to fix interwiki links without page identifiers.
paul@222 1
#!/usr/bin/env python
paul@222 2
paul@222 3
"""
paul@222 4
Link target parsing.
paul@222 5
paul@222 6
Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>
paul@222 7
paul@222 8
This program is free software; you can redistribute it and/or modify it under
paul@222 9
the terms of the GNU General Public License as published by the Free Software
paul@222 10
Foundation; either version 3 of the License, or (at your option) any later
paul@222 11
version.
paul@222 12
paul@222 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@222 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@222 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@222 16
details.
paul@222 17
paul@222 18
You should have received a copy of the GNU General Public License along with
paul@222 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@222 20
"""
paul@222 21
paul@222 22
from urlparse import urlparse
paul@222 23
paul@222 24
class LinkTarget:
paul@222 25
paul@222 26
    "A link target abstraction."
paul@222 27
paul@222 28
    def __init__(self, type, text, identifier=None):
paul@222 29
paul@222 30
        "Initialise the link with the given 'type', 'text' and 'identifier'."
paul@222 31
paul@222 32
        self.type = type
paul@222 33
        self.text = text
paul@222 34
        self.identifier = identifier
paul@222 35
paul@222 36
    def __repr__(self):
paul@222 37
        return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier)
paul@222 38
paul@222 39
    def __str__(self):
paul@222 40
        return self.text
paul@222 41
paul@222 42
    __unicode__ = __str__
paul@222 43
paul@222 44
    def get_identifier(self):
paul@248 45
        if self.identifier is not None:
paul@248 46
            return self.identifier
paul@248 47
        else:
paul@248 48
            return self.text
paul@222 49
paul@222 50
    def get_text(self):
paul@222 51
        return self.text
paul@222 52
paul@222 53
    def get_type(self):
paul@222 54
        return self.type
paul@222 55
paul@222 56
# Parsing and recognition functions.
paul@222 57
paul@222 58
def is_url(target):
paul@222 59
paul@222 60
    "Return whether the 'target' references a URL."
paul@222 61
paul@222 62
    scheme, host, path, params, query, fragment = urlparse(target)
paul@222 63
    return scheme and target or None
paul@222 64
paul@222 65
def parse_link_target(target, metadata=None):
paul@222 66
paul@222 67
    """
paul@222 68
    Parse a link 'target', returning a link target object. Use any 'metadata'
paul@222 69
    to identify certain link types.
paul@222 70
    """
paul@222 71
paul@222 72
    # Fragments.
paul@222 73
paul@222 74
    if target.startswith("#"):
paul@222 75
        return LinkTarget("fragment", target, target.lstrip("#"))
paul@222 76
paul@222 77
    # Sub-pages.
paul@222 78
paul@222 79
    if target.startswith("/"):
paul@222 80
        return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/"))
paul@222 81
paul@222 82
    # Sibling (of ancestor) pages.
paul@222 83
paul@222 84
    if target.startswith("../"):
paul@222 85
        return LinkTarget("sibling-page", target, target.rstrip("/"))
paul@222 86
paul@222 87
    # Attachment or interwiki link.
paul@222 88
paul@222 89
    result = parse_qualified_link_target(target, metadata)
paul@222 90
    if result:
paul@222 91
        return result 
paul@222 92
paul@222 93
    # Plain URL.
paul@222 94
paul@222 95
    if is_url(target):
paul@222 96
        return LinkTarget("url", target)
paul@222 97
paul@222 98
    # Top-level pages.
paul@222 99
paul@222 100
    return LinkTarget("page", target)
paul@222 101
paul@222 102
def parse_qualified_link_target(target, metadata=None):
paul@222 103
paul@222 104
    """
paul@222 105
    Parse a possible qualified link 'target', returning a link target object or
paul@222 106
    None if the target is not suitable. Use any 'metadata' to identify certain
paul@222 107
    link types.
paul@222 108
    """
paul@222 109
paul@222 110
    t = target.split(":", 1)
paul@222 111
paul@222 112
    if len(t) != 2:
paul@222 113
        return None
paul@222 114
paul@222 115
    prefix, identifier = t
paul@222 116
paul@222 117
    mapping = metadata and metadata.get("mapping")
paul@222 118
paul@248 119
    if prefix == "attachment" or mapping and mapping.get(prefix):
paul@222 120
        return LinkTarget(prefix, target, identifier)
paul@222 121
paul@222 122
    return None
paul@222 123
paul@222 124
# vim: tabstop=4 expandtab shiftwidth=4