MoinLight

Annotated moinformat/utils/links.py

222:90e7ce5b6cef
2019-04-13 Paul Boddie Separated link target parsing from translation; added a link target abstraction. Changed HTML link translation to work with this new abstraction. Added aggregation of link targets in the parser for further processing.
paul@222 1
#!/usr/bin/env python
paul@222 2
paul@222 3
"""
paul@222 4
Link target parsing.
paul@222 5
paul@222 6
Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>
paul@222 7
paul@222 8
This program is free software; you can redistribute it and/or modify it under
paul@222 9
the terms of the GNU General Public License as published by the Free Software
paul@222 10
Foundation; either version 3 of the License, or (at your option) any later
paul@222 11
version.
paul@222 12
paul@222 13
This program is distributed in the hope that it will be useful, but WITHOUT
paul@222 14
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@222 15
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@222 16
details.
paul@222 17
paul@222 18
You should have received a copy of the GNU General Public License along with
paul@222 19
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@222 20
"""
paul@222 21
paul@222 22
from urlparse import urlparse
paul@222 23
paul@222 24
class LinkTarget:
paul@222 25
paul@222 26
    "A link target abstraction."
paul@222 27
paul@222 28
    def __init__(self, type, text, identifier=None):
paul@222 29
paul@222 30
        "Initialise the link with the given 'type', 'text' and 'identifier'."
paul@222 31
paul@222 32
        self.type = type
paul@222 33
        self.text = text
paul@222 34
        self.identifier = identifier
paul@222 35
paul@222 36
    def __repr__(self):
paul@222 37
        return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier)
paul@222 38
paul@222 39
    def __str__(self):
paul@222 40
        return self.text
paul@222 41
paul@222 42
    __unicode__ = __str__
paul@222 43
paul@222 44
    def get_identifier(self):
paul@222 45
        return self.identifier or self.text
paul@222 46
paul@222 47
    def get_text(self):
paul@222 48
        return self.text
paul@222 49
paul@222 50
    def get_type(self):
paul@222 51
        return self.type
paul@222 52
paul@222 53
# Parsing and recognition functions.
paul@222 54
paul@222 55
def is_url(target):
paul@222 56
paul@222 57
    "Return whether the 'target' references a URL."
paul@222 58
paul@222 59
    scheme, host, path, params, query, fragment = urlparse(target)
paul@222 60
    return scheme and target or None
paul@222 61
paul@222 62
def parse_link_target(target, metadata=None):
paul@222 63
paul@222 64
    """
paul@222 65
    Parse a link 'target', returning a link target object. Use any 'metadata'
paul@222 66
    to identify certain link types.
paul@222 67
    """
paul@222 68
paul@222 69
    # Fragments.
paul@222 70
paul@222 71
    if target.startswith("#"):
paul@222 72
        return LinkTarget("fragment", target, target.lstrip("#"))
paul@222 73
paul@222 74
    # Sub-pages.
paul@222 75
paul@222 76
    if target.startswith("/"):
paul@222 77
        return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/"))
paul@222 78
paul@222 79
    # Sibling (of ancestor) pages.
paul@222 80
paul@222 81
    if target.startswith("../"):
paul@222 82
        return LinkTarget("sibling-page", target, target.rstrip("/"))
paul@222 83
paul@222 84
    # Attachment or interwiki link.
paul@222 85
paul@222 86
    result = parse_qualified_link_target(target, metadata)
paul@222 87
    if result:
paul@222 88
        return result 
paul@222 89
paul@222 90
    # Plain URL.
paul@222 91
paul@222 92
    if is_url(target):
paul@222 93
        return LinkTarget("url", target)
paul@222 94
paul@222 95
    # Top-level pages.
paul@222 96
paul@222 97
    return LinkTarget("page", target)
paul@222 98
paul@222 99
def parse_qualified_link_target(target, metadata=None):
paul@222 100
paul@222 101
    """
paul@222 102
    Parse a possible qualified link 'target', returning a link target object or
paul@222 103
    None if the target is not suitable. Use any 'metadata' to identify certain
paul@222 104
    link types.
paul@222 105
    """
paul@222 106
paul@222 107
    t = target.split(":", 1)
paul@222 108
paul@222 109
    if len(t) != 2:
paul@222 110
        return None
paul@222 111
paul@222 112
    prefix, identifier = t
paul@222 113
paul@222 114
    mapping = metadata and metadata.get("mapping")
paul@222 115
paul@222 116
    if prefix == "attachment" or mapping and mapping.get(prefix):
paul@222 117
        return LinkTarget(prefix, target, identifier)
paul@222 118
paul@222 119
    return None
paul@222 120
paul@222 121
# vim: tabstop=4 expandtab shiftwidth=4