paul@222 | 1 | #!/usr/bin/env python |
paul@222 | 2 | |
paul@222 | 3 | """ |
paul@222 | 4 | Link target parsing. |
paul@222 | 5 | |
paul@222 | 6 | Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk> |
paul@222 | 7 | |
paul@222 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@222 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@222 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@222 | 11 | version. |
paul@222 | 12 | |
paul@222 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@222 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@222 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@222 | 16 | details. |
paul@222 | 17 | |
paul@222 | 18 | You should have received a copy of the GNU General Public License along with |
paul@222 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@222 | 20 | """ |
paul@222 | 21 | |
paul@222 | 22 | from urlparse import urlparse |
paul@222 | 23 | |
paul@222 | 24 | class LinkTarget: |
paul@222 | 25 | |
paul@222 | 26 | "A link target abstraction." |
paul@222 | 27 | |
paul@222 | 28 | def __init__(self, type, text, identifier=None): |
paul@222 | 29 | |
paul@222 | 30 | "Initialise the link with the given 'type', 'text' and 'identifier'." |
paul@222 | 31 | |
paul@222 | 32 | self.type = type |
paul@222 | 33 | self.text = text |
paul@222 | 34 | self.identifier = identifier |
paul@222 | 35 | |
paul@222 | 36 | def __repr__(self): |
paul@222 | 37 | return "LinkTarget(%r, %r, %r)" % (self.type, self.text, self.identifier) |
paul@222 | 38 | |
paul@222 | 39 | def __str__(self): |
paul@222 | 40 | return self.text |
paul@222 | 41 | |
paul@222 | 42 | __unicode__ = __str__ |
paul@222 | 43 | |
paul@222 | 44 | def get_identifier(self): |
paul@248 | 45 | if self.identifier is not None: |
paul@248 | 46 | return self.identifier |
paul@248 | 47 | else: |
paul@248 | 48 | return self.text |
paul@222 | 49 | |
paul@222 | 50 | def get_text(self): |
paul@222 | 51 | return self.text |
paul@222 | 52 | |
paul@222 | 53 | def get_type(self): |
paul@222 | 54 | return self.type |
paul@222 | 55 | |
paul@222 | 56 | # Parsing and recognition functions. |
paul@222 | 57 | |
paul@222 | 58 | def is_url(target): |
paul@222 | 59 | |
paul@222 | 60 | "Return whether the 'target' references a URL." |
paul@222 | 61 | |
paul@222 | 62 | scheme, host, path, params, query, fragment = urlparse(target) |
paul@222 | 63 | return scheme and target or None |
paul@222 | 64 | |
paul@222 | 65 | def parse_link_target(target, metadata=None): |
paul@222 | 66 | |
paul@222 | 67 | """ |
paul@222 | 68 | Parse a link 'target', returning a link target object. Use any 'metadata' |
paul@222 | 69 | to identify certain link types. |
paul@222 | 70 | """ |
paul@222 | 71 | |
paul@222 | 72 | # Fragments. |
paul@222 | 73 | |
paul@222 | 74 | if target.startswith("#"): |
paul@222 | 75 | return LinkTarget("fragment", target, target.lstrip("#")) |
paul@222 | 76 | |
paul@222 | 77 | # Sub-pages. |
paul@222 | 78 | |
paul@222 | 79 | if target.startswith("/"): |
paul@222 | 80 | return LinkTarget("sub-page", target, target.lstrip("/").rstrip("/")) |
paul@222 | 81 | |
paul@222 | 82 | # Sibling (of ancestor) pages. |
paul@222 | 83 | |
paul@222 | 84 | if target.startswith("../"): |
paul@222 | 85 | return LinkTarget("sibling-page", target, target.rstrip("/")) |
paul@222 | 86 | |
paul@222 | 87 | # Attachment or interwiki link. |
paul@222 | 88 | |
paul@222 | 89 | result = parse_qualified_link_target(target, metadata) |
paul@222 | 90 | if result: |
paul@222 | 91 | return result |
paul@222 | 92 | |
paul@222 | 93 | # Plain URL. |
paul@222 | 94 | |
paul@222 | 95 | if is_url(target): |
paul@222 | 96 | return LinkTarget("url", target) |
paul@222 | 97 | |
paul@222 | 98 | # Top-level pages. |
paul@222 | 99 | |
paul@222 | 100 | return LinkTarget("page", target) |
paul@222 | 101 | |
paul@222 | 102 | def parse_qualified_link_target(target, metadata=None): |
paul@222 | 103 | |
paul@222 | 104 | """ |
paul@222 | 105 | Parse a possible qualified link 'target', returning a link target object or |
paul@222 | 106 | None if the target is not suitable. Use any 'metadata' to identify certain |
paul@222 | 107 | link types. |
paul@222 | 108 | """ |
paul@222 | 109 | |
paul@222 | 110 | t = target.split(":", 1) |
paul@222 | 111 | |
paul@222 | 112 | if len(t) != 2: |
paul@222 | 113 | return None |
paul@222 | 114 | |
paul@222 | 115 | prefix, identifier = t |
paul@222 | 116 | |
paul@222 | 117 | mapping = metadata and metadata.get("mapping") |
paul@222 | 118 | |
paul@248 | 119 | if prefix == "attachment" or mapping and mapping.get(prefix): |
paul@222 | 120 | return LinkTarget(prefix, target, identifier) |
paul@222 | 121 | |
paul@222 | 122 | return None |
paul@222 | 123 | |
paul@222 | 124 | # vim: tabstop=4 expandtab shiftwidth=4 |