1.1 --- a/moinconvert Tue Oct 26 01:32:32 2021 +0200
1.2 +++ b/moinconvert Fri Jun 03 18:42:46 2022 +0200
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 Moin wiki format converter.
1.6
1.7 -Copyright (C) 2018, 2019 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -96,6 +96,7 @@
1.13 mappings = []
1.14 output_dirs = []
1.15 output_encodings = []
1.16 + output_page_seps = []
1.17 pagenames = []
1.18 root_pagenames = []
1.19 theme_names = []
1.20 @@ -215,6 +216,12 @@
1.21 l = output_encodings
1.22 continue
1.23
1.24 + # Switch to collecting output page hierarchy separators.
1.25 +
1.26 + elif arg == "--output-page-sep":
1.27 + l = output_page_seps
1.28 + continue
1.29 +
1.30 # Switch to collecting page names.
1.31
1.32 elif arg == "--pagename":
1.33 @@ -271,6 +278,7 @@
1.34 "output_encoding" : getvalue(output_encodings),
1.35 "output_format" : format,
1.36 "output_filename" : output_dir,
1.37 + "output_separator" : getvalue(output_page_seps),
1.38 "root_pagename" : getvalue(root_pagenames, "FrontPage"),
1.39 "theme_name" : not fragment and \
1.40 "%s.%s" % (getvalue(theme_names, "default"), format) or None,
1.41 @@ -407,6 +415,8 @@
1.42 files
1.43 --output-encoding Indicate the character encoding used in serialised document
1.44 files
1.45 +--output-page-sep Indicate the separator used in filenames to encode
1.46 + hierarchical relationships (subpages and descendant pages)
1.47 --theme Indicate a theme for serialised documents, typically
1.48 requiring an output directory to be useful
1.49 --tree Produce a document tree representation on standard output
2.1 --- a/moinformat/output/directory.py Tue Oct 26 01:32:32 2021 +0200
2.2 +++ b/moinformat/output/directory.py Fri Jun 03 18:42:46 2022 +0200
2.3 @@ -21,7 +21,7 @@
2.4
2.5 from moinformat.output.common import Output
2.6 from moinformat.utils.directory import Directory
2.7 -from os.path import extsep, join
2.8 +from os.path import extsep, join, sep
2.9
2.10 class DirectoryOutput(Output):
2.11
2.12 @@ -40,15 +40,32 @@
2.13 self.dir = Directory(metadata.get("output_filename"))
2.14 self.dir.ensure()
2.15
2.16 + # Support an encoding of the level separator for the filesystem.
2.17 + # Where it is the same as the directory separator, documents are stored
2.18 + # using nested directories, not as a flat list.
2.19 +
2.20 + self.level_sep = metadata.get("output_separator", sep)
2.21 +
2.22 # Use any document index setting as the default for the index filename.
2.23
2.24 document_index = metadata.get("document_index", "index.html")
2.25 -
2.26 self.index_name = metadata.get("index_name", document_index)
2.27 - self.page_suffix = metadata.get("page_suffix", "%shtml" % extsep)
2.28 self.root_pagename = metadata.get("root_pagename")
2.29 self.attachments_dir = metadata.get("attachments")
2.30
2.31 + # Support a common attachments directory.
2.32 +
2.33 + self.common_attachments = metadata.get("common_attachments")
2.34 +
2.35 + def _get_attachments_dir(self, pagename):
2.36 +
2.37 + "Return the attachments directory for 'pagename'."
2.38 +
2.39 + if self.common_attachments:
2.40 + return join(self.dir.filename, self.attachments_dir)
2.41 + else:
2.42 + return join(self.dir.filename, self.attachments_dir, pagename)
2.43 +
2.44 # Convenience methods.
2.45
2.46 def ensure(self, pagename):
2.47 @@ -67,7 +84,7 @@
2.48 if not pagename:
2.49 return None
2.50
2.51 - self.dir.ensure(join(self.to_filename(pagename), self.attachments_dir))
2.52 + self.dir.ensure(self._get_attachments_dir(pagename))
2.53
2.54 def get_attachment_filename(self, pagename, filename):
2.55
2.56 @@ -79,8 +96,7 @@
2.57 if not pagename:
2.58 return None
2.59
2.60 - return self.dir.get_filename(join(self.to_filename(pagename),
2.61 - self.attachments_dir, filename))
2.62 + return self.dir.get_filename(join(self._get_attachments_dir(pagename), filename))
2.63
2.64 def get_filename(self, filename):
2.65
2.66 @@ -97,18 +113,35 @@
2.67
2.68 "Return the filename corresponding to 'pagename'."
2.69
2.70 - # For the root page, use the top-level directory.
2.71 + # Encode hierarchical filenames.
2.72 +
2.73 + if self.level_sep == sep:
2.74 +
2.75 + # For the root page, use the top-level directory.
2.76
2.77 - if pagename == self.root_pagename:
2.78 - return ""
2.79 + if pagename == self.root_pagename:
2.80 + return ""
2.81 + else:
2.82 + return pagename
2.83 +
2.84 + # Encode single-directory filenames.
2.85 +
2.86 else:
2.87 - return pagename
2.88 + return self.level_sep.join(pagename.split("/"))
2.89
2.90 def to_pagename(self, filename):
2.91
2.92 "Return the pagename corresponding to 'filename'."
2.93
2.94 - return self.within(filename)
2.95 + # Encode pagenames from hierarchical filenames.
2.96 +
2.97 + if self.level_sep == sep:
2.98 + return self.within(filename)
2.99 +
2.100 + # Encode pagenames from single-directory filenames.
2.101 +
2.102 + else:
2.103 + return "/".join(filename.split(self.level_sep))
2.104
2.105 # Serialisation methods.
2.106
2.107 @@ -137,14 +170,20 @@
2.108
2.109 filename = self.to_filename(pagename)
2.110
2.111 - # Make a directory for the page.
2.112 + # For hierarchical storage, store the page inside a directory bearing
2.113 + # its name.
2.114 +
2.115 + if self.level_sep == sep:
2.116 +
2.117 + # Make a directory for the page.
2.118
2.119 - if not self.dir.exists(filename):
2.120 - self.dir.makedirs(filename)
2.121 + if not self.dir.exists(filename):
2.122 + self.dir.makedirs(filename)
2.123
2.124 - # Write to an index filename within any existing directory.
2.125 + # Write to an index filename within any existing directory.
2.126
2.127 - filename = join(filename, self.index_name)
2.128 + filename = join(filename, self.index_name)
2.129 +
2.130 self.writefile(text, filename, encoding)
2.131
2.132 output = DirectoryOutput