1 #!/usr/bin/env python 2 3 """ 4 Moin wiki format converter. 5 6 Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat import copy_attachments, errors, make_parser, make_serialiser, \ 23 Metadata, parse, serialise 24 from os.path import split 25 import sys 26 27 # Long messages. 28 29 message_all_with_filenames = """\ 30 Using --all overrides any indicated pagenames. Either --all or the filenames 31 should be omitted.""" 32 33 message_explicit_pagenames = """\ 34 Explicit pagenames (indicated using --pagename) are only to be specified when 35 providing filenames without an input directory (indicated using --input-dir). 36 37 To indicate pagenames within an input directory, omit any --pagename flags.""" 38 39 message_tree_format_usage = """\ 40 The --tree and --format options cannot be used together since the --tree option 41 indicates use of the "pretty" format.""" 42 43 44 45 # Options management. 46 47 def getmapping(mappings): 48 49 """ 50 Return the given 'mappings' - a collection of key-then-value items - as a 51 dictionary. 52 """ 53 54 mapping = {} 55 key = None 56 57 for arg in mappings: 58 if key is None: 59 key = arg 60 else: 61 mapping[key] = arg 62 key = None 63 64 return mapping 65 66 def getvalue(values, default=None): 67 68 """ 69 Return the first value from 'values' or 'default' if 'values' is empty or 70 the first value tests as false. 71 """ 72 73 return values and values[0] or default 74 75 76 77 # Main program. 78 79 def main(): 80 81 "Interpret program options and perform the conversion." 82 83 dirname, progname = split(sys.argv[0]) 84 args = sys.argv[1:] 85 86 if "--help" in args: 87 show_help(progname) 88 sys.exit(0) 89 90 # Option values. 91 92 attachments_dir = [] 93 document_indexes = [] 94 filenames = [] 95 formats = [] 96 input_dir_types = [] 97 input_dirs = [] 98 input_encodings = [] 99 input_page_seps = [] 100 mappings = [] 101 output_dirs = [] 102 output_encodings = [] 103 output_page_seps = [] 104 pagenames = [] 105 root_pagenames = [] 106 theme_names = [] 107 108 # Obtain filenames by default. 109 110 l = filenames 111 112 # Flags. 113 114 all = False 115 bundle = False 116 common = False 117 fragment = False 118 macros = False 119 no_inline = False 120 tree = False 121 122 for arg in args: 123 124 # Flags with no following arguments. 125 126 # Detect all documents. 127 128 if arg == "--all": 129 all = True 130 131 # Detect resource bundling. 132 133 elif arg == "--bundle": 134 bundle = True 135 136 # Detect use of a common attachments directory. 137 138 elif arg == "--common": 139 common = True 140 141 # Detect fragment output (if serialising). 142 143 elif arg == "--fragment": 144 fragment = True 145 146 # Detect macro evaluation. 147 148 elif arg == "--macros": 149 macros = True 150 151 # Detect suppression of inline objects. 152 153 elif arg == "--no-inline": 154 no_inline = True 155 156 # Detect tree output. 157 158 elif arg == "--tree": 159 if formats: 160 print >>sys.stderr, message_tree_format_usage 161 sys.exit(1) 162 tree = True 163 164 # Options with following arguments. 165 166 # Switch to document index. 167 168 elif arg == "--attachments-dir": 169 l = attachments_dir 170 continue 171 172 # Switch to document index. 173 174 elif arg == "--document-index": 175 l = document_indexes 176 continue 177 178 # Switch to collecting formats. 179 180 elif arg == "--format": 181 if tree: 182 print >>sys.stderr, message_tree_format_usage 183 sys.exit(1) 184 l = formats 185 continue 186 187 # Switch to collecting input locations. 188 189 elif arg == "--input-dir": 190 l = input_dirs 191 continue 192 193 # Switch to collecting input context types. 194 195 elif arg == "--input-dir-type": 196 l = input_dir_types 197 continue 198 199 # Switch to collecting input encodings. 200 201 elif arg == "--input-encoding": 202 l = input_encodings 203 continue 204 205 # Switch to collecting input page hierarchy separators. 206 207 elif arg == "--input-page-sep": 208 l = input_page_seps 209 continue 210 211 # Switch to collecting mappings. 212 213 elif arg == "--mapping": 214 l = mappings 215 continue 216 217 # Switch to collecting output locations. 218 219 elif arg == "--output-dir": 220 l = output_dirs 221 continue 222 223 # Switch to collecting output encodings. 224 225 elif arg == "--output-encoding": 226 l = output_encodings 227 continue 228 229 # Switch to collecting output page hierarchy separators. 230 231 elif arg == "--output-page-sep": 232 l = output_page_seps 233 continue 234 235 # Switch to collecting page names. 236 237 elif arg == "--pagename": 238 l = pagenames 239 continue 240 241 # Switch to collecting root page names. 242 243 elif arg == "--root": 244 l = root_pagenames 245 continue 246 247 # Switch to collecting theme names. 248 249 elif arg == "--theme": 250 l = theme_names 251 continue 252 253 # Collect options and arguments. 254 255 else: 256 l.append(arg) 257 258 # Collect multiple mappings. 259 260 if l is mappings: 261 continue 262 263 # Collect filenames normally. 264 265 l = filenames 266 267 format = tree and "pretty" or formats and formats[0] or "html" 268 input_dir = getvalue(input_dirs) 269 output_dir = getvalue(output_dirs) 270 271 # Define metadata. 272 273 metadata = Metadata({ 274 "attachments" : getvalue(attachments_dir, "attachments"), 275 "bundle" : bundle, 276 "common_attachments": common, 277 "document_index" : getvalue(document_indexes), 278 "input_context" : input_dir and \ 279 getvalue(input_dir_types, "directory") or \ 280 "standalone", 281 "input_encoding" : getvalue(input_encodings), 282 "input_filename" : input_dir, 283 "input_separator" : getvalue(input_page_seps), 284 "link_format" : format, 285 "mapping" : getmapping(mappings), 286 "no_inline" : no_inline, 287 "output_context" : output_dir and "directory" or "standalone", 288 "output_encoding" : getvalue(output_encodings), 289 "output_format" : format, 290 "output_filename" : output_dir, 291 "output_separator" : getvalue(output_page_seps), 292 "root_pagename" : getvalue(root_pagenames, "FrontPage"), 293 "theme_name" : not fragment and \ 294 "%s.%s" % (getvalue(theme_names, "default"), format) or None, 295 }) 296 297 # Define the input context and theme. 298 299 input = metadata.get_input() 300 theme = metadata.get_theme() 301 302 # Treat filenames as pagenames if an input directory is indicated and if no 303 # pagenames are explicitly specified. 304 305 if input_dir: 306 if pagenames: 307 print >>sys.stderr, message_explicit_pagenames 308 sys.exit(1) 309 310 if all: 311 if filenames: 312 print >>sys.stderr, message_all_with_filenames 313 sys.exit(1) 314 else: 315 filenames = input.all() 316 317 pagenames = filenames 318 filenames = [] 319 320 # Open each file or page, parse the content, serialise the document. 321 322 for pagename, filename in map(None, pagenames, filenames): 323 324 # Define a pagename if missing. 325 326 pagename = pagename or split(filename)[-1] 327 metadata.set("pagename", pagename) 328 329 # Read either from a filename or using a pagename. 330 331 if filename: 332 pagetext = input.readfile(filename) 333 else: 334 pagetext = input.readpage(pagename) 335 336 # Parse the page content. 337 338 p = make_parser(metadata) 339 d = parse(pagetext, p) 340 341 if macros: 342 p.evaluate_macros() 343 344 p.update_metadata(metadata) 345 346 # Obtain a serialiser using the configuration. 347 348 serialiser = make_serialiser(metadata) 349 outtext = serialise(d, serialiser) 350 351 # Show a document tree for debugging purposes, if requested. 352 353 if tree: 354 print outtext 355 continue 356 357 # With a theme, apply it to the text. 358 359 if theme: 360 outtext = theme.apply(outtext) 361 362 # If reading from a file, show the result. Otherwise, write to the 363 # output context. 364 365 output = metadata.get_output() 366 367 if not output.can_write(): 368 print outtext 369 else: 370 output.writepage(outtext, pagename) 371 print >>sys.stderr, pagename 372 373 copy_attachments(p, input, output, all=True) 374 375 # Install any theme resources. 376 377 if theme: 378 theme.install_resources() 379 380 def show_help(progname): 381 382 "Show the help text." 383 384 print >>sys.stderr, help_text % progname 385 386 help_text = """\ 387 Usage: %s [ <options> ] ( --all | <filename>... ) 388 389 Input file options: 390 391 --all Detect all document files in the specified input directory 392 393 Input options: 394 395 --common Obtain attachments from a common directory for all pages, 396 rather than each page having its own subdirectory of a 397 top-level attachments directory. 398 --input-dir Indicate an input directory containing document files 399 --input-dir-type Indicate the type of input directory involved 400 (default: directory) 401 --input-encoding Indicate the character encoding used in document files 402 --input-page-sep Indicate the separator used in filenames to encode 403 hierarchical relationships (subpages and descendant pages) 404 --pagename Indicate the page name corresponding to an indicated 405 filename, with each successive instance of this option 406 corresponding to each successive filename instance 407 408 Output options: 409 410 --bundle Bundle resources such as stylesheets within every document, 411 useful for publishing documents that need to be copied or 412 distributed individually. 413 --document-index Provide a "DocumentIndex" filename to be used in links in 414 HTML format output, useful for local file browsing instead 415 of Web-published content 416 --format Indicate the format to be used for serialised documents 417 (default: html) 418 --fragment Indicates that an output fragment, not an entire document, 419 is to be generated, skipping any theming activities 420 --no-inline Suppress inline objects in serialised documents, linking to 421 separate objects instead. 422 --output-dir Indicate an output directory to contain serialised document 423 files 424 --output-encoding Indicate the character encoding used in serialised document 425 files 426 --output-page-sep Indicate the separator used in filenames to encode 427 hierarchical relationships (subpages and descendant pages) 428 --theme Indicate a theme for serialised documents, typically 429 requiring an output directory to be useful 430 --tree Produce a document tree representation on standard output 431 instead of generating output files 432 433 Configuration options: 434 435 --macros Perform macro evaluation/expansion before serialising 436 documents 437 --mapping Indicate a name and corresponding URL to be used to 438 translate interwiki links 439 --root Indicate the root page name to be used 440 (default: FrontPage) 441 """ 442 443 if __name__ == "__main__": 444 try: 445 main() 446 except errors.ProcessingError, exc: 447 print str(exc) 448 449 # vim: tabstop=4 expandtab shiftwidth=4