# HG changeset patch # User Paul Boddie # Date 1688155568 -7200 # Node ID e272b98a1aa36dbfc0bccc2695de2d98343e184b # Parent 8f71aeaf34654ca9fc4fdb351bafd2f823c40b12 Support explicit indication of the input format for parsing. diff -r 8f71aeaf3465 -r e272b98a1aa3 moinconvert --- a/moinconvert Fri Jun 30 16:11:00 2023 +0200 +++ b/moinconvert Fri Jun 30 22:06:08 2023 +0200 @@ -3,7 +3,7 @@ """ Moin wiki format converter. -Copyright (C) 2018, 2019, 2021 Paul Boddie +Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -37,8 +37,8 @@ To indicate pagenames within an input directory, omit any --pagename flags.""" message_tree_format_usage = """\ -The --tree and --format options cannot be used together since the --tree option -indicates use of the "pretty" format.""" +The --tree option cannot be used together with the --format or --output-format +options since the --tree option indicates use of the "pretty" format.""" @@ -92,7 +92,7 @@ attachments_dir = [] document_indexes = [] filenames = [] - formats = [] + input_formats = [] input_dir_types = [] input_dirs = [] input_encodings = [] @@ -100,6 +100,7 @@ mappings = [] output_dirs = [] output_encodings = [] + output_formats = [] output_page_seps = [] pagenames = [] root_pagenames = [] @@ -156,7 +157,7 @@ # Detect tree output. elif arg == "--tree": - if formats: + if output_formats: print >>sys.stderr, message_tree_format_usage sys.exit(1) tree = True @@ -177,11 +178,11 @@ # Switch to collecting formats. - elif arg == "--format": + elif arg in ("--format", "--output-format"): if tree: print >>sys.stderr, message_tree_format_usage sys.exit(1) - l = formats + l = output_formats continue # Switch to collecting input locations. @@ -202,6 +203,12 @@ l = input_encodings continue + # Switch to collecting input formats. + + elif arg == "--input-format": + l = input_formats + continue + # Switch to collecting input page hierarchy separators. elif arg == "--input-page-sep": @@ -264,7 +271,8 @@ l = filenames - format = tree and "pretty" or formats and formats[0] or "html" + input_format = input_formats and input_formats[0] or "moin" + output_format = tree and "pretty" or output_formats and output_formats[0] or "html" input_dir = getvalue(input_dirs) output_dir = getvalue(output_dirs) @@ -280,18 +288,19 @@ "standalone", "input_encoding" : getvalue(input_encodings), "input_filename" : input_dir, + "input_format" : input_format, "input_separator" : getvalue(input_page_seps), - "link_format" : format, + "link_format" : output_format, "mapping" : getmapping(mappings), "no_inline" : no_inline, "output_context" : output_dir and "directory" or "standalone", "output_encoding" : getvalue(output_encodings), - "output_format" : format, + "output_format" : output_format, "output_filename" : output_dir, "output_separator" : getvalue(output_page_seps), "root_pagename" : getvalue(root_pagenames, "FrontPage"), "theme_name" : not fragment and \ - "%s.%s" % (getvalue(theme_names, "default"), format) or None, + "%s.%s" % (getvalue(theme_names, "default"), output_format) or None, }) # Define the input context and theme. @@ -394,11 +403,13 @@ --common Obtain attachments from a common directory for all pages, rather than each page having its own subdirectory of a - top-level attachments directory. + top-level attachments directory --input-dir Indicate an input directory containing document files --input-dir-type Indicate the type of input directory involved (default: directory) --input-encoding Indicate the character encoding used in document files +--input-format Indicate the format of the parsed documents + (default: moin) --input-page-sep Indicate the separator used in filenames to encode hierarchical relationships (subpages and descendant pages) --pagename Indicate the page name corresponding to an indicated @@ -409,20 +420,24 @@ --bundle Bundle resources such as stylesheets within every document, useful for publishing documents that need to be copied or - distributed individually. + distributed individually --document-index Provide a "DocumentIndex" filename to be used in links in HTML format output, useful for local file browsing instead of Web-published content ---format Indicate the format to be used for serialised documents +--format Indicate the format to be used for serialised documents; + equivalent to --output-format (default: html) --fragment Indicates that an output fragment, not an entire document, is to be generated, skipping any theming activities --no-inline Suppress inline objects in serialised documents, linking to - separate objects instead. + separate objects instead --output-dir Indicate an output directory to contain serialised document files --output-encoding Indicate the character encoding used in serialised document files +--output-format Indicate the format to be used for serialised documents; + equivalent to --format + (default: html) --output-page-sep Indicate the separator used in filenames to encode hierarchical relationships (subpages and descendant pages) --theme Indicate a theme for serialised documents, typically diff -r 8f71aeaf3465 -r e272b98a1aa3 moinformat/parsers/__init__.py --- a/moinformat/parsers/__init__.py Fri Jun 30 16:11:00 2023 +0200 +++ b/moinformat/parsers/__init__.py Fri Jun 30 22:06:08 2023 +0200 @@ -3,7 +3,7 @@ """ Moin wiki parsers. -Copyright (C) 2017, 2018 Paul Boddie +Copyright (C) 2017, 2018, 2023 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,7 +30,7 @@ return parsers[name] -def make_parser(metadata, name="moin"): +def make_parser(metadata, name=None): "Return a parser instance using the given 'metadata' and optional 'name'."