MoinLight (file moinformat/__init_

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format parser.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, Break, DefItem, DefTerm, FontStyle, Heading, \    23                             ListItem, Monospace, Region, Rule, Text    24 import re    25     26 # Regular expressions.    27     28 syntax = {    29     # Page regions:    30     "regionstart"   : r"((^\s*)([{]{3,}))",                         # {{{...    31     "regionend"     : r"^\s*([}]{3,})",                             # }}}...    32     "header"        : r"#!(.*?)\n",                                 # #! char-excl-nl    33     34     # Region contents:    35     # Line-oriented patterns:    36                       # blank line    37     "break"         : r"^(\s*?)\n",    38                       # ws... expecting text ::    39     "defterm"       : r"^(\s+)(?=.+?::)",    40                       # ws... expecting :: ws...    41     "defterm_empty" : r"^(\s+)(?=::\s+)",    42                       # [ws...] =... ws... expecting headingend    43     "heading"       : r"^(\s*)(?P<x>=+)(\s+)(?=.*?\s+(?P=x)\s*\n)",    44                       # ws... list-item [ws...]    45     "listitem"      : r"^(\s+)(\*)(\s*)",    46                       # ws... number-item ws...    47     "listitem_num"  : r"^(\s+)(\d+\.)(\s+)",    48                       # ws... alpha-item ws...    49     "listitem_alpha": r"^(\s+)([aA]\.)(\s+)",    50                       # ws... roman-item ws...    51     "listitem_roman": r"^(\s+)([iI]\.)(\s+)",    52                       # ws... dot-item [ws...]    53     "listitem_dot"  : r"^(\s+)(\.)(\s*)",    54     55     # Region contents:    56     # Inline patterns:    57     "fontstyle"     : r"('{2,6})",    58     "monospace"     : r"`",    59     "rule"          : r"(-----*)",                                  # ----...    60     61     # Inline contents:    62     "monospaceend"  : r"`",    63     64     # Heading contents:    65     "headingend"    : r"(\s+)(=+)(\s*\n)",                          # ws... =... [ws...] nl    66     67     # List contents:    68     "deftermend"    : r"::(\s*?\n)",    69     "deftermsep"    : r"::(\s+)",    70     "listitemend"   : r"^",                                         # next line    71     }    72     73 # Define patterns for the regular expressions.    74     75 patterns = {}    76 for name, value in syntax.items():    77     patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)    78     79     80     81 # Tokenising functions.    82     83 class TokenStream:    84     85     "A stream of tokens taken from a string."    86     87     def __init__(self, s):    88         self.s = s    89         self.pos = 0    90         self.match = None    91         self.matching = None    92     93     def rewind(self, length):    94     95         "Rewind in the string by 'length'."    96     97         self.pos -= min(length, self.pos)    98     99     def read_until(self, pattern_names, remaining=True):   100    101         """   102         Find the first match for the given 'pattern_names'. Return the text   103         preceding any match, the remaining text if no match was found, or None   104         if no match was found and 'remaining' is given as a false value.   105         """   106    107         first = None   108         self.matching = None   109    110         # Find the first matching pattern.   111    112         for pattern_name in pattern_names:   113             match = patterns[pattern_name].search(self.s, self.pos)   114             if match:   115                 start, end = match.span()   116                 if self.matching is None or start < first:   117                     first = start   118                     self.matching = pattern_name   119                     self.match = match   120    121         if self.matching is None:   122             if remaining:   123                 return self.s[self.pos:]   124             else:   125                 return None   126         else:   127             return self.s[self.pos:first]   128    129     def read_match(self, group=1):   130    131         """   132         Return the matched text, updating the position in the stream. If 'group'   133         is specified, the indicated group in a match will be returned.   134         Typically, group 1 should contain all pertinent data, but groups defined   135         within group 1 can provide sections of the data.   136         """   137    138         if self.match:   139             _start, self.pos = self.match.span()   140             try:   141                 return self.match.group(group)   142             except IndexError:   143                 return ""   144         else:   145             self.pos = len(self.s)   146             return None   147    148    149    150 # Parser functions.   151    152 def parse_page(s):   153    154     """   155     Parse page text 's'. Pages consist of regions delimited by markers.   156     """   157    158     return parse_region(TokenStream(s))   159    160 def parse_region(items, level=0, indent=0):   161    162     """   163     Parse the data provided by 'items' to populate a region with the given   164     'level' at the given 'indent'.   165     """   166    167     region = Region([], level, indent)   168    169     # Parse section headers.   170    171     parse_region_header(items, region)   172    173     # Parse section body.   174    175     if region.is_transparent():   176         parse_region_wiki(items, region)   177     else:   178         parse_region_opaque(items, region)   179    180     return region   181    182 def parse_region_header(items, region):   183    184     """   185     Parse the region header from the 'items', setting it for the given 'region'.   186     """   187    188     if items.read_until(["header"], False) == "": # None means no header   189         region.type = items.read_match()   190    191 def parse_region_wiki(items, region):   192    193     "Parse the data provided by 'items' to populate a wiki 'region'."   194    195     new_block(region)   196     parse_region_details(items, region, [   197         "break", "heading",   198         "defterm", "defterm_empty",   199         "fontstyle",   200         "listitem", "listitem_alpha", "listitem_dot", "listitem_num",   201         "listitem_roman",   202         "monospace",   203         "regionstart", "regionend",   204         "rule",   205         ])   206    207 def parse_region_opaque(items, region):   208    209     "Parse the data provided by 'items' to populate an opaque 'region'."   210    211     parse_region_details(items, region, ["regionend"])   212    213 def parse_region_details(items, region, pattern_names):   214    215     "Parse 'items' within 'region' searching using 'pattern_names'."   216    217     try:   218         while True:   219    220             # Obtain text before any marker or the end of the input.   221    222             preceding = items.read_until(pattern_names)   223             if preceding:   224                 region.append_inline(Text(preceding))   225    226             # End of input.   227    228             if not items.matching:   229                 break   230    231             # Obtain any feature.   232    233             feature = items.read_match()   234             handler = handlers.get(items.matching)   235    236             # Handle each feature or add text to the region.   237    238             if handler:   239                 handler(items, region)   240             else:   241                 region.append_inline(Text(feature))   242    243     except StopIteration:   244         pass   245    246     region.normalise()   247    248 def end_region(items, region):   249    250     "End the parsing of 'region'."   251    252     raise StopIteration   253    254 def parse_break(items, region):   255    256     "Handle a paragraph break within 'region'."   257    258     region.add(Break())   259     new_block(region)   260    261 def parse_defitem(items, region, extra=""):   262    263     "Handle a definition item within 'region'."   264    265     pad = items.read_match(1)   266     item = DefItem([], pad, extra)   267     parse_region_details(items, item, ["listitemend"])   268     region.append(item)   269     new_block(region)   270    271 def parse_defterm(items, region):   272    273     "Handle a definition term within 'region'."   274    275     pad = items.read_match(1)   276     term = DefTerm([], pad)   277     parse_region_details(items, term, ["deftermend", "deftermsep"])   278     region.append(term)   279     if items.matching == "deftermsep":   280         parse_defitem(items, region)   281    282 def parse_defterm_empty(items, region):   283    284     "Handle an empty definition term within 'region'."   285    286     extra = items.read_match(1)   287     parse_region_details(items, region, ["deftermsep"])   288     parse_defitem(items, region, extra)   289    290 def parse_fontstyle(items, region):   291    292     "Handle emphasis and strong styles."   293    294     n = len(items.read_match(1))   295    296     # Handle endings.   297    298     if isinstance(region, FontStyle):   299         emphasis = n in (2, 4, 5)   300         strong = n in (3, 5, 6)   301         active = True   302    303         if region.emphasis and emphasis:   304             active = region.close_emphasis()   305             n -= 2   306         if region.strong and strong:   307             active = region.close_strong()   308             n -= 3   309    310         if not active:   311             if n:   312                 items.rewind(n)   313             raise StopIteration   314    315         elif not n:   316             return   317    318     # Handle new styles.   319    320     emphasis = n in (2, 4, 5)   321     strong = n in (3, 5, 6)   322     double = n in (4, 6)   323    324     span = FontStyle([], emphasis, strong)   325     if not double:   326         parse_region_details(items, span, ["fontstyle", "monospace"])   327     region.append_inline(span)   328    329 def parse_heading(items, region):   330    331     "Handle a heading."   332    333     start_extra = items.read_match(1)   334     level = len(items.read_match(2))   335     start_pad = items.read_match(3)   336     heading = Heading([], level, start_extra, start_pad)   337     parse_region_details(items, heading, ["headingend"])   338     region.append(heading)   339     new_block(region)   340    341 def parse_heading_end(items, heading):   342    343     "Handle the end of a heading."   344    345     level = len(items.read_match(2))   346     if heading.level == level:   347         heading.end_pad = items.read_match(1)   348         heading.end_extra = items.read_match(3)   349         raise StopIteration   350    351 def parse_listitem(items, region):   352    353     "Handle a list item marker within 'region'."   354    355     indent = len(items.read_match(1))   356     marker = items.read_match(2)   357     space = items.read_match(3)   358     item = ListItem([], indent, marker, space)   359     parse_region_details(items, item, ["listitemend"])   360     region.append(item)   361     new_block(region)   362    363 def parse_monospace(items, region):   364    365     "Handle monospace."   366    367     span = Monospace([])   368     parse_region_details(items, span, ["fontstyle", "monospaceend"])   369     region.append_inline(span)   370    371 def parse_rule(items, region):   372    373     "Handle a horizontal rule within 'region'."   374    375     length = len(items.read_match(1))   376     rule = Rule(length)   377     region.append(rule)   378     new_block(region)   379    380 def parse_section(items, region):   381    382     "Handle the start of a new section within 'region'."   383    384     # Parse the section and start a new block after the section.   385    386     indent = len(items.read_match(2))   387     level = len(items.read_match(3))   388     region.append(parse_region(items, level, indent))   389     new_block(region)   390    391 def parse_section_end(items, region):   392    393     "Handle the end of a new section within 'region'."   394    395     feature = items.read_match()   396     if region.have_end(feature):   397         raise StopIteration   398     else:   399         region.append_inline(Text(feature))   400    401 # Pattern handlers.   402    403 handlers = {   404     None : end_region,   405     "break" : parse_break,   406     "defterm" : parse_defterm,   407     "defterm_empty" : parse_defterm_empty,   408     "deftermend" : end_region,   409     "deftermsep" : end_region,   410     "fontstyle" : parse_fontstyle,   411     "heading" : parse_heading,   412     "headingend" : parse_heading_end,   413     "listitemend" : end_region,   414     "listitem" : parse_listitem,   415     "listitem_alpha" : parse_listitem,   416     "listitem_dot" : parse_listitem,   417     "listitem_num" : parse_listitem,   418     "listitem_roman" : parse_listitem,   419     "monospace" : parse_monospace,   420     "monospaceend" : end_region,   421     "regionstart" : parse_section,   422     "regionend" : parse_section_end,   423     "rule" : parse_rule,   424     }   425    426 def new_block(region):   427    428     "Start a new block in 'region'."   429    430     block = Block([])   431     region.add(block)   432    433    434    435 # Top-level functions.   436    437 parse = parse_page   438    439 # vim: tabstop=4 expandtab shiftwidth=4
MoinLight

moinformat/__init__.py

moinformat/init.py