MoinLight (file moinformat/__init_

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format parser.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, Break, DefItem, DefTerm, Emphasis, Heading, \    23                             ListItem, Region, Rule, Strong, Text    24 import re    25     26 # Regular expressions.    27     28 syntax = {    29     # Page regions:    30     "regionstart"   : r"((^\s*)([{]{3,}))",                         # {{{...    31     "regionend"     : r"^\s*([}]{3,})",                             # }}}...    32     "header"        : r"#!(.*?)\n",                                 # #! char-excl-nl    33     34     # Region contents:    35     # Line-oriented patterns:    36                       # blank line    37     "break"         : r"^(\s*?)\n",    38                       # ws... expecting text ::    39     "defterm"       : r"^(\s+)(?=.+?::)",    40                       # ws... expecting :: ws...    41     "defterm_empty" : r"^(\s+)(?=::\s+)",    42                       # [ws...] =... ws... expecting headingend    43     "heading"       : r"^(\s*)(?P<x>=+)(\s+)(?=.*?\s+(?P=x)\s*\n)",    44                       # ws... list-item [ws...]    45     "listitem"      : r"^(\s+)(\*)(\s*)",    46                       # ws... number-item ws...    47     "listitem_num"  : r"^(\s+)(\d+\.)(\s+)",    48                       # ws... alpha-item ws...    49     "listitem_alpha": r"^(\s+)([aA]\.)(\s+)",    50                       # ws... roman-item ws...    51     "listitem_roman": r"^(\s+)([iI]\.)(\s+)",    52                       # ws... dot-item [ws...]    53     "listitem_dot"  : r"^(\s+)(\.)(\s*)",    54     55     # Region contents:    56     # Inline patterns:    57     "em"            : r"''(?!')",                                   # '' expecting not '    58     "rule"          : r"(-----*)",                                  # ----...    59     "strong"        : r"'''",                                       # '''    60     61     # Inline contents:    62     "emend"         : r"''(?!')|''(?='')",    63     "strongend"     : r"'''",    64     65     # Heading contents:    66     "headingend"    : r"(\s+)(=+)(\s*\n)",                          # ws... =... [ws...] nl    67     68     # List contents:    69     "deftermend"    : r"::(\s*?\n)",    70     "deftermsep"    : r"::(\s+)",    71     "listitemend"   : r"^",                                         # next line    72     }    73     74 # Define patterns for the regular expressions.    75     76 patterns = {}    77 for name, value in syntax.items():    78     patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)    79     80     81     82 # Tokenising functions.    83     84 class TokenStream:    85     86     "A stream of tokens taken from a string."    87     88     def __init__(self, s):    89         self.s = s    90         self.pos = 0    91         self.match = None    92         self.matching = None    93     94     def read_until(self, pattern_names, remaining=True):    95     96         """    97         Find the first match for the given 'pattern_names'. Return the text    98         preceding any match, the remaining text if no match was found, or None    99         if no match was found and 'remaining' is given as a false value.   100         """   101    102         first = None   103         self.matching = None   104    105         # Find the first matching pattern.   106    107         for pattern_name in pattern_names:   108             match = patterns[pattern_name].search(self.s, self.pos)   109             if match:   110                 start, end = match.span()   111                 if self.matching is None or start < first:   112                     first = start   113                     self.matching = pattern_name   114                     self.match = match   115    116         if self.matching is None:   117             if remaining:   118                 return self.s[self.pos:]   119             else:   120                 return None   121         else:   122             return self.s[self.pos:first]   123    124     def read_match(self, group=1):   125    126         """   127         Return the matched text, updating the position in the stream. If 'group'   128         is specified, the indicated group in a match will be returned.   129         Typically, group 1 should contain all pertinent data, but groups defined   130         within group 1 can provide sections of the data.   131         """   132    133         if self.match:   134             _start, self.pos = self.match.span()   135             try:   136                 return self.match.group(group)   137             except IndexError:   138                 return ""   139         else:   140             self.pos = len(self.s)   141             return None   142    143    144    145 # Parser functions.   146    147 def parse_page(s):   148    149     """   150     Parse page text 's'. Pages consist of regions delimited by markers.   151     """   152    153     return parse_region(TokenStream(s))   154    155 def parse_region(items, level=0, indent=0):   156    157     """   158     Parse the data provided by 'items' to populate a region with the given   159     'level' at the given 'indent'.   160     """   161    162     region = Region([], level, indent)   163    164     # Parse section headers.   165    166     parse_region_header(items, region)   167    168     # Parse section body.   169    170     if region.is_transparent():   171         parse_region_wiki(items, region)   172     else:   173         parse_region_opaque(items, region)   174    175     return region   176    177 def parse_region_header(items, region):   178    179     """   180     Parse the region header from the 'items', setting it for the given 'region'.   181     """   182    183     if items.read_until(["header"], False) == "": # None means no header   184         region.type = items.read_match()   185    186 def parse_region_wiki(items, region):   187    188     "Parse the data provided by 'items' to populate a wiki 'region'."   189    190     new_block(region)   191     parse_region_details(items, region, [   192         "break", "heading",   193         "defterm", "defterm_empty",   194         "em",   195         "listitem", "listitem_alpha", "listitem_dot", "listitem_num",   196         "listitem_roman",   197         "regionstart", "regionend",   198         "rule",   199         "strong"])   200    201 def parse_region_opaque(items, region):   202    203     "Parse the data provided by 'items' to populate an opaque 'region'."   204    205     parse_region_details(items, region, ["regionend"])   206    207 def parse_region_details(items, region, pattern_names):   208    209     "Parse 'items' within 'region' searching using 'pattern_names'."   210    211     try:   212         while True:   213    214             # Obtain text before any marker or the end of the input.   215    216             preceding = items.read_until(pattern_names)   217             if preceding:   218                 region.append_inline(Text(preceding))   219    220             # End of input.   221    222             if not items.matching:   223                 break   224    225             # Obtain any feature.   226    227             feature = items.read_match()   228             handler = handlers.get(items.matching)   229    230             # Handle each feature or add text to the region.   231    232             if handler:   233                 handler(items, region)   234             else:   235                 region.append_inline(Text(feature))   236    237     except StopIteration:   238         pass   239    240     region.normalise()   241    242 def end_region(items, region):   243    244     "End the parsing of 'region'."   245    246     raise StopIteration   247    248 def parse_break(items, region):   249    250     "Handle a paragraph break within 'region'."   251    252     region.add(Break())   253     new_block(region)   254    255 def parse_defitem(items, region, extra=""):   256    257     "Handle a definition item within 'region'."   258    259     pad = items.read_match(1)   260     item = DefItem([], pad, extra)   261     parse_region_details(items, item, ["listitemend"])   262     region.append(item)   263     new_block(region)   264    265 def parse_defterm(items, region):   266    267     "Handle a definition term within 'region'."   268    269     pad = items.read_match(1)   270     term = DefTerm([], pad)   271     parse_region_details(items, term, ["deftermend", "deftermsep"])   272     region.append(term)   273     if items.matching == "deftermsep":   274         parse_defitem(items, region)   275    276 def parse_defterm_empty(items, region):   277    278     "Handle an empty definition term within 'region'."   279    280     extra = items.read_match(1)   281     parse_region_details(items, region, ["deftermsep"])   282     parse_defitem(items, region, extra)   283    284 def parse_em(items, region):   285    286     "Handle emphasis."   287    288     span = Emphasis([])   289     parse_region_details(items, span, ["emend", "strong"])   290     region.append_inline(span)   291    292 def parse_heading(items, region):   293    294     "Handle a heading."   295    296     start_extra = items.read_match(1)   297     level = len(items.read_match(2))   298     start_pad = items.read_match(3)   299     heading = Heading([], level, start_extra, start_pad)   300     parse_region_details(items, heading, ["headingend"])   301     region.append(heading)   302     new_block(region)   303    304 def parse_heading_end(items, heading):   305    306     "Handle the end of a heading."   307    308     level = len(items.read_match(2))   309     if heading.level == level:   310         heading.end_pad = items.read_match(1)   311         heading.end_extra = items.read_match(3)   312         raise StopIteration   313    314 def parse_listitem(items, region):   315    316     "Handle a list item marker within 'region'."   317    318     indent = len(items.read_match(1))   319     marker = items.read_match(2)   320     space = items.read_match(3)   321     item = ListItem([], indent, marker, space)   322     parse_region_details(items, item, ["listitemend"])   323     region.append(item)   324     new_block(region)   325    326 def parse_rule(items, region):   327    328     "Handle a horizontal rule within 'region'."   329    330     length = len(items.read_match(1))   331     rule = Rule(length)   332     region.append(rule)   333     new_block(region)   334    335 def parse_section(items, region):   336    337     "Handle the start of a new section within 'region'."   338    339     # Parse the section and start a new block after the section.   340    341     indent = len(items.read_match(2))   342     level = len(items.read_match(3))   343     region.append(parse_region(items, level, indent))   344     new_block(region)   345    346 def parse_section_end(items, region):   347    348     "Handle the end of a new section within 'region'."   349    350     feature = items.read_match()   351     if region.have_end(feature):   352         raise StopIteration   353     else:   354         region.append_inline(Text(feature))   355    356 def parse_strong(items, region):   357    358     "Handle emboldened text."   359    360     span = Strong([])   361     parse_region_details(items, span, ["em", "strongend"])   362     region.append_inline(span)   363    364 # Pattern handlers.   365    366 handlers = {   367     None : end_region,   368     "break" : parse_break,   369     "defterm" : parse_defterm,   370     "defterm_empty" : parse_defterm_empty,   371     "deftermend" : end_region,   372     "deftermsep" : end_region,   373     "em" : parse_em,   374     "emend" : end_region,   375     "heading" : parse_heading,   376     "headingend" : parse_heading_end,   377     "listitemend" : end_region,   378     "listitem" : parse_listitem,   379     "listitem_alpha" : parse_listitem,   380     "listitem_dot" : parse_listitem,   381     "listitem_num" : parse_listitem,   382     "listitem_roman" : parse_listitem,   383     "regionstart" : parse_section,   384     "regionend" : parse_section_end,   385     "rule" : parse_rule,   386     "strong" : parse_strong,   387     "strongend" : end_region,   388     }   389    390 def new_block(region):   391    392     "Start a new block in 'region'."   393    394     block = Block([])   395     region.add(block)   396    397    398    399 # Top-level functions.   400    401 parse = parse_page   402    403 # vim: tabstop=4 expandtab shiftwidth=4
MoinLight

moinformat/__init__.py

moinformat/init.py