MoinLight (file moinformat/__init_

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format parser.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, Heading, ListItem, Region, Rule, Text    23 import re    24     25 # Regular expressions.    26     27 syntax = {    28     # Page regions:    29     "regionstart"   : r"((^\s*)([{]{3,}))",                         # {{{...    30     "regionend"     : r"^\s*([}]{3,})",                             # }}}...    31     "header"        : r"#!(.*?)\n",                                 # #! char-excl-nl    32     33     # Region contents:    34     # Line-oriented patterns:    35     "break"         : r"^(\s*?)\n",                                 # blank line    36     "heading"       : r"^(\s*)(?P<x>=+)(\s+)(?=.*?\s+(?P=x)\s*\n)", # [ws...] =... ws... expecting headingend    37     "listitem"      : r"^((\s+)([*]|\d+[.]))",                      # indent (list-item or number-item)    38     39     # Region contents:    40     # Inline patterns:    41     "rule"          : r"(-----*)",                                  # ----...    42     43     # Heading contents:    44     "headingend"    : r"(\s+)(=+)(\s*\n)",                          # ws... =... [ws...] nl    45     46     # List contents:    47     "listitemend"   : r"^",                                         # next line    48     }    49     50 # Define patterns for the regular expressions.    51     52 patterns = {}    53 for name, value in syntax.items():    54     patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)    55     56     57     58 # Tokenising functions.    59     60 class TokenStream:    61     62     "A stream of tokens taken from a string."    63     64     def __init__(self, s):    65         self.s = s    66         self.pos = 0    67         self.match = None    68         self.matching = None    69     70     def read_until(self, pattern_names, remaining=True):    71     72         """    73         Find the first match for the given 'pattern_names'. Return the text    74         preceding any match, the remaining text if no match was found, or None    75         if no match was found and 'remaining' is given as a false value.    76         """    77     78         first = None    79         self.matching = None    80     81         # Find the first matching pattern.    82     83         for pattern_name in pattern_names:    84             match = patterns[pattern_name].search(self.s, self.pos)    85             if match:    86                 start, end = match.span()    87                 if self.matching is None or start < first:    88                     first = start    89                     self.matching = pattern_name    90                     self.match = match    91     92         if self.matching is None:    93             if remaining:    94                 return self.s[self.pos:]    95             else:    96                 return None    97         else:    98             return self.s[self.pos:first]    99    100     def read_match(self, group=1):   101    102         """   103         Return the matched text, updating the position in the stream. If 'group'   104         is specified, the indicated group in a match will be returned.   105         Typically, group 1 should contain all pertinent data, but groups defined   106         within group 1 can provide sections of the data.   107         """   108    109         if self.match:   110             _start, self.pos = self.match.span()   111             try:   112                 return self.match.group(group)   113             except IndexError:   114                 return ""   115         else:   116             self.pos = len(self.s)   117             return None   118    119    120    121 # Parser functions.   122    123 def parse_page(s):   124    125     """   126     Parse page text 's'. Pages consist of regions delimited by markers.   127     """   128    129     return parse_region(TokenStream(s))   130    131 def parse_region(items, level=0, indent=0):   132    133     """   134     Parse the data provided by 'items' to populate a region with the given   135     'level' at the given 'indent'.   136     """   137    138     region = Region([], level, indent)   139    140     # Parse section headers.   141    142     parse_region_header(items, region)   143    144     # Parse section body.   145    146     if region.is_transparent():   147         parse_region_wiki(items, region)   148     else:   149         parse_region_opaque(items, region)   150    151     return region   152    153 def parse_region_header(items, region):   154    155     """   156     Parse the region header from the 'items', setting it for the given 'region'.   157     """   158    159     if items.read_until(["header"], False) == "": # None means no header   160         region.type = items.read_match()   161    162 def parse_region_wiki(items, region):   163    164     "Parse the data provided by 'items' to populate a wiki 'region'."   165    166     new_block(region)   167     parse_region_details(items, region, ["break", "heading", "listitem", "regionstart", "regionend", "rule"])   168    169 def parse_region_opaque(items, region):   170    171     "Parse the data provided by 'items' to populate an opaque 'region'."   172    173     parse_region_details(items, region, ["regionend"])   174    175 def parse_region_details(items, region, pattern_names):   176    177     "Parse 'items' within 'region' searching using 'pattern_names'."   178    179     try:   180         while True:   181    182             # Obtain text before any marker or the end of the input.   183    184             preceding = items.read_until(pattern_names)   185             if preceding:   186                 region.append_text(Text(preceding))   187    188             # End of input.   189    190             if not items.matching:   191                 break   192    193             # Obtain any feature.   194    195             feature = items.read_match()   196             handler = handlers.get(items.matching)   197    198             # Handle each feature or add text to the region.   199    200             if handler:   201                 handler(items, region)   202             else:   203                 region.append_text(Text(feature))   204    205     except StopIteration:   206         pass   207    208     region.normalise()   209    210 def end_region(items, region):   211    212     "End the parsing of 'region'."   213    214     raise StopIteration   215    216 def parse_break(items, region):   217    218     "Handle a paragraph break within 'region'."   219    220     # Mark any previous block as not being the final one in a sequence.   221    222     block = region.nodes[-1]   223     block.final = False   224     new_block(region)   225    226 def parse_heading(items, region):   227    228     "Handle a heading."   229    230     start_extra = items.read_match(1)   231     level = len(items.read_match(2))   232     start_pad = items.read_match(3)   233     heading = Heading([], level, start_extra, start_pad)   234     parse_region_details(items, heading, ["headingend"])   235     region.append(heading)   236     new_block(region)   237    238 def parse_heading_end(items, heading):   239    240     "Handle the end of a heading."   241    242     level = len(items.read_match(2))   243     if heading.level == level:   244         heading.end_pad = items.read_match(1)   245         heading.end_extra = items.read_match(3)   246         raise StopIteration   247    248 def parse_listitem(items, region):   249    250     "Handle a list item marker within 'region'."   251    252     item = ListItem([])   253     parse_region_details(items, item, ["listitemend"])   254     region.append(item)   255     new_block(region)   256    257 def parse_listitem_end(items, item):   258    259     "Handle the end of a list."   260    261     raise StopIteration   262    263 def parse_rule(items, region):   264    265     "Handle a horizontal rule within 'region'."   266    267     length = len(items.read_match(1))   268     rule = Rule(length)   269     region.append(rule)   270     new_block(region)   271    272 def parse_section(items, region):   273    274     "Handle the start of a new section within 'region'."   275    276     # Parse the section and start a new block after the section.   277    278     indent = len(items.read_match(2))   279     level = len(items.read_match(3))   280     region.append(parse_region(items, level, indent))   281     new_block(region)   282    283 def parse_section_end(items, region):   284    285     "Handle the end of a new section within 'region'."   286    287     feature = items.read_match()   288     if region.have_end(feature):   289         raise StopIteration   290     else:   291         region.append_text(Text(feature))   292    293 # Pattern handlers.   294    295 handlers = {   296     None : end_region,   297     "break" : parse_break,   298     "heading" : parse_heading,   299     "headingend" : parse_heading_end,   300     "listitemend" : parse_listitem_end,   301     "listitem" : parse_listitem,   302     "regionstart" : parse_section,   303     "regionend" : parse_section_end,   304     "rule" : parse_rule,   305     }   306    307 def new_block(region):   308    309     "Start a new block in 'region'."   310    311     block = Block([])   312     region.append(block)   313    314    315    316 # Top-level functions.   317    318 parse = parse_page   319    320 # vim: tabstop=4 expandtab shiftwidth=4
MoinLight

moinformat/__init__.py

moinformat/init.py