MoinLight (file moinformat/__init_

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format parser.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, Break, Heading, ListItem, Region, Rule, Text    23 import re    24     25 # Regular expressions.    26     27 syntax = {    28     # Page regions:    29     "regionstart"   : r"((^\s*)([{]{3,}))",                         # {{{...    30     "regionend"     : r"^\s*([}]{3,})",                             # }}}...    31     "header"        : r"#!(.*?)\n",                                 # #! char-excl-nl    32     33     # Region contents:    34     # Line-oriented patterns:    35                       # blank line    36     "break"         : r"^(\s*?)\n",    37                       # [ws...] =... ws... expecting headingend    38     "heading"       : r"^(\s*)(?P<x>=+)(\s+)(?=.*?\s+(?P=x)\s*\n)",    39                       # indent list-item [ws...]    40     "listitem"      : r"^(\s+)(\*)(\s*)",    41                       # indent number-item ws...    42     "listitem_num"  : r"^(\s+)(\d+\.)(\s+)",    43                       # indent alpha-item ws...    44     "listitem_alpha": r"^(\s+)([aA]\.)(\s+)",    45                       # indent roman-item ws...    46     "listitem_roman": r"^(\s+)([iI]\.)(\s+)",    47                       # indent dot-item [ws...]    48     "listitem_dot"  : r"^(\s+)(\.)(\s*)",    49     50     # Region contents:    51     # Inline patterns:    52     "rule"          : r"(-----*)",                                  # ----...    53     54     # Heading contents:    55     "headingend"    : r"(\s+)(=+)(\s*\n)",                          # ws... =... [ws...] nl    56     57     # List contents:    58     "listitemend"   : r"^",                                         # next line    59     }    60     61 # Define patterns for the regular expressions.    62     63 patterns = {}    64 for name, value in syntax.items():    65     patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)    66     67     68     69 # Tokenising functions.    70     71 class TokenStream:    72     73     "A stream of tokens taken from a string."    74     75     def __init__(self, s):    76         self.s = s    77         self.pos = 0    78         self.match = None    79         self.matching = None    80     81     def read_until(self, pattern_names, remaining=True):    82     83         """    84         Find the first match for the given 'pattern_names'. Return the text    85         preceding any match, the remaining text if no match was found, or None    86         if no match was found and 'remaining' is given as a false value.    87         """    88     89         first = None    90         self.matching = None    91     92         # Find the first matching pattern.    93     94         for pattern_name in pattern_names:    95             match = patterns[pattern_name].search(self.s, self.pos)    96             if match:    97                 start, end = match.span()    98                 if self.matching is None or start < first:    99                     first = start   100                     self.matching = pattern_name   101                     self.match = match   102    103         if self.matching is None:   104             if remaining:   105                 return self.s[self.pos:]   106             else:   107                 return None   108         else:   109             return self.s[self.pos:first]   110    111     def read_match(self, group=1):   112    113         """   114         Return the matched text, updating the position in the stream. If 'group'   115         is specified, the indicated group in a match will be returned.   116         Typically, group 1 should contain all pertinent data, but groups defined   117         within group 1 can provide sections of the data.   118         """   119    120         if self.match:   121             _start, self.pos = self.match.span()   122             try:   123                 return self.match.group(group)   124             except IndexError:   125                 return ""   126         else:   127             self.pos = len(self.s)   128             return None   129    130    131    132 # Parser functions.   133    134 def parse_page(s):   135    136     """   137     Parse page text 's'. Pages consist of regions delimited by markers.   138     """   139    140     return parse_region(TokenStream(s))   141    142 def parse_region(items, level=0, indent=0):   143    144     """   145     Parse the data provided by 'items' to populate a region with the given   146     'level' at the given 'indent'.   147     """   148    149     region = Region([], level, indent)   150    151     # Parse section headers.   152    153     parse_region_header(items, region)   154    155     # Parse section body.   156    157     if region.is_transparent():   158         parse_region_wiki(items, region)   159     else:   160         parse_region_opaque(items, region)   161    162     return region   163    164 def parse_region_header(items, region):   165    166     """   167     Parse the region header from the 'items', setting it for the given 'region'.   168     """   169    170     if items.read_until(["header"], False) == "": # None means no header   171         region.type = items.read_match()   172    173 def parse_region_wiki(items, region):   174    175     "Parse the data provided by 'items' to populate a wiki 'region'."   176    177     new_block(region)   178     parse_region_details(items, region, [   179         "break", "heading", "listitem", "listitem_num", "listitem_alpha",   180         "listitem_roman", "listitem_dot", "regionstart", "regionend", "rule"])   181    182 def parse_region_opaque(items, region):   183    184     "Parse the data provided by 'items' to populate an opaque 'region'."   185    186     parse_region_details(items, region, ["regionend"])   187    188 def parse_region_details(items, region, pattern_names):   189    190     "Parse 'items' within 'region' searching using 'pattern_names'."   191    192     try:   193         while True:   194    195             # Obtain text before any marker or the end of the input.   196    197             preceding = items.read_until(pattern_names)   198             if preceding:   199                 region.append_text(Text(preceding))   200    201             # End of input.   202    203             if not items.matching:   204                 break   205    206             # Obtain any feature.   207    208             feature = items.read_match()   209             handler = handlers.get(items.matching)   210    211             # Handle each feature or add text to the region.   212    213             if handler:   214                 handler(items, region)   215             else:   216                 region.append_text(Text(feature))   217    218     except StopIteration:   219         pass   220    221     region.normalise()   222    223 def end_region(items, region):   224    225     "End the parsing of 'region'."   226    227     raise StopIteration   228    229 def parse_break(items, region):   230    231     "Handle a paragraph break within 'region'."   232    233     region.add(Break())   234     new_block(region)   235    236 def parse_heading(items, region):   237    238     "Handle a heading."   239    240     start_extra = items.read_match(1)   241     level = len(items.read_match(2))   242     start_pad = items.read_match(3)   243     heading = Heading([], level, start_extra, start_pad)   244     parse_region_details(items, heading, ["headingend"])   245     region.append(heading)   246     new_block(region)   247    248 def parse_heading_end(items, heading):   249    250     "Handle the end of a heading."   251    252     level = len(items.read_match(2))   253     if heading.level == level:   254         heading.end_pad = items.read_match(1)   255         heading.end_extra = items.read_match(3)   256         raise StopIteration   257    258 def parse_listitem(items, region):   259    260     "Handle a list item marker within 'region'."   261    262     indent = len(items.read_match(1))   263     marker = items.read_match(2)   264     space = items.read_match(3)   265     item = ListItem([], indent, marker, space)   266     parse_region_details(items, item, ["listitemend"])   267     region.append(item)   268     new_block(region)   269    270 def parse_listitem_end(items, item):   271    272     "Handle the end of a list."   273    274     raise StopIteration   275    276 def parse_rule(items, region):   277    278     "Handle a horizontal rule within 'region'."   279    280     length = len(items.read_match(1))   281     rule = Rule(length)   282     region.append(rule)   283     new_block(region)   284    285 def parse_section(items, region):   286    287     "Handle the start of a new section within 'region'."   288    289     # Parse the section and start a new block after the section.   290    291     indent = len(items.read_match(2))   292     level = len(items.read_match(3))   293     region.append(parse_region(items, level, indent))   294     new_block(region)   295    296 def parse_section_end(items, region):   297    298     "Handle the end of a new section within 'region'."   299    300     feature = items.read_match()   301     if region.have_end(feature):   302         raise StopIteration   303     else:   304         region.append_text(Text(feature))   305    306 # Pattern handlers.   307    308 handlers = {   309     None : end_region,   310     "break" : parse_break,   311     "heading" : parse_heading,   312     "headingend" : parse_heading_end,   313     "listitemend" : parse_listitem_end,   314     "listitem" : parse_listitem,   315     "listitem_alpha" : parse_listitem,   316     "listitem_dot" : parse_listitem,   317     "listitem_num" : parse_listitem,   318     "listitem_roman" : parse_listitem,   319     "regionstart" : parse_section,   320     "regionend" : parse_section_end,   321     "rule" : parse_rule,   322     }   323    324 def new_block(region):   325    326     "Start a new block in 'region'."   327    328     block = Block([])   329     region.add(block)   330    331    332    333 # Top-level functions.   334    335 parse = parse_page   336    337 # vim: tabstop=4 expandtab shiftwidth=4
MoinLight

moinformat/__init__.py

moinformat/init.py