MoinLight (file moinformat/__init_

     1 #!/usr/bin/env python     2      3 """     4 Moin wiki format parser.     5      6 Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.tree import Block, Break, DefItem, DefTerm, Heading, ListItem, Region, Rule, Text    23 import re    24     25 # Regular expressions.    26     27 syntax = {    28     # Page regions:    29     "regionstart"   : r"((^\s*)([{]{3,}))",                         # {{{...    30     "regionend"     : r"^\s*([}]{3,})",                             # }}}...    31     "header"        : r"#!(.*?)\n",                                 # #! char-excl-nl    32     33     # Region contents:    34     # Line-oriented patterns:    35                       # blank line    36     "break"         : r"^(\s*?)\n",    37                       # ws... expecting text ::    38     "defterm"       : r"^(\s+)(?=.+?::)",    39                       # ws... expecting :: ws...    40     "defterm_empty" : r"^(\s+)(?=::\s+)",    41                       # [ws...] =... ws... expecting headingend    42     "heading"       : r"^(\s*)(?P<x>=+)(\s+)(?=.*?\s+(?P=x)\s*\n)",    43                       # ws... list-item [ws...]    44     "listitem"      : r"^(\s+)(\*)(\s*)",    45                       # ws... number-item ws...    46     "listitem_num"  : r"^(\s+)(\d+\.)(\s+)",    47                       # ws... alpha-item ws...    48     "listitem_alpha": r"^(\s+)([aA]\.)(\s+)",    49                       # ws... roman-item ws...    50     "listitem_roman": r"^(\s+)([iI]\.)(\s+)",    51                       # ws... dot-item [ws...]    52     "listitem_dot"  : r"^(\s+)(\.)(\s*)",    53     54     # Region contents:    55     # Inline patterns:    56     "rule"          : r"(-----*)",                                  # ----...    57     58     # Heading contents:    59     "headingend"    : r"(\s+)(=+)(\s*\n)",                          # ws... =... [ws...] nl    60     61     # List contents:    62     "deftermend"    : r"::(\s*?\n)",    63     "deftermsep"    : r"::(\s+)",    64     "listitemend"   : r"^",                                         # next line    65     }    66     67 # Define patterns for the regular expressions.    68     69 patterns = {}    70 for name, value in syntax.items():    71     patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)    72     73     74     75 # Tokenising functions.    76     77 class TokenStream:    78     79     "A stream of tokens taken from a string."    80     81     def __init__(self, s):    82         self.s = s    83         self.pos = 0    84         self.match = None    85         self.matching = None    86     87     def read_until(self, pattern_names, remaining=True):    88     89         """    90         Find the first match for the given 'pattern_names'. Return the text    91         preceding any match, the remaining text if no match was found, or None    92         if no match was found and 'remaining' is given as a false value.    93         """    94     95         first = None    96         self.matching = None    97     98         # Find the first matching pattern.    99    100         for pattern_name in pattern_names:   101             match = patterns[pattern_name].search(self.s, self.pos)   102             if match:   103                 start, end = match.span()   104                 if self.matching is None or start < first:   105                     first = start   106                     self.matching = pattern_name   107                     self.match = match   108    109         if self.matching is None:   110             if remaining:   111                 return self.s[self.pos:]   112             else:   113                 return None   114         else:   115             return self.s[self.pos:first]   116    117     def read_match(self, group=1):   118    119         """   120         Return the matched text, updating the position in the stream. If 'group'   121         is specified, the indicated group in a match will be returned.   122         Typically, group 1 should contain all pertinent data, but groups defined   123         within group 1 can provide sections of the data.   124         """   125    126         if self.match:   127             _start, self.pos = self.match.span()   128             try:   129                 return self.match.group(group)   130             except IndexError:   131                 return ""   132         else:   133             self.pos = len(self.s)   134             return None   135    136    137    138 # Parser functions.   139    140 def parse_page(s):   141    142     """   143     Parse page text 's'. Pages consist of regions delimited by markers.   144     """   145    146     return parse_region(TokenStream(s))   147    148 def parse_region(items, level=0, indent=0):   149    150     """   151     Parse the data provided by 'items' to populate a region with the given   152     'level' at the given 'indent'.   153     """   154    155     region = Region([], level, indent)   156    157     # Parse section headers.   158    159     parse_region_header(items, region)   160    161     # Parse section body.   162    163     if region.is_transparent():   164         parse_region_wiki(items, region)   165     else:   166         parse_region_opaque(items, region)   167    168     return region   169    170 def parse_region_header(items, region):   171    172     """   173     Parse the region header from the 'items', setting it for the given 'region'.   174     """   175    176     if items.read_until(["header"], False) == "": # None means no header   177         region.type = items.read_match()   178    179 def parse_region_wiki(items, region):   180    181     "Parse the data provided by 'items' to populate a wiki 'region'."   182    183     new_block(region)   184     parse_region_details(items, region, [   185         "break", "heading",   186         "defterm", "defterm_empty",   187         "listitem", "listitem_alpha", "listitem_dot", "listitem_num",   188         "listitem_roman",   189         "regionstart", "regionend", "rule"])   190    191 def parse_region_opaque(items, region):   192    193     "Parse the data provided by 'items' to populate an opaque 'region'."   194    195     parse_region_details(items, region, ["regionend"])   196    197 def parse_region_details(items, region, pattern_names):   198    199     "Parse 'items' within 'region' searching using 'pattern_names'."   200    201     try:   202         while True:   203    204             # Obtain text before any marker or the end of the input.   205    206             preceding = items.read_until(pattern_names)   207             if preceding:   208                 region.append_text(Text(preceding))   209    210             # End of input.   211    212             if not items.matching:   213                 break   214    215             # Obtain any feature.   216    217             feature = items.read_match()   218             handler = handlers.get(items.matching)   219    220             # Handle each feature or add text to the region.   221    222             if handler:   223                 handler(items, region)   224             else:   225                 region.append_text(Text(feature))   226    227     except StopIteration:   228         pass   229    230     region.normalise()   231    232 def end_region(items, region):   233    234     "End the parsing of 'region'."   235    236     raise StopIteration   237    238 def parse_break(items, region):   239    240     "Handle a paragraph break within 'region'."   241    242     region.add(Break())   243     new_block(region)   244    245 def parse_defitem(items, region, extra=""):   246    247     "Handle a definition item within 'region'."   248    249     pad = items.read_match(1)   250     item = DefItem([], pad, extra)   251     parse_region_details(items, item, ["listitemend"])   252     region.append(item)   253     new_block(region)   254    255 def parse_defterm(items, region):   256    257     "Handle a definition term within 'region'."   258    259     pad = items.read_match(1)   260     term = DefTerm([], pad)   261     parse_region_details(items, term, ["deftermend", "deftermsep"])   262     region.append(term)   263     if items.matching == "deftermsep":   264         parse_defitem(items, region)   265    266 def parse_defterm_empty(items, region):   267    268     "Handle an empty definition term within 'region'."   269    270     extra = items.read_match(1)   271     parse_region_details(items, region, ["deftermsep"])   272     parse_defitem(items, region, extra)   273    274 parse_defterm_end = end_region   275 parse_defterm_sep = end_region   276    277 def parse_heading(items, region):   278    279     "Handle a heading."   280    281     start_extra = items.read_match(1)   282     level = len(items.read_match(2))   283     start_pad = items.read_match(3)   284     heading = Heading([], level, start_extra, start_pad)   285     parse_region_details(items, heading, ["headingend"])   286     region.append(heading)   287     new_block(region)   288    289 def parse_heading_end(items, heading):   290    291     "Handle the end of a heading."   292    293     level = len(items.read_match(2))   294     if heading.level == level:   295         heading.end_pad = items.read_match(1)   296         heading.end_extra = items.read_match(3)   297         raise StopIteration   298    299 def parse_listitem(items, region):   300    301     "Handle a list item marker within 'region'."   302    303     indent = len(items.read_match(1))   304     marker = items.read_match(2)   305     space = items.read_match(3)   306     item = ListItem([], indent, marker, space)   307     parse_region_details(items, item, ["listitemend"])   308     region.append(item)   309     new_block(region)   310    311 parse_listitem_end = end_region   312    313 def parse_rule(items, region):   314    315     "Handle a horizontal rule within 'region'."   316    317     length = len(items.read_match(1))   318     rule = Rule(length)   319     region.append(rule)   320     new_block(region)   321    322 def parse_section(items, region):   323    324     "Handle the start of a new section within 'region'."   325    326     # Parse the section and start a new block after the section.   327    328     indent = len(items.read_match(2))   329     level = len(items.read_match(3))   330     region.append(parse_region(items, level, indent))   331     new_block(region)   332    333 def parse_section_end(items, region):   334    335     "Handle the end of a new section within 'region'."   336    337     feature = items.read_match()   338     if region.have_end(feature):   339         raise StopIteration   340     else:   341         region.append_text(Text(feature))   342    343 # Pattern handlers.   344    345 handlers = {   346     None : end_region,   347     "break" : parse_break,   348     "defterm" : parse_defterm,   349     "defterm_empty" : parse_defterm_empty,   350     "deftermend" : parse_defterm_end,   351     "deftermsep" : parse_defterm_sep,   352     "heading" : parse_heading,   353     "headingend" : parse_heading_end,   354     "listitemend" : parse_listitem_end,   355     "listitem" : parse_listitem,   356     "listitem_alpha" : parse_listitem,   357     "listitem_dot" : parse_listitem,   358     "listitem_num" : parse_listitem,   359     "listitem_roman" : parse_listitem,   360     "regionstart" : parse_section,   361     "regionend" : parse_section_end,   362     "rule" : parse_rule,   363     }   364    365 def new_block(region):   366    367     "Start a new block in 'region'."   368    369     block = Block([])   370     region.add(block)   371    372    373    374 # Top-level functions.   375    376 parse = parse_page   377    378 # vim: tabstop=4 expandtab shiftwidth=4
MoinLight

moinformat/__init__.py

moinformat/init.py