paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - ImprovedMoinSearch library |
paul@0 | 4 | |
paul@0 | 5 | @copyright: 2010 Paul Boddie <paul@boddie.org.uk> |
paul@0 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 7 | """ |
paul@0 | 8 | |
paul@0 | 9 | from MoinMoin.search import searchPages |
paul@0 | 10 | from MoinMoin.Page import Page |
paul@1 | 11 | from MoinMoin import wikiutil |
paul@0 | 12 | import re |
paul@0 | 13 | |
paul@0 | 14 | heading_regexp = re.compile(r"^(?P<level>=+)(?P<heading>.*?)(?P=level)$", re.UNICODE | re.MULTILINE) |
paul@3 | 15 | paragraph_regexp = re.compile(r"(?P<paragraph>(?:^[^#=\s].*$\n)+)", re.UNICODE | re.MULTILINE) |
paul@3 | 16 | |
paul@3 | 17 | def range_groups(min_name, max_name): |
paul@7 | 18 | return r"(?:\((?P<%s>-?\d+)?(?:\s*-\s*(?P<%s>-?\d+))?\))" % (min_name, max_name) |
paul@3 | 19 | |
paul@6 | 20 | format_options_regexp = re.compile( |
paul@7 | 21 | r"(" |
paul@7 | 22 | r"(?P<link>(link|l):)" |
paul@7 | 23 | r"|(?P<strong>(strong|str|bold|b):)" |
paul@7 | 24 | r"|(?P<em>(emphasis|em|italic|i):)" |
paul@7 | 25 | r")*" |
paul@6 | 26 | r"(" |
paul@6 | 27 | r"(?P<heading>(heading|title|h)\s*" + range_groups("min_heading", "max_heading") + ")" |
paul@7 | 28 | r"|(?P<paragraph>(paragraph|para|p)\s*" + r"(?:\((?P<paragraph_number>\d+)?\))?" + ")" |
paul@6 | 29 | r"|(?P<name>(name|page)\s*" + range_groups("first", "last") + ")" |
paul@7 | 30 | r"|(?P<break>(break|br))" |
paul@7 | 31 | r")", |
paul@6 | 32 | re.UNICODE) |
paul@0 | 33 | |
paul@5 | 34 | def convert_index(i, length): |
paul@5 | 35 | |
paul@5 | 36 | """ |
paul@5 | 37 | Convert from a 1-based indexing scheme to a 0-based scheme for the given |
paul@5 | 38 | index 'i' in a sequence having the given 'length'. |
paul@5 | 39 | """ |
paul@5 | 40 | |
paul@5 | 41 | if i is None: |
paul@5 | 42 | return i |
paul@5 | 43 | elif i > 0: |
paul@5 | 44 | return i - 1 |
paul@5 | 45 | elif i < 0: |
paul@5 | 46 | return length + i |
paul@5 | 47 | else: |
paul@5 | 48 | return i |
paul@5 | 49 | |
paul@0 | 50 | def getSearchResultPages(request, query, **kw): |
paul@0 | 51 | |
paul@0 | 52 | """ |
paul@0 | 53 | Return matching pages using the given 'request' and search 'query'. Optional |
paul@0 | 54 | keyword arguments are passed to the underlying search infrastructure. |
paul@0 | 55 | """ |
paul@0 | 56 | |
paul@0 | 57 | results = searchPages(request, query, **kw) |
paul@0 | 58 | return results.hits |
paul@0 | 59 | |
paul@7 | 60 | # Action functions. |
paul@7 | 61 | |
paul@4 | 62 | def getFirstPageHeading(request, page, start=0, min_level=None, max_level=None): |
paul@0 | 63 | |
paul@0 | 64 | """ |
paul@0 | 65 | Using the given 'request', return the first heading in the given 'page' |
paul@4 | 66 | from the given 'start' point (optional, defaulting to the start of the page) |
paul@3 | 67 | having a heading level of at least 'min_level' (which is undefined if not |
paul@3 | 68 | specified) and at most 'max_level' (which is undefined if not specified). |
paul@4 | 69 | |
paul@4 | 70 | A tuple containing the heading and the span (the start offset and the end |
paul@4 | 71 | offset as a tuple) is returned for a successful retrieval. Otherwise, None |
paul@4 | 72 | is returned. |
paul@0 | 73 | """ |
paul@0 | 74 | |
paul@0 | 75 | full_page = Page(request, page.page_name) |
paul@0 | 76 | body = full_page.get_raw_body() |
paul@4 | 77 | if start != 0: |
paul@4 | 78 | body = body[start:] |
paul@0 | 79 | |
paul@0 | 80 | for match in heading_regexp.finditer(body): |
paul@0 | 81 | level = len(match.group("level")) |
paul@0 | 82 | |
paul@0 | 83 | if (min_level is None or level >= min_level) and \ |
paul@0 | 84 | (max_level is None or level <= max_level): |
paul@0 | 85 | |
paul@4 | 86 | return match.group("heading"), match.span() |
paul@0 | 87 | |
paul@0 | 88 | return None |
paul@0 | 89 | |
paul@4 | 90 | def getParagraph(request, page, start=0, number=None): |
paul@3 | 91 | |
paul@5 | 92 | """ |
paul@5 | 93 | Using the given 'request', return from the given 'page', starting from the |
paul@5 | 94 | optional 'start' offset (or the beginning, if no such offset is specified), |
paul@5 | 95 | the first paragraph or, if the optional 'number' is given, the paragraph |
paul@5 | 96 | whose position corresponds to that number, with a number of 1 being the |
paul@5 | 97 | first paragraph found, 2 being the second, and so on. |
paul@5 | 98 | """ |
paul@5 | 99 | |
paul@3 | 100 | full_page = Page(request, page.page_name) |
paul@3 | 101 | body = full_page.get_raw_body() |
paul@4 | 102 | if start != 0: |
paul@4 | 103 | body = body[start:] |
paul@3 | 104 | |
paul@3 | 105 | for i, match in enumerate(paragraph_regexp.finditer(body)): |
paul@4 | 106 | if number is None or i == max(0, number - 1): |
paul@4 | 107 | return match.group("paragraph"), match.span() |
paul@3 | 108 | |
paul@3 | 109 | return None |
paul@3 | 110 | |
paul@5 | 111 | def getPageName(request, page, start=0, first=None, last=None): |
paul@5 | 112 | |
paul@5 | 113 | """ |
paul@5 | 114 | Using the given 'request', return the name of the given 'page'. The optional |
paul@5 | 115 | 'start' offset refers to the body of the page and is returned as the start |
paul@5 | 116 | and end of the result span if specified. |
paul@5 | 117 | |
paul@5 | 118 | If the optional 'first' or 'last' parameters are specified, only the |
paul@5 | 119 | specified span of parts extracted from the page name will be returned, where |
paul@5 | 120 | the parts of the name are obtained by splitting the full name where the |
paul@5 | 121 | slash ("/") character is found. The first part has an index of 1, and the |
paul@5 | 122 | last part can be referred to using an index of -1. |
paul@5 | 123 | """ |
paul@5 | 124 | |
paul@5 | 125 | parts = page.page_name.split("/") |
paul@5 | 126 | |
paul@5 | 127 | first = convert_index(first, len(parts)) |
paul@5 | 128 | last = convert_index(last, len(parts)) |
paul@5 | 129 | |
paul@5 | 130 | if first is None: |
paul@5 | 131 | if last is None: |
paul@5 | 132 | pass |
paul@5 | 133 | else: |
paul@5 | 134 | parts = parts[:last+1] |
paul@5 | 135 | else: |
paul@5 | 136 | if last is None: |
paul@5 | 137 | parts = parts[first:] |
paul@5 | 138 | else: |
paul@5 | 139 | parts = parts[first:last+1] |
paul@5 | 140 | |
paul@5 | 141 | return "/".join(parts), (start, start) |
paul@3 | 142 | |
paul@7 | 143 | # Formatting styles. |
paul@7 | 144 | |
paul@7 | 145 | def asLink(styles, formatter, text, page): |
paul@7 | 146 | output = [] |
paul@7 | 147 | output.append(formatter.pagelink(on=1, pagename=page.page_name)) |
paul@7 | 148 | if not styles: |
paul@7 | 149 | output.append(asText(None, formatter, text, page)) |
paul@7 | 150 | else: |
paul@7 | 151 | output.append(next_style(styles, formatter, text, page)) |
paul@7 | 152 | output.append(formatter.pagelink(on=0)) |
paul@7 | 153 | return u''.join(output) |
paul@7 | 154 | |
paul@7 | 155 | def _asStyledText(styles, formatter, text, page, fn): |
paul@7 | 156 | output = [] |
paul@7 | 157 | output.append(fn(on=1)) |
paul@7 | 158 | if not styles: |
paul@7 | 159 | output.append(asText(None, formatter, text, page)) |
paul@7 | 160 | else: |
paul@7 | 161 | output.append(next_style(styles, formatter, text, page)) |
paul@7 | 162 | output.append(fn(on=0)) |
paul@7 | 163 | return u''.join(output) |
paul@7 | 164 | |
paul@7 | 165 | def asStrong(styles, formatter, text, page): |
paul@7 | 166 | return _asStyledText(styles, formatter, text, page, formatter.strong) |
paul@7 | 167 | |
paul@7 | 168 | def asEmphasis(styles, formatter, text, page): |
paul@7 | 169 | return _asStyledText(styles, formatter, text, page, formatter.emphasis) |
paul@7 | 170 | |
paul@7 | 171 | def asText(styles, formatter, text, page): |
paul@7 | 172 | if not styles: |
paul@7 | 173 | return formatter.text(text) |
paul@7 | 174 | else: |
paul@7 | 175 | return next_style(styles, formatter, text, page) |
paul@7 | 176 | |
paul@7 | 177 | def asBreak(styles, formatter, text, page): |
paul@7 | 178 | return formatter.linebreak(0) |
paul@7 | 179 | |
paul@7 | 180 | def next_style(styles, formatter, text, page): |
paul@7 | 181 | return styles[0](styles[1:], formatter, text, page) |
paul@7 | 182 | |
paul@7 | 183 | style_functions = { |
paul@7 | 184 | "link" : asLink, |
paul@7 | 185 | "strong" : asStrong, |
paul@7 | 186 | "em" : asEmphasis, |
paul@7 | 187 | } |
paul@7 | 188 | |
paul@7 | 189 | # Formatting functions. |
paul@7 | 190 | |
paul@1 | 191 | def formatResultPages(request, formatter, pages, paging, format, page_from=0): |
paul@0 | 192 | |
paul@0 | 193 | """ |
paul@0 | 194 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@0 | 195 | the result 'pages', providing paging controls when 'paging' is set to a true |
paul@0 | 196 | value, and providing page details according to the given 'format'. |
paul@1 | 197 | |
paul@1 | 198 | If the optional 'pages_from' parameter is set, the result pages from the |
paul@1 | 199 | given result (specified within a range from 0 to the length of the 'pages' |
paul@1 | 200 | collection) will be shown. |
paul@0 | 201 | """ |
paul@0 | 202 | |
paul@3 | 203 | actions = [] |
paul@1 | 204 | |
paul@3 | 205 | if format: |
paul@3 | 206 | for match in format_options_regexp.finditer(format): |
paul@7 | 207 | |
paul@7 | 208 | # Apply styles by gathering style functions. |
paul@7 | 209 | |
paul@7 | 210 | styles = [] |
paul@7 | 211 | for style in ("strong", "em", "link"): |
paul@7 | 212 | if match.group(style): |
paul@7 | 213 | styles.append(style_functions[style]) |
paul@7 | 214 | styles.append(asText) |
paul@7 | 215 | |
paul@7 | 216 | # Add actions, arguments and styles. |
paul@7 | 217 | |
paul@3 | 218 | if match.group("heading"): |
paul@7 | 219 | actions.append((getFirstPageHeading, map(int_or_none, (match.group("min_heading"), match.group("max_heading"))), styles)) |
paul@3 | 220 | elif match.group("paragraph"): |
paul@7 | 221 | actions.append((getParagraph, map(int_or_none, (match.group("paragraph_number"),)), styles)) |
paul@5 | 222 | elif match.group("name"): |
paul@7 | 223 | actions.append((getPageName, map(int_or_none, (match.group("first"), match.group("last"))), styles)) |
paul@7 | 224 | elif match.group("break"): |
paul@7 | 225 | actions.append((None, None, [asBreak])) |
paul@0 | 226 | else: |
paul@7 | 227 | actions.append((getPageName, (), [asLink])) |
paul@0 | 228 | |
paul@1 | 229 | # Use paging only when there are enough results. |
paul@1 | 230 | |
paul@1 | 231 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 232 | paging = paging and len(pages) > results_per_page |
paul@1 | 233 | |
paul@1 | 234 | if paging: |
paul@1 | 235 | pages_to_show = pages[page_from:page_from + results_per_page] |
paul@1 | 236 | else: |
paul@1 | 237 | pages_to_show = pages |
paul@1 | 238 | |
paul@1 | 239 | # Prepare the output. |
paul@1 | 240 | |
paul@0 | 241 | output = [] |
paul@2 | 242 | output.append(formatter.number_list(on=1, start=page_from + 1)) |
paul@0 | 243 | |
paul@1 | 244 | for page in pages_to_show: |
paul@0 | 245 | output.append(formatter.listitem(on=1)) |
paul@0 | 246 | |
paul@4 | 247 | start = 0 |
paul@3 | 248 | first = 1 |
paul@7 | 249 | for action, args, styles in actions: |
paul@7 | 250 | |
paul@7 | 251 | # Process requested actions. |
paul@7 | 252 | |
paul@7 | 253 | if action is not None: |
paul@7 | 254 | result = action(request, page, start, *args) |
paul@7 | 255 | if result is not None: |
paul@7 | 256 | text, span = result |
paul@4 | 257 | |
paul@7 | 258 | # Or handle null actions. |
paul@7 | 259 | |
paul@7 | 260 | else: |
paul@7 | 261 | text, span = None, None |
paul@7 | 262 | |
paul@7 | 263 | # Where actions are performed, there must be a result. |
paul@7 | 264 | |
paul@7 | 265 | if action is None or result is not None: |
paul@7 | 266 | |
paul@6 | 267 | if not first: |
paul@6 | 268 | output.append(" ") |
paul@0 | 269 | |
paul@7 | 270 | output.append(next_style(styles, formatter, text, page)) |
paul@4 | 271 | |
paul@4 | 272 | # Position the search for the next action. |
paul@3 | 273 | |
paul@7 | 274 | if span is not None: |
paul@7 | 275 | _start, _end = span |
paul@7 | 276 | start = _end + 1 |
paul@4 | 277 | |
paul@7 | 278 | first = 0 |
paul@3 | 279 | |
paul@0 | 280 | output.append(formatter.listitem(on=0)) |
paul@0 | 281 | |
paul@0 | 282 | output.append(formatter.number_list(on=0)) |
paul@0 | 283 | |
paul@1 | 284 | # Show paging navigation. |
paul@1 | 285 | |
paul@1 | 286 | if paging: |
paul@1 | 287 | output.append(formatPagingNavigation(request, formatter, pages, page_from)) |
paul@1 | 288 | |
paul@0 | 289 | return "".join(output) |
paul@0 | 290 | |
paul@1 | 291 | def formatPagingNavigation(request, formatter, pages, page_from=0): |
paul@1 | 292 | |
paul@1 | 293 | """ |
paul@1 | 294 | Using the given 'request' and 'formatter', return a formatted string showing |
paul@1 | 295 | the paging navigation for the result 'pages', according to the 'page_from' |
paul@1 | 296 | indicator which provides the current position in the result set. |
paul@1 | 297 | """ |
paul@1 | 298 | |
paul@2 | 299 | page = formatter.page |
paul@2 | 300 | pagename = page.page_name |
paul@1 | 301 | _ = request.getText |
paul@1 | 302 | |
paul@1 | 303 | output = [] |
paul@1 | 304 | |
paul@1 | 305 | results_per_page = request.cfg.search_results_per_page |
paul@1 | 306 | number_of_results = len(pages) |
paul@1 | 307 | |
paul@1 | 308 | pages_total = number_of_results / results_per_page |
paul@1 | 309 | pages_before = page_from / results_per_page |
paul@1 | 310 | pages_after = ((number_of_results - page_from) / results_per_page) - 1 |
paul@1 | 311 | |
paul@1 | 312 | querydict = wikiutil.parseQueryString(request.query_string) |
paul@1 | 313 | |
paul@1 | 314 | output.append(formatter.paragraph(on=1)) |
paul@1 | 315 | output.append(formatter.text(_("Result pages:"))) |
paul@1 | 316 | output.append(formatter.text(" ")) |
paul@1 | 317 | |
paul@1 | 318 | n = 0 |
paul@1 | 319 | while n < pages_before: |
paul@2 | 320 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 321 | output.append(formatter.text(str(n + 1))) |
paul@1 | 322 | output.append(formatter.pagelink(on=0)) |
paul@1 | 323 | output.append(formatter.text(" ")) |
paul@1 | 324 | n += 1 |
paul@1 | 325 | |
paul@1 | 326 | output.append(formatter.text(str(n + 1))) |
paul@1 | 327 | output.append(formatter.text(" ")) |
paul@1 | 328 | n += 1 |
paul@1 | 329 | |
paul@2 | 330 | while n <= pages_total: |
paul@2 | 331 | output.append(formatter.pagelink(on=1, pagename=pagename, querystr=getPagingQueryString(querydict, n * results_per_page))) |
paul@1 | 332 | output.append(formatter.text(str(n + 1))) |
paul@1 | 333 | output.append(formatter.pagelink(on=0)) |
paul@1 | 334 | output.append(formatter.text(" ")) |
paul@1 | 335 | n += 1 |
paul@1 | 336 | |
paul@1 | 337 | output.append(formatter.paragraph(on=0)) |
paul@1 | 338 | |
paul@1 | 339 | return "".join(output) |
paul@1 | 340 | |
paul@1 | 341 | def getPagingQueryString(querydict, page_from): |
paul@1 | 342 | querydict["from"] = page_from |
paul@1 | 343 | return wikiutil.makeQueryString(querydict) |
paul@1 | 344 | |
paul@0 | 345 | def int_or_none(x): |
paul@0 | 346 | if x is None: |
paul@0 | 347 | return x |
paul@0 | 348 | else: |
paul@0 | 349 | return int(x) |
paul@0 | 350 | |
paul@0 | 351 | # vim: tabstop=4 expandtab shiftwidth=4 |