1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/tests/Dict.py Mon Sep 26 21:59:16 2005 +0000
1.3 @@ -0,0 +1,91 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"A simple file indexer."
1.7 +
1.8 +import codecs
1.9 +
1.10 +class Indexer:
1.11 + def __init__(self, dict_location, encoding=None):
1.12 + self.dict_location = dict_location
1.13 + self.encoding = encoding
1.14 +
1.15 + # Initialisation.
1.16 +
1.17 + self.index = self.make_index()
1.18 +
1.19 + def get_index(self):
1.20 + return self.index
1.21 +
1.22 + def make_index(self):
1.23 +
1.24 + "Return a dictionary containing an index structure for the dict."
1.25 +
1.26 + if self.encoding is None:
1.27 + f = open(self.dict_location)
1.28 + else:
1.29 + f = codecs.open(self.dict_location, encoding=self.encoding)
1.30 + s = f.read()
1.31 + f.close()
1.32 +
1.33 + tokens = s.split()
1.34 + index = {}
1.35 +
1.36 + for token in tokens:
1.37 + slot = index
1.38 + for c in token:
1.39 + if not slot.has_key(c):
1.40 + slot[c] = {}, []
1.41 + slot, words = slot[c]
1.42 +
1.43 + if token not in words:
1.44 + words.append(token)
1.45 +
1.46 + return index
1.47 +
1.48 +class Searcher:
1.49 + def __init__(self, index):
1.50 + self.index = index
1.51 +
1.52 + def find(self, pattern):
1.53 +
1.54 + "Find words beginning with the given 'pattern'."
1.55 +
1.56 + slot = self.index
1.57 + words = []
1.58 +
1.59 + for c in pattern:
1.60 + if not slot.has_key(c):
1.61 + return []
1.62 + slot, words = slot[c]
1.63 +
1.64 + results = []
1.65 + results += words
1.66 + results += self.get_all_words(slot)
1.67 + return results
1.68 +
1.69 + def get_all_words(self, slot):
1.70 +
1.71 + "Get all words under the given index 'slot'."
1.72 +
1.73 + all_words = []
1.74 + keys = slot.keys()
1.75 + keys.sort()
1.76 + for c in keys:
1.77 + this_slot, words = slot[c]
1.78 + all_words += words
1.79 + all_words += self.get_all_words(this_slot)
1.80 + return all_words
1.81 +
1.82 +def update(index1, index2):
1.83 + for key in index2.keys():
1.84 + if not index1.has_key(key):
1.85 + index1[key] = index2[key]
1.86 + else:
1.87 + slot1, words1 = index1[key]
1.88 + slot2, words2 = index2[key]
1.89 + for word in words2:
1.90 + if not word in words1:
1.91 + words1.append(word)
1.92 + update(slot1, slot2)
1.93 +
1.94 +# vim: tabstop=4 expandtab shiftwidth=4