1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/examples/Common/Dictionary/Dict.py Fri Sep 09 17:33:19 2005 +0000
1.3 @@ -0,0 +1,70 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"A simple file indexer."
1.7 +
1.8 +import codecs
1.9 +
1.10 +class Dict:
1.11 + def __init__(self, dict_location, encoding=None):
1.12 + self.dict_location = dict_location
1.13 + self.encoding = encoding
1.14 +
1.15 + # Initialisation.
1.16 +
1.17 + self.index = self.get_index()
1.18 +
1.19 + def get_index(self):
1.20 +
1.21 + "Return a dictionary containing an index structure for the dict."
1.22 +
1.23 + if self.encoding is None:
1.24 + f = open(self.dict_location)
1.25 + else:
1.26 + f = codecs.open(self.dict_location, encoding=self.encoding)
1.27 + s = f.read()
1.28 + f.close()
1.29 +
1.30 + tokens = s.split()
1.31 + index = {}
1.32 +
1.33 + for token in tokens:
1.34 + slot = index
1.35 + for c in token:
1.36 + if not slot.has_key(c):
1.37 + slot[c] = {}, []
1.38 + slot, words = slot[c]
1.39 +
1.40 + if token not in words:
1.41 + words.append(token)
1.42 +
1.43 + return index
1.44 +
1.45 + def find(self, pattern):
1.46 +
1.47 + "Find words beginning with the given 'pattern'."
1.48 +
1.49 + slot = self.index
1.50 + words = []
1.51 +
1.52 + for c in pattern:
1.53 + if not slot.has_key(c):
1.54 + return []
1.55 + slot, words = slot[c]
1.56 +
1.57 + results = []
1.58 + results += words
1.59 + results += self.get_all_words(slot)
1.60 + return results
1.61 +
1.62 + def get_all_words(self, slot):
1.63 +
1.64 + "Get all words under the given index 'slot'."
1.65 +
1.66 + all_words = []
1.67 + for c in slot.keys():
1.68 + this_slot, words = slot[c]
1.69 + all_words += words
1.70 + all_words += self.get_all_words(this_slot)
1.71 + return all_words
1.72 +
1.73 +# vim: tabstop=4 expandtab shiftwidth=4