1.1 --- a/iixr/filesystem.py Sat Feb 12 01:23:58 2011 +0100
1.2 +++ b/iixr/filesystem.py Sun Feb 13 02:49:55 2011 +0100
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 File access.
1.6
1.7 -Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -18,9 +18,7 @@
1.13 with this program. If not, see <http://www.gnu.org/licenses/>.
1.14 """
1.15
1.16 -from iixr.fields import *
1.17 from iixr.terms import *
1.18 -from iixr.positions import *
1.19 from os import listdir, remove, rename # partition manipulation
1.20 from shutil import copy # index updating
1.21 from os.path import join
1.22 @@ -32,8 +30,7 @@
1.23
1.24 # Constants.
1.25
1.26 -TERM_FILENAMES = "terms", "terms_index", "positions", "positions_index"
1.27 -FIELD_FILENAMES = "fields", "fields_index"
1.28 +TERM_FILENAMES = "terms",
1.29
1.30 # Utility functions.
1.31
1.32 @@ -49,7 +46,9 @@
1.33 partitions = set()
1.34 for filename in listdir(pathname):
1.35 if filename.startswith(prefix):
1.36 - partitions.add(filename[prefix_length:])
1.37 + partition = filename[prefix_length:]
1.38 + if partition.isdigit():
1.39 + partitions.add(int(partition))
1.40 return partitions
1.41
1.42 def get_term_partitions(pathname):
1.43 @@ -61,95 +60,40 @@
1.44
1.45 return get_partitions(pathname, "terms-")
1.46
1.47 -def get_field_partitions(pathname):
1.48 +def get_next_partition(partitions):
1.49 + return max(partitions or [-1]) + 1
1.50 +
1.51 +def get_term_writer(pathname, partition):
1.52
1.53 """
1.54 - Return a set of field partition identifiers for partitions residing at the
1.55 - given 'pathname'.
1.56 - """
1.57 -
1.58 - return get_partitions(pathname, "fields-")
1.59 -
1.60 -def get_next_partition(partitions):
1.61 - return max([int(n) for n in partitions if n.isdigit()] or [-1]) + 1
1.62 -
1.63 -def get_term_writer(pathname, partition, interval, doc_interval):
1.64 -
1.65 - """
1.66 - Return a term dictionary writer using files under the given 'pathname'
1.67 - labelled according to the given 'partition', using the given indexing
1.68 - 'interval' for terms and 'doc_interval' for document position records.
1.69 + Return a term writer using files under the given 'pathname' labelled
1.70 + according to the given 'partition'.
1.71 """
1.72
1.73 - tdf = open(join(pathname, "terms-%s" % partition), "wb")
1.74 - info_writer = TermWriter(tdf)
1.75 -
1.76 - tdif = open(join(pathname, "terms_index-%s" % partition), "wb")
1.77 - index_writer = TermIndexWriter(tdif)
1.78 -
1.79 - tpf = open(join(pathname, "positions-%s" % partition), "wb")
1.80 - positions_writer = PositionWriter(tpf)
1.81 -
1.82 - tpif = open(join(pathname, "positions_index-%s" % partition), "wb")
1.83 - positions_index_writer = PositionIndexWriter(tpif)
1.84 -
1.85 - positions_dict_writer = PositionDictionaryWriter(positions_writer, positions_index_writer, doc_interval)
1.86 -
1.87 - return TermDictionaryWriter(info_writer, index_writer, positions_dict_writer, interval)
1.88 + f = open(join(pathname, "terms-%s" % partition), "wb")
1.89 + return TermWriter(f)
1.90
1.91 -def get_field_writer(pathname, partition, interval):
1.92 -
1.93 - """
1.94 - Return a field dictionary writer using files under the given 'pathname'
1.95 - labelled according to the given 'partition', using the given indexing
1.96 - 'interval'.
1.97 - """
1.98 -
1.99 - ff = open(join(pathname, "fields-%s" % partition), "wb")
1.100 - field_writer = FieldWriter(ff)
1.101 -
1.102 - fif = open(join(pathname, "fields_index-%s" % partition), "wb")
1.103 - field_index_writer = FieldIndexWriter(fif)
1.104 -
1.105 - return FieldDictionaryWriter(field_writer, field_index_writer, interval)
1.106 +def get_reader(pathname, name, partition, cls):
1.107 + f = open(join(pathname, "%s-%s" % (name, partition)), "rb")
1.108 + return cls(f)
1.109
1.110 def get_term_reader(pathname, partition):
1.111
1.112 """
1.113 - Return a term dictionary reader using files under the given 'pathname'
1.114 + Return a term reader using files under the given 'pathname' labelled
1.115 + according to the given 'partition'.
1.116 + """
1.117 +
1.118 + return get_reader(pathname, "terms", partition, TermIterator)
1.119 +
1.120 +def get_term_data_reader(pathname, partition):
1.121 +
1.122 + """
1.123 + Return a term plus data reader using files under the given 'pathname'
1.124 labelled according to the given 'partition'.
1.125 """
1.126
1.127 - tdf = open(join(pathname, "terms-%s" % partition), "rb")
1.128 - info_reader = TermReader(tdf)
1.129 -
1.130 - tdif = open(join(pathname, "terms_index-%s" % partition), "rb")
1.131 - index_reader = TermIndexReader(tdif)
1.132 -
1.133 - pf = open(join(pathname, "positions-%s" % partition), "rb")
1.134 - position_reader = PositionReader(pf)
1.135 -
1.136 - pif = open(join(pathname, "positions_index-%s" % partition), "rb")
1.137 - position_index_reader = PositionIndexReader(pif)
1.138 -
1.139 - position_dict_reader = PositionDictionaryReader(position_reader, position_index_reader)
1.140 -
1.141 - return TermDictionaryReader(info_reader, index_reader, position_dict_reader)
1.142 -
1.143 -def get_field_reader(pathname, partition):
1.144 -
1.145 - """
1.146 - Return a field dictionary reader using files under the given 'pathname'
1.147 - labelled according to the given 'partition'.
1.148 - """
1.149 -
1.150 - ff = open(join(pathname, "fields-%s" % partition), "rb")
1.151 - field_reader = FieldReader(ff)
1.152 -
1.153 - fif = open(join(pathname, "fields_index-%s" % partition), "rb")
1.154 - field_index_reader = FieldIndexReader(fif)
1.155 -
1.156 - return FieldDictionaryReader(field_reader, field_index_reader)
1.157 + return get_reader(pathname, "terms", partition, TermDataIterator)
1.158
1.159 # Renaming.
1.160
1.161 @@ -160,9 +104,6 @@
1.162 def rename_term_files(pathname, from_partition, to_partition):
1.163 rename_files(pathname, TERM_FILENAMES, from_partition, to_partition)
1.164
1.165 -def rename_field_files(pathname, from_partition, to_partition):
1.166 - rename_files(pathname, FIELD_FILENAMES, from_partition, to_partition)
1.167 -
1.168 # Removal/deletion.
1.169
1.170 def remove_files(pathname, names, partition):
1.171 @@ -172,9 +113,6 @@
1.172 def remove_term_files(pathname, partition):
1.173 remove_files(pathname, TERM_FILENAMES, partition)
1.174
1.175 -def remove_field_files(pathname, partition):
1.176 - remove_files(pathname, FIELD_FILENAMES, partition)
1.177 -
1.178 # Copying.
1.179
1.180 def copy_files(source, names, partition, destination, suffix):
1.181 @@ -185,7 +123,4 @@
1.182 def copy_term_files(source, partition, destination, suffix):
1.183 copy_files(source, TERM_FILENAMES, partition, destination, suffix)
1.184
1.185 -def copy_field_files(source, partition, destination, suffix):
1.186 - copy_files(source, FIELD_FILENAMES, partition, destination, suffix)
1.187 -
1.188 # vim: tabstop=4 expandtab shiftwidth=4