1 #!/usr/bin/env python 2 3 """ 4 File access. 5 6 Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from iixr.fields import * 22 from iixr.terms import * 23 from iixr.positions import * 24 from os import listdir, remove, rename # partition manipulation 25 from shutil import copy # index updating 26 from os.path import join 27 28 try: 29 set 30 except NameError: 31 from sets import Set as set 32 33 # Constants. 34 35 TERM_FILENAMES = "terms", "terms_index", "positions", "positions_index" 36 FIELD_FILENAMES = "fields", "fields_index" 37 38 # Utility functions. 39 40 def get_partitions(pathname, prefix): 41 42 """ 43 Return a set of partition names for partitions residing at the given 44 'pathname' having the given 'prefix'. 45 """ 46 47 prefix_length = len(prefix) 48 49 partitions = set() 50 for filename in listdir(pathname): 51 if filename.startswith(prefix): 52 partitions.add(filename[prefix_length:]) 53 return partitions 54 55 def get_term_partitions(pathname): 56 57 """ 58 Return a set of term partition identifiers for partitions residing at the 59 given 'pathname'. 60 """ 61 62 return get_partitions(pathname, "terms-") 63 64 def get_field_partitions(pathname): 65 66 """ 67 Return a set of field partition identifiers for partitions residing at the 68 given 'pathname'. 69 """ 70 71 return get_partitions(pathname, "fields-") 72 73 def get_next_partition(partitions): 74 return max([int(n) for n in partitions if n.isdigit()] or [-1]) + 1 75 76 def get_term_writer(pathname, partition, interval, doc_interval): 77 78 """ 79 Return a term dictionary writer using files under the given 'pathname' 80 labelled according to the given 'partition', using the given indexing 81 'interval' for terms and 'doc_interval' for document position records. 82 """ 83 84 tdf = open(join(pathname, "terms-%s" % partition), "wb") 85 info_writer = TermWriter(tdf) 86 87 tdif = open(join(pathname, "terms_index-%s" % partition), "wb") 88 index_writer = TermIndexWriter(tdif) 89 90 tpf = open(join(pathname, "positions-%s" % partition), "wb") 91 positions_writer = PositionWriter(tpf) 92 93 tpif = open(join(pathname, "positions_index-%s" % partition), "wb") 94 positions_index_writer = PositionIndexWriter(tpif) 95 96 positions_dict_writer = PositionDictionaryWriter(positions_writer, positions_index_writer, doc_interval) 97 98 return TermDictionaryWriter(info_writer, index_writer, positions_dict_writer, interval) 99 100 def get_field_writer(pathname, partition, interval): 101 102 """ 103 Return a field dictionary writer using files under the given 'pathname' 104 labelled according to the given 'partition', using the given indexing 105 'interval'. 106 """ 107 108 ff = open(join(pathname, "fields-%s" % partition), "wb") 109 field_writer = FieldWriter(ff) 110 111 fif = open(join(pathname, "fields_index-%s" % partition), "wb") 112 field_index_writer = FieldIndexWriter(fif) 113 114 return FieldDictionaryWriter(field_writer, field_index_writer, interval) 115 116 def get_term_reader(pathname, partition): 117 118 """ 119 Return a term dictionary reader using files under the given 'pathname' 120 labelled according to the given 'partition'. 121 """ 122 123 tdf = open(join(pathname, "terms-%s" % partition), "rb") 124 info_reader = TermReader(tdf) 125 126 tdif = open(join(pathname, "terms_index-%s" % partition), "rb") 127 index_reader = TermIndexReader(tdif) 128 129 pf = open(join(pathname, "positions-%s" % partition), "rb") 130 position_reader = PositionReader(pf) 131 132 pif = open(join(pathname, "positions_index-%s" % partition), "rb") 133 position_index_reader = PositionIndexReader(pif) 134 135 position_dict_reader = PositionDictionaryReader(position_reader, position_index_reader) 136 137 return TermDictionaryReader(info_reader, index_reader, position_dict_reader) 138 139 def get_field_reader(pathname, partition): 140 141 """ 142 Return a field dictionary reader using files under the given 'pathname' 143 labelled according to the given 'partition'. 144 """ 145 146 ff = open(join(pathname, "fields-%s" % partition), "rb") 147 field_reader = FieldReader(ff) 148 149 fif = open(join(pathname, "fields_index-%s" % partition), "rb") 150 field_index_reader = FieldIndexReader(fif) 151 152 return FieldDictionaryReader(field_reader, field_index_reader) 153 154 # Renaming. 155 156 def rename_files(pathname, names, from_partition, to_partition): 157 for name in names: 158 rename(join(pathname, "%s-%s" % (name, from_partition)), join(pathname, "%s-%s" % (name, to_partition))) 159 160 def rename_term_files(pathname, from_partition, to_partition): 161 rename_files(pathname, TERM_FILENAMES, from_partition, to_partition) 162 163 def rename_field_files(pathname, from_partition, to_partition): 164 rename_files(pathname, FIELD_FILENAMES, from_partition, to_partition) 165 166 # Removal/deletion. 167 168 def remove_files(pathname, names, partition): 169 for name in names: 170 remove(join(pathname, "%s-%s" % (name, partition))) 171 172 def remove_term_files(pathname, partition): 173 remove_files(pathname, TERM_FILENAMES, partition) 174 175 def remove_field_files(pathname, partition): 176 remove_files(pathname, FIELD_FILENAMES, partition) 177 178 # Copying. 179 180 def copy_files(source, names, partition, destination, suffix): 181 for name in names: 182 filename = "%s-%s" % (name, partition) 183 copy(join(source, filename), join(destination, filename + suffix)) 184 185 def copy_term_files(source, partition, destination, suffix): 186 copy_files(source, TERM_FILENAMES, partition, destination, suffix) 187 188 def copy_field_files(source, partition, destination, suffix): 189 copy_files(source, FIELD_FILENAMES, partition, destination, suffix) 190 191 # vim: tabstop=4 expandtab shiftwidth=4