2 class LocalIndex < AbstractIndex
3 include MoreLikeThis::IndexMethods
5 def initialize(aaf_configuration)
15 logger.debug "reopening index at #{aaf_configuration[:ferret][:path]}"
19 # The 'real' Ferret Index instance
22 returning @ferret_index ||= Ferret::Index::Index.new(aaf_configuration[:ferret]) do
23 @ferret_index.batch_size = aaf_configuration[:reindex_batch_size]
24 @ferret_index.logger = logger
28 # Checks for the presence of a segments file in the index directory
29 # Rebuilds the index if none exists.
30 def ensure_index_exists
31 logger.debug "LocalIndex: ensure_index_exists at #{aaf_configuration[:index_dir]}"
32 unless File.file? "#{aaf_configuration[:index_dir]}/segments"
33 ActsAsFerret::ensure_directory(aaf_configuration[:index_dir])
39 # Closes the underlying index instance
41 @ferret_index.close if @ferret_index
43 # is raised when index already closed
48 # rebuilds the index from all records of the model class this index belongs
49 # to. Arguments can be given in shared index scenarios to name multiple
50 # model classes to include in the index
51 def rebuild_index(*models)
52 models << aaf_configuration[:class_name] unless models.include?(aaf_configuration[:class_name])
53 models = models.flatten.uniq.map(&:constantize)
54 logger.debug "rebuild index: #{models.inspect}"
55 index = Ferret::Index::Index.new(aaf_configuration[:ferret].dup.update(:auto_flush => false,
56 :field_infos => ActsAsFerret::field_infos(models),
58 index.batch_size = aaf_configuration[:reindex_batch_size]
60 index.index_models models
63 def bulk_index(ids, options)
64 ferret_index.bulk_index(aaf_configuration[:class_name].constantize, ids, options)
67 # Parses the given query string into a Ferret Query object.
68 def process_query(query)
69 # work around ferret bug in #process_query (doesn't ensure the
71 ferret_index.synchronize do
72 ferret_index.send(:ensure_reader_open)
73 original_query = ferret_index.process_query(query)
77 # Total number of hits for the given query.
78 # To count the results of a multi_search query, specify an array of
79 # class names with the :multi option.
80 def total_hits(query, options = {})
81 index = (models = options.delete(:multi)) ? multi_index(models) : ferret_index
82 index.search(query, options).total_hits
85 def determine_lazy_fields(options = {})
86 stored_fields = options[:lazy]
87 if stored_fields && !(Array === stored_fields)
88 stored_fields = aaf_configuration[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first)
90 logger.debug "stored_fields: #{stored_fields}"
94 # Queries the Ferret index to retrieve model class, id, score and the
95 # values of any fields stored in the index for each hit.
96 # If a block is given, these are yielded and the number of total hits is
97 # returned. Otherwise [total_hits, result_array] is returned.
98 def find_id_by_contents(query, options = {})
101 logger.debug "query: #{ferret_index.process_query query}" if logger.debug?
102 lazy_fields = determine_lazy_fields options
104 total_hits = index.search_each(query, options) do |hit, score|
106 model = aaf_configuration[:store_class_name] ? doc[:class_name] : aaf_configuration[:class_name]
107 # fetch stored fields if lazy loading
109 lazy_fields.each { |field| data[field] = doc[field] } if lazy_fields
111 yield model, doc[:id], score, data
113 result << { :model => model, :id => doc[:id], :score => score, :data => data }
116 #logger.debug "id_score_model array: #{result.inspect}"
117 return block_given? ? total_hits : [total_hits, result]
120 # Queries multiple Ferret indexes to retrieve model class, id and score for
121 # each hit. Use the models parameter to give the list of models to search.
122 # If a block is given, model, id and score are yielded and the number of
123 # total hits is returned. Otherwise [total_hits, result_array] is returned.
124 def id_multi_search(query, models, options = {})
125 index = multi_index(models)
127 lazy_fields = determine_lazy_fields options
128 total_hits = index.search_each(query, options) do |hit, score|
130 # fetch stored fields if lazy loading
132 lazy_fields.each { |field| data[field] = doc[field] } if lazy_fields
133 raise "':store_class_name => true' required for multi_search to work" if doc[:class_name].blank?
135 yield doc[:class_name], doc[:id], score, doc, data
137 result << { :model => doc[:class_name], :id => doc[:id], :score => score, :data => data }
140 return block_given? ? total_hits : [ total_hits, result ]
143 ######################################
144 # methods working on a single record
145 # called from instance_methods, here to simplify interfacing with the
146 # remote ferret server
147 # TODO having to pass id and class_name around like this isn't nice
148 ######################################
150 # add record to index
151 # record may be the full AR object, a Ferret document instance or a Hash
153 record = record.to_doc unless Hash === record || Ferret::Document === record
154 ferret_index << record
158 # delete record from index
159 def remove(id, class_name)
160 ferret_index.query_delete query_for_record(id, class_name)
163 # highlight search terms for the record with the given id.
164 def highlight(id, class_name, query, options = {})
165 options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>'
167 ferret_index.synchronize do
168 doc_num = document_number(id, class_name)
170 highlights << ferret_index.highlight(query, doc_num, options)
172 query = process_query(query) # process only once
173 aaf_configuration[:ferret_fields].each_pair do |field, config|
174 next if config[:store] == :no || config[:highlight] == :no
175 options[:field] = field
176 highlights << ferret_index.highlight(query, doc_num, options)
180 return highlights.compact.flatten[0..options[:num_excerpts]-1]
183 # retrieves the ferret document number of the record with the given id.
184 def document_number(id, class_name)
185 hits = ferret_index.search(query_for_record(id, class_name))
186 return hits.hits.first.doc if hits.total_hits == 1
187 raise "cannot determine document number for class #{class_name} / primary key: #{id}\nresult was: #{hits.inspect}"
190 # build a ferret query matching only the record with the given id
191 # the class name only needs to be given in case of a shared index configuration
192 def query_for_record(id, class_name = nil)
193 Ferret::Search::TermQuery.new(:id, id.to_s)
199 # returns a MultiIndex instance operating on a MultiReader
200 def multi_index(model_classes)
201 model_classes.map!(&:constantize) if String === model_classes.first
202 model_classes.sort! { |a, b| a.name <=> b.name }
203 key = model_classes.inject("") { |s, clazz| s + clazz.name }
204 multi_config = aaf_configuration[:ferret].dup
205 multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
206 ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)