5 # Disables ferret index updates for this model. When a block is given,
6 # Ferret will be re-enabled again after executing the block.
8 aaf_configuration[:enabled] = false
16 aaf_configuration[:enabled] = true
20 aaf_configuration[:enabled]
23 # rebuild the index from all data stored for this model.
24 # This is called automatically when no index exists yet.
26 # When calling this method manually, you can give any additional
27 # model classes that should also go into this index as parameters.
28 # Useful when using the :single_index option.
29 # Note that attributes named the same in different models will share
30 # the same field options in the shared index.
31 def rebuild_index(*models)
32 models << self unless models.include?(self)
33 aaf_index.rebuild_index models.map(&:to_s)
34 index_dir = find_last_index_version(aaf_configuration[:index_base_dir]) unless aaf_configuration[:remote]
37 # re-index a number records specified by the given ids. Use for large
38 # indexing jobs i.e. after modifying a lot of records with Ferret disabled.
39 # Please note that the state of Ferret (enabled or disabled at class or
40 # record level) is not checked by this method, so if you need to do so
41 # (e.g. because of a custom ferret_enabled? implementation), you have to do
44 options = Hash === ids.last ? ids.pop : {}
45 ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable)
46 aaf_index.bulk_index(ids, options)
49 # true if our db and table appear to be suitable for the mysql fast batch
51 # http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord)
53 if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches]
54 logger.info "using mysql specific batched find :all. Turn off with :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)"
59 # runs across all records yielding those to be indexed when the index is rebuilt
60 def records_for_rebuild(batch_size = 1000)
64 while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any?
69 # sql server adapter won't batch correctly without defined ordering
70 order = "#{primary_key} ASC" if connection.class.name =~ /SQLServer/
71 0.step(self.count, batch_size) do |offset|
72 yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
78 # yields the records with the given ids, in batches of batch_size
79 def records_for_bulk_index(ids, batch_size = 1000)
82 ids.each_slice(batch_size) do |id_slice|
83 logger.debug "########## slice: #{id_slice.join(',')}"
84 records = find( :all, :conditions => ["id in (?)", id_slice] )
85 logger.debug "########## slice records: #{records.inspect}"
86 #yield records, offset
87 yield find( :all, :conditions => ["id in (?)", id_slice] ), offset
93 # Switches this class to a new index located in dir.
94 # Used by the DRb server when switching to a new index version.
96 logger.debug "changing index dir to #{dir}"
97 aaf_configuration[:index_dir] = aaf_configuration[:ferret][:path] = dir
99 logger.debug "index dir is now #{dir}"
102 # Retrieve the index instance for this model class. This can either be a
103 # LocalIndex, or a RemoteIndex instance.
105 # Index instances are stored in a hash, using the index directory
106 # as the key. So model classes sharing a single index will share their
109 ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
112 # Finds instances by searching the Ferret index. Terms are ANDed by default, use
113 # OR between terms for ORed queries. Or specify +:or_default => true+ in the
114 # +:ferret+ options hash of acts_as_ferret.
116 # You may either use the +offset+ and +limit+ options to implement your own
117 # pagination logic, or use the +page+ and +per_page+ options to use the
118 # built in pagination support which is compatible with will_paginate's view
119 # helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be
123 # page:: page of search results to retrieve
124 # per_page:: number of search results that are displayed per page
125 # offset:: first hit to retrieve (useful for paging)
126 # limit:: number of hits to retrieve, or :all to retrieve
128 # lazy:: Array of field names whose contents should be read directly
129 # from the index. Those fields have to be marked
130 # +:store => :yes+ in their field options. Give true to get all
131 # stored fields. Note that if you have a shared index, you have
132 # to explicitly state the fields you want to fetch, true won't
134 # models:: only for single_index scenarios: an Array of other Model classes to
135 # include in this search. Use :all to query all models.
136 # multi:: Specify additional model classes to search through. Each of
137 # these, as well as this class, has to have the
138 # :store_class_name option set to true. This option replaces the
139 # multi_search method.
141 # +find_options+ is a hash passed on to active_record's find when
142 # retrieving the data from db, useful to i.e. prefetch relationships with
143 # :include or to specify additional filter criteria with :conditions.
145 # This method returns a +SearchResults+ instance, which really is an Array that has
146 # been decorated with a total_hits attribute holding the total number of hits.
147 # Additionally, SearchResults is compatible with the pagination helper
148 # methods of the will_paginate plugin.
150 # Please keep in mind that the number of results delivered might be less than
151 # +limit+ if you specify any active record conditions that further limit
152 # the result. Use +limit+ and +offset+ as AR find_options instead.
153 # +page+ and +per_page+ are supposed to work regardless of any
154 # +conitions+ present in +find_options+.
155 def find_with_ferret(q, options = {}, find_options = {})
156 if options[:per_page]
157 options[:page] = options[:page] ? options[:page].to_i : 1
158 limit = options[:per_page]
159 offset = (options[:page] - 1) * limit
160 if find_options[:conditions] && !options[:multi]
161 find_options[:limit] = limit
162 find_options[:offset] = offset
163 options[:limit] = :all
164 options.delete :offset
166 # do pagination with ferret (or after everything is done in the case
168 options[:limit] = limit
169 options[:offset] = offset
171 elsif find_options[:conditions]
173 # multisearch ignores find_options limit and offset
174 options[:limit] ||= find_options.delete(:limit)
175 options[:offset] ||= find_options.delete(:offset)
177 # let the db do the limiting and offsetting for single-table searches
178 unless options[:limit] == :all
179 find_options[:limit] ||= options.delete(:limit)
181 find_options[:offset] ||= options.delete(:offset)
182 options[:limit] = :all
186 total_hits, result = if options[:multi].blank?
187 find_records_lazy_or_not q, options, find_options
189 _multi_search q, options.delete(:multi), options, find_options
191 logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
192 SearchResults.new(result, total_hits, options[:page], options[:per_page])
194 alias find_by_contents find_with_ferret
198 # Returns the total number of hits for the given query
199 # To count the results of a query across multiple models, specify an array of
200 # class names with the :multi option.
202 # Note that since we don't query the database here, this method won't deliver
203 # the expected results when used on an AR association.
204 def total_hits(q, options={})
206 # backwards compatibility
207 logger.warn "the :models option of total_hits is deprecated, please use :multi instead"
208 options[:multi] = options[:models]
210 if models = options[:multi]
211 options[:multi] = add_self_to_model_list_if_necessary(models).map(&:to_s)
213 aaf_index.total_hits(q, options)
216 # Finds instance model name, ids and scores by contents.
217 # Useful e.g. if you want to search across models or do not want to fetch
218 # all result records (yet).
220 # Options are the same as for find_by_contents
222 # A block can be given too, it will be executed with every result:
223 # find_id_by_contents(q, options) do |model, id, score|
225 # scores_by_id[id] = score
227 # NOTE: in case a block is given, only the total_hits value will be returned
228 # instead of the [total_hits, results] array!
230 def find_id_by_contents(q, options = {}, &block)
231 deprecated_options_support(options)
232 aaf_index.find_id_by_contents(q, options, &block)
236 # returns an array of hashes, each containing :class_name,
237 # :id and :score for a hit.
239 # if a block is given, class_name, id and score of each hit will
240 # be yielded, and the total number of hits is returned.
241 def id_multi_search(query, additional_models = [], options = {}, &proc)
242 deprecated_options_support(options)
243 models = add_self_to_model_list_if_necessary(additional_models)
244 aaf_index.id_multi_search(query, models.map(&:to_s), options, &proc)
250 def _multi_search(query, additional_models = [], options = {}, find_options = {})
254 logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
255 total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
256 result << FerretResult.new(model, id, score, data)
262 limit = options.delete(:limit)
263 offset = options.delete(:offset) || 0
264 options[:limit] = :all
265 total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
266 id_arrays[model] ||= {}
267 id_arrays[model][id] = [ rank += 1, score ]
269 result = retrieve_records(id_arrays, find_options)
270 total_hits = result.size if find_options[:conditions]
271 # total_hits += offset if offset
272 if limit && limit != :all
273 result = result[offset..limit+offset-1]
279 def add_self_to_model_list_if_necessary(models)
280 models = [ models ] unless models.is_a? Array
281 models << self unless models.include?(self)
285 def find_records_lazy_or_not(q, options = {}, find_options = {})
287 logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
288 lazy_find_by_contents q, options
290 ar_find_by_contents q, options, find_options
294 def ar_find_by_contents(q, options = {}, find_options = {})
296 total_hits = find_id_by_contents(q, options) do |model, id, score, data|
297 # stores ids, index and score of each hit for later ordering of
299 result_ids[id] = [ result_ids.size + 1, score ]
302 result = retrieve_records( { self.name => result_ids }, find_options )
304 # count total_hits via sql when using conditions or when we're called
305 # from an ActiveRecord association.
306 if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} }
307 # chances are the ferret result count is not our total_hits value, so
308 # we correct this here.
309 if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset]
310 # our ferret result has been limited, so we need to re-run that
311 # search to get the full result set from ferret.
313 find_id_by_contents(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data|
314 result_ids[id] = [ result_ids.size + 1, score ]
316 # Now ask the database for the total size of the final result set.
317 total_hits = count_records( { self.name => result_ids }, find_options )
319 # what we got from the database is our full result set, so take
321 total_hits = result.length
325 [ total_hits, result ]
328 def lazy_find_by_contents(q, options = {})
330 total_hits = find_id_by_contents(q, options) do |model, id, score, data|
331 result << FerretResult.new(model, id, score, data)
333 [ total_hits, result ]
337 def model_find(model, id, find_options = {})
338 model.constantize.find(id, find_options)
341 # retrieves search result records from a data structure like this:
342 # { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
344 # TODO: in case of STI AR will filter out hits from other
345 # classes for us, but this
346 # will lead to less results retrieved --> scoping of ferret query
347 # to self.class is still needed.
348 # from the ferret ML (thanks Curtis Hatter)
349 # > I created a method in my base STI class so I can scope my query. For scoping
350 # > I used something like the following line:
352 # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
354 # > Though you could make it more generic by simply asking
355 # > "self.descends_from_active_record?" which is how rails decides if it should
356 # > scope your "find" query for STI models. You can check out "base.rb" in
357 # > activerecord to see that.
358 # but maybe better do the scoping in find_id_by_contents...
359 def retrieve_records(id_arrays, find_options = {})
361 # get objects for each model
362 id_arrays.each do |model, id_array|
363 next if id_array.empty?
365 model = model.constantize
367 raise "Please use ':store_class_name => true' if you want to use multi_search.\n#{$!}"
371 conditions = combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)",
373 find_options[:conditions])
375 # check for include association that might only exist on some models in case of multi_search
376 filtered_include_options = []
377 if include_options = find_options[:include]
378 include_options = [ include_options ] unless include_options.respond_to?(:each)
379 include_options.each do |include_option|
380 filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
383 filtered_include_options = nil if filtered_include_options.empty?
386 tmp_result = model.find(:all, find_options.merge(:conditions => conditions,
387 :include => filtered_include_options))
389 # set scores and rank
390 tmp_result.each do |record|
391 record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
393 # merge with result array
394 result.concat tmp_result
397 # order results as they were found by ferret, unless an AR :order
399 result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
403 def count_records(id_arrays, find_options = {})
404 count_options = find_options.dup
405 count_options.delete :limit
406 count_options.delete :offset
408 id_arrays.each do |model, id_array|
409 next if id_array.empty?
411 model = model.constantize
413 conditions = combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],
414 find_options[:conditions])
415 opts = find_options.merge :conditions => conditions
416 opts.delete :limit; opts.delete :offset
417 count += model.count opts
419 raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
425 def deprecated_options_support(options)
426 if options[:num_docs]
427 logger.warn ":num_docs is deprecated, use :limit instead!"
428 options[:limit] ||= options[:num_docs]
430 if options[:first_doc]
431 logger.warn ":first_doc is deprecated, use :offset instead!"
432 options[:offset] ||= options[:first_doc]
436 # creates a new Index instance.
437 def create_index_instance
438 if aaf_configuration[:remote]
440 elsif aaf_configuration[:single_index]
444 end.new(aaf_configuration)
447 # combine our conditions with those given by user, if any
448 def combine_conditions(conditions, additional_conditions = [])
449 returning conditions do
450 if additional_conditions && additional_conditions.any?
451 cust_opts = additional_conditions.respond_to?(:shift) ? additional_conditions.dup : [ additional_conditions ]
452 conditions.first << " and " << cust_opts.shift
453 conditions.concat(cust_opts)