1 module ActsAsFerret #:nodoc:
3 # This module defines the acts_as_ferret method and is included into
8 def reloadable?; false end
10 # declares a class as ferret-searchable.
13 # fields:: names all fields to include in the index. If not given,
14 # all attributes of the class will be indexed. You may also give
15 # symbols pointing to instance methods of your model here, i.e.
16 # to retrieve and index data from a related model.
18 # additional_fields:: names fields to include in the index, in addition
19 # to those derived from the db scheme. use if you want
20 # to add custom fields derived from methods to the db
21 # fields (which will be picked by aaf). This option will
22 # be ignored when the fields option is given, in that
23 # case additional fields get specified there.
25 # index_dir:: declares the directory where to put the index for this class.
26 # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
27 # The index directory will be created if it doesn't exist.
29 # single_index:: set this to true to let this class use a Ferret
30 # index that is shared by all classes having :single_index set to true.
31 # :store_class_name is set to true implicitly, as well as index_dir, so
32 # don't bother setting these when using this option. the shared index
33 # will be located in index/<RAILS_ENV>/shared .
35 # store_class_name:: to make search across multiple models (with either
36 # single_index or the multi_search method) useful, set
37 # this to true. the model class name will be stored in a keyword field
40 # reindex_batch_size:: reindexing is done in batches of this size, default is 1000
41 # mysql_fast_batches:: set this to false to disable the faster mysql batching
42 # algorithm if this model uses a non-integer primary key named
45 # ferret:: Hash of Options that directly influence the way the Ferret engine works. You
46 # can use most of the options the Ferret::I class accepts here, too. Among the
49 # or_default:: whether query terms are required by
50 # default (the default, false), or not (true)
52 # analyzer:: the analyzer to use for query parsing (default: nil,
53 # which means the ferret StandardAnalyzer gets used)
55 # default_field:: use to set one or more fields that are searched for query terms
56 # that don't have an explicit field list. This list should *not*
57 # contain any untokenized fields. If it does, you're asking
58 # for trouble (i.e. not getting results for queries having
59 # stop words in them). Aaf by default initializes the default field
60 # list to contain all tokenized fields. If you use :single_index => true,
61 # you really should set this option specifying your default field
62 # list (which should be equal in all your classes sharing the index).
63 # Otherwise you might get incorrect search results and you won't get
64 # any lazy loading of stored field data.
66 # For downwards compatibility reasons you can also specify the Ferret options in the
68 def acts_as_ferret(options={}, ferret_options={})
70 options[:remote] = true if options[:remote].nil?
72 # force local mode if running *inside* the Ferret server - somewhere the
73 # real indexing has to be done after all :-)
74 # Usually the automatic detection of server mode works fine, however if you
75 # require your model classes in environment.rb they will get loaded before the
76 # DRb server is started, so this code is executed too early and detection won't
77 # work. In this case you'll get endless loops resulting in "stack level too deep"
79 # To get around this, start the DRb server with the environment variable
80 # FERRET_USE_LOCAL_INDEX set to '1'.
81 logger.debug "Asked for a remote server ? #{options[:remote].inspect}, ENV[\"FERRET_USE_LOCAL_INDEX\"] is #{ENV["FERRET_USE_LOCAL_INDEX"].inspect}, looks like we are#{ActsAsFerret::Remote::Server.running || ENV['FERRET_USE_LOCAL_INDEX'] ? '' : ' not'} the server"
82 options.delete(:remote) if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
84 if options[:remote] && options[:remote] !~ /^druby/
85 # read server location from config/ferret_server.yml
86 options[:remote] = ActsAsFerret::Remote::Config.new.uri rescue nil
90 logger.info "Will use remote index server which should be available at #{options[:remote]}"
92 logger.info "Will use local index."
97 extend SharedIndexClassMethods if options[:single_index]
99 include InstanceMethods
100 include MoreLikeThis::InstanceMethods
103 after_create :ferret_create
104 after_update :ferret_update
105 after_destroy :ferret_destroy
107 cattr_accessor :aaf_configuration
110 self.aaf_configuration = {
111 :index_dir => "#{ActsAsFerret::index_dir}/#{self.name.underscore}",
112 :store_class_name => false,
113 :name => self.table_name,
114 :class_name => self.name,
115 :single_index => false,
116 :reindex_batch_size => 1000,
117 :ferret => {}, # Ferret config Hash
118 :ferret_fields => {}, # list of indexed fields that will be filled later
119 :enabled => true, # used for class-wide disabling of Ferret
120 :mysql_fast_batches => true # turn off to disable the faster, id based batching mechanism for MySQL
123 # merge aaf options with args
124 aaf_configuration.update(options) if options.is_a?(Hash)
125 # apply appropriate settings for shared index
126 if aaf_configuration[:single_index]
127 aaf_configuration[:index_dir] = "#{ActsAsFerret::index_dir}/shared"
128 aaf_configuration[:store_class_name] = true
131 # set ferret default options
132 aaf_configuration[:ferret].reverse_merge!( :or_default => false,
133 :handle_parse_errors => true,
134 :default_field => nil # will be set later on
135 #:max_clauses => 512,
136 #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
137 # :wild_card_downcase => true
140 # merge ferret options with those from second parameter hash
141 aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash)
143 unless options[:remote]
144 ActsAsFerret::ensure_directory aaf_configuration[:index_dir]
145 aaf_configuration[:index_base_dir] = aaf_configuration[:index_dir]
146 aaf_configuration[:index_dir] = find_last_index_version(aaf_configuration[:index_dir])
147 logger.debug "using index in #{aaf_configuration[:index_dir]}"
150 # these properties are somewhat vital to the plugin and shouldn't
151 # be overwritten by the user:
152 aaf_configuration[:ferret].update(
153 :key => (aaf_configuration[:single_index] ? [:id, :class_name] : :id),
154 :path => aaf_configuration[:index_dir],
155 :auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
156 :create_if_missing => true
159 if aaf_configuration[:fields]
160 add_fields(aaf_configuration[:fields])
162 add_fields(self.new.attributes.keys.map { |k| k.to_sym })
163 add_fields(aaf_configuration[:additional_fields])
166 # now that all fields have been added, we can initialize the default
167 # field list to be used by the query parser.
168 # It will include all content fields *not* marked as :untokenized.
169 # This fixes the otherwise failing CommentTest#test_stopwords. Basically
170 # this means that by default only tokenized fields (which all fields are
171 # by default) will be searched. If you want to search inside the contents
172 # of an untokenized field, you'll have to explicitly specify it in your
175 # Unfortunately this is not very useful with a shared index (see
176 # http://projects.jkraemer.net/acts_as_ferret/ticket/85)
177 # You should consider specifying the default field list to search for as
178 # part of the ferret_options hash in your call to acts_as_ferret.
179 aaf_configuration[:ferret][:default_field] ||= if aaf_configuration[:single_index]
180 logger.warn "You really should set the acts_as_ferret :default_field option when using a shared index!"
183 aaf_configuration[:ferret_fields].keys.select do |f|
184 aaf_configuration[:ferret_fields][f][:index] != :untokenized
187 logger.info "default field list: #{aaf_configuration[:ferret][:default_field].inspect}"
190 aaf_index.ensure_index_exists
197 # find the most recent version of an index
198 def find_last_index_version(basedir)
199 # check for versioned index
200 versions = Dir.entries(basedir).select do |f|
201 dir = File.join(basedir, f)
202 File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
205 # select latest version
207 File.join basedir, versions.last
214 # helper that defines a method that adds the given field to a ferret
216 def define_to_field_method(field, options = {})
217 if options[:boost].is_a?(Symbol)
218 dynamic_boost = options[:boost]
219 options.delete :boost
221 options.reverse_merge!( :store => :no,
224 :term_vector => :with_positions_offsets,
226 options[:term_vector] = :no if options[:index] == :no
227 aaf_configuration[:ferret_fields][field] = options
229 define_method("#{field}_to_ferret".to_sym) do
231 val = content_for_field_name(field, dynamic_boost)
233 logger.warn("Error retrieving value for field #{field}: #{$!}")
236 logger.debug("Adding field #{field} with value '#{val}' to index")
241 def add_fields(field_config)
242 if field_config.is_a? Hash
243 field_config.each_pair do |key,val|
244 define_to_field_method(key,val)
246 elsif field_config.respond_to?(:each)
247 field_config.each do |field|
248 define_to_field_method(field)