lib/ferret_extensions.rb

   1 module Ferret
   2
   3   module Analysis
   4
   5     # = PerFieldAnalyzer
   6     #
   7     # This PerFieldAnalyzer is a workaround to a memory leak in
   8     # ferret 0.11.4. It does basically do the same as the original
   9     # Ferret::Analysis::PerFieldAnalyzer, but without the leak :)
  10     #
  11     # http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html
  12     #
  13     # Thanks to Ben from omdb.org for tracking this down and creating this
  14     # workaround.
  15     # You can read more about the issue there:
  16     # http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4
  17     class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer
  18       def initialize( default_analyzer = StandardAnalyzer.new )
  19         @analyzers = {}
  20         @default_analyzer = default_analyzer
  21       end
  22
  23       def add_field( field, analyzer )
  24         @analyzers[field] = analyzer
  25       end
  26       alias []= add_field
  27
  28       def token_stream(field, string)
  29         @analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) :
  30           @default_analyzer.token_stream(field, string)
  31       end
  32     end
  33   end
  34
  35   class Index::Index
  36     attr_accessor :batch_size, :logger
  37
  38     def index_models(models)
  39       models.each { |model| index_model model }
  40       flush
  41       optimize
  42       close
  43       ActsAsFerret::close_multi_indexes
  44     end
  45
  46     def index_model(model)
  47       bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
  48                                                    :model => model, :index => self, :reindex => true)
  49       logger.info "reindexing model #{model.name}"
  50
  51       model.records_for_rebuild(@batch_size) do |records, offset|
  52         bulk_indexer.index_records(records, offset)
  53       end
  54     end
  55
  56     def bulk_index(model, ids, options = {})
  57       options.reverse_merge! :optimize => true
  58       orig_flush = @auto_flush
  59       @auto_flush = false
  60       bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
  61                                                    :model => model, :index => self, :total => ids.size)
  62       model.records_for_bulk_index(ids, @batch_size) do |records, offset|
  63         logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
  64         bulk_indexer.index_records(records, offset)
  65       end
  66       logger.info 'finishing bulk index...'
  67       flush
  68       if options[:optimize]
  69         logger.info 'optimizing...'
  70         optimize
  71       end
  72       @auto_flush = orig_flush
  73     end
  74
  75   end
  76
  77   # add marshalling support to SortFields
  78   class Search::SortField
  79     def _dump(depth)
  80       to_s
  81     end
  82
  83     def self._load(string)
  84       case string
  85         when /<DOC(_ID)?>!/         : Ferret::Search::SortField::DOC_ID_REV
  86         when /<DOC(_ID)?>/          : Ferret::Search::SortField::DOC_ID
  87         when '<SCORE>!'             : Ferret::Search::SortField::SCORE_REV
  88         when '<SCORE>'              : Ferret::Search::SortField::SCORE
  89         when /^(\w+):<(\w+)>(!)?$/ : new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
  90         else raise "invalid value: #{string}"
  91       end
  92     end
  93   end
  94
  95   # add marshalling support to Sort
  96   class Search::Sort
  97     def _dump(depth)
  98       to_s
  99     end
 100
 101     def self._load(string)
 102       # we exclude the last <DOC> sorting as it is appended by new anyway
 103       if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
 104         sort_fields = $1.split(',').map do |value|
 105         value.strip!
 106           Ferret::Search::SortField._load value unless value.blank?
 107         end
 108         new sort_fields.compact
 109       else
 110         raise "invalid value: #{string}"
 111       end
 112     end
 113   end
 114
 115 end