Changeset 187
- Timestamp:
- 05/13/07 20:47:11 (1 year ago)
- Files:
-
- trunk/plugin/acts_as_ferret/lib/act_methods.rb (modified) (5 diffs)
- trunk/plugin/acts_as_ferret/lib/acts_as_ferret.rb (modified) (1 diff)
- trunk/plugin/acts_as_ferret/lib/class_methods.rb (modified) (1 diff)
- trunk/plugin/acts_as_ferret/lib/ferret_cap_tasks.rb (added)
- trunk/plugin/acts_as_ferret/lib/ferret_server.rb (modified) (1 diff)
- trunk/plugin/acts_as_ferret/lib/local_index.rb (modified) (4 diffs)
- trunk/plugin/acts_as_ferret/script/ferret_start (modified) (2 diffs)
- trunk/plugin/acts_as_ferret/script/ferret_stop (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plugin/acts_as_ferret/lib/act_methods.rb
r185 r187 37 37 # this to true. the model class name will be stored in a keyword field 38 38 # named class_name 39 # 40 # reindex_batch_size:: reindexing is done in batches of this size, default is 1000 39 41 # 40 42 # ferret:: Hash of Options that directly influence the way the Ferret engine works. You … … 107 109 :class_name => self.name, 108 110 :single_index => false, 111 :reindex_batch_size => 1000, 109 112 :ferret => {}, # Ferret config Hash 110 113 :ferret_fields => {} # list of indexed fields that will be filled later … … 130 133 # merge ferret options with those from second parameter hash 131 134 aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash) 135 136 unless options[:remote] 137 ActsAsFerret::ensure_directory aaf_configuration[:index_dir] 138 aaf_configuration[:index_base_dir] = aaf_configuration[:index_dir] 139 aaf_configuration[:index_dir] = find_last_index_version(aaf_configuration[:index_dir]) 140 logger.debug "using index in #{aaf_configuration[:index_dir]}" 141 end 132 142 133 143 # these properties are somewhat vital to the plugin and shouldn't … … 146 156 add_fields(aaf_configuration[:additional_fields]) 147 157 end 148 149 ActsAsFerret::ensure_directory aaf_configuration[:index_dir] unless options[:remote]150 158 151 159 # now that all fields have been added, we can initialize the default … … 175 183 protected 176 184 185 # find the most recent version of an index 186 def find_last_index_version(basedir) 187 # check for versioned index 188 versions = Dir.entries(basedir).select { |f| File.directory?(File.join(basedir, f)) && f =~ /^\d+$/ } 189 if versions.any? 190 # select latest version 191 versions.sort! 192 File.join basedir, versions.last 193 else 194 basedir 195 end 196 end 197 177 198 # helper that defines a method that adds the given field to a ferret 178 199 # document instance trunk/plugin/acts_as_ferret/lib/acts_as_ferret.rb
r176 r187 76 76 @@ferret_indexes = Hash.new 77 77 def self.ferret_indexes; @@ferret_indexes end 78 78 79 79 80 # decorator that adds a total_hits accessor to search result arrays trunk/plugin/acts_as_ferret/lib/class_methods.rb
r184 r187 13 13 def rebuild_index(*models) 14 14 models << self unless models.include?(self) 15 aaf_index.rebuild_index (models.map(&:to_s))15 aaf_index.rebuild_index models.map(&:to_s) 16 16 end 17 18 # Switches this class to a new index located in dir. 19 # Used by the DRb server when switching to a new index version. 20 def index_dir=(dir) 21 aaf_configuration[:index_dir] = aaf_configuration[:ferret][:path] = dir 22 aaf_index.reopen! 23 end 17 24 18 25 # Retrieve the index instance for this model class. This can either be a trunk/plugin/acts_as_ferret/lib/ferret_server.rb
r180 r187 7 7 module ActsAsFerret 8 8 9 module Remote9 module Remote 10 10 11 module Config 12 class << self 13 DEFAULTS = { 14 'host' => 'localhost', 15 'port' => '9009' 16 } 17 # reads connection settings from config file 18 def load(file = "#{RAILS_ROOT}/config/ferret_server.yml") 19 config = DEFAULTS.merge(YAML.load(ERB.new(IO.read(file)).result)) 20 if config = config[RAILS_ENV] 21 config[:uri] = "druby://#{config['host']}:#{config['port']}" 22 return config 11 module Config 12 class << self 13 DEFAULTS = { 14 'host' => 'localhost', 15 'port' => '9009' 16 } 17 # read connection settings from config file 18 def load(file = "#{RAILS_ROOT}/config/ferret_server.yml") 19 config = DEFAULTS.merge(YAML.load(ERB.new(IO.read(file)).result)) 20 if config = config[RAILS_ENV] 21 config[:uri] = "druby://#{config['host']}:#{config['port']}" 22 return config 23 end 24 {} 23 25 end 24 {}25 26 end 26 27 end 27 end28 28 29 # This class acts as a drb server listening for indexing and30 # search requests from models declared to 'acts_as_ferret :remote => true'31 #32 # Usage:33 # - copy doc/ferret_server.yml to RAILS_ROOT/config and modify to suit34 # your needs.35 # - run script/ferret_server (in the plugin directory) via script/runner:36 # RAILS_ENV=production script/runner vendor/plugins/acts_as_ferret/script/ferret_server37 #38 # TODO: automate installation of files to script/ and config/39 class Server29 # This class acts as a drb server listening for indexing and 30 # search requests from models declared to 'acts_as_ferret :remote => true' 31 # 32 # Usage: 33 # - copy doc/ferret_server.yml to RAILS_ROOT/config and modify to suit 34 # your needs. environments for which no section in the config file exists 35 # will use the index locally (good for unit tests/development mode) 36 # - run script/ferret_server (in the plugin directory) via script/runner: 37 # RAILS_ENV=production script/runner vendor/plugins/acts_as_ferret/script/ferret_server 38 # 39 class Server 40 40 41 cattr_accessor :running41 cattr_accessor :running 42 42 43 def self.start(uri = nil) 44 ActiveRecord::Base.allow_concurrency = true 45 uri ||= ActsAsFerret::Remote::Config.load[:uri] 46 DRb.start_service(uri, ActsAsFerret::Remote::Server.new) 47 self.running = true 43 def self.start(uri = nil) 44 ActiveRecord::Base.allow_concurrency = true 45 uri ||= ActsAsFerret::Remote::Config.load[:uri] 46 DRb.start_service(uri, ActsAsFerret::Remote::Server.new) 47 self.running = true 48 end 49 50 def initialize 51 @logger = Logger.new("#{RAILS_ROOT}/log/ferret_server.log") 52 end 53 54 # handles all incoming method calls, and sends them on to the LocalIndex 55 # instance of the correct model class. 56 # 57 # Calls are not queued atm, so this will block until the call returned. 58 # Might throw the occasional LockError, too, which most probably means that you're 59 # a) rebuilding your index or 60 # b) have *really* high load. I wasn't able to reproduce this case until 61 # now, if you do, please contact me. 62 # 63 def method_missing(name, *args) 64 @logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})" 65 clazz = args.shift.constantize 66 begin 67 clazz.aaf_index.send name, *args 68 rescue NoMethodError 69 @logger.debug "no luck, trying to call class method instead" 70 clazz.send name, *args 71 end 72 rescue 73 @logger.error "ferret server error #{$!}\n#{$!.backtrace.join '\n'}" 74 raise 75 end 76 77 # def ferret_index(class_name) 78 # # TODO check if in use! 79 # class_name.constantize.aaf_index.ferret_index 80 # end 81 82 def new_index_for(clazz, models) 83 aaf_configuration = clazz.aaf_configuration 84 ferret_cfg = aaf_configuration[:ferret].dup 85 ferret_cfg.update :auto_flush => false, 86 :create => true, 87 :field_infos => clazz.aaf_index.field_infos(models), 88 :path => File.join(aaf_configuration[:index_base_dir], 'rebuild') 89 Ferret::Index::Index.new ferret_cfg 90 end 91 92 def rebuild_index(class_name, *models) 93 clazz = class_name.constantize 94 models = models.flatten.uniq.map(&:constantize) 95 @logger.debug "rebuild index: #{models.inspect}" 96 index = new_index_for(clazz, models) 97 clazz.aaf_index.do_rebuild_with_index(index, models) 98 new_version = File.join clazz.aaf_configuration[:index_base_dir], Time.now.utc.strftime('%Y%m%d%H%M%S') 99 File.rename index.options[:path], new_version 100 clazz.index_dir = new_version 101 end 102 48 103 end 49 50 def initialize51 @logger = Logger.new("#{RAILS_ROOT}/log/ferret_server.log")52 end53 54 # handles all incoming method calls, and sends them on to the LocalIndex55 # instance of the correct model class.56 #57 # Calls are not queued atm, so this will block until the call returned.58 # Might throw the occasional LockError, too, which most probably means that you're59 # a) rebuilding your index or60 # b) have *really* high load. I wasn't able to reproduce this case until61 # now, if you do, please contact me.62 #63 # TODO: rebuild indexes in separate directory so no lock errors in these64 # cases.65 def method_missing(name, *args)66 @logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})"67 clazz = args.shift.constantize68 begin69 clazz.aaf_index.send name, *args70 rescue NoMethodError71 @logger.debug "no luck, trying to call class method instead"72 clazz.send name, *args73 end74 rescue75 @logger.error "ferret server error #{$!}\n#{$!.backtrace.join '\n'}"76 raise77 end78 79 def ferret_index(class_name)80 # TODO check if in use!81 class_name.constantize.aaf_index.ferret_index82 end83 84 # the main loop taking stuff from the queue and running it...85 #def run86 #end87 88 104 end 89 105 end 90 endtrunk/plugin/acts_as_ferret/lib/local_index.rb
r168 r187 8 8 super 9 9 ensure_index_exists 10 end 11 12 def reopen! 13 if @ferret_index 14 @ferret_index.close 15 @ferret_index = nil 16 end 17 logger.debug "reopening index at #{aaf_configuration[:ferret][:path]}" 18 ferret_index 10 19 end 11 20 … … 38 47 # model classes to include in the index 39 48 def rebuild_index(*models) 40 logger.debug "rebuild index: #{models.inspect}"41 49 models << aaf_configuration[:class_name] unless models.include?(aaf_configuration[:class_name]) 42 50 models = models.flatten.uniq.map(&:constantize) 51 logger.debug "rebuild index: #{models.inspect}" 43 52 index = Ferret::Index::Index.new(aaf_configuration[:ferret].dup.update(:auto_flush => false, 44 53 :field_infos => field_infos(models), 45 54 :create => true)) 55 do_rebuild_with_index(index, models) 56 end 57 58 def do_rebuild_with_index(index, models) 46 59 models.each do |model| 47 60 reindex_model(index, model) 48 61 end 49 logger.debug("Created Ferret index in: #{aaf_configuration[:index_dir]}")50 62 index.flush 63 logger.debug("Created Ferret index in: #{index.options[:path]}. Will now optimize...") 51 64 index.optimize 52 65 index.close 53 66 close_multi_indexes 67 logger.debug("Done.") 54 68 end 55 69 … … 180 194 end 181 195 182 183 protected184 185 # returns a MultiIndex instance operating on a MultiReader186 def multi_index(model_classes)187 model_classes.sort! { |a, b| a.name <=> b.name }188 key = model_classes.inject("") { |s, clazz| s + clazz.name }189 multi_config = aaf_configuration[:ferret].dup190 multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching191 ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)192 end193 194 def close_multi_indexes195 # close combined index readers, just in case196 # this seems to fix a strange test failure that seems to relate to a197 # multi_index looking at an old version of the content_base index.198 ActsAsFerret::multi_indexes.each_pair do |key, index|199 # puts "#{key} -- #{self.name}"200 # TODO only close those where necessary (watch inheritance, where201 # self.name is base class of a class where key is made from)202 index.close #if key =~ /#{self.name}/203 end204 ActsAsFerret::multi_indexes.clear205 end206 207 def reindex_model(index, model = aaf_configuration[:class_name].constantize)208 # index in batches of 1000 to limit memory consumption (fixes #24)209 # TODO make configurable through options210 batch_size = 1000211 model_count = model.count.to_f212 work_done = 0213 batch_time = 0214 logger.info "reindexing model #{model.name}"215 order = "#{model.primary_key} ASC" # this works around a bug in sqlserver-adapter (where paging only works with an order applied)216 model.transaction do217 0.step(model.count, batch_size) do |i|218 b1 = Time.now.to_f219 model.find(:all, :limit => batch_size, :offset => i, :order => order).each do |rec|220 index << rec.to_doc if rec.ferret_enabled?(true)221 end222 batch_time = Time.now.to_f - b1223 work_done = i.to_f / model_count * 100.0 if model_count > 0224 remaining_time = ( batch_time / batch_size ) * ( model_count - i + batch_size )225 logger.info "reindex model #{model.name} : #{'%.2f' % work_done}% complete : #{'%.2f' % remaining_time} secs to finish"226 end227 end228 end229 230 196 # builds a FieldInfos instance for creation of an index containing fields 231 197 # for the given model classes. … … 253 219 end 254 220 221 protected 222 223 # returns a MultiIndex instance operating on a MultiReader 224 def multi_index(model_classes) 225 model_classes.sort! { |a, b| a.name <=> b.name } 226 key = model_classes.inject("") { |s, clazz| s + clazz.name } 227 multi_config = aaf_configuration[:ferret].dup 228 multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching 229 ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config) 230 end 231 232 def close_multi_indexes 233 # close combined index readers, just in case 234 # this seems to fix a strange test failure that seems to relate to a 235 # multi_index looking at an old version of the content_base index. 236 ActsAsFerret::multi_indexes.each_pair do |key, index| 237 # puts "#{key} -- #{self.name}" 238 # TODO only close those where necessary (watch inheritance, where 239 # self.name is base class of a class where key is made from) 240 index.close #if key =~ /#{self.name}/ 241 end 242 ActsAsFerret::multi_indexes.clear 243 end 244 245 # indexing is done in batches to limit memory consumption (fixes #24). 246 # The default batch size is 1000, this can be changed with the :reindex_batch_size 247 # option of acts_as_ferret. 248 def reindex_model(index, model = aaf_configuration[:class_name].constantize) 249 batch_size = aaf_configuration[:reindex_batch_size] 250 model_count = model.count.to_f 251 work_done = 0 252 batch_time = 0 253 logger.info "reindexing model #{model.name}" 254 order = "#{model.primary_key} ASC" # this works around a bug in sqlserver-adapter (where paging only works with an order applied) 255 model.transaction do 256 0.step(model.count, batch_size) do |i| 257 batch_time = measure_time { 258 model.find(:all, :limit => batch_size, :offset => i, :order => order).each do |rec| 259 index << rec.to_doc if rec.ferret_enabled?(true) 260 end 261 }.to_f 262 work_done = i.to_f / model_count * 100.0 if model_count > 0 263 remaining_time = ( batch_time / batch_size ) * ( model_count - i + batch_size ) 264 logger.info "reindex model #{model.name} : #{'%.2f' % work_done}% complete : #{'%.2f' % remaining_time} secs to finish" 265 end 266 end 267 end 268 269 def measure_time 270 t1 = Time.now 271 yield 272 Time.now - t1 273 end 274 275 255 276 end 256 277 trunk/plugin/acts_as_ferret/script/ferret_start
r179 r187 1 #!/usr/bin/env script/runner 2 1 #!/usr/bin/env ruby 3 2 # Ferret DRb server launcher script 4 3 # … … 18 17 # The server writes a log file in log/ferret_server.log, it's 19 18 # STDOUT gets redirected to log/ferret_server.out 19 20 ENV['FERRET_USE_LOCAL_INDEX'] = 'true' 21 require File.dirname(__FILE__) + '/../config/boot' 22 require RAILS_ROOT + '/config/environment' 20 23 21 24 trunk/plugin/acts_as_ferret/script/ferret_stop
r170 r187 18 18 pid_file = config['pid_file'] 19 19 puts "Stopping ferret_server..." 20 send_signal("TERM", pid_file) 20 if File.file?(pid_file) 21 send_signal("TERM", pid_file) 22 else 23 puts "no pid file found" 24 end 21 25 22 26 # vim:set filetype=ruby:
