To edit pages or tickets please login with username/password: aaf/aaf

Changeset 187

Show
Ignore:
Timestamp:
05/13/07 20:47:11 (1 year ago)
Author:
jk
Message:

Improved DRb server index rebuild handling by keeping index versions

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/plugin/acts_as_ferret/lib/act_methods.rb

    r185 r187  
    3737    #                    this to true. the model class name will be stored in a keyword field  
    3838    #                    named class_name 
     39    # 
     40    # reindex_batch_size:: reindexing is done in batches of this size, default is 1000 
    3941    # 
    4042    # ferret:: Hash of Options that directly influence the way the Ferret engine works. You  
     
    107109        :class_name => self.name, 
    108110        :single_index => false, 
     111        :reindex_batch_size => 1000, 
    109112        :ferret => {},                    # Ferret config Hash 
    110113        :ferret_fields => {}              # list of indexed fields that will be filled later 
     
    130133      # merge ferret options with those from second parameter hash 
    131134      aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash) 
     135 
     136      unless options[:remote] 
     137        ActsAsFerret::ensure_directory aaf_configuration[:index_dir]  
     138        aaf_configuration[:index_base_dir] = aaf_configuration[:index_dir] 
     139        aaf_configuration[:index_dir] = find_last_index_version(aaf_configuration[:index_dir]) 
     140        logger.debug "using index in #{aaf_configuration[:index_dir]}" 
     141      end 
    132142 
    133143      # these properties are somewhat vital to the plugin and shouldn't 
     
    146156        add_fields(aaf_configuration[:additional_fields]) 
    147157      end 
    148  
    149       ActsAsFerret::ensure_directory aaf_configuration[:index_dir] unless options[:remote] 
    150158 
    151159      # now that all fields have been added, we can initialize the default 
     
    175183    protected 
    176184     
     185    # find the most recent version of an index 
     186    def find_last_index_version(basedir) 
     187      # check for versioned index 
     188      versions = Dir.entries(basedir).select { |f| File.directory?(File.join(basedir, f)) && f =~ /^\d+$/ } 
     189      if versions.any? 
     190        # select latest version 
     191        versions.sort! 
     192        File.join basedir, versions.last 
     193      else 
     194        basedir 
     195      end 
     196    end 
     197 
    177198    # helper that defines a method that adds the given field to a ferret  
    178199    # document instance 
  • trunk/plugin/acts_as_ferret/lib/acts_as_ferret.rb

    r176 r187  
    7676    @@ferret_indexes = Hash.new 
    7777    def self.ferret_indexes; @@ferret_indexes end 
     78 
    7879  
    7980  # decorator that adds a total_hits accessor to search result arrays 
  • trunk/plugin/acts_as_ferret/lib/class_methods.rb

    r184 r187  
    1313    def rebuild_index(*models) 
    1414      models << self unless models.include?(self) 
    15       aaf_index.rebuild_index(models.map(&:to_s)
     15      aaf_index.rebuild_index models.map(&:to_s
    1616    end                                                             
     17 
     18    # Switches this class to a new index located in dir. 
     19    # Used by the DRb server when switching to a new index version. 
     20    def index_dir=(dir) 
     21      aaf_configuration[:index_dir] = aaf_configuration[:ferret][:path] = dir 
     22      aaf_index.reopen! 
     23    end 
    1724     
    1825    # Retrieve the index instance for this model class. This can either be a 
  • trunk/plugin/acts_as_ferret/lib/ferret_server.rb

    r180 r187  
    77module ActsAsFerret 
    88 
    9 module Remote 
     9  module Remote 
    1010 
    11   module Config 
    12     class << self 
    13       DEFAULTS = { 
    14         'host' => 'localhost', 
    15         'port' => '9009' 
    16       } 
    17       # reads connection settings from config file 
    18       def load(file = "#{RAILS_ROOT}/config/ferret_server.yml") 
    19         config = DEFAULTS.merge(YAML.load(ERB.new(IO.read(file)).result)) 
    20         if config = config[RAILS_ENV] 
    21           config[:uri] = "druby://#{config['host']}:#{config['port']}" 
    22           return config 
     11    module Config 
     12      class << self 
     13        DEFAULTS = { 
     14          'host' => 'localhost', 
     15          'port' => '9009' 
     16        } 
     17        # read connection settings from config file 
     18        def load(file = "#{RAILS_ROOT}/config/ferret_server.yml") 
     19          config = DEFAULTS.merge(YAML.load(ERB.new(IO.read(file)).result)) 
     20          if config = config[RAILS_ENV] 
     21            config[:uri] = "druby://#{config['host']}:#{config['port']}" 
     22            return config 
     23          end 
     24          {} 
    2325        end 
    24         {} 
    2526      end 
    2627    end 
    27   end 
    2828 
    29   # This class acts as a drb server listening for indexing and 
    30   # search requests from models declared to 'acts_as_ferret :remote => true' 
    31  
    32   # Usage:  
    33   # - copy doc/ferret_server.yml to RAILS_ROOT/config and modify to suit 
    34   # your needs. 
    35   # - run script/ferret_server (in the plugin directory) via script/runner: 
    36   # RAILS_ENV=production script/runner vendor/plugins/acts_as_ferret/script/ferret_server 
    37   # 
    38   # TODO: automate installation of files to script/ and config/ 
    39   class Server 
     29    # This class acts as a drb server listening for indexing and 
     30    # search requests from models declared to 'acts_as_ferret :remote => true' 
     31   
     32    # Usage:  
     33    # - copy doc/ferret_server.yml to RAILS_ROOT/config and modify to suit 
     34    # your needs. environments for which no section in the config file exists 
     35    # will use the index locally (good for unit tests/development mode) 
     36    # - run script/ferret_server (in the plugin directory) via script/runner: 
     37    # RAILS_ENV=production script/runner vendor/plugins/acts_as_ferret/script/ferret_server 
     38    # 
     39    class Server 
    4040 
    41     cattr_accessor :running 
     41      cattr_accessor :running 
    4242 
    43     def self.start(uri = nil) 
    44       ActiveRecord::Base.allow_concurrency = true 
    45       uri ||= ActsAsFerret::Remote::Config.load[:uri] 
    46       DRb.start_service(uri, ActsAsFerret::Remote::Server.new) 
    47       self.running = true 
     43      def self.start(uri = nil) 
     44        ActiveRecord::Base.allow_concurrency = true 
     45        uri ||= ActsAsFerret::Remote::Config.load[:uri] 
     46        DRb.start_service(uri, ActsAsFerret::Remote::Server.new) 
     47        self.running = true 
     48      end 
     49 
     50      def initialize 
     51        @logger = Logger.new("#{RAILS_ROOT}/log/ferret_server.log") 
     52      end 
     53 
     54      # handles all incoming method calls, and sends them on to the LocalIndex 
     55      # instance of the correct model class. 
     56      # 
     57      # Calls are not queued atm, so this will block until the call returned. 
     58      # Might throw the occasional LockError, too, which most probably means that you're  
     59      # a) rebuilding your index or  
     60      # b) have *really* high load. I wasn't able to reproduce this case until 
     61      # now, if you do, please contact me. 
     62      # 
     63      def method_missing(name, *args) 
     64        @logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})" 
     65        clazz = args.shift.constantize 
     66        begin 
     67          clazz.aaf_index.send name, *args 
     68        rescue NoMethodError 
     69          @logger.debug "no luck, trying to call class method instead" 
     70          clazz.send name, *args 
     71        end 
     72      rescue 
     73        @logger.error "ferret server error #{$!}\n#{$!.backtrace.join '\n'}" 
     74        raise 
     75      end 
     76 
     77  #    def ferret_index(class_name) 
     78  #      # TODO check if in use! 
     79  #      class_name.constantize.aaf_index.ferret_index 
     80  #    end 
     81 
     82      def new_index_for(clazz, models) 
     83        aaf_configuration = clazz.aaf_configuration 
     84        ferret_cfg = aaf_configuration[:ferret].dup 
     85        ferret_cfg.update :auto_flush  => false,  
     86                          :create      => true, 
     87                          :field_infos => clazz.aaf_index.field_infos(models), 
     88                          :path        => File.join(aaf_configuration[:index_base_dir], 'rebuild') 
     89        Ferret::Index::Index.new ferret_cfg 
     90      end 
     91 
     92      def rebuild_index(class_name, *models) 
     93        clazz = class_name.constantize 
     94        models = models.flatten.uniq.map(&:constantize) 
     95        @logger.debug "rebuild index: #{models.inspect}" 
     96        index = new_index_for(clazz, models) 
     97        clazz.aaf_index.do_rebuild_with_index(index, models) 
     98        new_version = File.join clazz.aaf_configuration[:index_base_dir], Time.now.utc.strftime('%Y%m%d%H%M%S') 
     99        File.rename index.options[:path], new_version 
     100        clazz.index_dir = new_version  
     101      end 
     102 
    48103    end 
    49  
    50     def initialize 
    51       @logger = Logger.new("#{RAILS_ROOT}/log/ferret_server.log") 
    52     end 
    53  
    54     # handles all incoming method calls, and sends them on to the LocalIndex 
    55     # instance of the correct model class. 
    56     # 
    57     # Calls are not queued atm, so this will block until the call returned. 
    58     # Might throw the occasional LockError, too, which most probably means that you're  
    59     # a) rebuilding your index or  
    60     # b) have *really* high load. I wasn't able to reproduce this case until 
    61     # now, if you do, please contact me. 
    62     # 
    63     # TODO: rebuild indexes in separate directory so no lock errors in these 
    64     # cases. 
    65     def method_missing(name, *args) 
    66       @logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})" 
    67       clazz = args.shift.constantize 
    68       begin 
    69         clazz.aaf_index.send name, *args 
    70       rescue NoMethodError 
    71         @logger.debug "no luck, trying to call class method instead" 
    72         clazz.send name, *args 
    73       end 
    74     rescue 
    75       @logger.error "ferret server error #{$!}\n#{$!.backtrace.join '\n'}" 
    76       raise 
    77     end 
    78  
    79     def ferret_index(class_name) 
    80       # TODO check if in use! 
    81       class_name.constantize.aaf_index.ferret_index 
    82     end 
    83  
    84     # the main loop taking stuff from the queue and running it... 
    85     #def run 
    86     #end 
    87  
    88104  end 
    89105end 
    90 end 
  • trunk/plugin/acts_as_ferret/lib/local_index.rb

    r168 r187  
    88      super 
    99      ensure_index_exists 
     10    end 
     11 
     12    def reopen! 
     13      if @ferret_index 
     14        @ferret_index.close 
     15        @ferret_index = nil 
     16      end 
     17      logger.debug "reopening index at #{aaf_configuration[:ferret][:path]}" 
     18      ferret_index 
    1019    end 
    1120 
     
    3847    # model classes to include in the index 
    3948    def rebuild_index(*models) 
    40       logger.debug "rebuild index: #{models.inspect}" 
    4149      models << aaf_configuration[:class_name] unless models.include?(aaf_configuration[:class_name]) 
    4250      models = models.flatten.uniq.map(&:constantize) 
     51      logger.debug "rebuild index: #{models.inspect}" 
    4352      index = Ferret::Index::Index.new(aaf_configuration[:ferret].dup.update(:auto_flush => false,  
    4453                                                                             :field_infos => field_infos(models), 
    4554                                                                             :create => true)) 
     55      do_rebuild_with_index(index, models) 
     56    end 
     57 
     58    def do_rebuild_with_index(index, models) 
    4659      models.each do |model| 
    4760        reindex_model(index, model) 
    4861      end 
    49       logger.debug("Created Ferret index in: #{aaf_configuration[:index_dir]}") 
    5062      index.flush 
     63      logger.debug("Created Ferret index in: #{index.options[:path]}. Will now optimize...") 
    5164      index.optimize 
    5265      index.close 
    5366      close_multi_indexes 
     67      logger.debug("Done.") 
    5468    end 
    5569 
     
    180194    end 
    181195 
    182  
    183     protected 
    184  
    185     # returns a MultiIndex instance operating on a MultiReader 
    186     def multi_index(model_classes) 
    187       model_classes.sort! { |a, b| a.name <=> b.name } 
    188       key = model_classes.inject("") { |s, clazz| s + clazz.name } 
    189       multi_config = aaf_configuration[:ferret].dup 
    190       multi_config.delete :default_field  # we don't want the default field list of *this* class for multi_searching 
    191       ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config) 
    192     end 
    193   
    194     def close_multi_indexes 
    195       # close combined index readers, just in case 
    196       # this seems to fix a strange test failure that seems to relate to a 
    197       # multi_index looking at an old version of the content_base index. 
    198       ActsAsFerret::multi_indexes.each_pair do |key, index| 
    199         # puts "#{key} -- #{self.name}" 
    200         # TODO only close those where necessary (watch inheritance, where 
    201         # self.name is base class of a class where key is made from) 
    202         index.close #if key =~ /#{self.name}/ 
    203       end 
    204       ActsAsFerret::multi_indexes.clear 
    205     end 
    206  
    207     def reindex_model(index, model = aaf_configuration[:class_name].constantize) 
    208       # index in batches of 1000 to limit memory consumption (fixes #24) 
    209       # TODO make configurable through options 
    210       batch_size = 1000 
    211       model_count = model.count.to_f 
    212       work_done = 0 
    213       batch_time = 0 
    214       logger.info "reindexing model #{model.name}" 
    215       order = "#{model.primary_key} ASC" # this works around a bug in sqlserver-adapter (where paging only works with an order applied) 
    216       model.transaction do 
    217         0.step(model.count, batch_size) do |i| 
    218           b1 = Time.now.to_f 
    219           model.find(:all, :limit => batch_size, :offset => i, :order => order).each do |rec| 
    220             index << rec.to_doc if rec.ferret_enabled?(true) 
    221           end 
    222           batch_time = Time.now.to_f - b1 
    223           work_done = i.to_f / model_count * 100.0 if model_count > 0 
    224           remaining_time = ( batch_time / batch_size ) * ( model_count - i + batch_size ) 
    225           logger.info "reindex model #{model.name} : #{'%.2f' % work_done}% complete : #{'%.2f' % remaining_time} secs to finish" 
    226         end 
    227       end 
    228     end 
    229  
    230196    # builds a FieldInfos instance for creation of an index containing fields 
    231197    # for the given model classes. 
     
    253219    end 
    254220 
     221    protected 
     222 
     223    # returns a MultiIndex instance operating on a MultiReader 
     224    def multi_index(model_classes) 
     225      model_classes.sort! { |a, b| a.name <=> b.name } 
     226      key = model_classes.inject("") { |s, clazz| s + clazz.name } 
     227      multi_config = aaf_configuration[:ferret].dup 
     228      multi_config.delete :default_field  # we don't want the default field list of *this* class for multi_searching 
     229      ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config) 
     230    end 
     231  
     232    def close_multi_indexes 
     233      # close combined index readers, just in case 
     234      # this seems to fix a strange test failure that seems to relate to a 
     235      # multi_index looking at an old version of the content_base index. 
     236      ActsAsFerret::multi_indexes.each_pair do |key, index| 
     237        # puts "#{key} -- #{self.name}" 
     238        # TODO only close those where necessary (watch inheritance, where 
     239        # self.name is base class of a class where key is made from) 
     240        index.close #if key =~ /#{self.name}/ 
     241      end 
     242      ActsAsFerret::multi_indexes.clear 
     243    end 
     244 
     245    # indexing is done in batches to limit memory consumption (fixes #24). 
     246    # The default batch size is 1000, this can be changed with the :reindex_batch_size 
     247    # option of acts_as_ferret. 
     248    def reindex_model(index, model = aaf_configuration[:class_name].constantize) 
     249      batch_size = aaf_configuration[:reindex_batch_size] 
     250      model_count = model.count.to_f 
     251      work_done = 0 
     252      batch_time = 0 
     253      logger.info "reindexing model #{model.name}" 
     254      order = "#{model.primary_key} ASC" # this works around a bug in sqlserver-adapter (where paging only works with an order applied) 
     255      model.transaction do 
     256        0.step(model.count, batch_size) do |i| 
     257          batch_time = measure_time { 
     258            model.find(:all, :limit => batch_size, :offset => i, :order => order).each do |rec| 
     259              index << rec.to_doc if rec.ferret_enabled?(true) 
     260            end 
     261          }.to_f 
     262          work_done = i.to_f / model_count * 100.0 if model_count > 0 
     263          remaining_time = ( batch_time / batch_size ) * ( model_count - i + batch_size ) 
     264          logger.info "reindex model #{model.name} : #{'%.2f' % work_done}% complete : #{'%.2f' % remaining_time} secs to finish" 
     265        end 
     266      end 
     267    end 
     268 
     269    def measure_time 
     270      t1 = Time.now 
     271      yield 
     272      Time.now - t1 
     273    end 
     274 
     275 
    255276  end 
    256277 
  • trunk/plugin/acts_as_ferret/script/ferret_start

    r179 r187  
    1 #!/usr/bin/env script/runner 
    2  
     1#!/usr/bin/env ruby 
    32# Ferret DRb server launcher script 
    43# 
     
    1817# The server writes a log file in log/ferret_server.log, it's  
    1918# STDOUT gets redirected to log/ferret_server.out 
     19 
     20ENV['FERRET_USE_LOCAL_INDEX'] = 'true' 
     21require File.dirname(__FILE__) + '/../config/boot' 
     22require RAILS_ROOT + '/config/environment' 
    2023 
    2124 
  • trunk/plugin/acts_as_ferret/script/ferret_stop

    r170 r187  
    1818pid_file = config['pid_file'] 
    1919puts "Stopping ferret_server..." 
    20 send_signal("TERM", pid_file) 
     20if File.file?(pid_file) 
     21  send_signal("TERM", pid_file)  
     22else 
     23  puts "no pid file found" 
     24end 
    2125 
    2226# vim:set filetype=ruby: 

To edit pages or tickets please login with username/password: aaf/aaf