To edit pages or tickets please login with username/password: aaf/aaf

Changeset 318

Show
Ignore:
Timestamp:
02/18/08 20:36:04 (8 months ago)
Author:
jk
Message:

works except of multi and shared indexes

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/demo/app/models/shared_index1.rb

    r308 r318  
    11class SharedIndex1 < ActiveRecord::Base 
    2   # default field list for all classes sharing the index 
    3   DEFAULT_FIELDS = [ :name ] 
    4   acts_as_ferret( :fields       => { :name => { :store => :yes } },  
    5                   :single_index => true,  
    6                   :remote       => ENV['AAF_REMOTE'] == 'true', 
    7                   :raise_drb_errors => ENV['RAISE_DRB_ERRORS'] == 'true', 
    8                   :ferret       => { :default_field => DEFAULT_FIELDS }  
    9                 ) 
     2  acts_as_ferret :index  => 'shared' 
    103end 
  • trunk/demo/app/models/shared_index2.rb

    r308 r318  
    11class SharedIndex2 < ActiveRecord::Base 
    2   acts_as_ferret( :fields       => { :name => { :store => :yes } },  
    3                   :single_index => true,  
    4                   :remote       => ENV['AAF_REMOTE'] == 'true',  
    5                   :raise_drb_errors => ENV['RAISE_DRB_ERRORS'] == 'true', 
    6                   :ferret => { :default_field => SharedIndex1::DEFAULT_FIELDS } ) 
     2  acts_as_ferret :index => 'shared' 
    73end 
  • trunk/demo/config/environment.rb

    r228 r318  
    5454 
    5555# Include your application configuration below 
     56 
     57# define the index shared by the SharedIndex1 and SharedIndex2 classes 
     58ActsAsFerret::define_index 'shared', :remote           => ENV['AAF_REMOTE'] == 'true', 
     59                                     :raise_drb_errors => ENV['RAISE_DRB_ERRORS'] == 'true', 
     60                                     :fields           => { 
     61                                       :name => { :store => :yes } 
     62                                     },  
     63                                     :ferret           => { 
     64                                       :default_field => [ :name ] 
     65                                     } 
     66 
  • trunk/demo/test/unit/content_test.rb

    r310 r318  
    357357   
    358358  def test_total_hits_multi 
    359     result = Content.total_hits('*:title OR *:comment', :multi => Comment) 
    360     assert_equal 5, result 
     359    q = '*:title OR *:comment' 
     360    assert_equal 3, Comment.total_hits(q) 
     361    assert_equal 2, Content.total_hits(q) 
     362    assert_equal 5, ActsAsFerret::total_hits(q, [ Comment, Content ]) 
    361363  end 
    362364 
     
    476478      remove_index Content 
    477479      i =  ActsAsFerret::MultiIndex.new([Content]) 
    478       assert File.exists?("#{Content.aaf_configuration[:index_dir]}/segments") 
     480      assert File.exists?("#{ActsAsFerret::index_definition(Content)[:index_dir]}/segments") 
    479481      hits = i.search("description:title") 
    480482      assert_equal 1, hits.total_hits, hits.inspect 
  • trunk/plugin/acts_as_ferret/init.rb

    r317 r318  
    2121require 'acts_as_ferret' 
    2222 
    23 ActsAsFerret::logger = RAILS_DEFAULT_LOGGER 
  • trunk/plugin/acts_as_ferret/lib/act_methods.rb

    r317 r318  
    101101 
    102102      extend ClassMethods 
    103       extend SharedIndexClassMethods if options[:single_index] 
    104103 
    105104      include InstanceMethods 
     
    120119      cattr_accessor :aaf_configuration 
    121120 
    122       # shared index defaults 
    123       if options.delete(:single_index) 
    124         options[:store_class_name] = true  
    125         options[:index] = 'shared' 
    126       end 
    127  
    128121      # apply default config for rdig based models 
    129122      if options[:rdig] 
     
    135128      index_name = options.delete(:index) || self.name.underscore 
    136129 
    137       self.aaf_configuration = ActsAsFerret::register_class_with_index(self, index_name, options) 
     130      index = ActsAsFerret::register_class_with_index(self, index_name, options) 
     131      self.aaf_configuration = index.index_definition 
    138132      logger.debug "configured index for class #{self.name}:\n#{aaf_configuration.inspect}" 
    139133 
  • trunk/plugin/acts_as_ferret/lib/acts_as_ferret.rb

    r317 r318  
    7777  class IndexAlreadyDefined < ActsAsFerretError; end 
    7878 
    79   # default field list for use with a shared index. Set it globally to 
    80   # avoid having to specify the same :default_field value in every class using 
    81   # the shared index. 
    82   @@shared_index_default_fields = nil 
    83   mattr_accessor :shared_index_default_fields 
    84  
    85   @@logger = nil 
    86   mattr_accessor :logger 
    87  
    8879  # global Hash containing all multi indexes created by all classes using the plugin 
    8980  # key is the concatenation of alphabetically sorted names of the classes the 
     
    9384 
    9485  # global Hash containing the ferret indexes of all classes using the plugin 
    95   # key is the index directory
     86  # key is the index name
    9687  @@ferret_indexes = Hash.new 
    9788  def self.ferret_indexes; @@ferret_indexes end 
    9889 
    99   # holds per-index configuration, key is the index name 
    100   @@index_definitions = {} 
    10190  # mapping from class name to index name 
    10291  @@index_using_classes = {} 
    103   def self.index_definitions; @@index_definitions end 
    104  
     92 
     93  @@logger = Logger.new "#{RAILS_ROOT}/log/acts_as_ferret.log" 
     94  @@logger.level = ActiveRecord::Base.logger.level rescue Logger::DEBUG 
     95  mattr_accessor :logger 
     96 
     97     
     98  # Default ferret configuration for index fields 
    10599  DEFAULT_FIELD_OPTIONS = { 
    106100    :store       => :no,  
     
    111105  } 
    112106 
    113   def self.field_config_for(fieldname, options = {}) 
    114     config = DEFAULT_FIELD_OPTIONS.merge options 
    115     config[:term_vector] = :no if config[:index] == :no 
    116     config.delete :via 
    117     config.delete :boost if config[:boost].is_a?(Symbol) # dynamic boosts aren't handled here 
    118     return config 
    119   end 
    120  
    121   def self.build_field_config(fields) 
    122     field_config = {} 
    123     case fields 
    124     when Array 
    125       fields.each { |name| field_config[name] = field_config_for name } 
    126     when Hash 
    127       fields.each { |name, options| field_config[name] = field_config_for name, options } 
    128     else raise InvalidArgumentError.new(":fields option must be Hash or Array") 
    129     end if fields 
    130     return field_config 
    131   end 
    132  
    133107  # Globally declares an index. 
    134108  # 
     
    138112  # This method is also used to implicitly declare an index when you use the 
    139113  # acts_as_ferret call without the :index option as usual. 
     114  # Returns the created index instance 
    140115  def self.define_index(name, options = {}) 
    141116    name = name.to_sym 
    142     raise IndexAlreadyDefined.new(name) if index_definitions.has_key?(name) 
     117    raise IndexAlreadyDefined.new(name) if ferret_indexes.has_key?(name) 
    143118    index_definition = { 
    144119      :index_dir => "#{ActsAsFerret::index_dir}/#{name}", 
     
    189164    index_definition[:ferret_fields].update build_field_config( options[:additional_fields] ) 
    190165 
    191     index_definitions[name] = index_definition 
    192     return index_definition 
     166    ferret_indexes[name] = create_index_instance index_definition 
    193167  end 
    194168  
    195169  # called internally by the acts_as_ferret method 
    196170  # 
    197   # TODO part of the given options which might influence the indexing of 
    198   # records of a special class (such as analyzer, field configuration(i.e. 
    199   # dynamic boosts) need to be copied to the returned per-class config so they 
    200   # are taken into account properly even when multiple classes use conflicting 
    201   # settings) 
     171  # returns the index 
    202172  def self.register_class_with_index(clazz, index_name, options = {}) 
    203173    index_name = index_name.to_sym 
    204174    @@index_using_classes[clazz.name] = index_name 
    205     if definition = index_definitions[index_name] 
    206       definition[:shared_index] = true 
    207       # TODO: add class-declared options to the index definition? which? 
    208       # merge fields from this acts_as_ferret call with predefined fields 
    209       already_defined_fields = definition[:ferret_fields] 
    210       field_config = build_field_config options[:fields] 
    211       field_config.update build_field_config( options[:additional_fields] ) 
    212       field_config.each do |field, config| 
    213         if already_defined_fields.has_key?(field) 
    214           logger.info "ignoring redefinition of ferret field #{field}" 
    215         else 
    216           already_defined_fields[field] = config 
    217           logger.info "adding new field #{field} from class #{clazz.name} to index #{index_name}" 
    218         end 
    219       end 
    220     else 
     175    unless index = ferret_indexes[index_name] 
    221176      # index definition on the fly 
    222177      # default to all attributes of this class 
    223178      options[:fields] ||= clazz.new.attributes.keys.map { |k| k.to_sym } 
    224       define_index index_name, options 
    225     end 
    226  
    227     # update default field list to be used by the query parser, unless it  
    228     # was explicitly given by user. 
    229     # 
    230     # It will include all content fields *not* marked as :untokenized. 
    231     # This fixes the otherwise failing CommentTest#test_stopwords. Basically 
    232     # this means that by default only tokenized fields (which all fields are 
    233     # by default) will be searched. If you want to search inside the contents  
    234     # of an untokenized field, you'll have to explicitly specify it in your  
    235     # query. 
    236     definition = index_definitions[index_name] 
    237     unless definition[:user_default_field] 
    238       # grab all tokenized fields 
    239       definition[:ferret][:default_field] = definition[:ferret_fields].keys.select do |field| 
    240         definition[:ferret_fields][field][:index] != :untokenized 
     179      index = define_index index_name, options 
     180    end 
     181    index.register_class(clazz, options) 
     182    if index.shared? 
     183      # make sure all models using this index get proper class methods 
     184      index.index_definition[:registered_models].each do |clazz| 
     185        clazz.extend SharedIndexClassMethods unless clazz.extended_by.include?(ActsAsFerret::SharedIndexClassMethods) 
    241186      end 
    242       logger.info "default field list for index #{index_name}: #{definition[:ferret][:default_field].inspect}" 
    243     end 
    244  
    245     # TODO: duped definition more or less worthless... 
    246     definition[:registered_models] << clazz 
    247     return definition.dup 
     187    end 
     188    return index 
    248189  end 
    249190 
    250191  # returns the index with the given name. 
    251192  def self.get_index(name) 
    252     definition = index_definitions[name] 
    253     path = definition[:index_dir] 
    254     ferret_indexes[path] ||= create_index_instance(definition) 
    255   end 
     193    raise IndexNotDefined.new(name) unless ferret_indexes.has_key?(name) 
     194    ferret_indexes[name] 
     195  end 
     196 
     197  # count hits for a query across multiple models 
     198  def self.total_hits(query, models, options = {}) 
     199    models = [models] unless Array === models 
     200    get_index_for(*models).total_hits query, options.merge( :models => models ) 
     201  end 
     202 
     203  # returns the index used by the given class. 
     204  # 
     205  # If multiple classes are given, either the single index shared by these 
     206  # classes, or a multi index (to be used for search only) across the indexes 
     207  # of all models, is returned. 
     208  def self.get_index_for(*classes) 
     209    raise ArgumentError.new("no class specified") unless classes.any? 
     210    classes.map!(&:constantize) unless Class === classes.first 
     211    logger.debug "index_for #{classes.inspect}" 
     212    index = if classes.size > 1 
     213      indexes = classes.map { |c| get_index_for c }.uniq 
     214      indexes.size > 1 ? multi_index(indexes) : indexes.first 
     215    else 
     216      clazz = classes.first 
     217      clazz = clazz.superclass while clazz && !@@index_using_classes.has_key?(clazz.name) 
     218      get_index @@index_using_classes[clazz.name] 
     219    end 
     220    raise IndexNotDefined.new("no index found for class: #{classes.map(&:name).join(',')}") if index.nil? 
     221    return index 
     222  end 
     223 
    256224 
    257225  # creates a new Index instance. 
    258226  def self.create_index_instance(definition) 
    259     if definition[:remote] 
    260       RemoteIndex 
    261     elsif definition[:shared_index] 
    262       SharedIndex 
    263     else 
    264       LocalIndex 
    265     end.new(definition) 
     227    (definition[:remote] ? RemoteIndex : LocalIndex).new(definition) 
    266228  end 
    267229 
    268230  def self.rebuild_index(name) 
    269     idx = get_index(name) 
    270     idx.rebuild_index 
    271   end 
    272  
    273   # Switches the named index to a new index directory. 
    274   # Used by the DRb server when switching to a new index version. 
     231    get_index(name).rebuild_index 
     232  end 
     233 
    275234  def self.change_index_dir(name, new_dir) 
    276     logger.debug "[#{name}] changing index dir to #{new_dir}" 
    277     definition = @@index_definitions[name] 
    278     idx = get_index(name) 
    279  
    280     # store index with the new dir as key. This prevents the aaf_index method 
    281     # from opening another index instance later on. 
    282     ferret_indexes[new_dir] = idx 
    283  
    284     old_dir = definition[:index_dir] 
    285     definition[:index_dir] = definition[:ferret][:path] = new_dir 
    286  
    287     # clean old reference to index 
    288     ActsAsFerret::ferret_indexes.delete old_dir 
    289     idx.reopen! 
    290     logger.debug "[#{name}] index dir is now #{new_dir}" 
    291   end 
    292  
    293   # returns the index definition for the index used by the given class or 
    294   # index_name 
    295   def self.index_definition(clazz_or_index_name) 
    296     logger.debug "index_definition for #{clazz_or_index_name}" 
    297     # TODO: inheritance hochhangeln (Content, ContentBase) 
    298     index_name = clazz_or_index_name.is_a?(Class) ?  
    299       @@index_using_classes[clazz_or_index_name.name] : clazz_or_index_name 
    300     logger.debug "index_definition for #{index_name}" 
    301     index_definitions[index_name] 
     235    get_index(name).change_index_dir new_dir 
    302236  end 
    303237 
     
    318252  end 
    319253 
     254  # returns a MultiIndex instance operating on a MultiReader 
     255  def self.multi_index(indexes) 
     256    key = indexes.map{ |i| i.index_name.to_s }.sort.join(",") 
     257    ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(indexes) 
     258  end 
     259 
     260  def self.build_field_config(fields) 
     261    field_config = {} 
     262    case fields 
     263    when Array 
     264      fields.each { |name| field_config[name] = field_config_for name } 
     265    when Hash 
     266      fields.each { |name, options| field_config[name] = field_config_for name, options } 
     267    else raise InvalidArgumentError.new(":fields option must be Hash or Array") 
     268    end if fields 
     269    return field_config 
     270  end 
     271 
    320272  def self.ensure_directory(dir) 
    321273    FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir)) 
    322274  end 
    323  
    324275 
    325276   
     
    339290  end 
    340291   
    341   # builds a FieldInfos instance for creation of an index containing fields 
    342   # for the given model classes. 
    343   def self.field_infos(models) 
     292  # builds a FieldInfos instance for creation of an index 
     293  def self.field_infos(index_definition) 
    344294    # default attributes for fields 
    345295    fi = Ferret::Index::FieldInfos.new(:store => :no,  
     
    349299    # primary key 
    350300    fi.add_field(:id, :store => :yes, :index => :untokenized)  
    351     fields = {} 
    352     have_class_name = false 
    353     models.each do |model| 
    354       fields.update(model.aaf_configuration[:ferret_fields]) 
    355       # class_name 
    356       if !have_class_name && model.aaf_configuration[:store_class_name] 
    357         fi.add_field(:class_name, :store => :yes, :index => :untokenized)  
    358         have_class_name = true 
    359       end 
    360     end 
    361     fields.each_pair do |field, options| 
    362       options = options.dup 
    363       options.delete(:boost) if options[:boost].is_a?(Symbol) 
    364       options.delete(:via) 
    365       fi.add_field(field, { :store => :no,  
    366                             :index => :yes }.update(options))  
     301    # class_name 
     302    fi.add_field(:class_name, :store => :yes, :index => :untokenized) if index_definition[:store_class_name] 
     303 
     304    # other fields 
     305    index_definition[:ferret_fields].each_pair do |field, options| 
     306      fi.add_field(field, options) 
    367307    end 
    368308    return fi 
     
    382322  end 
    383323 
     324  protected 
     325 
     326  def self.field_config_for(fieldname, options = {}) 
     327    config = DEFAULT_FIELD_OPTIONS.merge options 
     328    config[:term_vector] = :no if config[:index] == :no 
     329    config.delete :via 
     330    config.delete :boost if config[:boost].is_a?(Symbol) # dynamic boosts aren't handled here 
     331    return config 
     332  end 
     333 
    384334end 
    385335 
  • trunk/plugin/acts_as_ferret/lib/class_methods.rb

    r317 r318  
    2525    # This is called automatically when no index exists yet. 
    2626    # 
    27     # TODO: move into index class and add a method taking an index name to 
    28     # ActsAsFerret module. 
    2927    def rebuild_index 
    30       ActsAsFerret::rebuild_index(aaf_configuration[:name]) 
     28      aaf_index.rebuild_index 
    3129    end 
    3230 
     
    118116    # Index object, too. 
    119117    def aaf_index 
    120       ActsAsFerret::get_index(aaf_configuration[:name]) 
     118      @index ||= ActsAsFerret::get_index(aaf_configuration[:name]) 
    121119    end  
    122120     
     
    208206 
    209207    # Returns the total number of hits for the given query  
    210     # To count the results of a query across multiple models, specify an array of  
    211     # class names with the :multi option. 
    212208    # 
    213209    # Note that since we don't query the database here, this method won't deliver  
    214210    # the expected results when used on an AR association. 
     211    # 
    215212    def total_hits(q, options={}) 
    216213      if options[:models] 
  • trunk/plugin/acts_as_ferret/lib/ferret_server.rb

    r317 r318  
    105105        @logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})" 
    106106        retried = false 
    107         with_class args.shift do |clazz| 
    108           reconnect_when_needed(clazz) do 
    109             # using respond_to? here so we not have to catch NoMethodError 
    110             # which would silently catch those from deep inside the indexing 
    111             # code, too... 
    112             if clazz.aaf_index.respond_to?(name) 
    113               clazz.aaf_index.send name, *args 
    114             elsif clazz.respond_to?(name) 
    115               @logger.debug "no luck, trying to call class method instead" 
    116               clazz.send name, *args 
    117             else 
    118               raise NoMethodError.new("method #{name} not supported by DRb server") 
    119             end 
    120           end 
     107        index_name = args.shift 
     108        index = ActsAsFerret::get_index(index_name) 
     109 
     110        # TODO find another way to implement the reconnection logic (maybe in 
     111        # local_index or class_methods) 
     112        #  reconnect_when_needed(clazz) do 
     113         
     114        # using respond_to? here so we not have to catch NoMethodError 
     115        # which would silently catch those from deep inside the indexing 
     116        # code, too... 
     117 
     118        if index.respond_to?(name) 
     119          index.send name, *args 
     120        # TODO check where we need this: 
     121        #elsif clazz.respond_to?(name) 
     122        #      @logger.debug "no luck, trying to call class method instead" 
     123        #      clazz.send name, *args 
     124        else 
     125          raise NoMethodError.new("method #{name} not supported by DRb server") 
    121126        end 
    122127      rescue => e 
     
    126131 
    127132      # make sure we have a versioned index in place, building one if necessary 
    128       def ensure_index_exists(class_name) 
    129         @logger.debug "DRb server: ensure_index_exists for class #{class_name}" 
    130         with_class class_name do |clazz| 
    131           dir = clazz.aaf_configuration[:index_dir] 
    132           unless File.directory?(dir) && File.file?(File.join(dir, 'segments')) && dir =~ %r{/\d+(_\d+)?$} 
    133             rebuild_index(clazz) 
    134           end 
     133      def ensure_index_exists(index_name) 
     134        @logger.debug "DRb server: ensure_index_exists for index #{index_name}" 
     135        definition = ActsAsFerret::index_definition(index_name) 
     136        dir = definition[:index_dir] 
     137        unless File.directory?(dir) && File.file?(File.join(dir, 'segments')) && dir =~ %r{/\d+(_\d+)?$} 
     138          rebuild_index(index_name) 
    135139        end 
    136140      end 
     
    145149 
    146150      # hides LocalIndex#rebuild_index to implement index versioning 
    147       def rebuild_index(clazz
    148         definition = ActsAsFerret::index_definition_for_class(clazz) 
     151      def rebuild_index(index_name
     152        definition = ActsAsFerret::get_index(index_name).index_definition.dup 
    149153        models = definition[:registered_models] 
    150         with_class clazz do |clazz| 
    151           index = new_index_for(clazz, models) 
    152           reconnect_when_needed(clazz) do 
    153             @logger.debug "DRb server: rebuild index for class(es) #{models.inspect} in #{index.options[:path]}" 
    154             index.index_models models 
    155           end 
    156           new_version = File.join definition[:index_base_dir], Time.now.utc.strftime('%Y%m%d%H%M%S') 
    157           # create a unique directory name (needed for unit tests where  
    158           # multiple rebuilds per second may occur) 
    159           if File.exists?(new_version) 
    160             i = 0 
    161             i+=1 while File.exists?("#{new_version}_#{i}") 
    162             new_version << "_#{i}" 
    163           end 
     154        index = new_index_for(definition) 
     155        # TODO fix reconnection stuff 
     156        #  reconnect_when_needed(clazz) do 
     157        #    @logger.debug "DRb server: rebuild index for class(es) #{models.inspect} in #{index.options[:path]}" 
     158        index.index_models models 
     159        #  end 
     160        new_version = File.join definition[:index_base_dir], Time.now.utc.strftime('%Y%m%d%H%M%S') 
     161        # create a unique directory name (needed for unit tests where  
     162        # multiple rebuilds per second may occur) 
     163        if File.exists?(new_version) 
     164          i = 0 
     165          i+=1 while File.exists?("#{new_version}_#{i}") 
     166          new_version << "_#{i}" 
     167        end 
    164168           
    165           File.rename index.options[:path], new_version 
    166           ActsAsFerret::change_index_dir definition[:name], new_version  
    167         end 
     169        File.rename index.options[:path], new_version 
     170        ActsAsFerret::change_index_dir index_name, new_version  
    168171      end 
    169172 
    170173 
    171174      protected 
    172  
    173         def with_class(clazz, *args) 
    174           clazz = clazz.constantize if String === clazz 
    175           yield clazz, *args 
    176         end 
    177175 
    178176        def reconnect_when_needed(clazz) 
     
    197195        end 
    198196 
    199         def new_index_for(clazz, models) 
    200           aaf_configuration = clazz.aaf_configuration 
    201           ferret_cfg = aaf_configuration[:ferret].dup 
     197        def new_index_for(index_definition) 
     198          ferret_cfg = index_definition[:ferret].dup 
    202199          ferret_cfg.update :auto_flush  => false,  
    203200                            :create      => true, 
    204                             :field_infos => ActsAsFerret::field_infos(models), 
    205                             :path        => File.join(aaf_configuration[:index_base_dir], 'rebuild') 
     201                            :field_infos => ActsAsFerret::field_infos(index_definition), 
     202                            :path        => File.join(index_definition[:index_base_dir], 'rebuild') 
    206203          returning Ferret::Index::Index.new(ferret_cfg) do |i| 
    207             i.batch_size = aaf_configuration[:reindex_batch_size] 
     204            i.batch_size = index_definition[:reindex_batch_size] 
    208205            i.logger = @logger 
    209206          end 
  • trunk/plugin/acts_as_ferret/lib/index.rb

    r317 r318  
    11module ActsAsFerret 
     2 
     3  class IndexLogger 
     4    def initialize(logger, name) 
     5      @logger = logger 
     6      @index_name = name 
     7    end 
     8    %w(debug info warn error).each do |m| 
     9      define_method(m) do |message| 
     10        @logger.send m, "[#{@index_name}] #{message}" 
     11      end 
     12      question = :"#{m}?" 
     13      define_method(question) do 
     14        @logger.send question 
     15      end 
     16    end 
     17  end 
    218 
    319  # base class for local and remote indexes 
     
    521 
    622    attr_reader :aaf_configuration 
    7     attr_accessor :logger 
    8     def initialize(aaf_configuration) 
    9       @aaf_configuration = aaf_configuration 
    10       @logger = Logger.new("#{RAILS_ROOT}/log/ferret_index.log") 
    11       @logger.level = ActiveRecord::Base.logger.level 
    12       @index_name = aaf_configuration[:name] 
     23    attr_accessor :logger, :index_name, :index_definition 
     24    def initialize(index_definition) 
     25      @index_definition = index_definition 
     26      @index_name = index_definition[:name] 
     27      @logger = IndexLogger.new(ActsAsFerret::logger, @index_name) 
    1328    end 
    14      
     29 
     30    #def index_definition 
     31    #  @index_definition ||= ActsAsFerret::index_definition(@index_name) 
     32    #end 
     33 
     34    def register_class(clazz, options = {}) 
     35      logger.info "register class #{clazz} with index #{index_name}" 
     36      index_definition[:registered_models] << clazz 
     37      index_definition[:store_class_name] = true if shared? 
     38 
     39      # merge fields from this acts_as_ferret call with predefined fields 
     40      already_defined_fields = index_definition[:ferret_fields] 
     41      field_config = ActsAsFerret::build_field_config options[:fields] 
     42      field_config.update ActsAsFerret::build_field_config( options[:additional_fields] ) 
     43      field_config.each do |field, config| 
     44        if already_defined_fields.has_key?(field) 
     45          logger.info "ignoring redefinition of ferret field #{field}" if shared?  
     46        else 
     47          already_defined_fields[field] = config 
     48          logger.info "adding new field #{field} from class #{clazz.name} to index #{index_name}" 
     49        end 
     50      end 
     51       
     52      # update default field list to be used by the query parser, unless it  
     53      # was explicitly given by user. 
     54      # 
     55      # It will include all content fields *not* marked as :untokenized. 
     56      # This fixes the otherwise failing CommentTest#test_stopwords. Basically 
     57      # this means that by default only tokenized fields (which all fields are 
     58      # by default) will be searched. If you want to search inside the contents  
     59      # of an untokenized field, you'll have to explicitly specify it in your  
     60      # query. 
     61      unless index_definition[:user_default_field] 
     62        # grab all tokenized fields 
     63        ferret_fields = index_definition[:ferret_fields] 
     64        index_definition[:ferret][:default_field] = ferret_fields.keys.select do |field| 
     65          ferret_fields[field][:index] != :untokenized 
     66        end 
     67        logger.info "default field list for index #{index_name}: #{index_definition[:ferret][:default_field].inspect}" 
     68      end 
     69 
     70      index_definition 
     71    end 
     72 
     73    # true if this index is used by more than one model class 
     74    def shared? 
     75      index_definition[:registered_models].size > 1 
     76    end 
     77 
     78    # Switches the index to a new index directory. 
     79    # Used by the DRb server when switching to a new index version. 
     80    def change_index_dir(new_dir) 
     81      logger.debug "[#{index_name}] changing index dir to #{new_dir}" 
     82      index_definition[:index_dir] = index_definition[:ferret][:path] = new_dir 
     83      reopen! 
     84      logger.debug "[#{index_name}] index dir is now #{new_dir}" 
     85    end 
     86 
     87    protected 
     88 
    1589  end 
    1690 
  • trunk/plugin/acts_as_ferret/lib/local_index.rb

    r317 r318  
    33    include MoreLikeThis::IndexMethods 
    44 
    5     def initialize(aaf_configuration
     5    def initialize(index_name
    66      super 
    77      ensure_index_exists 
     
    99 
    1010    def reopen! 
    11       if @ferret_index 
    12         @ferret_index.close 
    13         @ferret_index = nil 
    14       end 
    15       logger.debug "reopening index at #{aaf_configuration[:ferret][:path]}" 
     11      logger.debug "reopening index at #{index_definition[:ferret][:path]}" 
     12      close 
    1613      ferret_index 
    1714    end 
     
    2017    def ferret_index 
    2118      ensure_index_exists 
    22       returning @ferret_index ||= Ferret::Index::Index.new(aaf_configuration[:ferret]) do 
    23         @ferret_index.batch_size = aaf_configuration[:reindex_batch_size] 
     19      returning @ferret_index ||= Ferret::Index::Index.new(index_definition[:ferret]) do 
     20        @ferret_index.batch_size = index_definition[:reindex_batch_size] 
    2421        @ferret_index.logger = logger 
    2522      end 
     
    2926    # Rebuilds the index if none exists. 
    3027    def ensure_index_exists 
    31       logger.debug "LocalIndex: ensure_index_exists at #{aaf_configuration[:index_dir]}" 
    32       unless File.file? "#{aaf_configuration[:index_dir]}/segments" 
    33         ActsAsFerret::ensure_directory(aaf_configuration[:index_dir]) 
    34         close 
     28      #logger.debug "LocalIndex: ensure_index_exists at #{index_definition[:index_dir]}" 
     29      unless File.file? "#{index_definition[:index_dir]}/segments" 
     30        ActsAsFerret::ensure_directory(index_definition[:index_dir]) 
    3531        rebuild_index  
    3632      end 
     
    4844    # rebuilds the index from all records of the model classes associated with this index 
    4945    def rebuild_index 
    50       definition = ActsAsFerret::index_definition(@index_name) 
    51       models = definition[:registered_models] 
    52       logger.debug "rebuild index: #{models.inspect}" 
    53       self.close 
    54       index = Ferret::Index::Index.new(definition[:ferret].dup.update(:auto_flush  => false,  
    55                                                                       # TODO fieldinfos sollten jetzt 
    56                                                                       # auch aus neuer config kommen! 
    57                                                                       :field_infos => ActsAsFerret::field_infos(models), 
    58                                                                       :create      => true)) 
    59       index.batch_size = definition[:reindex_batch_size] 
     46      models = index_definition[:registered_models] 
     47      logger.debug "rebuild index with models: #{models.inspect}" 
     48      close 
     49      index = Ferret::Index::Index.new(index_definition[:ferret].dup.update(:auto_flush  => false,  
     50                                                                            :field_infos => ActsAsFerret::field_infos(index_definition), 
     51                                                                            :create      => true)) 
     52      index.batch_size = index_definition[:reindex_batch_size] 
    6053      index.logger = logger 
    6154      index.index_models models 
    62       ActsAsFerret::change_index_dir @index_name, definition[:ferret][:path] 
     55      reopen! 
    6356    end 
    6457 
     
    7871 
    7972    # Total number of hits for the given query.  
    80     # To count the results of a multi_search query, specify an array of  
    81     # class names with the :multi option. 
    8273    def total_hits(query, options = {}) 
    83       index = (models = options.delete(:multi)) ? multi_index(models) : ferret_index 
    84       index.search(query, options).total_hits 
    85     end 
    86  
    87     def determine_lazy_fields(options = {}) 
    88       stored_fields = options[:lazy] 
    89       if stored_fields && !(Array === stored_fields) 
    90         stored_fields = aaf_configuration[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first) 
    91       end 
    92       logger.debug "stored_fields: #{stored_fields}" 
    93       return stored_fields 
    94     end 
    95  
    96     # loads data for fields declared as :lazy from the Ferret document 
    97     def extract_lazy_fields(doc, lazy_fields)  
    98       fields = aaf_configuration[:ferret_fields]  
    99       data = {}  
    100       lazy_fields.each { |field| data[fields[field][:via]] = doc[field] } if lazy_fields  
    101       data  
     74      ferret_index.search(query, options).total_hits 
    10275    end 
    10376 
     
    11083      index = ferret_index 
    11184      logger.debug "query: #{ferret_index.process_query query}" if logger.debug? 
    112       lazy_fields = determine_lazy_fields options 
     85      lazy_fields = determine_stored_fields options 
    11386      logger.debug "lazy_fields: #{lazy_fields}" 
    11487 
    11588      total_hits = index.search_each(query, options) do |hit, score| 
    11689        doc = index[hit] 
    117         model = aaf_configuration[:store_class_name] ? doc[:class_name] : aaf_configuration[:class_name] 
     90        model = index_definition[:store_class_name] ? doc[:class_name] : index_definition[:class_name] 
    11891        # fetch stored fields if lazy loading 
    11992        data = extract_lazy_fields(doc, lazy_fields) 
     
    128101    end 
    129102 
    130     # Queries multiple Ferret indexes to retrieve model class, id and score for  
    131     # each hit. Use the models parameter to give the list of models to search. 
    132     # If a block is given, model, id and score are yielded and the number of  
    133     # total hits is returned. Otherwise [total_hits, result_array] is returned. 
    134     def id_multi_search(query, models, options = {}) 
    135       index = multi_index(models) 
    136       result = [] 
    137       lazy_fields = determine_lazy_fields options 
    138       total_hits = index.search_each(query, options) do |hit, score| 
    139         doc = index[hit] 
    140         # fetch stored fields if lazy loading 
    141         data = extract_lazy_fields(doc, lazy_fields) 
    142         raise "':store_class_name => true' required for multi_search to work" if doc[:class_name].blank? 
    143         if block_given? 
    144           yield doc[:class_name], doc[:id], score, doc, data 
    145         else 
    146           result << { :model => doc[:class_name], :id => doc[:id], :score => score, :data => data } 
    147         end 
    148       end 
    149       return block_given? ? total_hits : [ total_hits, result ] 
    150     end 
    151103 
    152104    ###################################### 
     
    183135        else 
    184136          query = process_query(query) # process only once 
    185           aaf_configuration[:ferret_fields].each_pair do |field, config| 
     137          index_definition[:ferret_fields].each_pair do |field, config| 
    186138            next if config[:store] == :no || config[:highlight] == :no 
    187139            options[:field] = field 
     
    203155    # the class name only needs to be given in case of a shared index configuration 
    204156    def query_for_record(id, class_name = nil) 
    205       Ferret::Search::TermQuery.new(:id, id.to_s) 
     157      if shared? 
     158        raise InvalidArgumentError.new("shared index needs class_name argument") if class_name.nil? 
     159        returning bq = Ferret::Search::BooleanQuery.new do 
     160          bq.add_query(Ferret::Search::TermQuery.new(:id,         id.to_s),    :must) 
     161          bq.add_query(Ferret::Search::TermQuery.new(:class_name, class_name), :must) 
     162        end 
     163      else 
     164        Ferret::Search::TermQuery.new(:id, id.to_s) 
     165      end 
    206166    end 
    207167 
     
    209169    protected 
    210170 
     171    def determine_stored_fields(options = {}) 
     172      stored_fields = options[:lazy] 
     173      if stored_fields && !(Array === stored_fields) 
     174        stored_fields = index_definition[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first) 
     175      end 
     176      logger.debug "stored_fields: #{stored_fields}" 
     177      return stored_fields 
     178    end 
     179 
     180    # loads data for fields declared as :lazy from the Ferret document 
     181    def extract_lazy_fields(doc, lazy_fields)  
     182      fields = index_definition[:ferret_fields]  
     183      data = {}  
     184      lazy_fields.each { |field| data[fields[field][:via]] = doc[field] } if lazy_fields  
     185      data  
     186    end 
     187 
    211188    # returns a MultiIndex instance operating on a MultiReader 
    212     def multi_index(model_classes) 
    213       model_classes.map!(&:constantize) if String === model_classes.first 
    214       model_classes.sort! { |a, b| a.name <=> b.name } 
    215       key = model_classes.inject("") { |s, clazz| s + clazz.name } 
    216       multi_config = aaf_configuration[:ferret].dup 
    217       multi_config.delete :default_field  # we don't want the default field list of *this* class for multi_searching 
    218       ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config) 
    219     end 
     189    #def multi_index(model_classes) 
     190    #  model_classes.map!(&:constantize) if String === model_classes.first 
     191    #  model_classes.sort! { |a, b| a.name <=> b.name } 
     192    #  key = model_classes.inject("") { |s, clazz| s + clazz.name } 
     193    #  multi_config = index_definition[:ferret].dup 
     194    #  multi_config.delete :default_field  # we don't want the default field list of *this* class for multi_searching 
     195    #  ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config) 
     196    #end 
    220197  
    221198  end 
  • trunk/plugin/acts_as_ferret/lib/multi_index.rb

    r317 r318  
    44      class MultiIndex 
    55         
    6         def initialize(model_classes, options = {}) 
    7           @model_classes = model_classes 
     6        def initialize(indexes, options = {}) 
    87          # ensure all models indexes exist 
    9           @model_classes.each { |m| m.aaf_index.ensure_index_exists } 
    10           default_fields = @model_classes.inject([]) do |fields, c|  
    11             fields + [ c.aaf_configuration[:ferret][:default_field] ].flatten 
    12           end 
    13           @options = {  
     8          @indexes = indexes 
     9          indexes.each { |i| i.ensure_index_exists } 
     10          default_fields = indexes.inject([]) do |fields, idx|  
     11            fields + [ idx.index_definition[:ferret][:default_field] ] 
     12          end.flatten.uniq 
     13          @options = { 
    1414            :default_field => default_fields 
    1515          }.update(options) 
     16          @logger = IndexLogger.new(ActsAsFerret::logger, "multi: #{indexes.map(&:index_name).join(',')}") 
     17        end 
     18         
     19        # Queries multiple Ferret indexes to retrieve model class, id and score for  
     20        # each hit. Use the models parameter to give the list of models to search. 
     21        # If a block is given, model, id and score are yielded and the number of  
     22        # total hits is returned. Otherwise [total_hits, result_array] is returned. 
     23        def find_ids(query, options = {}) 
     24          result = [] 
     25          lazy_fields = determine_stored_fields options 
     26          total_hits = search_each(query, options) do |hit, score| 
     27            doc = index[hit] 
     28            # fetch stored fields if lazy loading 
     29            data = extract_lazy_fields(doc, lazy_fields) 
     30            raise "':store_class_name => true' required for multi_search to work" if doc[:class_name].blank?