Changeset 77
- Timestamp:
- 08/23/06 16:42:52 (2 years ago)
- Files:
-
- trunk/plugin/acts_as_ferret/lib/acts_as_ferret.rb (modified) (21 diffs)
- trunk/plugin/acts_as_ferret/lib/multi_index.rb (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plugin/acts_as_ferret/lib/acts_as_ferret.rb
r67 r77 22 22 require 'set' 23 23 24 # 0.10 problems 25 # Ferret::Search::Similarity, Ferret::Search::Similarity.default missing 26 # IndexReader#latest? segfaults when used on multiple indexes 27 # :offset and :limit get ignored by search_each 28 # query_parser ignores or_default 24 29 25 30 # Yet another Ferret Mixin. … … 94 99 # document instance 95 100 def define_to_field_method(field, options = {}) 96 default_opts = { :store => Ferret::Document::Field::Store::NO, 97 :index => Ferret::Document::Field::Index::TOKENIZED, 98 :term_vector => Ferret::Document::Field::TermVector::NO, 99 :binary => false, 100 :boost => 1.0 101 } 102 default_opts.update(options) if options.is_a?(Hash) 103 fields_for_ferret << field 101 options = { 102 :store => :no, 103 :index => :yes, 104 :term_vector => :with_positions_offsets, 105 :boost => 1.0 }.update(options) 106 fields_for_ferret[field] = options 104 107 define_method("#{field}_to_ferret".to_sym) do 105 108 begin … … 107 110 val = content_for_field_name(field) 108 111 rescue 109 logger. debug("Error retrieving value for field #{field}: #{$!}")112 logger.warn("Error retrieving value for field #{field}: #{$!}") 110 113 val = '' 111 114 end 112 115 logger.debug("Adding field #{field} with value '#{val}' to index") 113 Ferret::Document::Field.new(field.to_s, val, 114 default_opts[:store], 115 default_opts[:index], 116 default_opts[:term_vector], 117 default_opts[:binary], 118 default_opts[:boost]) 116 val 117 #Ferret::Field.new(val, default_opts[:boost]) 118 end 119 end 120 121 def add_fields(field_config) 122 if field_config.respond_to?(:each_pair) 123 field_config.each_pair do |key,val| 124 define_to_field_method(key,val) 125 end 126 elsif field_config.respond_to?(:each) 127 field_config.each do |field| 128 define_to_field_method(field) 129 end 119 130 end 120 131 end … … 137 148 # all attributes of the class will be indexed. You may also give 138 149 # symbols pointing to instance methods of your model here, i.e. 139 # to retrieve and index data from a related model. 150 # to retrieve and index data from a related model. 151 # 152 # additional_fields:: names fields to include in the index, in addition 153 # to those derived from the db scheme. use if you want to add 154 # custom fields derived from methods to the db fields (which will be picked 155 # by aaf). This option will be ignored when the fields option is given, in 156 # that case additional fields get specified there. 140 157 # 141 158 # index_dir:: declares the directory where to put the index for this class. … … 157 174 # 158 175 # ferret_options may be: 159 # occur_default:: - whether query terms are required by 160 # default (the default), or not. Specify one of 161 # Ferret::Search::BooleanClause::Occur::MUST or 162 # Ferret::Search::BooleanClause::Occur::SHOULD 176 # or_default:: - whether query terms are required by 177 # default (the default, false), or not (true) 163 178 # 164 179 # analyzer:: the analyzer to use for query parsing (default: nil, 165 # wihch means the ferret default Analyzer gets used) 166 # 180 # wihch means the ferret StandardAnalyzer gets used) 181 # 182 # TODO: handle additional_fields 167 183 def acts_as_ferret(options={}, ferret_options={}) 168 184 configuration = { 169 :fields => nil,170 185 :index_dir => "#{FerretMixin::Acts::ARFerret::index_dir}/#{self.name.underscore}", 171 186 :store_class_name => false, … … 174 189 } 175 190 ferret_configuration = { 176 :occur_default => Ferret::Search::BooleanClause::Occur::MUST, 177 :handle_parse_errors => true, 178 :default_search_field => '*', 179 :analyzer => Ferret::Analysis::StandardAnalyzer.new, 180 # :wild_lower => true 191 :or_default => false, 192 :handle_parser_errors => true, 193 #:max_clauses => 512, 194 #:default_field => '*', 195 #:analyzer => Ferret::Analysis::StandardAnalyzer.new, 196 # :wild_card_downcase => true 181 197 } 182 198 configuration.update(options) if options.is_a?(Hash) 199 183 200 # apply appropriate settings for shared index 184 201 if configuration[:single_index] … … 190 207 # be overwritten by the user: 191 208 ferret_configuration.update( 192 :key => (configuration[:single_index] ? ['id', 'class_name'] : 'id'), 209 210 :key => (configuration[:single_index] ? [:id, :class_name] : :id), 193 211 :path => configuration[:index_dir], 194 212 :auto_flush => true, … … 209 227 cattr_accessor :ferret_configuration 210 228 211 @@fields_for_ferret = Array.new229 @@fields_for_ferret = Hash.new 212 230 @@configuration = configuration 213 231 @@ferret_configuration = ferret_configuration 214 215 if configuration[:fields].respond_to?(:each_pair) 216 configuration[:fields].each_pair do |key,val| 217 define_to_field_method(key,val) 218 end 219 elsif configuration[:fields].respond_to?(:each) 220 configuration[:fields].each do |field| 221 define_to_field_method(field) 222 end 232 233 if configuration[:fields] 234 add_fields(configuration[:fields]) 223 235 else 224 @@fields_for_ferret = nil 236 add_fields(self.new.attributes.keys.map { |k| k.to_sym }) 237 add_fields(configuration[:additional_fields]) 225 238 end 226 239 EOV … … 243 256 # model classes that should also go into this index as parameters. 244 257 # Useful when using the :single_index option. 245 def rebuild_index(*additional_models) 246 index = Ferret::Index::Index.new(ferret_configuration.merge(:create => true)) 247 additional_models << self 258 # Note that attributes named the same in different models will share 259 # the same field options in the shared index. 260 def rebuild_index(*models) 261 models << self 262 # default attributes for fields 263 fi = Ferret::Index::FieldInfos.new(:store => :no, 264 :index => :yes, 265 :term_vector => :no, 266 :boost => 1.0) 267 # primary key 268 fi.add_field(:id, :store => :yes, :index => :untokenized) 269 # class_name 270 if configuration[:store_class_name] 271 fi.add_field(:class_name, :store => :yes, :index => :untokenized) 272 end 273 # collect field options from all models 274 fields = {} 275 models.each do |model| 276 fields.update(model.fields_for_ferret) 277 end 278 fields.each_pair do |field, options| 279 fi.add_field(field, { :store => :no, 280 :index => :yes }.update(options)) 281 end 282 fi.create_index(ferret_configuration[:path]) 283 284 index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => false)) 248 285 batch_size = 1000 249 additional_models.each do |model|286 models.each do |model| 250 287 # index in batches of 1000 to limit memory consumption (fixes #24) 251 288 model.transaction do … … 341 378 def single_index_field_names(models) 342 379 @single_index_field_names ||= ( 343 searcher = Ferret::Search:: IndexSearcher.new(class_index_dir)380 searcher = Ferret::Search::Searcher.new(class_index_dir) 344 381 if searcher.reader.respond_to?(:get_field_names) 345 382 (searcher.reader.send(:get_field_names) - ['id', 'class_name']).to_a … … 374 411 #end 375 412 #q << " AND (#{class_clauses.join(' OR ')})" 376 qp = Ferret::QueryParser.new(ferret_configuration[:default_search_field], ferret_configuration.update(:fields => single_index_field_names(options[:models]))) 413 414 qp = Ferret::QueryParser.new (ferret_configuration) 415 qp.fields = ferret_index.send(:reader).field_names 377 416 original_query = qp.parse(q) 378 417 end 379 418 #else 380 419 q = Ferret::Search::BooleanQuery.new 381 q.add_query(original_query, Ferret::Search::BooleanClause::Occur::MUST)420 q.add_query(original_query, :must) 382 421 model_query = Ferret::Search::BooleanQuery.new 383 422 options[:models].each do |model| 384 model_query.add_query(Ferret::Search::TermQuery.new( Ferret::Index::Term.new('class_name', model.name)), Ferret::Search::BooleanClause::Occur::SHOULD)385 end 386 q.add_query(model_query, Ferret::Search::BooleanClause::Occur::MUST)423 model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should) 424 end 425 q.add_query(model_query, :must) 387 426 #end 388 427 end … … 432 471 # 433 472 def find_id_by_contents(q, options = {}) 434 options[:num_docs] = configuration[:max_results] if options[:num_docs] == :all 473 deprecated_options_support(options) 474 options[:limit] = configuration[:max_results] if options[:limit] == :all 475 435 476 result = [] 436 477 index = self.ferret_index 437 hits = index.search(q, options) 438 hits.each do |hit, score| 478 #hits = index.search(q, options) 479 #hits.each do |hit, score| 480 total_hits = index.search_each(q, options) do |hit, score| 439 481 # only collect result data if we intend to return it 440 482 doc = index[hit] … … 447 489 end 448 490 logger.debug "id_score_model array: #{result.inspect}" 449 return block_given? ? hits.total_hits : result491 return block_given? ? total_hits : result 450 492 end 451 493 … … 470 512 # 471 513 def id_multi_search(query, additional_models = [], options = {}) 472 options[:num_docs] = configuration[:max_results] if options[:num_docs] == :all 514 deprecated_options_support(options) 515 options[:limit] = configuration[:max_results] if options[:limit] == :all 473 516 additional_models << self 474 517 searcher = multi_index(additional_models) 475 518 result = [] 476 hits = searcher.search(query, options) 477 hits.each { |hit, score| 478 doc = searcher.doc(hit) 519 total_hits = searcher.search_each (query, options) do |hit, score| 520 doc = searcher[hit] 479 521 if block_given? 480 522 yield doc[:class_name], doc[:id].to_i, score … … 482 524 result << { :model => doc[:class_name], :id => doc[:id], :score => score } 483 525 end 484 }485 return block_given? ? hits.total_hits : result526 end 527 return block_given? ? total_hits : result 486 528 end 487 529 … … 492 534 @@multi_indexes[key] ||= MultiIndex.new(model_classes, ferret_configuration) 493 535 end 494 536 537 def deprecated_options_support(options) 538 if options[:num_docs] 539 logger.warn ":num_docs is deprecated, use :limit instead!" 540 options[:limit] ||= options[:num_docs] 541 end 542 if options[:first_doc] 543 logger.warn ":first_doc is deprecated, use :offset instead!" 544 options[:offset] ||= options[:first_doc] 545 end 546 end 495 547 end 496 548 … … 508 560 def ferret_create 509 561 logger.debug "ferret_create/update: #{self.class.name} : #{self.id}" 510 self.class.ferret_index << self.to_doc if @ferret_reindex 562 if @ferret_reindex 563 self.class.ferret_index << self.to_doc 564 end 511 565 @ferret_reindex = true 512 566 true … … 518 572 logger.debug "ferret_destroy: #{self.class.name} : #{self.id}" 519 573 begin 520 query = Ferret::Search::TermQuery.new( Ferret::Index::Term.new('id',self.id.to_s))574 query = Ferret::Search::TermQuery.new(:id, self.id.to_s) 521 575 if self.class.configuration[:single_index] 522 576 bq = Ferret::Search::BooleanQuery.new 523 bq.add_query(query, Ferret::Search::BooleanClause::Occur::MUST) 524 bq.add_query(Ferret::Search::TermQuery.new(Ferret::Index::Term.new('class_name', self.class.name)), 525 Ferret::Search::BooleanClause::Occur::MUST) 577 bq.add_query(query, :must) 578 bq.add_query(Ferret::Search::TermQuery.new(:class_name, self.class.name), :must) 526 579 query = bq 527 580 end … … 537 590 logger.debug "creating doc for class: #{self.class.name}, id: #{self.id}" 538 591 # Churn through the complete Active Record and add it to the Ferret document 539 doc = Ferret::Document ::Document.new592 doc = Ferret::Document.new 540 593 # store the id of each item 541 doc << Ferret::Document::Field.new( "id", self.id, 542 Ferret::Document::Field::Store::YES, 543 Ferret::Document::Field::Index::UNTOKENIZED ) 594 doc[:id] = self.id 595 544 596 # store the class name if configured to do so 545 597 if configuration[:store_class_name] 546 doc << Ferret::Document::Field.new( "class_name", self.class.name, 547 Ferret::Document::Field::Store::YES, 548 Ferret::Document::Field::Index::UNTOKENIZED ) # have to tokenize to be able to use class_name field in queries ?! 598 doc[:class_name] = self.class.name 549 599 end 550 600 # iterate through the fields and add them to the document 551 if fields_for_ferret601 #if fields_for_ferret 552 602 # have user defined fields 553 fields_for_ferret.each do |field|554 doc << self.send("#{field}_to_ferret")555 end556 else603 fields_for_ferret.each_pair do |field, config| 604 doc[field] = self.send("#{field}_to_ferret") unless config[:ignore] 605 end 606 #else 557 607 # take all fields 558 self.attributes.each_pair do |key,val| 559 unless key == :id 560 logger.debug "add field #{key} with value #{val}" 561 doc << Ferret::Document::Field.new( 562 key, 563 val.to_s, 564 Ferret::Document::Field::Store::NO, 565 Ferret::Document::Field::Index::TOKENIZED) 566 end 567 end 568 end 608 # TODO shouldn't be needed any more 609 # puts "remove me!" 610 # self.attributes.each_pair do |key,val| 611 # unless key == :id 612 # logger.debug "add field #{key} with value #{val}" 613 # doc[key] = val.to_s 614 # end 615 # end 616 #end 569 617 return doc 570 618 end … … 648 696 end 649 697 begin 650 query.add_query(term_query, Ferret::Search::BooleanClause::Occur::SHOULD)698 query.add_query(term_query, :should) 651 699 rescue Ferret::Search::BooleanQuery::TooManyClauses 652 700 break … … 657 705 # exclude ourselves 658 706 t = Ferret::Index::Term.new('id', self.id.to_s) 659 query.add_query(Ferret::Search::TermQuery.new(t), 660 Ferret::Search::BooleanClause::Occur::MUST_NOT) 707 query.add_query(Ferret::Search::TermQuery.new(t), :must_not) 661 708 return query 662 709 end trunk/plugin/acts_as_ferret/lib/multi_index.rb
r61 r77 5 5 class MultiIndex 6 6 7 attr_reader :reader8 9 7 # todo: check for necessary index rebuilds in this place, too 10 8 # idea - each class gets a create_reader method that does this … … 12 10 @model_classes = model_classes 13 11 @options = { 14 :default_search_field => '*', 15 :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new 12 :default_field => '*', 13 #:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new 14 :analyzer => Ferret::Analysis::StandardAnalyzer.new 16 15 }.update(options) 17 16 end … … 24 23 end 25 24 25 def search_each(query, options={}, &block) 26 query = process_query(query) 27 searcher.search_each(query, options={}, &block) 28 end 29 26 30 # checks if all our sub-searchers still are up to date 27 31 def latest? 28 return false unless @searcher 29 @sub_searchers.each do |s| 30 return false unless s.reader.latest? 32 return false unless @reader 33 # segfaults with 0.10.0 --> TODO report as bug @reader.latest? 34 @sub_readers.each do |r| 35 return false unless r.latest? 31 36 end 32 37 true … … 35 40 def ensure_searcher 36 41 unless latest? 37 field_names = Set.new38 @sub_ searchers = @model_classes.map { |clazz|42 #field_names = Set.new 43 @sub_readers = @model_classes.map { |clazz| 39 44 begin 40 searcher = Ferret::Search::IndexSearcher.new(clazz.class_index_dir)45 reader = Ferret::Index::IndexReader.new(clazz.class_index_dir) 41 46 rescue Exception 42 47 puts "error opening #{clazz.class_index_dir}: #{$!}" 43 48 end 44 if searcher.reader.respond_to?(:get_field_names) 45 field_names << searcher.reader.send(:get_field_names).to_set 46 elsif clazz.fields_for_ferret 47 field_names << clazz.fields_for_ferret.to_set 48 else 49 puts <<-END 50 unable to retrieve field names for class #{clazz.name}, please 51 consider naming all indexed fields in your call to acts_as_ferret! 52 END 53 clazz.content_columns.each { |col| field_names << col.name } 54 end 55 searcher 49 # field_names << reader.field_names.to_set 50 reader 56 51 } 57 @ searcher = Ferret::Search::MultiSearcher.new(@sub_searchers)58 @ field_names = field_names.flatten.to_a52 @reader = Ferret::Index::IndexReader.new(@sub_readers) 53 @searcher = Ferret::Search::Searcher.new(@reader) 59 54 @query_parser = nil # trigger re-creation from new field_name array 60 55 end … … 67 62 68 63 def doc(i) 69 searcher .doc(i)64 searcher[i] 70 65 end 66 alias :[] :doc 71 67 72 68 def query_parser 69 ensure_searcher 73 70 unless @query_parser 74 ensure_searcher # we dont need the searcher, but the @field_names array is built by this function, too 75 @query_parser ||= Ferret::QueryParser.new( 76 @options[:default_search_field], 77 { :fields => @field_names }.merge(@options) 78 ) 71 @query_parser ||= Ferret::QueryParser.new(@options) 79 72 end 73 @query_parser.fields = @reader.field_names 80 74 @query_parser 81 75 end
