diff --git a/.hound.yml b/.hound.yml new file mode 100644 index 0000000..5d0ff60 --- /dev/null +++ b/.hound.yml @@ -0,0 +1,2 @@ +ruby: + config_file: .rubocop.yml diff --git a/.rubocop.yml b/.rubocop.yml index 7efe0b1..76563b3 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -5,3 +5,9 @@ Style/Documentation: Lint/HandleExceptions: Exclude: - Rakefile + +# For Ruby 1.8 compatibility +Style/DotPosition: + EnforcedStyle: trailing +Style/HashSyntax: + EnforcedStyle: hash_rockets diff --git a/.travis.yml b/.travis.yml index 9952f75..9a75a41 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,10 @@ language: ruby rvm: + - 1.8.7 - 1.9.2 - 1.9.3 - 2.0 - 2.1 - 2.2 - jruby-1.7 - - rbx-2.2 - - rbx-2.4 + - rbx-2 diff --git a/Gemfile b/Gemfile index 79b7d6e..b76de99 100644 --- a/Gemfile +++ b/Gemfile @@ -4,5 +4,5 @@ gemspec group :test do gem 'rake', '~> 10.4' - gem 'rubocop', '~> 0.30', platforms: [:ruby_20, :ruby_21] + gem 'rubocop', '~> 0.30', :platforms => [:ruby_20, :ruby_21] end diff --git a/README.md b/README.md index dd9223d..8df520d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# AhoCorasickMatcher [![Build Status](https://travis-ci.org/altmetric/aho_corasick_matcher.svg?branch=master)](https://travis-ci.org/altmetric/aho_corasick_matcher) +# Aho-Corasick Matcher [![Build Status](https://travis-ci.org/altmetric/aho_corasick_matcher.svg?branch=master)](https://travis-ci.org/altmetric/aho_corasick_matcher) A Ruby gem for finding strings in text using the [Aho-Corasick string matching search](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.96.4671&rep=rep1&type=pdf). @@ -22,14 +22,14 @@ matcher = AhoCorasickMatcher.new(['a', 'b', 'ab']) matcher.match('aba') #=> ['a', 'ab', 'b', 'a'] -matcher = AhoCorasickMatcher.new(["thistle", "sift", "thistles"]) -matcher.match("Theophilus thistle, the successful thistle sifter, in sifting a sieve full of un-sifted thistles, thrust three thousand thistles through the thick of his thumb.") +matcher = AhoCorasickMatcher.new(['thistle', 'sift', 'thistles']) +matcher.match('Theophilus thistle, the successful thistle sifter, in sifting a sieve full of un-sifted thistles, thrust three thousand thistles through the thick of his thumb.') #=> ["thistle", "thistle", "sift", "sift", "sift", "thistle", "thistles", "thistle", "thistles"] ``` ## Thanks -Loosely based on Tim Cowlishaw's implementation of the same algorithm https://github.com/timcowlishaw/aho_corasick +Loosely based on [Tim Cowlishaw's implementation of the same algorithm](https://github.com/timcowlishaw/aho_corasick). ## License diff --git a/Rakefile b/Rakefile index 5a194db..3ebaaa7 100644 --- a/Rakefile +++ b/Rakefile @@ -1,6 +1,6 @@ require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) -task default: :spec +task :default => :spec begin require 'rubocop/rake_task' diff --git a/aho_corasick_matcher.gemspec b/aho_corasick_matcher.gemspec index ad00112..3cf8de8 100644 --- a/aho_corasick_matcher.gemspec +++ b/aho_corasick_matcher.gemspec @@ -12,7 +12,6 @@ Gem::Specification.new do |s| s.homepage = 'https://github.com/altmetric/aho_corasick_matcher' s.files = %w(README.md LICENSE lib/aho_corasick_matcher.rb) s.test_files = Dir['spec/**/*.rb'] - s.require_paths = ['lib'] s.add_development_dependency('rspec', '~> 3.2') end diff --git a/lib/aho_corasick_matcher.rb b/lib/aho_corasick_matcher.rb index 5bad711..33548c0 100644 --- a/lib/aho_corasick_matcher.rb +++ b/lib/aho_corasick_matcher.rb @@ -12,13 +12,16 @@ def initialize(dictionary) end def match(string) - [].tap do |matches| - string.each_char.reduce(root) do |node, char| - (node || root).search(char.intern).tap do |child| - matches.push(*child.matches) if child - end - end + matches = [] + string.each_char.reduce(root) do |node, char| + child = (node || root).search(char.intern) + next unless child + + matches.push(*child.matches) + child end + + matches end private @@ -47,8 +50,8 @@ def build_suffix_map end class Node - attr_reader :matches, :child_map, :suffix, :parent - attr_writer :suffix + attr_reader :matches, :child_map, :parent + attr_accessor :suffix def initialize(parent = nil) @matches = [] @@ -57,19 +60,19 @@ def initialize(parent = nil) end def search(char) - @child_map[char] || suffix && suffix.search(char) + child_map[char] || (suffix && suffix.search(char)) end def child_or_create(char) - @child_map[char] ||= self.class.new(self) + child_map[char] ||= self.class.new(self) end def children - @child_map.values + child_map.values end def root? - parent.nil? + !parent end def build_child_suffixes @@ -89,6 +92,7 @@ def build_child_suffixes def find_failure_node(char) failure = suffix failure = failure.suffix until failure.search(char) || failure.root? + failure end end