Statistics
| Revision:

root / tmp / org.txm.statsengine.r.core.win32 / res / win32 / library / BH / include / boost / algorithm / searching / boyer_moore.hpp @ 2486

History | View | Annotate | Download (10.9 kB)

1
/* 
2
   Copyright (c) Marshall Clow 2010-2012.
3

4
   Distributed under the Boost Software License, Version 1.0. (See accompanying
5
   file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6

7
    For more information, see http://www.boost.org
8
*/
9

    
10
#ifndef BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP
11
#define BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP
12

    
13
#include <iterator>     // for std::iterator_traits
14

    
15
#include <boost/assert.hpp>
16
#include <boost/static_assert.hpp>
17

    
18
#include <boost/range/begin.hpp>
19
#include <boost/range/end.hpp>
20

    
21
#include <boost/utility/enable_if.hpp>
22
#include <boost/type_traits/is_same.hpp>
23

    
24
#include <boost/algorithm/searching/detail/bm_traits.hpp>
25
#include <boost/algorithm/searching/detail/debugging.hpp>
26

    
27
namespace boost { namespace algorithm {
28

    
29
/*
30
    A templated version of the boyer-moore searching algorithm.
31
    
32
References:
33
    http://www.cs.utexas.edu/users/moore/best-ideas/string-searching/
34
    http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf
35
    
36
Explanations:
37
    http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm
38
    http://www.movsd.com/bm.htm
39
    http://www.cs.ucdavis.edu/~gusfield/cs224f09/bnotes.pdf
40

41
The Boyer-Moore search algorithm uses two tables, a "bad character" table
42
to tell how far to skip ahead when it hits a character that is not in the pattern,
43
and a "good character" table to tell how far to skip ahead when it hits a
44
mismatch on a character that _is_ in the pattern.
45

46
Requirements:
47
        * Random access iterators
48
        * The two iterator types (patIter and corpusIter) must 
49
            "point to" the same underlying type and be comparable.
50
        * Additional requirements may be imposed but the skip table, such as:
51
        ** Numeric type (array-based skip table)
52
        ** Hashable type (map-based skip table)
53
*/
54

    
55
    template <typename patIter, typename traits = detail::BM_traits<patIter> >
56
    class boyer_moore {
57
        typedef typename std::iterator_traits<patIter>::difference_type difference_type;
58
    public:
59
        boyer_moore ( patIter first, patIter last ) 
60
                : pat_first ( first ), pat_last ( last ),
61
                  k_pattern_length ( std::distance ( pat_first, pat_last )),
62
                  skip_ ( k_pattern_length, -1 ),
63
                  suffix_ ( k_pattern_length + 1 )
64
            {
65
            this->build_skip_table   ( first, last );
66
            this->build_suffix_table ( first, last );
67
            }
68
            
69
        ~boyer_moore () {}
70
        
71
        /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last )
72
        /// \brief Searches the corpus for the pattern that was passed into the constructor
73
        /// 
74
        /// \param corpus_first The start of the data to search (Random Access Iterator)
75
        /// \param corpus_last  One past the end of the data to search
76
        ///
77
        template <typename corpusIter>
78
        corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
79
            BOOST_STATIC_ASSERT (( boost::is_same<
80
                                    typename std::iterator_traits<patIter>::value_type, 
81
                                    typename std::iterator_traits<corpusIter>::value_type>::value ));
82

    
83
            if ( corpus_first == corpus_last ) return corpus_last;  // if nothing to search, we didn't find it!
84
            if (    pat_first ==    pat_last ) return corpus_first; // empty pattern matches at start
85

    
86
            const difference_type k_corpus_length  = std::distance ( corpus_first, corpus_last );
87
        //  If the pattern is larger than the corpus, we can't find it!
88
            if ( k_corpus_length < k_pattern_length ) 
89
                return corpus_last;
90

    
91
        //  Do the search 
92
            return this->do_search   ( corpus_first, corpus_last );
93
            }
94
            
95
        template <typename Range>
96
        typename boost::range_iterator<Range>::type operator () ( Range &r ) const {
97
            return (*this) (boost::begin(r), boost::end(r));
98
            }
99

    
100
    private:
101
/// \cond DOXYGEN_HIDE
102
        patIter pat_first, pat_last;
103
        const difference_type k_pattern_length;
104
        typename traits::skip_table_t skip_;
105
        std::vector <difference_type> suffix_;
106

    
107
        /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
108
        /// \brief Searches the corpus for the pattern that was passed into the constructor
109
        /// 
110
        /// \param corpus_first The start of the data to search (Random Access Iterator)
111
        /// \param corpus_last  One past the end of the data to search
112
        /// \param p            A predicate used for the search comparisons.
113
        ///
114
        template <typename corpusIter>
115
        corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
116
        /*  ---- Do the matching ---- */
117
            corpusIter curPos = corpus_first;
118
            const corpusIter lastPos = corpus_last - k_pattern_length;
119
            difference_type j, k, m;
120

    
121
            while ( curPos <= lastPos ) {
122
        /*  while ( std::distance ( curPos, corpus_last ) >= k_pattern_length ) { */
123
            //  Do we match right where we are?
124
                j = k_pattern_length;
125
                while ( pat_first [j-1] == curPos [j-1] ) {
126
                    j--;
127
                //  We matched - we're done!
128
                    if ( j == 0 )
129
                        return curPos;
130
                    }
131
                
132
            //  Since we didn't match, figure out how far to skip forward
133
                k = skip_ [ curPos [ j - 1 ]];
134
                m = j - k - 1;
135
                if ( k < j && m > suffix_ [ j ] )
136
                    curPos += m;
137
                else
138
                    curPos += suffix_ [ j ];
139
                }
140
        
141
            return corpus_last;     // We didn't find anything
142
            }
143

    
144

    
145
        void build_skip_table ( patIter first, patIter last ) {
146
            for ( std::size_t i = 0; first != last; ++first, ++i )
147
                skip_.insert ( *first, i );
148
            }
149
        
150

    
151
        template<typename Iter, typename Container>
152
        void compute_bm_prefix ( Iter pat_first, Iter pat_last, Container &prefix ) {
153
            const std::size_t count = std::distance ( pat_first, pat_last );
154
            BOOST_ASSERT ( count > 0 );
155
            BOOST_ASSERT ( prefix.size () == count );
156
                            
157
            prefix[0] = 0;
158
            std::size_t k = 0;
159
            for ( std::size_t i = 1; i < count; ++i ) {
160
                BOOST_ASSERT ( k < count );
161
                while ( k > 0 && ( pat_first[k] != pat_first[i] )) {
162
                    BOOST_ASSERT ( k < count );
163
                    k = prefix [ k - 1 ];
164
                    }
165
                    
166
                if ( pat_first[k] == pat_first[i] )
167
                    k++;
168
                prefix [ i ] = k;
169
                }
170
            }
171

    
172
        void build_suffix_table ( patIter pat_first, patIter pat_last ) {
173
            const std::size_t count = (std::size_t) std::distance ( pat_first, pat_last );
174
            
175
            if ( count > 0 ) {  // empty pattern
176
                std::vector<typename std::iterator_traits<patIter>::value_type> reversed(count);
177
                (void) std::reverse_copy ( pat_first, pat_last, reversed.begin ());
178
                
179
                std::vector<difference_type> prefix (count);
180
                compute_bm_prefix ( pat_first, pat_last, prefix );
181
        
182
                std::vector<difference_type> prefix_reversed (count);
183
                compute_bm_prefix ( reversed.begin (), reversed.end (), prefix_reversed );
184
                
185
                for ( std::size_t i = 0; i <= count; i++ )
186
                    suffix_[i] = count - prefix [count-1];
187
         
188
                for ( std::size_t i = 0; i < count; i++ ) {
189
                    const std::size_t     j = count - prefix_reversed[i];
190
                    const difference_type k = i -     prefix_reversed[i] + 1;
191
         
192
                    if (suffix_[j] > k)
193
                        suffix_[j] = k;
194
                    }
195
                }
196
            }
197
/// \endcond
198
        };
199

    
200

    
201
/*  Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
202
    Use a bit of TMP to disambiguate the 3-argument templates */
203

    
204
/// \fn boyer_moore_search ( corpusIter corpus_first, corpusIter corpus_last, 
205
///       patIter pat_first, patIter pat_last )
206
/// \brief Searches the corpus for the pattern.
207
/// 
208
/// \param corpus_first The start of the data to search (Random Access Iterator)
209
/// \param corpus_last  One past the end of the data to search
210
/// \param pat_first    The start of the pattern to search for (Random Access Iterator)
211
/// \param pat_last     One past the end of the data to search for
212
///
213
    template <typename patIter, typename corpusIter>
214
    corpusIter boyer_moore_search ( 
215
                  corpusIter corpus_first, corpusIter corpus_last, 
216
                  patIter pat_first, patIter pat_last )
217
    {
218
        boyer_moore<patIter> bm ( pat_first, pat_last );
219
        return bm ( corpus_first, corpus_last );
220
    }
221

    
222
    template <typename PatternRange, typename corpusIter>
223
    corpusIter boyer_moore_search ( 
224
        corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
225
    {
226
        typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
227
        boyer_moore<pattern_iterator> bm ( boost::begin(pattern), boost::end (pattern));
228
        return bm ( corpus_first, corpus_last );
229
    }
230
    
231
    template <typename patIter, typename CorpusRange>
232
    typename boost::lazy_disable_if_c<
233
        boost::is_same<CorpusRange, patIter>::value, typename boost::range_iterator<CorpusRange> >
234
    ::type
235
    boyer_moore_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
236
    {
237
        boyer_moore<patIter> bm ( pat_first, pat_last );
238
        return bm (boost::begin (corpus), boost::end (corpus));
239
    }
240
    
241
    template <typename PatternRange, typename CorpusRange>
242
    typename boost::range_iterator<CorpusRange>::type
243
    boyer_moore_search ( CorpusRange &corpus, const PatternRange &pattern )
244
    {
245
        typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
246
        boyer_moore<pattern_iterator> bm ( boost::begin(pattern), boost::end (pattern));
247
        return bm (boost::begin (corpus), boost::end (corpus));
248
    }
249

    
250

    
251
    //  Creator functions -- take a pattern range, return an object
252
    template <typename Range>
253
    boost::algorithm::boyer_moore<typename boost::range_iterator<const Range>::type>
254
    make_boyer_moore ( const Range &r ) {
255
        return boost::algorithm::boyer_moore
256
            <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
257
        }
258
    
259
    template <typename Range>
260
    boost::algorithm::boyer_moore<typename boost::range_iterator<Range>::type>
261
    make_boyer_moore ( Range &r ) {
262
        return boost::algorithm::boyer_moore
263
            <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
264
        }
265

    
266
}}
267

    
268
#endif  //  BOOST_ALGORITHM_BOYER_MOORE_SEARCH_HPP