Statistics
| Revision:

root / tmp / org.txm.statsengine.r.core.win32 / res / win32 / library / BH / include / boost / algorithm / searching / knuth_morris_pratt.hpp @ 2486

History | View | Annotate | Download (10.1 kB)

1
/* 
2
   Copyright (c) Marshall Clow 2010-2012.
3

4
   Distributed under the Boost Software License, Version 1.0. (See accompanying
5
   file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6

7
    For more information, see http://www.boost.org
8
*/
9

    
10
#ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
11
#define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
12

    
13
#include <vector>
14
#include <iterator>     // for std::iterator_traits
15

    
16
#include <boost/assert.hpp>
17
#include <boost/static_assert.hpp>
18

    
19
#include <boost/range/begin.hpp>
20
#include <boost/range/end.hpp>
21

    
22
#include <boost/utility/enable_if.hpp>
23
#include <boost/type_traits/is_same.hpp>
24

    
25
#include <boost/algorithm/searching/detail/debugging.hpp>
26

    
27
// #define  BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
28

    
29
namespace boost { namespace algorithm {
30

    
31
// #define  NEW_KMP
32

    
33
/*
34
    A templated version of the Knuth-Morris-Pratt searching algorithm.
35
    
36
    Requirements:
37
        * Random-access iterators
38
        * The two iterator types (I1 and I2) must "point to" the same underlying type.
39

40
    http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
41
    http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
42
*/
43

    
44
    template <typename patIter>
45
    class knuth_morris_pratt {
46
        typedef typename std::iterator_traits<patIter>::difference_type difference_type;
47
    public:
48
        knuth_morris_pratt ( patIter first, patIter last ) 
49
                : pat_first ( first ), pat_last ( last ), 
50
                  k_pattern_length ( std::distance ( pat_first, pat_last )),
51
                  skip_ ( k_pattern_length + 1 ) {
52
#ifdef NEW_KMP
53
            preKmp ( pat_first, pat_last );
54
#else
55
            init_skip_table ( pat_first, pat_last );
56
#endif
57
#ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
58
            detail::PrintTable ( skip_.begin (), skip_.end ());
59
#endif
60
            }
61
            
62
        ~knuth_morris_pratt () {}
63
        
64
        /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
65
        /// \brief Searches the corpus for the pattern that was passed into the constructor
66
        /// 
67
        /// \param corpus_first The start of the data to search (Random Access Iterator)
68
        /// \param corpus_last  One past the end of the data to search
69
        /// \param p            A predicate used for the search comparisons.
70
        ///
71
        template <typename corpusIter>
72
        corpusIter operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
73
            BOOST_STATIC_ASSERT (( boost::is_same<
74
                typename std::iterator_traits<patIter>::value_type, 
75
                typename std::iterator_traits<corpusIter>::value_type>::value ));
76
            if ( corpus_first == corpus_last ) return corpus_last;  // if nothing to search, we didn't find it!
77
            if ( pat_first == pat_last )       return corpus_first; // empty pattern matches at start
78

    
79
            const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
80
        //  If the pattern is larger than the corpus, we can't find it!
81
            if ( k_corpus_length < k_pattern_length ) 
82
                return corpus_last;
83

    
84
            return do_search   ( corpus_first, corpus_last, k_corpus_length );
85
            }
86
    
87
        template <typename Range>
88
        typename boost::range_iterator<Range>::type operator () ( Range &r ) const {
89
            return (*this) (boost::begin(r), boost::end(r));
90
            }
91

    
92
    private:
93
/// \cond DOXYGEN_HIDE
94
        patIter pat_first, pat_last;
95
        const difference_type k_pattern_length;
96
        std::vector <difference_type> skip_;
97

    
98
        /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
99
        /// \brief Searches the corpus for the pattern that was passed into the constructor
100
        /// 
101
        /// \param corpus_first The start of the data to search (Random Access Iterator)
102
        /// \param corpus_last  One past the end of the data to search
103
        /// \param p            A predicate used for the search comparisons.
104
        ///
105
        template <typename corpusIter>
106
        corpusIter do_search ( corpusIter corpus_first, corpusIter corpus_last, 
107
                                                difference_type k_corpus_length ) const {
108
            difference_type match_start = 0;  // position in the corpus that we're matching
109
            
110
#ifdef NEW_KMP
111
            int patternIdx = 0;
112
            while ( match_start < k_corpus_length ) {
113
                while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
114
                    patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch
115

    
116
                patternIdx++;
117
                match_start++; //<--- corpus is always increased by 1
118

    
119
                if ( patternIdx >= (int) k_pattern_length )
120
                    return corpus_first + match_start - patternIdx;
121
                }
122
            
123
#else
124
//  At this point, we know:
125
//          k_pattern_length <= k_corpus_length
126
//          for all elements of skip, it holds -1 .. k_pattern_length
127
//      
128
//          In the loop, we have the following invariants
129
//              idx is in the range 0 .. k_pattern_length
130
//              match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1
131

    
132
            const difference_type last_match = k_corpus_length - k_pattern_length;
133
            difference_type idx = 0;          // position in the pattern we're comparing
134

    
135
            while ( match_start <= last_match ) {
136
                while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
137
                    if ( ++idx == k_pattern_length )
138
                        return corpus_first + match_start;
139
                    }
140
            //  Figure out where to start searching again
141
           //   assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward
142
                match_start += idx - skip_ [ idx ];
143
                idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
144
           //   assert ( idx >= 0 && idx < k_pattern_length );
145
                }
146
#endif
147
                
148
        //  We didn't find anything
149
            return corpus_last;
150
            }
151
    
152

    
153
        void preKmp ( patIter first, patIter last ) {
154
           const /*std::size_t*/ int count = std::distance ( first, last );
155
        
156
           int i, j;
157
        
158
           i = 0;
159
           j = skip_[0] = -1;
160
           while (i < count) {
161
              while (j > -1 && first[i] != first[j])
162
                 j = skip_[j];
163
              i++;
164
              j++;
165
              if (first[i] == first[j])
166
                 skip_[i] = skip_[j];
167
              else
168
                 skip_[i] = j;
169
           }
170
        }
171

    
172

    
173
        void init_skip_table ( patIter first, patIter last ) {
174
            const difference_type count = std::distance ( first, last );
175
    
176
            int j;
177
            skip_ [ 0 ] = -1;
178
            for ( int i = 1; i <= count; ++i ) {
179
                j = skip_ [ i - 1 ];
180
                while ( j >= 0 ) {
181
                    if ( first [ j ] == first [ i - 1 ] )
182
                        break;
183
                    j = skip_ [ j ];
184
                    }
185
                skip_ [ i ] = j + 1;
186
                }
187
            }
188
// \endcond
189
        };
190

    
191

    
192
/*  Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
193
    Use a bit of TMP to disambiguate the 3-argument templates */
194

    
195
/// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last, 
196
///       patIter pat_first, patIter pat_last )
197
/// \brief Searches the corpus for the pattern.
198
/// 
199
/// \param corpus_first The start of the data to search (Random Access Iterator)
200
/// \param corpus_last  One past the end of the data to search
201
/// \param pat_first    The start of the pattern to search for (Random Access Iterator)
202
/// \param pat_last     One past the end of the data to search for
203
///
204
    template <typename patIter, typename corpusIter>
205
    corpusIter knuth_morris_pratt_search ( 
206
                  corpusIter corpus_first, corpusIter corpus_last, 
207
                  patIter pat_first, patIter pat_last )
208
    {
209
        knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
210
        return kmp ( corpus_first, corpus_last );
211
    }
212

    
213
    template <typename PatternRange, typename corpusIter>
214
    corpusIter knuth_morris_pratt_search ( 
215
        corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
216
    {
217
        typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
218
        knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
219
        return kmp ( corpus_first, corpus_last );
220
    }
221
    
222
    template <typename patIter, typename CorpusRange>
223
    typename boost::lazy_disable_if_c<
224
        boost::is_same<CorpusRange, patIter>::value, typename boost::range_iterator<CorpusRange> >
225
    ::type
226
    knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
227
    {
228
        knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
229
        return kmp (boost::begin (corpus), boost::end (corpus));
230
    }
231
    
232
    template <typename PatternRange, typename CorpusRange>
233
    typename boost::range_iterator<CorpusRange>::type
234
    knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
235
    {
236
        typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
237
        knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
238
        return kmp (boost::begin (corpus), boost::end (corpus));
239
    }
240

    
241

    
242
    //  Creator functions -- take a pattern range, return an object
243
    template <typename Range>
244
    boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
245
    make_knuth_morris_pratt ( const Range &r ) {
246
        return boost::algorithm::knuth_morris_pratt
247
            <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
248
        }
249
    
250
    template <typename Range>
251
    boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
252
    make_knuth_morris_pratt ( Range &r ) {
253
        return boost::algorithm::knuth_morris_pratt
254
            <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
255
        }
256
}}
257

    
258
#endif  // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP