2 CLAW - a C++ Library Absolutely Wonderful
4 CLAW is a free library without any particular aim but being useful to
7 Copyright (C) 2005-2011 Julien Jorge
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 contact: julien.jorge@gamned.org
27 * \brief Implementation of the kmp class.
28 * \author Julien Jorge
32#include <claw/it_index.hpp>
34/*---------------------------------------------------------------------------*/
36 * \brief Calculate the length of the longest common prefix between two
38 * \param begin_1 Iterator on the first item in the first pattern.
39 * \param begin_2 Iterator on the first item in the second pattern.
40 * \param end_1 Iterator after the last item in the first pattern.
41 * \param end_2 Iterator after the last item in the second pattern.
43template<class RandomIterator>
45claw::text::kmp<RandomIterator>::
46common_prefix_length( const RandomIterator begin_1,
47 const RandomIterator begin_2,
48 const RandomIterator end_1, const RandomIterator end_2
51 unsigned int count = 0;
52 RandomIterator it_1 = begin_1, it_2 = begin_2;
55 while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
66} // common_prefix_length()
68/*---------------------------------------------------------------------------*/
70 * \brief Preprocessing. Calculate the pattern's z-boxes.
71 * \param begin Iterator on the first item in the pattern.
72 * \param end Iterator after the last item in the pattern.
73 * \param out (out) Calculated z-boxes.
74 * \remark out contains only non-zero z-boxes.
76template<class RandomIterator>
77void claw::text::kmp<RandomIterator>::z_boxes(const RandomIterator begin,
78 const RandomIterator end,
79 std::map<unsigned int,unsigned int>& out) const
81 // right and left bounds of the current item's Z-box.
82 claw::it_index<RandomIterator> it_r(begin);
83 claw::it_index<RandomIterator> it_l(begin);
84 claw::it_index<RandomIterator> it_k(begin); // increment on the items
85 unsigned int z; // le Zi of the current position
87 for (++it_k; it_k!=end; ++it_k)
91 z = common_prefix_length(begin, it_k, end, end);
93 if ( z > 0 ) // set the Z-box
97 it_r = it_k.operator+(z) - 1;
102 unsigned int k_bis = it_k - it_l;
103 claw::it_index<RandomIterator> it_b(it_r - it_k);
105 if ( out.find(k_bis) == out.end() )
110 if ( z <= (unsigned int)it_b )
117 claw::it_index<RandomIterator> it_q = it_r + 1;
119 it_q += common_prefix_length(it_q, it_b+1, end, end);
121 out[it_k] = it_q - it_k;
129/*---------------------------------------------------------------------------*/
131 * \brief Preprocessing. Calculate the pattern's spi' values.
132 * \param begin Iterator on the first item in the pattern.
133 * \param end Iterator after the last item in the pattern.
134 * \param out (out) Calculated spi'.
135 * \remark out contains only non-zero spi'.
137template<class RandomIterator>
138void claw::text::kmp<RandomIterator>::spi_prime(const RandomIterator begin,
139 const RandomIterator end,
140 std::map<unsigned int, unsigned int>& out) const
142 std::map<unsigned int, unsigned int> z; // pattern's Z-boxes.
143 unsigned int j; // loop index.
146 z_boxes(begin, end, z);
148 // calculates spi' (from end to begining)
154 if (z.find(j) != z.end())
155 out[j + z[j] - 1] = z[j];
160/*---------------------------------------------------------------------------*/
162 * \brief Pattern matching with the Knuth-Morris-Pratt's algorithm.
163 * \param pattern_begin Iterator on the first item in the pattern.
164 * \param pattern_end Iterator after the last item in the pattern.
165 * \param text_begin Iterator on the first item in the text.
166 * \param text_end Iterator after the last item in the text.
167 * \param action Predicate called with the last found position for the pattern.
168 * \remark Exits if action return false.
169 * \pre pattern_begin != pattern_end
171template<class RandomIterator>
172template<class UnaryPredicate>
173void claw::text::kmp<RandomIterator>::operator()
174 (const RandomIterator pattern_begin, const RandomIterator pattern_end,
175 const RandomIterator text_begin, const RandomIterator text_end,
176 UnaryPredicate& action ) const
178 std::map<unsigned int, unsigned int> spi; // pattern's spi'
179 claw::it_index<RandomIterator> it_p(pattern_begin,1);
180 claw::it_index<RandomIterator> it_t(text_begin,1);
181 bool stop = false; // result of the last call to the predicate action
183 const int pattern_length = pattern_end - pattern_begin;
184 const int text_length = text_end - text_begin;
186 assert(pattern_begin != pattern_end);
188 spi_prime(pattern_begin, pattern_end, spi);
192 while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
196 common = common_prefix_length(it_p, it_t, pattern_end, text_end);
205 if ( (int)it_p == pattern_length+1 )
206 stop = !action( (int)it_t - pattern_length-1 );
209 it_p.set(pattern_begin+i, i+1);