Ninja
edit_distance.cc
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "edit_distance.h"
16
17#include <algorithm>
18#include <vector>
19
21 const StringPiece& s2,
22 bool allow_replacements,
23 int max_edit_distance) {
24 // The algorithm implemented below is the "classic"
25 // dynamic-programming algorithm for computing the Levenshtein
26 // distance, which is described here:
27 //
28 // http://en.wikipedia.org/wiki/Levenshtein_distance
29 //
30 // Although the algorithm is typically described using an m x n
31 // array, only one row plus one element are used at a time, so this
32 // implementation just keeps one vector for the row. To update one entry,
33 // only the entries to the left, top, and top-left are needed. The left
34 // entry is in row[x-1], the top entry is what's in row[x] from the last
35 // iteration, and the top-left entry is stored in previous.
36 int m = static_cast<int>(s1.len_);
37 int n = static_cast<int>(s2.len_);
38
39 std::vector<int> row(n + 1);
40 for (int i = 1; i <= n; ++i)
41 row[i] = i;
42
43 for (int y = 1; y <= m; ++y) {
44 row[0] = y;
45 int best_this_row = row[0];
46
47 int previous = y - 1;
48 for (int x = 1; x <= n; ++x) {
49 int old_row = row[x];
50 if (allow_replacements) {
51 row[x] = std::min(previous + (s1.str_[y - 1] == s2.str_[x - 1] ? 0 : 1),
52 std::min(row[x - 1], row[x]) + 1);
53 }
54 else {
55 if (s1.str_[y - 1] == s2.str_[x - 1])
56 row[x] = previous;
57 else
58 row[x] = std::min(row[x - 1], row[x]) + 1;
59 }
60 previous = old_row;
61 best_this_row = std::min(best_this_row, row[x]);
62 }
63
64 if (max_edit_distance && best_this_row > max_edit_distance)
65 return max_edit_distance + 1;
66 }
67
68 return row[n];
69}
int EditDistance(const StringPiece &s1, const StringPiece &s2, bool allow_replacements, int max_edit_distance)
StringPiece represents a slice of a string whose memory is managed externally.
const char * str_