0.6.2
C++ to UML diagram generator based on Clang
Loading...
Searching...
No Matches
levenshtein.cc
Go to the documentation of this file.
1/**
2 * @file src/util/levenshtein.cc
3 *
4 * Copyright (c) 2021-2025 Bartek Kryza <bkryza@gmail.com>
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#include "levenshtein.h"
20
21namespace clanguml::util {
22
23size_t levenshtein_distance(const std::string &left, const std::string &right)
24{
25 const size_t m = left.size();
26 const size_t n = right.size();
27
28 std::vector<std::vector<size_t>> dp(m + 1, std::vector<size_t>(n + 1, 0));
29
30 for (size_t i = 0; i <= m; ++i)
31 dp[i][0] = i;
32 for (size_t j = 0; j <= n; ++j)
33 dp[0][j] = j;
34
35 for (size_t i = 1; i <= m; ++i) {
36 for (size_t j = 1; j <= n; ++j) {
37 int substitution_cost = (left[i - 1] == right[j - 1]) ? 0 : 1;
38 dp[i][j] = std::min({dp[i - 1][j] + 1, dp[i][j - 1] + 1,
39 dp[i - 1][j - 1] + substitution_cost});
40 }
41 }
42
43 return dp[m][n];
44}
45
46std::vector<std::string> get_approximate_matches(
47 const std::vector<std::string> &collection, const std::string &pattern,
48 const int max_results)
49{
50 std::vector<std::pair<size_t, std::string>> distance_pairs;
51
52 for (const auto &item : collection) {
53 auto distance = levenshtein_distance(item, pattern);
54 distance_pairs.emplace_back(distance, item);
55 }
56
57 std::sort(distance_pairs.begin(), distance_pairs.end(),
58 [](const auto &a, const auto &b) { return a.first < b.first; });
59
60 std::vector<std::string> result;
61 for (size_t i = 0; i < std::min<size_t>(distance_pairs.size(), max_results);
62 ++i) {
63 result.push_back(distance_pairs[i].second);
64 }
65
66 return result;
67}
68
69} // namespace clanguml::util