Remove assert in get_def_bb_for_const
[official-gcc.git] / gcc / spellcheck.c
blobe4e83a5ae16eaae870b670e808bb3b3ef6dcbd69
1 /* Find near-matches for strings.
2 Copyright (C) 2015-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "tree.h"
25 #include "spellcheck.h"
27 /* The Levenshtein distance is an "edit-distance": the minimal
28 number of one-character insertions, removals or substitutions
29 that are needed to change one string into another.
31 This implementation uses the Wagner-Fischer algorithm. */
33 edit_distance_t
34 levenshtein_distance (const char *s, int len_s,
35 const char *t, int len_t)
37 const bool debug = false;
39 if (debug)
41 printf ("s: \"%s\" (len_s=%i)\n", s, len_s);
42 printf ("t: \"%s\" (len_t=%i)\n", t, len_t);
45 if (len_s == 0)
46 return len_t;
47 if (len_t == 0)
48 return len_s;
50 /* We effectively build a matrix where each (i, j) contains the
51 Levenshtein distance between the prefix strings s[0:j]
52 and t[0:i].
53 Rather than actually build an (len_t + 1) * (len_s + 1) matrix,
54 we simply keep track of the last row, v0 and a new row, v1,
55 which avoids an (len_t + 1) * (len_s + 1) allocation and memory accesses
56 in favor of two (len_s + 1) allocations. These could potentially be
57 statically-allocated if we impose a maximum length on the
58 strings of interest. */
59 edit_distance_t *v0 = new edit_distance_t[len_s + 1];
60 edit_distance_t *v1 = new edit_distance_t[len_s + 1];
62 /* The first row is for the case of an empty target string, which
63 we can reach by deleting every character in the source string. */
64 for (int i = 0; i < len_s + 1; i++)
65 v0[i] = i;
67 /* Build successive rows. */
68 for (int i = 0; i < len_t; i++)
70 if (debug)
72 printf ("i:%i v0 = ", i);
73 for (int j = 0; j < len_s + 1; j++)
74 printf ("%i ", v0[j]);
75 printf ("\n");
78 /* The initial column is for the case of an empty source string; we
79 can reach prefixes of the target string of length i
80 by inserting i characters. */
81 v1[0] = i + 1;
83 /* Build the rest of the row by considering neighbors to
84 the north, west and northwest. */
85 for (int j = 0; j < len_s; j++)
87 edit_distance_t cost = (s[j] == t[i] ? 0 : 1);
88 edit_distance_t deletion = v1[j] + 1;
89 edit_distance_t insertion = v0[j + 1] + 1;
90 edit_distance_t substitution = v0[j] + cost;
91 edit_distance_t cheapest = MIN (deletion, insertion);
92 cheapest = MIN (cheapest, substitution);
93 v1[j + 1] = cheapest;
96 /* Prepare to move on to next row. */
97 for (int j = 0; j < len_s + 1; j++)
98 v0[j] = v1[j];
101 if (debug)
103 printf ("final v1 = ");
104 for (int j = 0; j < len_s + 1; j++)
105 printf ("%i ", v1[j]);
106 printf ("\n");
109 edit_distance_t result = v1[len_s];
110 delete[] v0;
111 delete[] v1;
112 return result;
115 /* Calculate Levenshtein distance between two nil-terminated strings. */
117 edit_distance_t
118 levenshtein_distance (const char *s, const char *t)
120 return levenshtein_distance (s, strlen (s), t, strlen (t));
123 /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to
124 an autovec of non-NULL strings, determine which element within
125 CANDIDATES has the lowest edit distance to TARGET. If there are
126 multiple elements with the same minimal distance, the first in the
127 vector wins.
129 If more than half of the letters were misspelled, the suggestion is
130 likely to be meaningless, so return NULL for this case. */
132 const char *
133 find_closest_string (const char *target,
134 const auto_vec<const char *> *candidates)
136 gcc_assert (target);
137 gcc_assert (candidates);
139 int i;
140 const char *candidate;
141 const char *best_candidate = NULL;
142 edit_distance_t best_distance = MAX_EDIT_DISTANCE;
143 size_t len_target = strlen (target);
144 FOR_EACH_VEC_ELT (*candidates, i, candidate)
146 gcc_assert (candidate);
147 edit_distance_t dist
148 = levenshtein_distance (target, len_target,
149 candidate, strlen (candidate));
150 if (dist < best_distance)
152 best_distance = dist;
153 best_candidate = candidate;
157 /* If more than half of the letters were misspelled, the suggestion is
158 likely to be meaningless. */
159 if (best_candidate)
161 unsigned int cutoff = MAX (len_target, strlen (best_candidate)) / 2;
162 if (best_distance > cutoff)
163 return NULL;
166 return best_candidate;