spellcheck.h: add best_match template; implement early-reject
[official-gcc.git] / gcc / spellcheck.h
blob7379399f1fd24937c1d3017ac2beabf4a284f1d1
1 /* Find near-matches for strings and identifiers.
2 Copyright (C) 2015-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #ifndef GCC_SPELLCHECK_H
21 #define GCC_SPELLCHECK_H
23 typedef unsigned int edit_distance_t;
24 const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
26 /* spellcheck.c */
27 extern edit_distance_t
28 levenshtein_distance (const char *s, int len_s,
29 const char *t, int len_t);
31 extern edit_distance_t
32 levenshtein_distance (const char *s, const char *t);
34 extern const char *
35 find_closest_string (const char *target,
36 const auto_vec<const char *> *candidates);
38 /* A traits class for describing a string-like type usable by
39 class best_match.
40 Specializations should provide the implementations of the following:
42 static size_t get_length (TYPE);
43 static const char *get_string (TYPE);
45 get_string should return a non-NULL ptr, which does not need to be
46 0-terminated. */
48 template <typename TYPE>
49 struct edit_distance_traits {};
51 /* A type for use when determining the best match against a string,
52 expressed as a template so that we can match against various
53 string-like types (const char *, frontend identifiers, and preprocessor
54 macros).
56 This type accumulates the best possible match against GOAL_TYPE for
57 a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
58 number of calls to levenshtein_distance and to
59 edit_distance_traits<T>::get_length. */
61 template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
62 class best_match
64 public:
65 typedef GOAL_TYPE goal_t;
66 typedef CANDIDATE_TYPE candidate_t;
67 typedef edit_distance_traits<goal_t> goal_traits;
68 typedef edit_distance_traits<candidate_t> candidate_traits;
70 /* Constructor. */
72 best_match (goal_t goal)
73 : m_goal (goal_traits::get_string (goal)),
74 m_goal_len (goal_traits::get_length (goal)),
75 m_best_candidate (NULL),
76 m_best_distance (MAX_EDIT_DISTANCE)
79 /* Compare the edit distance between CANDIDATE and m_goal,
80 and if it's the best so far, record it. */
82 void consider (candidate_t candidate)
84 size_t candidate_len = candidate_traits::get_length (candidate);
86 /* Calculate a lower bound on the candidate's distance to the goal,
87 based on the difference in lengths; it will require at least
88 this many insertions/deletions. */
89 edit_distance_t min_candidate_distance
90 = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
92 /* If the candidate's length is sufficiently different to that
93 of the goal string, then the number of insertions/deletions
94 may be >= the best distance so far. If so, we can reject
95 the candidate immediately without needing to compute
96 the exact distance, since it won't be an improvement. */
97 if (min_candidate_distance >= m_best_distance)
98 return;
100 /* If the candidate will be unable to beat the criterion in
101 get_best_meaningful_candidate, reject it without computing
102 the exact distance. */
103 unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
104 if (min_candidate_distance > cutoff)
105 return;
107 /* Otherwise, compute the distance and see if the candidate
108 has beaten the previous best value. */
109 edit_distance_t dist
110 = levenshtein_distance (m_goal, m_goal_len,
111 candidate_traits::get_string (candidate),
112 candidate_len);
113 if (dist < m_best_distance)
115 m_best_distance = dist;
116 m_best_candidate = candidate;
117 m_best_candidate_len = candidate_len;
121 /* Get the best candidate so far, but applying a filter to ensure
122 that we return NULL if none of the candidates are close to the goal,
123 to avoid offering nonsensical suggestions to the user. */
125 candidate_t get_best_meaningful_candidate () const
127 /* If more than half of the letters were misspelled, the suggestion is
128 likely to be meaningless. */
129 if (m_best_candidate)
131 unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
132 if (m_best_distance > cutoff)
133 return NULL;
135 return m_best_candidate;
138 private:
139 const char *m_goal;
140 size_t m_goal_len;
141 candidate_t m_best_candidate;
142 edit_distance_t m_best_distance;
143 size_t m_best_candidate_len;
146 #endif /* GCC_SPELLCHECK_H */