Recommend lcov 1.11 as it uses much less memory
[xapian.git] / xapian-core / languages / steminternal.h
blob37a3dfa20440cd8938b357e17c2e6294e68311dc
1 /** @file steminternal.h
2 * @brief Base class for implementations of stemming algorithms
3 */
4 /* Copyright (C) 2007,2009,2010,2016 Olly Betts
5 * Copyright (C) 2010 Evgeny Sizikov
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
23 #define XAPIAN_INCLUDED_STEMINTERNAL_H
25 #include <xapian/stem.h>
27 #include <cstdlib>
28 #include <string>
30 typedef unsigned char symbol;
32 #define HEAD (2*sizeof(int))
34 // Cast via (void*) to avoid warnings about alignment (the pointers *are*
35 // appropriately aligned).
37 inline int
38 SIZE(const symbol* p)
40 const void * void_p = reinterpret_cast<const void *>(p);
41 return reinterpret_cast<const int *>(void_p)[-1];
44 inline void
45 SET_SIZE(symbol* p, int n)
47 void * void_p = reinterpret_cast<void *>(p);
48 reinterpret_cast<int *>(void_p)[-1] = n;
51 inline int
52 CAPACITY(const symbol* p)
54 const void * void_p = reinterpret_cast<const void *>(p);
55 return reinterpret_cast<const int *>(void_p)[-2];
58 inline void
59 SET_CAPACITY(symbol* p, int n)
61 void * void_p = reinterpret_cast<void *>(p);
62 reinterpret_cast<int *>(void_p)[-2] = n;
65 typedef int (*among_function)(Xapian::StemImplementation *);
67 struct among {
68 int s_size; /* length of search string (in symbols) */
69 unsigned s; /* offset in pool to search string */
70 int substring_i; /* index to longest matching substring */
71 int result; /* result of the lookup */
74 extern symbol * create_s();
76 inline void lose_s(symbol * p) {
77 if (p) std::free(reinterpret_cast<char *>(p) - HEAD);
80 extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
82 namespace Xapian {
84 class SnowballStemImplementation : public StemImplementation {
85 int slice_check();
87 protected:
88 symbol * p;
89 int c, l, lb, bra, ket;
91 int get_utf8(int * slot);
92 int get_b_utf8(int * slot);
94 int in_grouping_U(const unsigned char * s, int min, int max, int repeat);
95 int in_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
96 int out_grouping_U(const unsigned char * s, int min, int max, int repeat);
97 int out_grouping_b_U(const unsigned char * s, int min, int max, int repeat);
99 int eq_s(int s_size, const symbol * s);
100 int eq_s_b(int s_size, const symbol * s);
101 int eq_v(const symbol * v) { return eq_s(SIZE(v), v); }
102 int eq_v_b(const symbol * v) { return eq_s_b(SIZE(v), v); }
104 int find_among(const symbol *pool, const struct among * v, int v_size,
105 const unsigned char * fnum, const among_function * f);
106 int find_among_b(const symbol *pool, const struct among * v, int v_size,
107 const unsigned char * fnum, const among_function * f);
109 int replace_s(int c_bra, int c_ket, int s_size, const symbol * s);
110 int slice_from_s(int s_size, const symbol * s);
111 int slice_from_v(const symbol * v) { return slice_from_s(SIZE(v), v); }
113 int slice_del() { return slice_from_s(0, 0); }
115 void insert_s(int c_bra, int c_ket, int s_size, const symbol * s);
116 void insert_v(int c_bra, int c_ket, const symbol * v) {
117 insert_s(c_bra, c_ket, SIZE(v), v);
120 symbol * slice_to(symbol * v);
121 symbol * assign_to(symbol * v);
123 int len_utf8(const symbol * v);
125 #if 0
126 void debug(int number, int line_count);
127 #endif
129 public:
130 /// Perform initialisation common to all Snowball stemmers.
131 SnowballStemImplementation()
132 : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
134 /// Perform cleanup common to all Snowball stemmers.
135 virtual ~SnowballStemImplementation();
137 /// Stem the specified word.
138 virtual std::string operator()(const std::string & word);
140 /// Virtual method implemented by the subclass to actually do the work.
141 virtual int stem() = 0;
146 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H