1 /** @file steminternal.h
2 * @brief Base class for implementations of stemming algorithms
4 /* Copyright (C) 2007,2009,2010,2016 Olly Betts
5 * Copyright (C) 2010 Evgeny Sizikov
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef XAPIAN_INCLUDED_STEMINTERNAL_H
23 #define XAPIAN_INCLUDED_STEMINTERNAL_H
25 #include <xapian/stem.h>
30 typedef unsigned char symbol
;
32 #define HEAD (2*sizeof(int))
34 // Cast via (void*) to avoid warnings about alignment (the pointers *are*
35 // appropriately aligned).
40 const void * void_p
= reinterpret_cast<const void *>(p
);
41 return reinterpret_cast<const int *>(void_p
)[-1];
45 SET_SIZE(symbol
* p
, int n
)
47 void * void_p
= reinterpret_cast<void *>(p
);
48 reinterpret_cast<int *>(void_p
)[-1] = n
;
52 CAPACITY(const symbol
* p
)
54 const void * void_p
= reinterpret_cast<const void *>(p
);
55 return reinterpret_cast<const int *>(void_p
)[-2];
59 SET_CAPACITY(symbol
* p
, int n
)
61 void * void_p
= reinterpret_cast<void *>(p
);
62 reinterpret_cast<int *>(void_p
)[-2] = n
;
65 typedef int (*among_function
)(Xapian::StemImplementation
*);
68 int s_size
; /* length of search string (in symbols) */
69 unsigned s
; /* offset in pool to search string */
70 int substring_i
; /* index to longest matching substring */
71 int result
; /* result of the lookup */
74 extern symbol
* create_s();
76 inline void lose_s(symbol
* p
) {
77 if (p
) std::free(reinterpret_cast<char *>(p
) - HEAD
);
80 extern int skip_utf8(const symbol
* p
, int c
, int lb
, int l
, int n
);
84 class SnowballStemImplementation
: public StemImplementation
{
89 int c
, l
, lb
, bra
, ket
;
91 int get_utf8(int * slot
);
92 int get_b_utf8(int * slot
);
94 int in_grouping_U(const unsigned char * s
, int min
, int max
, int repeat
);
95 int in_grouping_b_U(const unsigned char * s
, int min
, int max
, int repeat
);
96 int out_grouping_U(const unsigned char * s
, int min
, int max
, int repeat
);
97 int out_grouping_b_U(const unsigned char * s
, int min
, int max
, int repeat
);
99 int eq_s(int s_size
, const symbol
* s
);
100 int eq_s_b(int s_size
, const symbol
* s
);
101 int eq_v(const symbol
* v
) { return eq_s(SIZE(v
), v
); }
102 int eq_v_b(const symbol
* v
) { return eq_s_b(SIZE(v
), v
); }
104 int find_among(const symbol
*pool
, const struct among
* v
, int v_size
,
105 const unsigned char * fnum
, const among_function
* f
);
106 int find_among_b(const symbol
*pool
, const struct among
* v
, int v_size
,
107 const unsigned char * fnum
, const among_function
* f
);
109 int replace_s(int c_bra
, int c_ket
, int s_size
, const symbol
* s
);
110 int slice_from_s(int s_size
, const symbol
* s
);
111 int slice_from_v(const symbol
* v
) { return slice_from_s(SIZE(v
), v
); }
113 int slice_del() { return slice_from_s(0, 0); }
115 void insert_s(int c_bra
, int c_ket
, int s_size
, const symbol
* s
);
116 void insert_v(int c_bra
, int c_ket
, const symbol
* v
) {
117 insert_s(c_bra
, c_ket
, SIZE(v
), v
);
120 symbol
* slice_to(symbol
* v
);
121 symbol
* assign_to(symbol
* v
);
123 int len_utf8(const symbol
* v
);
126 void debug(int number
, int line_count
);
130 /// Perform initialisation common to all Snowball stemmers.
131 SnowballStemImplementation()
132 : p(create_s()), c(0), l(0), lb(0), bra(0), ket(0) { }
134 /// Perform cleanup common to all Snowball stemmers.
135 virtual ~SnowballStemImplementation();
137 /// Stem the specified word.
138 virtual std::string
operator()(const std::string
& word
);
140 /// Virtual method implemented by the subclass to actually do the work.
141 virtual int stem() = 0;
146 #endif // XAPIAN_INCLUDED_STEMINTERNAL_H