6 // First some base level utility routines
13 #include "nscore.h" // for mozalloc headers
23 // default encoding and keystring
24 #define SPELL_ENCODING "ISO8859-1"
25 #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
27 // default morphological fields
28 #define MORPH_STEM "st:"
29 #define MORPH_ALLOMORPH "al:"
30 #define MORPH_POS "po:"
31 #define MORPH_DERI_PFX "dp:"
32 #define MORPH_INFL_PFX "ip:"
33 #define MORPH_TERM_PFX "tp:"
34 #define MORPH_DERI_SFX "ds:"
35 #define MORPH_INFL_SFX "is:"
36 #define MORPH_TERM_SFX "ts:"
37 #define MORPH_SURF_PFX "sp:"
38 #define MORPH_FREQ "fr:"
39 #define MORPH_PHON "ph:"
40 #define MORPH_HYPH "hy:"
41 #define MORPH_PART "pa:"
42 #define MORPH_FLAG "fl:"
43 #define MORPH_HENTRY "_H:"
44 #define MORPH_TAG_LEN strlen(MORPH_STEM)
51 #define DEFAULTFLAGS 65510
52 #define FORBIDDENWORD 65510
53 #define ONLYUPCASEFLAG 65511
55 // convert UTF-16 characters to UTF-8
56 LIBHUNSPELL_DLL_EXPORTED
char * u16_u8(char * dest
, int size
, const w_char
* src
, int srclen
);
58 // convert UTF-8 characters to UTF-16
59 LIBHUNSPELL_DLL_EXPORTED
int u8_u16(w_char
* dest
, int size
, const char * src
);
62 LIBHUNSPELL_DLL_EXPORTED
void flag_qsort(unsigned short flags
[], int begin
, int end
);
64 // binary search in 2-byte vector
65 LIBHUNSPELL_DLL_EXPORTED
int flag_bsearch(unsigned short flags
[], unsigned short flag
, int right
);
67 // remove end of line char(s)
68 LIBHUNSPELL_DLL_EXPORTED
void mychomp(char * s
);
71 LIBHUNSPELL_DLL_EXPORTED
char * mystrdup(const char * s
);
73 // strcat for limited length destination string
74 LIBHUNSPELL_DLL_EXPORTED
char * mystrcat(char * dest
, const char * st
, int max
);
76 // duplicate reverse of string
77 LIBHUNSPELL_DLL_EXPORTED
char * myrevstrdup(const char * s
);
79 // parse into tokens with char delimiter
80 LIBHUNSPELL_DLL_EXPORTED
char * mystrsep(char ** sptr
, const char delim
);
81 // parse into tokens with char delimiter
82 LIBHUNSPELL_DLL_EXPORTED
char * mystrsep2(char ** sptr
, const char delim
);
84 // parse into tokens with char delimiter
85 LIBHUNSPELL_DLL_EXPORTED
char * mystrrep(char *, const char *, const char *);
87 // append s to ends of every lines in text
88 LIBHUNSPELL_DLL_EXPORTED
void strlinecat(char * lines
, const char * s
);
90 // tokenize into lines with new line
91 LIBHUNSPELL_DLL_EXPORTED
int line_tok(const char * text
, char *** lines
, char breakchar
);
93 // tokenize into lines with new line and uniq in place
94 LIBHUNSPELL_DLL_EXPORTED
char * line_uniq(char * text
, char breakchar
);
95 LIBHUNSPELL_DLL_EXPORTED
char * line_uniq_app(char ** text
, char breakchar
);
97 // change oldchar to newchar in place
98 LIBHUNSPELL_DLL_EXPORTED
char * tr(char * text
, char oldc
, char newc
);
101 LIBHUNSPELL_DLL_EXPORTED
int reverseword(char *);
104 LIBHUNSPELL_DLL_EXPORTED
int reverseword_utf(char *);
107 LIBHUNSPELL_DLL_EXPORTED
int uniqlist(char ** list
, int n
);
109 // free character array list
110 LIBHUNSPELL_DLL_EXPORTED
void freelist(char *** list
, int n
);
112 // character encoding information
115 unsigned char clower
;
116 unsigned char cupper
;
119 LIBHUNSPELL_DLL_EXPORTED
int initialize_utf_tbl();
120 LIBHUNSPELL_DLL_EXPORTED
void free_utf_tbl();
121 LIBHUNSPELL_DLL_EXPORTED
unsigned short unicodetoupper(unsigned short c
, int langnum
);
122 LIBHUNSPELL_DLL_EXPORTED
unsigned short unicodetolower(unsigned short c
, int langnum
);
123 LIBHUNSPELL_DLL_EXPORTED
int unicodeisalpha(unsigned short c
);
125 LIBHUNSPELL_DLL_EXPORTED
struct cs_info
* get_current_cs(const char * es
);
127 // get language identifiers of language codes
128 LIBHUNSPELL_DLL_EXPORTED
int get_lang_num(const char * lang
);
130 // get characters of the given 8bit encoding with lower- and uppercase forms
131 LIBHUNSPELL_DLL_EXPORTED
char * get_casechars(const char * enc
);
133 // convert null terminated string to all caps using encoding
134 LIBHUNSPELL_DLL_EXPORTED
void enmkallcap(char * d
, const char * p
, const char * encoding
);
136 // convert null terminated string to all little using encoding
137 LIBHUNSPELL_DLL_EXPORTED
void enmkallsmall(char * d
, const char * p
, const char * encoding
);
139 // convert null terminated string to have initial capital using encoding
140 LIBHUNSPELL_DLL_EXPORTED
void enmkinitcap(char * d
, const char * p
, const char * encoding
);
142 // convert null terminated string to all caps
143 LIBHUNSPELL_DLL_EXPORTED
void mkallcap(char * p
, const struct cs_info
* csconv
);
145 // convert null terminated string to all little
146 LIBHUNSPELL_DLL_EXPORTED
void mkallsmall(char * p
, const struct cs_info
* csconv
);
148 // convert null terminated string to have initial capital
149 LIBHUNSPELL_DLL_EXPORTED
void mkinitcap(char * p
, const struct cs_info
* csconv
);
151 // convert first nc characters of UTF-8 string to little
152 LIBHUNSPELL_DLL_EXPORTED
void mkallsmall_utf(w_char
* u
, int nc
, int langnum
);
154 // convert first nc characters of UTF-8 string to capital
155 LIBHUNSPELL_DLL_EXPORTED
void mkallcap_utf(w_char
* u
, int nc
, int langnum
);
157 // get type of capitalization
158 LIBHUNSPELL_DLL_EXPORTED
int get_captype(char * q
, int nl
, cs_info
*);
160 // get type of capitalization (UTF-8)
161 LIBHUNSPELL_DLL_EXPORTED
int get_captype_utf8(w_char
* q
, int nl
, int langnum
);
163 // strip all ignored characters in the string
164 LIBHUNSPELL_DLL_EXPORTED
void remove_ignored_chars_utf(char * word
, unsigned short ignored_chars
[], int ignored_len
);
166 // strip all ignored characters in the string
167 LIBHUNSPELL_DLL_EXPORTED
void remove_ignored_chars(char * word
, char * ignored_chars
);
169 LIBHUNSPELL_DLL_EXPORTED
int parse_string(char * line
, char ** out
, int ln
);
171 LIBHUNSPELL_DLL_EXPORTED
int parse_array(char * line
, char ** out
, unsigned short ** out_utf16
,
172 int * out_utf16_len
, int utf8
, int ln
);
174 LIBHUNSPELL_DLL_EXPORTED
int fieldlen(const char * r
);
175 LIBHUNSPELL_DLL_EXPORTED
char * copy_field(char * dest
, const char * morph
, const char * var
);
177 LIBHUNSPELL_DLL_EXPORTED
int morphcmp(const char * s
, const char * t
);
179 LIBHUNSPELL_DLL_EXPORTED
int get_sfxcount(const char * morph
);
181 // conversion function for protected memory
182 LIBHUNSPELL_DLL_EXPORTED
void store_pointer(char * dest
, char * source
);
184 // conversion function for protected memory
185 LIBHUNSPELL_DLL_EXPORTED
char * get_stored_pointer(const char * s
);
188 LIBHUNSPELL_DLL_EXPORTED
inline char* HENTRY_DATA(struct hentry
*h
)
193 else if (h
->var
& H_OPT_ALIASM
)
194 ret
= get_stored_pointer(HENTRY_WORD(h
) + h
->blen
+ 1);
196 ret
= HENTRY_WORD(h
) + h
->blen
+ 1;
200 // NULL-free version for warning-free OOo build
201 LIBHUNSPELL_DLL_EXPORTED
inline const char* HENTRY_DATA2(const struct hentry
*h
)
206 else if (h
->var
& H_OPT_ALIASM
)
207 ret
= get_stored_pointer(HENTRY_WORD(h
) + h
->blen
+ 1);
209 ret
= HENTRY_WORD(h
) + h
->blen
+ 1;
213 LIBHUNSPELL_DLL_EXPORTED
inline char* HENTRY_FIND(struct hentry
*h
, const char *p
)
215 return (HENTRY_DATA(h
) ? strstr(HENTRY_DATA(h
), p
) : NULL
);
218 #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))