2 * (C) Copyright 2009-2010 John J. Foerch
4 * Use, modification, and distribution are subject to the terms specified in the
16 This module provides a pattern matcher for hints_text_match which lets you
17 type ascii characters in the hints minibuffer to match unicode characters
18 such as accented letters and ligatures.
22 in_module("casual_spelling");
24 function table_entry_features (entry) {
25 var ret = { ligatures: false, multiples: false };
26 if (typeof entry == "object") {
27 for each (var t in entry) {
28 if (typeof t == "string" && t.length > 1)
30 else if (typeof t == "object") {
32 if (t.some(function (x) (x.length > 1)))
40 function make_table (table) {
41 for (var k in table) {
42 var features = table_entry_features(table[k]);
43 if (features.ligatures) table.ligatures = true;
44 if (features.multiples) table.multiples = true;
51 * make_table_from_ranges is a constructor of a casual-spelling
52 * table which generates the table from a shorthand form
53 * called a "range table". A range table is an array where each element
54 * is an array of three elements: range-low, range-high, and range-spec.
55 * Low and high are integer codepoints of unicode characters to be
56 * translated. The spec can be a string, or an array. If it is a string,
57 * it should be a single character. The designated range will have that
58 * character as its low-point translation, and each next character in the
59 * range will be incremented from that point. This is how you can
60 * compactly denote alphabetic ranges, for example. If the spec is an
61 * array, the strings in that array will be repeated over the designated
62 * range. Multi-character translations, such as for ligatures, must be
63 * given in the array form of range-spec.
65 function make_table_from_ranges (table) {
67 table.map(function (a) {
68 var features = table_entry_features(a[2]);
69 if (features.ligatures) ret.ligatures = true;
70 if (features.multiples) ret.multiples = true;
71 for (var c = a[0]; c <= a[1]; c++) {
72 var chr = String.fromCharCode(c);
73 if (typeof a[2] == "string")
74 ret[chr] = String.fromCharCode(a[2].charCodeAt(0) + c - a[0]);
76 ret[chr] = a[2][(c - a[0]) % a[2].length];
83 function translate (chr) {
84 return tables[chr] || chr;
92 function hints_text_match (text, pattern) {
95 var plen = pattern.length;
96 for (var i = 0, tlen = text.length - plen; i <= tlen; i++) {
97 for (var j = 0;; j++) {
98 if (pattern[j] != text[i+j] &&
99 pattern[j] != translate(text[i+j]))
108 function hints_text_match_ligatures (text, pattern) {
111 var tlen = text.length;
112 var plen = pattern.length;
113 var decoded = Array.map(text, translate);
114 for (var i = 0; i < tlen; i++) {
115 for (var e = 0, j = 0; i + e < tlen; e++) {
117 if (pattern[j] != text[i+e] &&
118 pattern.substring(j, j+(elen = decoded[i+e].length)) != decoded[i+e])
128 function hints_text_match_multiples (text, pattern) {
131 var plen = pattern.length;
132 var decoded = Array.map(text, function (x) Array.concat(x, translate(x)));
133 for (var i = 0, tlen = text.length - plen; i < tlen; i++) {
134 for (var j = 0; j < plen; j++) {
135 if (! decoded[i+j].some(function (x) x == pattern[j]))
144 function hints_text_match_ligatures_multiples (text, pattern) {
147 var tlen = text.length;
148 var plen = pattern.length;
149 var decoded = Array.map(text, function (x) Array.concat(x, translate(x)));
151 for (var i = 0; i < tlen; i++) {
152 for (var e = 0, j = 0; i + e < tlen; e++) {
153 if (! decoded[i+e].some(function (x) (pattern.substring(j, j+(mlen = x.length)) == (matched = x))))
171 function add_table (table) {
172 table.__proto__ = tables;
174 if (tables.ligatures && tables.multiples)
175 conkeror.hints_text_match = hints_text_match_ligatures_multiples;
176 else if (tables.ligatures)
177 conkeror.hints_text_match = hints_text_match_ligatures;
178 else if (tables.multiples)
179 conkeror.hints_text_match = hints_text_match_multiples;
181 conkeror.hints_text_match = hints_text_match;
185 add_table(make_table({}));
189 var accents_table = make_table_from_ranges(
190 [[0x00a9, 0x00a9, "C"],//copyright
191 [0x00c0, 0x00c5, ["A"]],
192 [0x00c7, 0x00c7, "C"],
193 [0x00c8, 0x00cb, ["E"]],
194 [0x00cc, 0x00cf, ["I"]],
195 [0x00d1, 0x00d1, "N"],
196 [0x00d2, 0x00d6, ["O"]],
197 [0x00d8, 0x00d8, "O"],
198 [0x00d9, 0x00dc, ["U"]],
199 [0x00dd, 0x00dd, "Y"],
200 [0x00e0, 0x00e5, ["a"]],
201 [0x00e7, 0x00e7, "c"],
202 [0x00e8, 0x00eb, ["e"]],
203 [0x00ec, 0x00ef, ["i"]],
204 [0x00f1, 0x00f1, "n"],
205 [0x00f2, 0x00f6, ["o"]],
206 [0x00f8, 0x00f8, "o"],
207 [0x00f9, 0x00fc, ["u"]],
208 [0x00fd, 0x00fd, "y"],
209 [0x00ff, 0x00ff, "y"],
210 [0x0100, 0x0105, ["A", "a"]],
211 [0x0106, 0x010d, ["C", "c"]],
212 [0x010e, 0x0111, ["D", "d"]],
213 [0x0112, 0x011b, ["E", "e"]],
214 [0x011c, 0x0123, ["G", "g"]],
215 [0x0124, 0x0127, ["H", "h"]],
216 [0x0128, 0x0130, ["I", "i"]],
217 [0x0134, 0x0135, ["J", "j"]],
218 [0x0136, 0x0136, ["K", "k"]],
219 [0x0139, 0x0142, ["L", "l"]],
220 [0x0143, 0x0148, ["N", "n"]],
221 [0x0149, 0x0149, "n"],
222 [0x014c, 0x0151, ["O", "o"]],
223 [0x0154, 0x0159, ["R", "r"]],
224 [0x015a, 0x0161, ["S", "s"]],
225 [0x0162, 0x0167, ["T", "t"]],
226 [0x0168, 0x0173, ["U", "u"]],
227 [0x0174, 0x0175, ["W", "w"]],
228 [0x0176, 0x0178, ["Y", "y", "Y"]],
229 [0x0179, 0x017e, ["Z", "z"]],
230 [0x0180, 0x0183, ["b", "B", "B", "b"]],
231 [0x0187, 0x0189, ["C", "c", "D"]],
232 [0x018a, 0x0192, ["D", "D", "d", "F", "f"]],
233 [0x0193, 0x0194, ["G"]],
234 [0x0197, 0x019b, ["I", "K", "k", "l", "l"]],
235 [0x019d, 0x01a1, ["N", "n", "O", "O", "o"]],
236 [0x01a4, 0x01a5, ["P", "p"]],
237 [0x01ab, 0x01ab, ["t"]],
238 [0x01ac, 0x01b0, ["T", "t", "T", "U", "u"]],
239 [0x01b2, 0x01d2, ["V", "Y", "y", "Z", "z", "D", "L",
240 "N", "A", "a", "I", "i", "O", "o"]],
241 [0x01d3, 0x01dc, ["U", "u"]],
242 [0x01de, 0x01e1, ["A", "a"]],
243 [0x01e4, 0x01ed, ["G", "g", "G", "g", "K", "k", "O", "o", "O", "o"]],
244 [0x01f0, 0x01f5, ["j", "D", "G", "g"]],
245 [0x01fa, 0x01fb, ["A", "a"]],
246 [0x01fe, 0x0217, ["O", "o", "A", "a", "A", "a", "E", "e", "E",
247 "e", "I", "i", "I", "i", "O", "o", "O", "o",
248 "R", "r", "R", "r", "U", "u", "U", "u"]],
249 [0x0253, 0x0257, ["b", "c", "d", "d"]],
250 [0x0260, 0x0269, ["g", "h", "h", "i", "i"]],
251 [0x026b, 0x0273, ["l", "l", "l", "l", "m", "n", "n"]],
252 [0x027c, 0x028b, ["r", "r", "r", "r", "s", "t", "u", "u", "v"]],
253 [0x0290, 0x0291, ["z"]],
254 [0x029d, 0x02a0, ["j", "q"]],
255 [0x1e00, 0x1e09, ["A", "a", "B", "b", "B", "b", "B", "b", "C", "c"]],
256 [0x1e0a, 0x1e13, ["D", "d"]],
257 [0x1e14, 0x1e1d, ["E", "e"]],
258 [0x1e1e, 0x1e21, ["F", "f", "G", "g"]],
259 [0x1e22, 0x1e2b, ["H", "h"]],
260 [0x1e2c, 0x1e8f, ["I", "i", "I", "i", "K", "k", "K", "k", "K", "k",
261 "L", "l", "L", "l", "L", "l", "L", "l", "M", "m",
262 "M", "m", "M", "m", "N", "n", "N", "n", "N", "n",
263 "N", "n", "O", "o", "O", "o", "O", "o", "O", "o",
264 "P", "p", "P", "p", "R", "r", "R", "r", "R", "r",
265 "R", "r", "S", "s", "S", "s", "S", "s", "S", "s",
266 "S", "s", "T", "t", "T", "t", "T", "t", "T", "t",
267 "U", "u", "U", "u", "U", "u", "U", "u", "U", "u",
268 "V", "v", "V", "v", "W", "w", "W", "w", "W", "w",
269 "W", "w", "W", "w", "X", "x", "X", "x", "Y", "y"]],
270 [0x1e90, 0x1e9a, ["Z", "z", "Z", "z", "Z", "z", "h", "t", "w", "y", "a"]],
271 [0x1ea0, 0x1eb7, ["A", "a"]],
272 [0x1eb8, 0x1ec7, ["E", "e"]],
273 [0x1ec8, 0x1ecb, ["I", "i"]],
274 [0x1ecc, 0x1ee3, ["O", "o"]],
275 [0x1ee4, 0x1ef1, ["U", "u"]],
276 [0x1ef2, 0x1ef9, ["Y", "y"]],
277 [0x2071, 0x2071, "i"],
278 [0x207f, 0x207f, "n"],
279 [0x249c, 0x24b5, "a"],
280 [0x24b6, 0x24cf, "A"],
281 [0x24d0, 0x24e9, "a"],
282 [0xff21, 0xff3a, "A"],
283 [0xff41, 0xff5a, "a"]]);
285 var ligatures_table = make_table_from_ranges(
286 [[0x00c6, 0x00c6, ["AE"]],
287 [0x00df, 0x00df, ["ss"]],
288 [0x00e6, 0x00e6, ["ae"]],
289 [0x0132, 0x0132, ["IJ"]],
290 [0x0133, 0x0133, ["ij"]],
291 [0x0152, 0x0152, ["OE"]],
292 [0x0153, 0x0153, ["oe"]],
293 [0x01e2, 0x01e2, ["AE"]],
294 [0x01e3, 0x01e3, ["ae"]],
295 [0x01fc, 0x01fc, ["AE"]],
296 [0x01fd, 0x01fd, ["ae"]],
297 [0xfb00, 0xfb00, ["ff"]],
298 [0xfb01, 0xfb01, ["fi"]],
299 [0xfb02, 0xfb02, ["fl"]],
300 [0xfb03, 0xfb03, ["ffi"]],
301 [0xfb04, 0xfb04, ["ffl"]],
302 [0xfb05, 0xfb05, ["st"]],
303 [0xfb06, 0xfb06, ["st"]]]);
305 add_table(accents_table);
306 add_table(ligatures_table);
308 provide("casual-spelling");