2 * (C) Copyright 2009-2010 John J. Foerch
4 * Use, modification, and distribution are subject to the terms specified in the
16 This module provides a pattern matcher for hints_text_match which lets you
17 type ascii characters in the hints minibuffer to match unicode characters
18 such as accented letters and ligatures.
22 function table_entry_features (entry) {
23 var ret = { ligatures: false, multiples: false };
24 if (typeof entry == "object") {
25 for each (var t in entry) {
26 if (typeof t == "string" && t.length > 1)
28 else if (typeof t == "object") {
30 if (t.some(function (x) (x.length > 1)))
38 function make_table (table) {
39 for (var k in table) {
40 var features = table_entry_features(table[k]);
41 if (features.ligatures) table.ligatures = true;
42 if (features.multiples) table.multiples = true;
49 * make_table_from_ranges is a constructor of a casual-spelling
50 * table which generates the table from a shorthand form
51 * called a "range table". A range table is an array where each element
52 * is an array of three elements: range-low, range-high, and range-spec.
53 * Low and high are integer codepoints of unicode characters to be
54 * translated. The spec can be a string, or an array. If it is a string,
55 * it should be a single character. The designated range will have that
56 * character as its low-point translation, and each next character in the
57 * range will be incremented from that point. This is how you can
58 * compactly denote alphabetic ranges, for example. If the spec is an
59 * array, the strings in that array will be repeated over the designated
60 * range. Multi-character translations, such as for ligatures, must be
61 * given in the array form of range-spec.
63 function make_table_from_ranges (table) {
65 table.map(function (a) {
66 var features = table_entry_features(a[2]);
67 if (features.ligatures) ret.ligatures = true;
68 if (features.multiples) ret.multiples = true;
69 for (var c = a[0]; c <= a[1]; c++) {
70 var chr = String.fromCharCode(c);
71 if (typeof a[2] == "string")
72 ret[chr] = String.fromCharCode(a[2].charCodeAt(0) + c - a[0]);
74 ret[chr] = a[2][(c - a[0]) % a[2].length];
81 function translate (chr) {
82 return tables[chr] || chr;
90 function hints_text_match (text, pattern) {
93 var plen = pattern.length;
94 for (var i = 0, tlen = text.length - plen; i <= tlen; i++) {
95 for (var j = 0;; j++) {
96 if (pattern[j] != text[i+j] &&
97 pattern[j] != translate(text[i+j]))
106 function hints_text_match_ligatures (text, pattern) {
109 var tlen = text.length;
110 var plen = pattern.length;
111 var decoded = Array.map(text, translate);
112 for (var i = 0; i < tlen; i++) {
113 for (var e = 0, j = 0; i + e < tlen; e++) {
115 if (pattern[j] != text[i+e] &&
116 pattern.substring(j, j+(elen = decoded[i+e].length)) != decoded[i+e])
126 function hints_text_match_multiples (text, pattern) {
129 var plen = pattern.length;
130 var decoded = Array.map(text, function (x) Array.concat(x, translate(x)));
131 for (var i = 0, tlen = text.length - plen; i < tlen; i++) {
132 for (var j = 0; j < plen; j++) {
133 if (! decoded[i+j].some(function (x) x == pattern[j]))
142 function hints_text_match_ligatures_multiples (text, pattern) {
145 var tlen = text.length;
146 var plen = pattern.length;
147 var decoded = Array.map(text, function (x) Array.concat(x, translate(x)));
149 for (var i = 0; i < tlen; i++) {
150 for (var e = 0, j = 0; i + e < tlen; e++) {
151 if (! decoded[i+e].some(function (x) (pattern.substring(j, j+(mlen = x.length)) == (matched = x))))
169 function add_table (table) {
170 table.__proto__ = tables;
172 if (tables.ligatures && tables.multiples)
173 conkeror.hints_text_match = hints_text_match_ligatures_multiples;
174 else if (tables.ligatures)
175 conkeror.hints_text_match = hints_text_match_ligatures;
176 else if (tables.multiples)
177 conkeror.hints_text_match = hints_text_match_multiples;
179 conkeror.hints_text_match = hints_text_match;
183 add_table(make_table({}));
187 var accents_table = make_table_from_ranges(
188 [[0x00a9, 0x00a9, "C"],//copyright
189 [0x00c0, 0x00c5, ["A"]],
190 [0x00c7, 0x00c7, "C"],
191 [0x00c8, 0x00cb, ["E"]],
192 [0x00cc, 0x00cf, ["I"]],
193 [0x00d1, 0x00d1, "N"],
194 [0x00d2, 0x00d6, ["O"]],
195 [0x00d8, 0x00d8, "O"],
196 [0x00d9, 0x00dc, ["U"]],
197 [0x00dd, 0x00dd, "Y"],
198 [0x00e0, 0x00e5, ["a"]],
199 [0x00e7, 0x00e7, "c"],
200 [0x00e8, 0x00eb, ["e"]],
201 [0x00ec, 0x00ef, ["i"]],
202 [0x00f1, 0x00f1, "n"],
203 [0x00f2, 0x00f6, ["o"]],
204 [0x00f8, 0x00f8, "o"],
205 [0x00f9, 0x00fc, ["u"]],
206 [0x00fd, 0x00fd, "y"],
207 [0x00ff, 0x00ff, "y"],
208 [0x0100, 0x0105, ["A", "a"]],
209 [0x0106, 0x010d, ["C", "c"]],
210 [0x010e, 0x0111, ["D", "d"]],
211 [0x0112, 0x011b, ["E", "e"]],
212 [0x011c, 0x0123, ["G", "g"]],
213 [0x0124, 0x0127, ["H", "h"]],
214 [0x0128, 0x0130, ["I", "i"]],
215 [0x0134, 0x0135, ["J", "j"]],
216 [0x0136, 0x0136, ["K", "k"]],
217 [0x0139, 0x0142, ["L", "l"]],
218 [0x0143, 0x0148, ["N", "n"]],
219 [0x0149, 0x0149, "n"],
220 [0x014c, 0x0151, ["O", "o"]],
221 [0x0154, 0x0159, ["R", "r"]],
222 [0x015a, 0x0161, ["S", "s"]],
223 [0x0162, 0x0167, ["T", "t"]],
224 [0x0168, 0x0173, ["U", "u"]],
225 [0x0174, 0x0175, ["W", "w"]],
226 [0x0176, 0x0178, ["Y", "y", "Y"]],
227 [0x0179, 0x017e, ["Z", "z"]],
228 [0x0180, 0x0183, ["b", "B", "B", "b"]],
229 [0x0187, 0x0189, ["C", "c", "D"]],
230 [0x018a, 0x0192, ["D", "D", "d", "F", "f"]],
231 [0x0193, 0x0194, ["G"]],
232 [0x0197, 0x019b, ["I", "K", "k", "l", "l"]],
233 [0x019d, 0x01a1, ["N", "n", "O", "O", "o"]],
234 [0x01a4, 0x01a5, ["P", "p"]],
235 [0x01ab, 0x01ab, ["t"]],
236 [0x01ac, 0x01b0, ["T", "t", "T", "U", "u"]],
237 [0x01b2, 0x01d2, ["V", "Y", "y", "Z", "z", "D", "L",
238 "N", "A", "a", "I", "i", "O", "o"]],
239 [0x01d3, 0x01dc, ["U", "u"]],
240 [0x01de, 0x01e1, ["A", "a"]],
241 [0x01e4, 0x01ed, ["G", "g", "G", "g", "K", "k", "O", "o", "O", "o"]],
242 [0x01f0, 0x01f5, ["j", "D", "G", "g"]],
243 [0x01fa, 0x01fb, ["A", "a"]],
244 [0x01fe, 0x0217, ["O", "o", "A", "a", "A", "a", "E", "e", "E",
245 "e", "I", "i", "I", "i", "O", "o", "O", "o",
246 "R", "r", "R", "r", "U", "u", "U", "u"]],
247 [0x0253, 0x0257, ["b", "c", "d", "d"]],
248 [0x0260, 0x0269, ["g", "h", "h", "i", "i"]],
249 [0x026b, 0x0273, ["l", "l", "l", "l", "m", "n", "n"]],
250 [0x027c, 0x028b, ["r", "r", "r", "r", "s", "t", "u", "u", "v"]],
251 [0x0290, 0x0291, ["z"]],
252 [0x029d, 0x02a0, ["j", "q"]],
253 [0x1e00, 0x1e09, ["A", "a", "B", "b", "B", "b", "B", "b", "C", "c"]],
254 [0x1e0a, 0x1e13, ["D", "d"]],
255 [0x1e14, 0x1e1d, ["E", "e"]],
256 [0x1e1e, 0x1e21, ["F", "f", "G", "g"]],
257 [0x1e22, 0x1e2b, ["H", "h"]],
258 [0x1e2c, 0x1e8f, ["I", "i", "I", "i", "K", "k", "K", "k", "K", "k",
259 "L", "l", "L", "l", "L", "l", "L", "l", "M", "m",
260 "M", "m", "M", "m", "N", "n", "N", "n", "N", "n",
261 "N", "n", "O", "o", "O", "o", "O", "o", "O", "o",
262 "P", "p", "P", "p", "R", "r", "R", "r", "R", "r",
263 "R", "r", "S", "s", "S", "s", "S", "s", "S", "s",
264 "S", "s", "T", "t", "T", "t", "T", "t", "T", "t",
265 "U", "u", "U", "u", "U", "u", "U", "u", "U", "u",
266 "V", "v", "V", "v", "W", "w", "W", "w", "W", "w",
267 "W", "w", "W", "w", "X", "x", "X", "x", "Y", "y"]],
268 [0x1e90, 0x1e9a, ["Z", "z", "Z", "z", "Z", "z", "h", "t", "w", "y", "a"]],
269 [0x1ea0, 0x1eb7, ["A", "a"]],
270 [0x1eb8, 0x1ec7, ["E", "e"]],
271 [0x1ec8, 0x1ecb, ["I", "i"]],
272 [0x1ecc, 0x1ee3, ["O", "o"]],
273 [0x1ee4, 0x1ef1, ["U", "u"]],
274 [0x1ef2, 0x1ef9, ["Y", "y"]],
275 [0x2071, 0x2071, "i"],
276 [0x207f, 0x207f, "n"],
277 [0x249c, 0x24b5, "a"],
278 [0x24b6, 0x24cf, "A"],
279 [0x24d0, 0x24e9, "a"],
280 [0xff21, 0xff3a, "A"],
281 [0xff41, 0xff5a, "a"]]);
283 var ligatures_table = make_table_from_ranges(
284 [[0x00c6, 0x00c6, ["AE"]],
285 [0x00df, 0x00df, ["ss"]],
286 [0x00e6, 0x00e6, ["ae"]],
287 [0x0132, 0x0132, ["IJ"]],
288 [0x0133, 0x0133, ["ij"]],
289 [0x0152, 0x0152, ["OE"]],
290 [0x0153, 0x0153, ["oe"]],
291 [0x01e2, 0x01e2, ["AE"]],
292 [0x01e3, 0x01e3, ["ae"]],
293 [0x01fc, 0x01fc, ["AE"]],
294 [0x01fd, 0x01fd, ["ae"]],
295 [0xfb00, 0xfb00, ["ff"]],
296 [0xfb01, 0xfb01, ["fi"]],
297 [0xfb02, 0xfb02, ["fl"]],
298 [0xfb03, 0xfb03, ["ffi"]],
299 [0xfb04, 0xfb04, ["ffl"]],
300 [0xfb05, 0xfb05, ["st"]],
301 [0xfb06, 0xfb06, ["st"]]]);
303 add_table(accents_table);
304 add_table(ligatures_table);
306 provide("casual-spelling");