* etc/NEWS: Adjust to match previous patch.
[emacs.git] / src / casefiddle.c
blob11d594449166407c3b71a9e2ddcf21ca55b12a93
1 /* GNU Emacs case conversion functions.
3 Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation,
4 Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
22 #include <config.h>
24 #include "lisp.h"
25 #include "character.h"
26 #include "buffer.h"
27 #include "commands.h"
28 #include "syntax.h"
29 #include "composite.h"
30 #include "keymap.h"
32 enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
34 static Lisp_Object
35 casify_object (enum case_action flag, Lisp_Object obj)
37 int c, c1;
38 bool inword = flag == CASE_DOWN;
40 /* If the case table is flagged as modified, rescan it. */
41 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
42 Fset_case_table (BVAR (current_buffer, downcase_table));
44 if (NATNUMP (obj))
46 int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
47 | CHAR_SHIFT | CHAR_CTL | CHAR_META);
48 int flags = XINT (obj) & flagbits;
49 bool multibyte = ! NILP (BVAR (current_buffer,
50 enable_multibyte_characters));
52 /* If the character has higher bits set
53 above the flags, return it unchanged.
54 It is not a real character. */
55 if (UNSIGNED_CMP (XFASTINT (obj), >, flagbits))
56 return obj;
58 c1 = XFASTINT (obj) & ~flagbits;
59 /* FIXME: Even if enable-multibyte-characters is nil, we may
60 manipulate multibyte chars. This means we have a bug for latin-1
61 chars since when we receive an int 128-255 we can't tell whether
62 it's an eight-bit byte or a latin-1 char. */
63 if (c1 >= 256)
64 multibyte = 1;
65 if (! multibyte)
66 MAKE_CHAR_MULTIBYTE (c1);
67 c = flag == CASE_DOWN ? downcase (c1) : upcase (c1);
68 if (c != c1)
70 if (! multibyte)
71 MAKE_CHAR_UNIBYTE (c);
72 XSETFASTINT (obj, c | flags);
74 return obj;
77 if (!STRINGP (obj))
78 wrong_type_argument (Qchar_or_string_p, obj);
79 else if (!STRING_MULTIBYTE (obj))
81 ptrdiff_t i;
82 ptrdiff_t size = SCHARS (obj);
84 obj = Fcopy_sequence (obj);
85 for (i = 0; i < size; i++)
87 c = SREF (obj, i);
88 MAKE_CHAR_MULTIBYTE (c);
89 c1 = c;
90 if (inword && flag != CASE_CAPITALIZE_UP)
91 c = downcase (c);
92 else if (!uppercasep (c)
93 && (!inword || flag != CASE_CAPITALIZE_UP))
94 c = upcase (c1);
95 if ((int) flag >= (int) CASE_CAPITALIZE)
96 inword = (SYNTAX (c) == Sword);
97 if (c != c1)
99 MAKE_CHAR_UNIBYTE (c);
100 /* If the char can't be converted to a valid byte, just don't
101 change it. */
102 if (c >= 0 && c < 256)
103 SSET (obj, i, c);
106 return obj;
108 else
110 ptrdiff_t i, i_byte, size = SCHARS (obj);
111 int len;
112 USE_SAFE_ALLOCA;
113 ptrdiff_t o_size;
114 if (INT_MULTIPLY_WRAPV (size, MAX_MULTIBYTE_LENGTH, &o_size))
115 o_size = PTRDIFF_MAX;
116 unsigned char *dst = SAFE_ALLOCA (o_size);
117 unsigned char *o = dst;
119 for (i = i_byte = 0; i < size; i++, i_byte += len)
121 if (o_size - MAX_MULTIBYTE_LENGTH < o - dst)
122 string_overflow ();
123 c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
124 if (inword && flag != CASE_CAPITALIZE_UP)
125 c = downcase (c);
126 else if (!inword || flag != CASE_CAPITALIZE_UP)
127 c = upcase (c);
128 if ((int) flag >= (int) CASE_CAPITALIZE)
129 inword = (SYNTAX (c) == Sword);
130 o += CHAR_STRING (c, o);
132 eassert (o - dst <= o_size);
133 obj = make_multibyte_string ((char *) dst, size, o - dst);
134 SAFE_FREE ();
135 return obj;
139 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
140 doc: /* Convert argument to upper case and return that.
141 The argument may be a character or string. The result has the same type.
142 The argument object is not altered--the value is a copy.
143 See also `capitalize', `downcase' and `upcase-initials'. */)
144 (Lisp_Object obj)
146 return casify_object (CASE_UP, obj);
149 DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
150 doc: /* Convert argument to lower case and return that.
151 The argument may be a character or string. The result has the same type.
152 The argument object is not altered--the value is a copy. */)
153 (Lisp_Object obj)
155 return casify_object (CASE_DOWN, obj);
158 DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
159 doc: /* Convert argument to capitalized form and return that.
160 This means that each word's first character is upper case
161 and the rest is lower case.
162 The argument may be a character or string. The result has the same type.
163 The argument object is not altered--the value is a copy. */)
164 (Lisp_Object obj)
166 return casify_object (CASE_CAPITALIZE, obj);
169 /* Like Fcapitalize but change only the initials. */
171 DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
172 doc: /* Convert the initial of each word in the argument to upper case.
173 Do not change the other letters of each word.
174 The argument may be a character or string. The result has the same type.
175 The argument object is not altered--the value is a copy. */)
176 (Lisp_Object obj)
178 return casify_object (CASE_CAPITALIZE_UP, obj);
181 /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
182 b and e specify range of buffer to operate on. */
184 static void
185 casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
187 int c;
188 bool inword = flag == CASE_DOWN;
189 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
190 ptrdiff_t start, end;
191 ptrdiff_t start_byte;
193 /* Position of first and last changes. */
194 ptrdiff_t first = -1, last;
196 ptrdiff_t opoint = PT;
197 ptrdiff_t opoint_byte = PT_BYTE;
199 if (EQ (b, e))
200 /* Not modifying because nothing marked */
201 return;
203 /* If the case table is flagged as modified, rescan it. */
204 if (NILP (XCHAR_TABLE (BVAR (current_buffer, downcase_table))->extras[1]))
205 Fset_case_table (BVAR (current_buffer, downcase_table));
207 validate_region (&b, &e);
208 start = XFASTINT (b);
209 end = XFASTINT (e);
210 modify_text (start, end);
211 record_change (start, end - start);
212 start_byte = CHAR_TO_BYTE (start);
214 SETUP_BUFFER_SYNTAX_TABLE (); /* For syntax_prefix_flag_p. */
216 while (start < end)
218 int c2, len;
220 if (multibyte)
222 c = FETCH_MULTIBYTE_CHAR (start_byte);
223 len = CHAR_BYTES (c);
225 else
227 c = FETCH_BYTE (start_byte);
228 MAKE_CHAR_MULTIBYTE (c);
229 len = 1;
231 c2 = c;
232 if (inword && flag != CASE_CAPITALIZE_UP)
233 c = downcase (c);
234 else if (!inword || flag != CASE_CAPITALIZE_UP)
235 c = upcase (c);
236 if ((int) flag >= (int) CASE_CAPITALIZE)
237 inword = ((SYNTAX (c) == Sword)
238 && (inword || !syntax_prefix_flag_p (c)));
239 if (c != c2)
241 last = start;
242 if (first < 0)
243 first = start;
245 if (! multibyte)
247 MAKE_CHAR_UNIBYTE (c);
248 FETCH_BYTE (start_byte) = c;
250 else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
251 FETCH_BYTE (start_byte) = c;
252 else
254 int tolen = CHAR_BYTES (c);
255 int j;
256 unsigned char str[MAX_MULTIBYTE_LENGTH];
258 CHAR_STRING (c, str);
259 if (len == tolen)
261 /* Length is unchanged. */
262 for (j = 0; j < len; ++j)
263 FETCH_BYTE (start_byte + j) = str[j];
265 else
267 /* Replace one character with the other,
268 keeping text properties the same. */
269 replace_range_2 (start, start_byte,
270 start + 1, start_byte + len,
271 (char *) str, 1, tolen,
273 len = tolen;
277 start++;
278 start_byte += len;
281 if (PT != opoint)
282 TEMP_SET_PT_BOTH (opoint, opoint_byte);
284 if (first >= 0)
286 signal_after_change (first, last + 1 - first, last + 1 - first);
287 update_compositions (first, last + 1, CHECK_ALL);
291 DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
292 "(list (region-beginning) (region-end) (region-noncontiguous-p))",
293 doc: /* Convert the region to upper case. In programs, wants two arguments.
294 These arguments specify the starting and ending character numbers of
295 the region to operate on. When used as a command, the text between
296 point and the mark is operated on.
297 See also `capitalize-region'. */)
298 (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
300 Lisp_Object bounds = Qnil;
302 if (!NILP (region_noncontiguous_p))
304 bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
305 intern ("bounds"));
307 while (CONSP (bounds))
309 casify_region (CASE_UP, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
310 bounds = XCDR (bounds);
313 else
314 casify_region (CASE_UP, beg, end);
316 return Qnil;
319 DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 3,
320 "(list (region-beginning) (region-end) (region-noncontiguous-p))",
321 doc: /* Convert the region to lower case. In programs, wants two arguments.
322 These arguments specify the starting and ending character numbers of
323 the region to operate on. When used as a command, the text between
324 point and the mark is operated on. */)
325 (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
327 Lisp_Object bounds = Qnil;
329 if (!NILP (region_noncontiguous_p))
331 bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
332 intern ("bounds"));
334 while (CONSP (bounds))
336 casify_region (CASE_DOWN, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
337 bounds = XCDR (bounds);
340 else
341 casify_region (CASE_DOWN, beg, end);
343 return Qnil;
346 DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
347 doc: /* Convert the region to capitalized form.
348 Capitalized form means each word's first character is upper case
349 and the rest of it is lower case.
350 In programs, give two arguments, the starting and ending
351 character positions to operate on. */)
352 (Lisp_Object beg, Lisp_Object end)
354 casify_region (CASE_CAPITALIZE, beg, end);
355 return Qnil;
358 /* Like Fcapitalize_region but change only the initials. */
360 DEFUN ("upcase-initials-region", Fupcase_initials_region,
361 Supcase_initials_region, 2, 2, "r",
362 doc: /* Upcase the initial of each word in the region.
363 Subsequent letters of each word are not changed.
364 In programs, give two arguments, the starting and ending
365 character positions to operate on. */)
366 (Lisp_Object beg, Lisp_Object end)
368 casify_region (CASE_CAPITALIZE_UP, beg, end);
369 return Qnil;
372 static Lisp_Object
373 casify_word (enum case_action flag, Lisp_Object arg)
375 CHECK_NUMBER (arg);
376 ptrdiff_t farend = scan_words (PT, XINT (arg));
377 if (!farend)
378 farend = XINT (arg) <= 0 ? BEGV : ZV;
379 ptrdiff_t newpoint = max (PT, farend);
380 casify_region (flag, make_number (PT), make_number (farend));
381 SET_PT (newpoint);
382 return Qnil;
385 DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
386 doc: /* Convert to upper case from point to end of word, moving over.
388 If point is in the middle of a word, the part of that word before point
389 is ignored when moving forward.
391 With negative argument, convert previous words but do not move.
392 See also `capitalize-word'. */)
393 (Lisp_Object arg)
395 return casify_word (CASE_UP, arg);
398 DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
399 doc: /* Convert to lower case from point to end of word, moving over.
401 If point is in the middle of a word, the part of that word before point
402 is ignored when moving forward.
404 With negative argument, convert previous words but do not move. */)
405 (Lisp_Object arg)
407 return casify_word (CASE_DOWN, arg);
410 DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
411 doc: /* Capitalize from point to the end of word, moving over.
412 With numerical argument ARG, capitalize the next ARG-1 words as well.
413 This gives the word(s) a first character in upper case
414 and the rest lower case.
416 If point is in the middle of a word, the part of that word before point
417 is ignored when moving forward.
419 With negative argument, capitalize previous words but do not move. */)
420 (Lisp_Object arg)
422 return casify_word (CASE_CAPITALIZE, arg);
425 void
426 syms_of_casefiddle (void)
428 DEFSYM (Qidentity, "identity");
429 defsubr (&Supcase);
430 defsubr (&Sdowncase);
431 defsubr (&Scapitalize);
432 defsubr (&Supcase_initials);
433 defsubr (&Supcase_region);
434 defsubr (&Sdowncase_region);
435 defsubr (&Scapitalize_region);
436 defsubr (&Supcase_initials_region);
437 defsubr (&Supcase_word);
438 defsubr (&Sdowncase_word);
439 defsubr (&Scapitalize_word);
442 void
443 keys_of_casefiddle (void)
445 initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
446 Fput (intern ("upcase-region"), Qdisabled, Qt);
447 initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
448 Fput (intern ("downcase-region"), Qdisabled, Qt);
450 initial_define_key (meta_map, 'u', "upcase-word");
451 initial_define_key (meta_map, 'l', "downcase-word");
452 initial_define_key (meta_map, 'c', "capitalize-word");