Change to flush and close logic to fix #1760556.
[python.git] / Modules / stropmodule.c
blob8b00fed69a8434da5f5495a45bbadae6f8554b68
1 /* strop module */
3 #define PY_SSIZE_T_CLEAN
4 #include "Python.h"
5 #include <ctype.h>
7 PyDoc_STRVAR(strop_module__doc__,
8 "Common string manipulations, optimized for speed.\n"
9 "\n"
10 "Always use \"import string\" rather than referencing\n"
11 "this module directly.");
13 /* XXX This file assumes that the <ctype.h> is*() functions
14 XXX are defined for all 8-bit characters! */
16 #define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
17 "strop functions are obsolete; use string methods")) \
18 return NULL
20 /* The lstrip(), rstrip() and strip() functions are implemented
21 in do_strip(), which uses an additional parameter to indicate what
22 type of strip should occur. */
24 #define LEFTSTRIP 0
25 #define RIGHTSTRIP 1
26 #define BOTHSTRIP 2
29 static PyObject *
30 split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
32 Py_ssize_t i = 0, j;
33 int err;
34 Py_ssize_t countsplit = 0;
35 PyObject* item;
36 PyObject *list = PyList_New(0);
38 if (list == NULL)
39 return NULL;
41 while (i < len) {
42 while (i < len && isspace(Py_CHARMASK(s[i]))) {
43 i = i+1;
45 j = i;
46 while (i < len && !isspace(Py_CHARMASK(s[i]))) {
47 i = i+1;
49 if (j < i) {
50 item = PyString_FromStringAndSize(s+j, i-j);
51 if (item == NULL)
52 goto finally;
54 err = PyList_Append(list, item);
55 Py_DECREF(item);
56 if (err < 0)
57 goto finally;
59 countsplit++;
60 while (i < len && isspace(Py_CHARMASK(s[i]))) {
61 i = i+1;
63 if (maxsplit && (countsplit >= maxsplit) && i < len) {
64 item = PyString_FromStringAndSize(
65 s+i, len - i);
66 if (item == NULL)
67 goto finally;
69 err = PyList_Append(list, item);
70 Py_DECREF(item);
71 if (err < 0)
72 goto finally;
74 i = len;
78 return list;
79 finally:
80 Py_DECREF(list);
81 return NULL;
85 PyDoc_STRVAR(splitfields__doc__,
86 "split(s [,sep [,maxsplit]]) -> list of strings\n"
87 "splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
88 "\n"
89 "Return a list of the words in the string s, using sep as the\n"
90 "delimiter string. If maxsplit is nonzero, splits into at most\n"
91 "maxsplit words. If sep is not specified, any whitespace string\n"
92 "is a separator. Maxsplit defaults to 0.\n"
93 "\n"
94 "(split and splitfields are synonymous)");
96 static PyObject *
97 strop_splitfields(PyObject *self, PyObject *args)
99 Py_ssize_t len, n, i, j, err;
100 Py_ssize_t splitcount, maxsplit;
101 char *s, *sub;
102 PyObject *list, *item;
104 WARN;
105 sub = NULL;
106 n = 0;
107 splitcount = 0;
108 maxsplit = 0;
109 if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
110 return NULL;
111 if (sub == NULL)
112 return split_whitespace(s, len, maxsplit);
113 if (n == 0) {
114 PyErr_SetString(PyExc_ValueError, "empty separator");
115 return NULL;
118 list = PyList_New(0);
119 if (list == NULL)
120 return NULL;
122 i = j = 0;
123 while (i+n <= len) {
124 if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
125 item = PyString_FromStringAndSize(s+j, i-j);
126 if (item == NULL)
127 goto fail;
128 err = PyList_Append(list, item);
129 Py_DECREF(item);
130 if (err < 0)
131 goto fail;
132 i = j = i + n;
133 splitcount++;
134 if (maxsplit && (splitcount >= maxsplit))
135 break;
137 else
138 i++;
140 item = PyString_FromStringAndSize(s+j, len-j);
141 if (item == NULL)
142 goto fail;
143 err = PyList_Append(list, item);
144 Py_DECREF(item);
145 if (err < 0)
146 goto fail;
148 return list;
150 fail:
151 Py_DECREF(list);
152 return NULL;
156 PyDoc_STRVAR(joinfields__doc__,
157 "join(list [,sep]) -> string\n"
158 "joinfields(list [,sep]) -> string\n"
159 "\n"
160 "Return a string composed of the words in list, with\n"
161 "intervening occurrences of sep. Sep defaults to a single\n"
162 "space.\n"
163 "\n"
164 "(join and joinfields are synonymous)");
166 static PyObject *
167 strop_joinfields(PyObject *self, PyObject *args)
169 PyObject *seq;
170 char *sep = NULL;
171 Py_ssize_t seqlen, seplen = 0;
172 Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
173 PyObject *res = NULL;
174 char* p = NULL;
175 ssizeargfunc getitemfunc;
177 WARN;
178 if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
179 return NULL;
180 if (sep == NULL) {
181 sep = " ";
182 seplen = 1;
185 seqlen = PySequence_Size(seq);
186 if (seqlen < 0 && PyErr_Occurred())
187 return NULL;
189 if (seqlen == 1) {
190 /* Optimization if there's only one item */
191 PyObject *item = PySequence_GetItem(seq, 0);
192 if (item && !PyString_Check(item)) {
193 PyErr_SetString(PyExc_TypeError,
194 "first argument must be sequence of strings");
195 Py_DECREF(item);
196 return NULL;
198 return item;
201 if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
202 return NULL;
203 p = PyString_AsString(res);
205 /* optimize for lists, since it's the most common case. all others
206 * (tuples and arbitrary sequences) just use the sequence abstract
207 * interface.
209 if (PyList_Check(seq)) {
210 for (i = 0; i < seqlen; i++) {
211 PyObject *item = PyList_GET_ITEM(seq, i);
212 if (!PyString_Check(item)) {
213 PyErr_SetString(PyExc_TypeError,
214 "first argument must be sequence of strings");
215 Py_DECREF(res);
216 return NULL;
218 slen = PyString_GET_SIZE(item);
219 while (reslen + slen + seplen >= sz) {
220 if (_PyString_Resize(&res, sz * 2) < 0)
221 return NULL;
222 sz *= 2;
223 p = PyString_AsString(res) + reslen;
225 if (i > 0) {
226 memcpy(p, sep, seplen);
227 p += seplen;
228 reslen += seplen;
230 memcpy(p, PyString_AS_STRING(item), slen);
231 p += slen;
232 reslen += slen;
234 _PyString_Resize(&res, reslen);
235 return res;
238 if (seq->ob_type->tp_as_sequence == NULL ||
239 (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
241 PyErr_SetString(PyExc_TypeError,
242 "first argument must be a sequence");
243 return NULL;
245 /* This is now type safe */
246 for (i = 0; i < seqlen; i++) {
247 PyObject *item = getitemfunc(seq, i);
248 if (!item || !PyString_Check(item)) {
249 PyErr_SetString(PyExc_TypeError,
250 "first argument must be sequence of strings");
251 Py_DECREF(res);
252 Py_XDECREF(item);
253 return NULL;
255 slen = PyString_GET_SIZE(item);
256 while (reslen + slen + seplen >= sz) {
257 if (_PyString_Resize(&res, sz * 2) < 0) {
258 Py_DECREF(item);
259 return NULL;
261 sz *= 2;
262 p = PyString_AsString(res) + reslen;
264 if (i > 0) {
265 memcpy(p, sep, seplen);
266 p += seplen;
267 reslen += seplen;
269 memcpy(p, PyString_AS_STRING(item), slen);
270 p += slen;
271 reslen += slen;
272 Py_DECREF(item);
274 _PyString_Resize(&res, reslen);
275 return res;
279 PyDoc_STRVAR(find__doc__,
280 "find(s, sub [,start [,end]]) -> in\n"
281 "\n"
282 "Return the lowest index in s where substring sub is found,\n"
283 "such that sub is contained within s[start,end]. Optional\n"
284 "arguments start and end are interpreted as in slice notation.\n"
285 "\n"
286 "Return -1 on failure.");
288 static PyObject *
289 strop_find(PyObject *self, PyObject *args)
291 char *s, *sub;
292 Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
294 WARN;
295 if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
296 return NULL;
298 if (last > len)
299 last = len;
300 if (last < 0)
301 last += len;
302 if (last < 0)
303 last = 0;
304 if (i < 0)
305 i += len;
306 if (i < 0)
307 i = 0;
309 if (n == 0 && i <= last)
310 return PyInt_FromLong((long)i);
312 last -= n;
313 for (; i <= last; ++i)
314 if (s[i] == sub[0] &&
315 (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
316 return PyInt_FromLong((long)i);
318 return PyInt_FromLong(-1L);
322 PyDoc_STRVAR(rfind__doc__,
323 "rfind(s, sub [,start [,end]]) -> int\n"
324 "\n"
325 "Return the highest index in s where substring sub is found,\n"
326 "such that sub is contained within s[start,end]. Optional\n"
327 "arguments start and end are interpreted as in slice notation.\n"
328 "\n"
329 "Return -1 on failure.");
331 static PyObject *
332 strop_rfind(PyObject *self, PyObject *args)
334 char *s, *sub;
335 Py_ssize_t len, n, j;
336 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
338 WARN;
339 if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
340 return NULL;
342 if (last > len)
343 last = len;
344 if (last < 0)
345 last += len;
346 if (last < 0)
347 last = 0;
348 if (i < 0)
349 i += len;
350 if (i < 0)
351 i = 0;
353 if (n == 0 && i <= last)
354 return PyInt_FromLong((long)last);
356 for (j = last-n; j >= i; --j)
357 if (s[j] == sub[0] &&
358 (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
359 return PyInt_FromLong((long)j);
361 return PyInt_FromLong(-1L);
365 static PyObject *
366 do_strip(PyObject *args, int striptype)
368 char *s;
369 Py_ssize_t len, i, j;
372 if (PyString_AsStringAndSize(args, &s, &len))
373 return NULL;
375 i = 0;
376 if (striptype != RIGHTSTRIP) {
377 while (i < len && isspace(Py_CHARMASK(s[i]))) {
378 i++;
382 j = len;
383 if (striptype != LEFTSTRIP) {
384 do {
385 j--;
386 } while (j >= i && isspace(Py_CHARMASK(s[j])));
387 j++;
390 if (i == 0 && j == len) {
391 Py_INCREF(args);
392 return args;
394 else
395 return PyString_FromStringAndSize(s+i, j-i);
399 PyDoc_STRVAR(strip__doc__,
400 "strip(s) -> string\n"
401 "\n"
402 "Return a copy of the string s with leading and trailing\n"
403 "whitespace removed.");
405 static PyObject *
406 strop_strip(PyObject *self, PyObject *args)
408 WARN;
409 return do_strip(args, BOTHSTRIP);
413 PyDoc_STRVAR(lstrip__doc__,
414 "lstrip(s) -> string\n"
415 "\n"
416 "Return a copy of the string s with leading whitespace removed.");
418 static PyObject *
419 strop_lstrip(PyObject *self, PyObject *args)
421 WARN;
422 return do_strip(args, LEFTSTRIP);
426 PyDoc_STRVAR(rstrip__doc__,
427 "rstrip(s) -> string\n"
428 "\n"
429 "Return a copy of the string s with trailing whitespace removed.");
431 static PyObject *
432 strop_rstrip(PyObject *self, PyObject *args)
434 WARN;
435 return do_strip(args, RIGHTSTRIP);
439 PyDoc_STRVAR(lower__doc__,
440 "lower(s) -> string\n"
441 "\n"
442 "Return a copy of the string s converted to lowercase.");
444 static PyObject *
445 strop_lower(PyObject *self, PyObject *args)
447 char *s, *s_new;
448 Py_ssize_t i, n;
449 PyObject *newstr;
450 int changed;
452 WARN;
453 if (PyString_AsStringAndSize(args, &s, &n))
454 return NULL;
455 newstr = PyString_FromStringAndSize(NULL, n);
456 if (newstr == NULL)
457 return NULL;
458 s_new = PyString_AsString(newstr);
459 changed = 0;
460 for (i = 0; i < n; i++) {
461 int c = Py_CHARMASK(*s++);
462 if (isupper(c)) {
463 changed = 1;
464 *s_new = tolower(c);
465 } else
466 *s_new = c;
467 s_new++;
469 if (!changed) {
470 Py_DECREF(newstr);
471 Py_INCREF(args);
472 return args;
474 return newstr;
478 PyDoc_STRVAR(upper__doc__,
479 "upper(s) -> string\n"
480 "\n"
481 "Return a copy of the string s converted to uppercase.");
483 static PyObject *
484 strop_upper(PyObject *self, PyObject *args)
486 char *s, *s_new;
487 Py_ssize_t i, n;
488 PyObject *newstr;
489 int changed;
491 WARN;
492 if (PyString_AsStringAndSize(args, &s, &n))
493 return NULL;
494 newstr = PyString_FromStringAndSize(NULL, n);
495 if (newstr == NULL)
496 return NULL;
497 s_new = PyString_AsString(newstr);
498 changed = 0;
499 for (i = 0; i < n; i++) {
500 int c = Py_CHARMASK(*s++);
501 if (islower(c)) {
502 changed = 1;
503 *s_new = toupper(c);
504 } else
505 *s_new = c;
506 s_new++;
508 if (!changed) {
509 Py_DECREF(newstr);
510 Py_INCREF(args);
511 return args;
513 return newstr;
517 PyDoc_STRVAR(capitalize__doc__,
518 "capitalize(s) -> string\n"
519 "\n"
520 "Return a copy of the string s with only its first character\n"
521 "capitalized.");
523 static PyObject *
524 strop_capitalize(PyObject *self, PyObject *args)
526 char *s, *s_new;
527 Py_ssize_t i, n;
528 PyObject *newstr;
529 int changed;
531 WARN;
532 if (PyString_AsStringAndSize(args, &s, &n))
533 return NULL;
534 newstr = PyString_FromStringAndSize(NULL, n);
535 if (newstr == NULL)
536 return NULL;
537 s_new = PyString_AsString(newstr);
538 changed = 0;
539 if (0 < n) {
540 int c = Py_CHARMASK(*s++);
541 if (islower(c)) {
542 changed = 1;
543 *s_new = toupper(c);
544 } else
545 *s_new = c;
546 s_new++;
548 for (i = 1; i < n; i++) {
549 int c = Py_CHARMASK(*s++);
550 if (isupper(c)) {
551 changed = 1;
552 *s_new = tolower(c);
553 } else
554 *s_new = c;
555 s_new++;
557 if (!changed) {
558 Py_DECREF(newstr);
559 Py_INCREF(args);
560 return args;
562 return newstr;
566 PyDoc_STRVAR(expandtabs__doc__,
567 "expandtabs(string, [tabsize]) -> string\n"
568 "\n"
569 "Expand tabs in a string, i.e. replace them by one or more spaces,\n"
570 "depending on the current column and the given tab size (default 8).\n"
571 "The column number is reset to zero after each newline occurring in the\n"
572 "string. This doesn't understand other non-printing characters.");
574 static PyObject *
575 strop_expandtabs(PyObject *self, PyObject *args)
577 /* Original by Fredrik Lundh */
578 char* e;
579 char* p;
580 char* q;
581 Py_ssize_t i, j;
582 PyObject* out;
583 char* string;
584 Py_ssize_t stringlen;
585 int tabsize = 8;
587 WARN;
588 /* Get arguments */
589 if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
590 return NULL;
591 if (tabsize < 1) {
592 PyErr_SetString(PyExc_ValueError,
593 "tabsize must be at least 1");
594 return NULL;
597 /* First pass: determine size of output string */
598 i = j = 0; /* j: current column; i: total of previous lines */
599 e = string + stringlen;
600 for (p = string; p < e; p++) {
601 if (*p == '\t')
602 j += tabsize - (j%tabsize);
603 else {
604 j++;
605 if (*p == '\n') {
606 i += j;
607 j = 0;
612 /* Second pass: create output string and fill it */
613 out = PyString_FromStringAndSize(NULL, i+j);
614 if (out == NULL)
615 return NULL;
617 i = 0;
618 q = PyString_AS_STRING(out);
620 for (p = string; p < e; p++) {
621 if (*p == '\t') {
622 j = tabsize - (i%tabsize);
623 i += j;
624 while (j-- > 0)
625 *q++ = ' ';
626 } else {
627 *q++ = *p;
628 i++;
629 if (*p == '\n')
630 i = 0;
634 return out;
638 PyDoc_STRVAR(count__doc__,
639 "count(s, sub[, start[, end]]) -> int\n"
640 "\n"
641 "Return the number of occurrences of substring sub in string\n"
642 "s[start:end]. Optional arguments start and end are\n"
643 "interpreted as in slice notation.");
645 static PyObject *
646 strop_count(PyObject *self, PyObject *args)
648 char *s, *sub;
649 Py_ssize_t len, n;
650 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
651 Py_ssize_t m, r;
653 WARN;
654 if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
655 return NULL;
656 if (last > len)
657 last = len;
658 if (last < 0)
659 last += len;
660 if (last < 0)
661 last = 0;
662 if (i < 0)
663 i += len;
664 if (i < 0)
665 i = 0;
666 m = last + 1 - n;
667 if (n == 0)
668 return PyInt_FromLong((long) (m-i));
670 r = 0;
671 while (i < m) {
672 if (!memcmp(s+i, sub, n)) {
673 r++;
674 i += n;
675 } else {
676 i++;
679 return PyInt_FromLong((long) r);
683 PyDoc_STRVAR(swapcase__doc__,
684 "swapcase(s) -> string\n"
685 "\n"
686 "Return a copy of the string s with upper case characters\n"
687 "converted to lowercase and vice versa.");
689 static PyObject *
690 strop_swapcase(PyObject *self, PyObject *args)
692 char *s, *s_new;
693 Py_ssize_t i, n;
694 PyObject *newstr;
695 int changed;
697 WARN;
698 if (PyString_AsStringAndSize(args, &s, &n))
699 return NULL;
700 newstr = PyString_FromStringAndSize(NULL, n);
701 if (newstr == NULL)
702 return NULL;
703 s_new = PyString_AsString(newstr);
704 changed = 0;
705 for (i = 0; i < n; i++) {
706 int c = Py_CHARMASK(*s++);
707 if (islower(c)) {
708 changed = 1;
709 *s_new = toupper(c);
711 else if (isupper(c)) {
712 changed = 1;
713 *s_new = tolower(c);
715 else
716 *s_new = c;
717 s_new++;
719 if (!changed) {
720 Py_DECREF(newstr);
721 Py_INCREF(args);
722 return args;
724 return newstr;
728 PyDoc_STRVAR(atoi__doc__,
729 "atoi(s [,base]) -> int\n"
730 "\n"
731 "Return the integer represented by the string s in the given\n"
732 "base, which defaults to 10. The string s must consist of one\n"
733 "or more digits, possibly preceded by a sign. If base is 0, it\n"
734 "is chosen from the leading characters of s, 0 for octal, 0x or\n"
735 "0X for hexadecimal. If base is 16, a preceding 0x or 0X is\n"
736 "accepted.");
738 static PyObject *
739 strop_atoi(PyObject *self, PyObject *args)
741 char *s, *end;
742 int base = 10;
743 long x;
744 char buffer[256]; /* For errors */
746 WARN;
747 if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
748 return NULL;
750 if ((base != 0 && base < 2) || base > 36) {
751 PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
752 return NULL;
755 while (*s && isspace(Py_CHARMASK(*s)))
756 s++;
757 errno = 0;
758 if (base == 0 && s[0] == '0')
759 x = (long) PyOS_strtoul(s, &end, base);
760 else
761 x = PyOS_strtol(s, &end, base);
762 if (end == s || !isalnum(Py_CHARMASK(end[-1])))
763 goto bad;
764 while (*end && isspace(Py_CHARMASK(*end)))
765 end++;
766 if (*end != '\0') {
767 bad:
768 PyOS_snprintf(buffer, sizeof(buffer),
769 "invalid literal for atoi(): %.200s", s);
770 PyErr_SetString(PyExc_ValueError, buffer);
771 return NULL;
773 else if (errno != 0) {
774 PyOS_snprintf(buffer, sizeof(buffer),
775 "atoi() literal too large: %.200s", s);
776 PyErr_SetString(PyExc_ValueError, buffer);
777 return NULL;
779 return PyInt_FromLong(x);
783 PyDoc_STRVAR(atol__doc__,
784 "atol(s [,base]) -> long\n"
785 "\n"
786 "Return the long integer represented by the string s in the\n"
787 "given base, which defaults to 10. The string s must consist\n"
788 "of one or more digits, possibly preceded by a sign. If base\n"
789 "is 0, it is chosen from the leading characters of s, 0 for\n"
790 "octal, 0x or 0X for hexadecimal. If base is 16, a preceding\n"
791 "0x or 0X is accepted. A trailing L or l is not accepted,\n"
792 "unless base is 0.");
794 static PyObject *
795 strop_atol(PyObject *self, PyObject *args)
797 char *s, *end;
798 int base = 10;
799 PyObject *x;
800 char buffer[256]; /* For errors */
802 WARN;
803 if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
804 return NULL;
806 if ((base != 0 && base < 2) || base > 36) {
807 PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
808 return NULL;
811 while (*s && isspace(Py_CHARMASK(*s)))
812 s++;
813 if (s[0] == '\0') {
814 PyErr_SetString(PyExc_ValueError, "empty string for atol()");
815 return NULL;
817 x = PyLong_FromString(s, &end, base);
818 if (x == NULL)
819 return NULL;
820 if (base == 0 && (*end == 'l' || *end == 'L'))
821 end++;
822 while (*end && isspace(Py_CHARMASK(*end)))
823 end++;
824 if (*end != '\0') {
825 PyOS_snprintf(buffer, sizeof(buffer),
826 "invalid literal for atol(): %.200s", s);
827 PyErr_SetString(PyExc_ValueError, buffer);
828 Py_DECREF(x);
829 return NULL;
831 return x;
835 PyDoc_STRVAR(atof__doc__,
836 "atof(s) -> float\n"
837 "\n"
838 "Return the floating point number represented by the string s.");
840 static PyObject *
841 strop_atof(PyObject *self, PyObject *args)
843 char *s, *end;
844 double x;
845 char buffer[256]; /* For errors */
847 WARN;
848 if (!PyArg_ParseTuple(args, "s:atof", &s))
849 return NULL;
850 while (*s && isspace(Py_CHARMASK(*s)))
851 s++;
852 if (s[0] == '\0') {
853 PyErr_SetString(PyExc_ValueError, "empty string for atof()");
854 return NULL;
856 errno = 0;
857 PyFPE_START_PROTECT("strop_atof", return 0)
858 x = PyOS_ascii_strtod(s, &end);
859 PyFPE_END_PROTECT(x)
860 while (*end && isspace(Py_CHARMASK(*end)))
861 end++;
862 if (*end != '\0') {
863 PyOS_snprintf(buffer, sizeof(buffer),
864 "invalid literal for atof(): %.200s", s);
865 PyErr_SetString(PyExc_ValueError, buffer);
866 return NULL;
868 else if (errno != 0) {
869 PyOS_snprintf(buffer, sizeof(buffer),
870 "atof() literal too large: %.200s", s);
871 PyErr_SetString(PyExc_ValueError, buffer);
872 return NULL;
874 return PyFloat_FromDouble(x);
878 PyDoc_STRVAR(maketrans__doc__,
879 "maketrans(frm, to) -> string\n"
880 "\n"
881 "Return a translation table (a string of 256 bytes long)\n"
882 "suitable for use in string.translate. The strings frm and to\n"
883 "must be of the same length.");
885 static PyObject *
886 strop_maketrans(PyObject *self, PyObject *args)
888 unsigned char *c, *from=NULL, *to=NULL;
889 Py_ssize_t i, fromlen=0, tolen=0;
890 PyObject *result;
892 if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
893 return NULL;
895 if (fromlen != tolen) {
896 PyErr_SetString(PyExc_ValueError,
897 "maketrans arguments must have same length");
898 return NULL;
901 result = PyString_FromStringAndSize((char *)NULL, 256);
902 if (result == NULL)
903 return NULL;
904 c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
905 for (i = 0; i < 256; i++)
906 c[i]=(unsigned char)i;
907 for (i = 0; i < fromlen; i++)
908 c[from[i]]=to[i];
910 return result;
914 PyDoc_STRVAR(translate__doc__,
915 "translate(s,table [,deletechars]) -> string\n"
916 "\n"
917 "Return a copy of the string s, where all characters occurring\n"
918 "in the optional argument deletechars are removed, and the\n"
919 "remaining characters have been mapped through the given\n"
920 "translation table, which must be a string of length 256.");
922 static PyObject *
923 strop_translate(PyObject *self, PyObject *args)
925 register char *input, *table, *output;
926 Py_ssize_t i;
927 int c, changed = 0;
928 PyObject *input_obj;
929 char *table1, *output_start, *del_table=NULL;
930 Py_ssize_t inlen, tablen, dellen = 0;
931 PyObject *result;
932 int trans_table[256];
934 WARN;
935 if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
936 &table1, &tablen, &del_table, &dellen))
937 return NULL;
938 if (tablen != 256) {
939 PyErr_SetString(PyExc_ValueError,
940 "translation table must be 256 characters long");
941 return NULL;
944 table = table1;
945 inlen = PyString_GET_SIZE(input_obj);
946 result = PyString_FromStringAndSize((char *)NULL, inlen);
947 if (result == NULL)
948 return NULL;
949 output_start = output = PyString_AsString(result);
950 input = PyString_AsString(input_obj);
952 if (dellen == 0) {
953 /* If no deletions are required, use faster code */
954 for (i = inlen; --i >= 0; ) {
955 c = Py_CHARMASK(*input++);
956 if (Py_CHARMASK((*output++ = table[c])) != c)
957 changed = 1;
959 if (changed)
960 return result;
961 Py_DECREF(result);
962 Py_INCREF(input_obj);
963 return input_obj;
966 for (i = 0; i < 256; i++)
967 trans_table[i] = Py_CHARMASK(table[i]);
969 for (i = 0; i < dellen; i++)
970 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
972 for (i = inlen; --i >= 0; ) {
973 c = Py_CHARMASK(*input++);
974 if (trans_table[c] != -1)
975 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
976 continue;
977 changed = 1;
979 if (!changed) {
980 Py_DECREF(result);
981 Py_INCREF(input_obj);
982 return input_obj;
984 /* Fix the size of the resulting string */
985 if (inlen > 0)
986 _PyString_Resize(&result, output - output_start);
987 return result;
991 /* What follows is used for implementing replace(). Perry Stoll. */
994 mymemfind
996 strstr replacement for arbitrary blocks of memory.
998 Locates the first occurrence in the memory pointed to by MEM of the
999 contents of memory pointed to by PAT. Returns the index into MEM if
1000 found, or -1 if not found. If len of PAT is greater than length of
1001 MEM, the function returns -1.
1003 static Py_ssize_t
1004 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1006 register Py_ssize_t ii;
1008 /* pattern can not occur in the last pat_len-1 chars */
1009 len -= pat_len;
1011 for (ii = 0; ii <= len; ii++) {
1012 if (mem[ii] == pat[0] &&
1013 (pat_len == 1 ||
1014 memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1015 return ii;
1018 return -1;
1022 mymemcnt
1024 Return the number of distinct times PAT is found in MEM.
1025 meaning mem=1111 and pat==11 returns 2.
1026 mem=11111 and pat==11 also return 2.
1028 static Py_ssize_t
1029 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1031 register Py_ssize_t offset = 0;
1032 Py_ssize_t nfound = 0;
1034 while (len >= 0) {
1035 offset = mymemfind(mem, len, pat, pat_len);
1036 if (offset == -1)
1037 break;
1038 mem += offset + pat_len;
1039 len -= offset + pat_len;
1040 nfound++;
1042 return nfound;
1046 mymemreplace
1048 Return a string in which all occurrences of PAT in memory STR are
1049 replaced with SUB.
1051 If length of PAT is less than length of STR or there are no occurrences
1052 of PAT in STR, then the original string is returned. Otherwise, a new
1053 string is allocated here and returned.
1055 on return, out_len is:
1056 the length of output string, or
1057 -1 if the input string is returned, or
1058 unchanged if an error occurs (no memory).
1060 return value is:
1061 the new string allocated locally, or
1062 NULL if an error occurred.
1064 static char *
1065 mymemreplace(const char *str, Py_ssize_t len, /* input string */
1066 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
1067 const char *sub, Py_ssize_t sub_len, /* substitution string */
1068 Py_ssize_t count, /* number of replacements */
1069 Py_ssize_t *out_len)
1071 char *out_s;
1072 char *new_s;
1073 Py_ssize_t nfound, offset, new_len;
1075 if (len == 0 || pat_len > len)
1076 goto return_same;
1078 /* find length of output string */
1079 nfound = mymemcnt(str, len, pat, pat_len);
1080 if (count < 0)
1081 count = PY_SSIZE_T_MAX;
1082 else if (nfound > count)
1083 nfound = count;
1084 if (nfound == 0)
1085 goto return_same;
1087 new_len = len + nfound*(sub_len - pat_len);
1088 if (new_len == 0) {
1089 /* Have to allocate something for the caller to free(). */
1090 out_s = (char *)PyMem_MALLOC(1);
1091 if (out_s == NULL)
1092 return NULL;
1093 out_s[0] = '\0';
1095 else {
1096 assert(new_len > 0);
1097 new_s = (char *)PyMem_MALLOC(new_len);
1098 if (new_s == NULL)
1099 return NULL;
1100 out_s = new_s;
1102 for (; count > 0 && len > 0; --count) {
1103 /* find index of next instance of pattern */
1104 offset = mymemfind(str, len, pat, pat_len);
1105 if (offset == -1)
1106 break;
1108 /* copy non matching part of input string */
1109 memcpy(new_s, str, offset);
1110 str += offset + pat_len;
1111 len -= offset + pat_len;
1113 /* copy substitute into the output string */
1114 new_s += offset;
1115 memcpy(new_s, sub, sub_len);
1116 new_s += sub_len;
1118 /* copy any remaining values into output string */
1119 if (len > 0)
1120 memcpy(new_s, str, len);
1122 *out_len = new_len;
1123 return out_s;
1125 return_same:
1126 *out_len = -1;
1127 return (char *)str; /* cast away const */
1131 PyDoc_STRVAR(replace__doc__,
1132 "replace (str, old, new[, maxsplit]) -> string\n"
1133 "\n"
1134 "Return a copy of string str with all occurrences of substring\n"
1135 "old replaced by new. If the optional argument maxsplit is\n"
1136 "given, only the first maxsplit occurrences are replaced.");
1138 static PyObject *
1139 strop_replace(PyObject *self, PyObject *args)
1141 char *str, *pat,*sub,*new_s;
1142 Py_ssize_t len,pat_len,sub_len,out_len;
1143 Py_ssize_t count = -1;
1144 PyObject *newstr;
1146 WARN;
1147 if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
1148 &str, &len, &pat, &pat_len, &sub, &sub_len,
1149 &count))
1150 return NULL;
1151 if (pat_len <= 0) {
1152 PyErr_SetString(PyExc_ValueError, "empty pattern string");
1153 return NULL;
1155 /* CAUTION: strop treats a replace count of 0 as infinity, unlke
1156 * current (2.1) string.py and string methods. Preserve this for
1157 * ... well, hard to say for what <wink>.
1159 if (count == 0)
1160 count = -1;
1161 new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1162 if (new_s == NULL) {
1163 PyErr_NoMemory();
1164 return NULL;
1166 if (out_len == -1) {
1167 /* we're returning another reference to the input string */
1168 newstr = PyTuple_GetItem(args, 0);
1169 Py_XINCREF(newstr);
1171 else {
1172 newstr = PyString_FromStringAndSize(new_s, out_len);
1173 PyMem_FREE(new_s);
1175 return newstr;
1179 /* List of functions defined in the module */
1181 static PyMethodDef
1182 strop_methods[] = {
1183 {"atof", strop_atof, METH_VARARGS, atof__doc__},
1184 {"atoi", strop_atoi, METH_VARARGS, atoi__doc__},
1185 {"atol", strop_atol, METH_VARARGS, atol__doc__},
1186 {"capitalize", strop_capitalize, METH_O, capitalize__doc__},
1187 {"count", strop_count, METH_VARARGS, count__doc__},
1188 {"expandtabs", strop_expandtabs, METH_VARARGS, expandtabs__doc__},
1189 {"find", strop_find, METH_VARARGS, find__doc__},
1190 {"join", strop_joinfields, METH_VARARGS, joinfields__doc__},
1191 {"joinfields", strop_joinfields, METH_VARARGS, joinfields__doc__},
1192 {"lstrip", strop_lstrip, METH_O, lstrip__doc__},
1193 {"lower", strop_lower, METH_O, lower__doc__},
1194 {"maketrans", strop_maketrans, METH_VARARGS, maketrans__doc__},
1195 {"replace", strop_replace, METH_VARARGS, replace__doc__},
1196 {"rfind", strop_rfind, METH_VARARGS, rfind__doc__},
1197 {"rstrip", strop_rstrip, METH_O, rstrip__doc__},
1198 {"split", strop_splitfields, METH_VARARGS, splitfields__doc__},
1199 {"splitfields", strop_splitfields, METH_VARARGS, splitfields__doc__},
1200 {"strip", strop_strip, METH_O, strip__doc__},
1201 {"swapcase", strop_swapcase, METH_O, swapcase__doc__},
1202 {"translate", strop_translate, METH_VARARGS, translate__doc__},
1203 {"upper", strop_upper, METH_O, upper__doc__},
1204 {NULL, NULL} /* sentinel */
1208 PyMODINIT_FUNC
1209 initstrop(void)
1211 PyObject *m, *s;
1212 char buf[256];
1213 int c, n;
1214 m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1215 (PyObject*)NULL, PYTHON_API_VERSION);
1216 if (m == NULL)
1217 return;
1219 /* Create 'whitespace' object */
1220 n = 0;
1221 for (c = 0; c < 256; c++) {
1222 if (isspace(c))
1223 buf[n++] = c;
1225 s = PyString_FromStringAndSize(buf, n);
1226 if (s)
1227 PyModule_AddObject(m, "whitespace", s);
1229 /* Create 'lowercase' object */
1230 n = 0;
1231 for (c = 0; c < 256; c++) {
1232 if (islower(c))
1233 buf[n++] = c;
1235 s = PyString_FromStringAndSize(buf, n);
1236 if (s)
1237 PyModule_AddObject(m, "lowercase", s);
1239 /* Create 'uppercase' object */
1240 n = 0;
1241 for (c = 0; c < 256; c++) {
1242 if (isupper(c))
1243 buf[n++] = c;
1245 s = PyString_FromStringAndSize(buf, n);
1246 if (s)
1247 PyModule_AddObject(m, "uppercase", s);