Initial commit of newLISP.
[newlisp.git] / nl-string.c
blob77d26a2aa6668b64fe27bb3f303bacef53249bda
1 /* nl-string.c
3 Copyright (C) 2008 Lutz Mueller
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "newlisp.h"
21 #ifdef SUPPORT_UTF8
22 #include <wctype.h>
23 #endif
24 #include "pcre.h"
25 #include "protos.h"
27 extern SYMBOL * sysSymbol[];
28 #define OVECCOUNT (MAX_REGEX_EXP * 3) /* max sub expressions in PCRE */
30 void regexError(char * msg_1, int num, const char * msg_2);
31 pcre * pcreCachedCompile(char * pattern, int options);
33 CELL * cellTokenString(char * *source, size_t * srclen, char *separator, pcre *re);
35 #ifdef USE_WIN_UTF16PATH
36 int open_utf16(const char* a, int b, int c);
37 #endif
39 /* ---------------------------- string processing ----------------------- */
41 int my_strnicmp(char * s1, char * s2, ssize_t size)
43 while(toupper(*s1) == toupper(*s2))
45 if(--size <= 0) return(0);
46 s1++, s2++;
48 return(toupper(*s1) - toupper(*s2));
52 CELL * substring(char * string, ssize_t slen, ssize_t offset, ssize_t len)
54 if(offset < 0)
56 offset = slen + offset;
57 if(offset < 0) offset = 0;
59 else
60 offset = (offset > slen) ? slen : offset;
62 if(len < 0)
64 len = slen - offset + len;
65 if(len < 0) len = 0;
68 if(len == MAX_LONG)
69 len = slen - offset;
70 else
71 len = ((offset + len) > slen) ? (slen - offset) : len;
73 return(stuffStringN(string + offset, len));
76 #define SEARCH_SIZE 0x1000
78 CELL * p_search(CELL * params)
80 UINT fileHandle;
81 ssize_t bytesRead;
82 ssize_t position;
83 int flag = 0;
84 CELL * next;
86 #ifdef LFS
87 off_t filePosition;
88 off_t foundPosition;
89 double result;
90 #else
91 ssize_t filePosition;
92 ssize_t foundPosition;
93 #endif
95 char * searchString;
96 char * buffer;
97 long options;
98 size_t len;
100 params = getInteger(params, (UINT *)&fileHandle);
101 params = getStringSize(params, &searchString, &len, TRUE);
102 if(len == 0) return(nilCell);
104 options = -1;
105 if(params != nilCell)
107 next = params->next;
108 params = evaluateExpression(params);
109 if(isNumber(params->type))
110 getIntegerExt(params, (UINT *)&options, FALSE);
111 else
113 flag = (params->type != CELL_NIL);
114 if(next != nilCell)
115 getInteger(next, (UINT *)&options);
119 buffer = (char *)allocMemory(SEARCH_SIZE + 1);
120 if((filePosition = lseek((int)fileHandle, 0, SEEK_CUR)) == -1)
121 return(nilCell);
123 foundPosition = 0;
127 memset(buffer, 0, SEARCH_SIZE + 1);
128 bytesRead = read((int)fileHandle, buffer, SEARCH_SIZE);
130 if(options == -1)
131 position = searchBuffer(buffer, bytesRead, searchString, len, 1);
132 else
133 position = searchBufferRegex(buffer, 0, searchString, (int)bytesRead , options, (int *)&len);
135 if(position != -1)
137 if(flag)
138 foundPosition = filePosition + position + len;
139 else
140 foundPosition = filePosition + position;
141 break;
144 filePosition = filePosition + bytesRead - len;
145 lseek((int)fileHandle, filePosition, SEEK_SET);
146 } while (bytesRead == SEARCH_SIZE);
148 freeMemory(buffer);
150 if(position == -1) return(nilCell);
152 lseek((int)fileHandle, foundPosition, SEEK_SET);
153 #ifdef LFS
154 result = foundPosition;
155 return(stuffFloat(&result));
156 #else
157 return(stuffInteger(foundPosition));
158 #endif
163 CELL * implicitIndexString(CELL * cell, CELL * params)
165 ssize_t index;
166 char * ptr;
167 #ifndef SUPPORT_UTF8
168 char str[2];
169 #else
170 ssize_t i, p;
171 #endif
173 ptr = (char*)cell->contents;
174 getInteger(params, (UINT *)&index);
176 #ifndef SUPPORT_UTF8
177 index = adjustNegativeIndex(index, cell->aux - 1);
178 str[0] = *(ptr + index);
179 str[1] = 0;
180 return(stuffString(str));
181 #else
182 index = adjustNegativeIndex(index, utf8_wlen(ptr));
183 for(i = 0; i < index; i++)
185 p = utf8_1st_len(ptr);
186 ptr += p;
188 return(stuffStringN(ptr, utf8_1st_len(ptr)));
189 #endif
193 CELL * p_char(CELL * params)
195 char * string;
196 ssize_t offset;
197 size_t len;
198 int num;
199 CELL * datCell;
200 char buff[2];
203 datCell = evaluateExpression(params);
204 switch(datCell->type)
206 case CELL_STRING:
207 string = (char *)datCell->contents;
209 #ifndef SUPPORT_UTF8
210 len = (size_t)datCell->aux - 1;
211 #else
212 len = utf8_wlen(string);
213 #endif
215 if(params->next != nilCell)
216 getInteger(params->next, (UINT*)&offset);
217 else offset = 0;
219 offset = adjustNegativeIndex(offset, len);
221 #ifndef SUPPORT_UTF8
222 return(stuffInteger((UINT)*((unsigned char *)string + (UINT)offset)));
224 case CELL_LONG:
225 buff[0] = (int)datCell->contents;
226 break;
227 #ifndef NEWLISP64
228 case CELL_INT64:
229 num = *(INT64 *)&datCell->aux;
230 buff[0] = num;
231 break;
232 case CELL_FLOAT:
233 num = *(double*)&datCell->aux;
234 buff[0] = num;
235 #else /* NEWLISP64 */
236 case CELL_FLOAT:
237 num = *(double*)&datCell->contents;
238 buff[0] = num;
239 #endif /* NEWLISP64 */
240 break;
243 #else /* SUPPORT_UTF8 */
244 while(offset--) string += utf8_1st_len(string);
245 utf8_wchar(string, &num);
246 return(stuffInteger(num));
248 #ifndef NEWLISP64
249 case CELL_FLOAT:
250 case CELL_LONG:
251 case CELL_INT64:
252 string = allocMemory(UTF8_MAX_BYTES + 1);
253 if(datCell->type == CELL_FLOAT)
254 num = *(double*)&datCell->aux;
255 else if(datCell->type == CELL_INT64)
256 num = *(INT64 *)&datCell->aux;
257 else
258 num = datCell->contents;
259 #else /* NEWLISP64 */
260 case CELL_FLOAT:
261 case CELL_LONG:
262 string = allocMemory(UTF8_MAX_BYTES + 1);
263 if(datCell->type == CELL_FLOAT)
264 num = *(double*)&datCell->contents;
265 else
266 num = (int)datCell->contents;
267 #endif /* NEWLISP 64 */
268 len = wchar_utf8(num, string);
269 datCell = stuffStringN(string, len);
270 free(string);
271 return(datCell);
272 #endif /* SUPPORT_UTF8 */
273 default:
274 buff[0] = 0;
277 return(stuffStringN(buff, 1));
281 CELL * p_explode(CELL * params)
283 char * string;
284 CELL * cell;
285 CELL * result;
286 ssize_t size;
287 ssize_t len = 1;
288 int flag = 0;
289 #ifdef SUPPORT_UTF8
290 int clen, i;
291 #endif
293 cell = evaluateExpression(params);
294 if(isList(cell->type))
295 return(explodeList((CELL*)cell->contents, params->next));
297 getStringSize(cell, &string, (size_t *)&size, TRUE);
298 if(params->next != nilCell)
300 params = getInteger(params->next, (UINT *)&len);
301 flag = getFlag(params);
302 if(!flag && len > size) len = size;
305 result = cell = getCell(CELL_EXPRESSION);
306 if(size == 0 || len <= 0) return(result);
308 #ifndef SUPPORT_UTF8
309 if(flag && size < len) return(result);
311 cell->contents = (UINT)stuffStringN(string, len);
312 cell = (CELL *)cell->contents;
313 string += len;
315 while((size -= len) > 0)
317 if(flag && size < len) break;
318 cell->next = stuffStringN(string, (size >= len) ? len : size);
319 cell = cell->next;
320 string += len;
322 #else
323 size = utf8_wlen(string);
324 for(i = 0, clen = 0; i < len; i++)
325 clen += utf8_1st_len(string + clen);
327 if(flag && size < len) return(result);
329 cell->contents = (UINT)stuffStringN(string, clen);
330 cell = (CELL *)cell->contents;
331 string += clen;
332 while((size -= len) > 0)
334 if(flag && size < len) break;
335 for(i = 0, clen = 0; i < len; i++)
336 clen += utf8_1st_len(string + clen);
337 cell->next = stuffStringN(string, clen);
338 cell = cell->next;
339 string += clen;
341 #endif
343 return(result);
347 #define STR_LOWER 0
348 #define STR_UPPER 1
349 #define STR_TITLE 2
352 CELL * strUpperLower(CELL * params, int type)
354 #ifndef SUPPORT_UTF8
355 char * string;
356 char * ptr;
357 #else
358 char * utf8str;
359 int * unicode;
360 int * ptr;
361 size_t size;
362 #endif
363 CELL * cell;
364 int option = FALSE;
366 #ifndef SUPPORT_UTF8
367 params = getString(params, &string);
368 if(params != nilCell)
370 params = evaluateExpression(params);
371 if(params->contents != (UINT)nilCell)
372 option = TRUE;
375 cell = stuffString(string);
376 ptr = (void *)cell->contents;
378 if(type == STR_UPPER)
379 while(*ptr) { *ptr = toupper(*ptr); ptr++; }
380 else
382 if(type == STR_TITLE)
383 if(*ptr) { *ptr = toupper(*ptr); ptr++; }
384 if(((type == STR_TITLE) && option) || (type == STR_LOWER))
385 while(*ptr) { *ptr = tolower(*ptr); ptr++; }
387 #else
389 params = getString(params, &utf8str);
390 option = getFlag(params);
392 size = utf8_wlen(utf8str);
394 unicode = allocMemory((size + 1) * sizeof(int));
395 size = utf8_wstr(unicode, utf8str, size);
397 ptr = unicode;
399 /* Note that on many platforms towupper/towlower
400 do not work correctly for non-ascii unicodes */
402 if(type == STR_UPPER)
403 while(*ptr) { *ptr = towupper(*ptr); ptr++; }
404 else
406 if(type == STR_TITLE)
407 if(*ptr) { *ptr = towupper(*ptr); ptr++; }
408 if(((type == STR_TITLE) && option) || (type == STR_LOWER))
409 while(*ptr) { *ptr = towlower(*ptr); ptr++; }
412 utf8str = allocMemory(size * UTF8_MAX_BYTES + 1);
413 size = wstr_utf8(utf8str, unicode, size * UTF8_MAX_BYTES + 1);
414 utf8str = reallocMemory(utf8str, size + 1);
416 free(unicode);
417 cell = getCell(CELL_STRING);
418 cell->contents = (UINT)utf8str;
419 cell->aux = size + 1;
420 #endif
422 return(cell);
425 CELL * p_upper(CELL * params) {return(strUpperLower(params, STR_UPPER));}
426 CELL * p_lower(CELL * params) {return(strUpperLower(params, STR_LOWER));}
427 CELL * p_title(CELL * params) {return(strUpperLower(params, STR_TITLE));}
429 char * getFormatType(char * fmt, int * type)
431 PARSE_FORMAT:
433 /* get % */
434 while(*fmt != '%' && *fmt != 0) fmt++;
435 if(*fmt == 0)
437 *type = 0;
438 return(fmt);
440 fmt++;
442 /* is it double %% for '%' */
443 if(*fmt == '%')
445 fmt++;
446 goto PARSE_FORMAT;
449 /* get width spec */
451 /* force + before numbers */
452 if(*fmt == '+')
453 fmt++;
454 else
456 /* left align numbers or strings */
457 if(*fmt == '-')
459 fmt++;
460 /* force + before numbers */
461 if(*fmt == '+') fmt++;
465 while(isdigit((int)*fmt) && *fmt != 0) fmt++;
466 if(*fmt == 0) return(NULL);
468 /* its a float or string with dot precision*/
469 if(*fmt == '.')
471 fmt++;
472 while(isdigit((int)*fmt) && *fmt !=0) fmt++;
473 if(*fmt == 0) return(NULL);
475 if(*fmt == 'f' || *fmt == 'g' || *fmt =='G' || *fmt == 'e' || *fmt == 'E')
477 *type = CELL_FLOAT;
478 return(++fmt);
480 if(*fmt == 's')
482 *type = CELL_STRING;
483 return(++fmt);
485 else return(NULL);
488 /* its a float without dot */
489 if(*fmt == 'f' || *fmt == 'g' || *fmt =='G' || *fmt == 'e' || *fmt == 'E')
491 *type = CELL_FLOAT;
492 return(++fmt);
495 /* its an integer or character */
496 #ifdef TRU64
497 if(*fmt == 'd' || *fmt == 'i' || *fmt == 'u' || *fmt == 'x' || *fmt == 'X' || *fmt == 'c' || *fmt == 'o')
498 #else
499 if(*fmt == 'd' || *fmt == 'u' || *fmt == 'x' || *fmt == 'X' || *fmt == 'c' || *fmt == 'o')
500 #endif
502 #ifdef TRU64
503 #ifndef NEWLISP64
504 *type = CELL_INT64;
505 #else
506 *type = CELL_LONG;
507 #endif
508 #else
509 *type = CELL_LONG;
510 #endif
511 return(++fmt);
514 /* its a string */
515 if(*fmt == 's')
517 *type = CELL_STRING;
518 return(++fmt);
521 #ifndef WIN_32
522 #ifdef TRU64 /* supporting ld, li, lu, lx, lX formats */
523 if(*fmt == 'l' &&
524 (*(fmt + 1) == 'd' || *(fmt + 1) == 'i' || *(fmt + 1) == 'u' || *(fmt + 1) =='x' || *(fmt + 1) == 'X'))
526 #ifndef NEWLISP64
527 *type = CELL_INT64;
528 #else
529 *type = CELL_LONG;
530 #endif
531 return(fmt+2);
533 #else /* all other UNIX suporting lld, llu, llx, llX formats */
534 if(*fmt == 'l' && *(fmt + 1) == 'l' &&
535 (*(fmt + 2) == 'd' || *(fmt + 2) == 'u' || *(fmt + 2) =='x' || *(fmt + 2) == 'X'))
537 #ifndef NEWLISP64
538 *type = CELL_INT64;
539 #else
540 *type = CELL_LONG;
541 #endif
542 return(fmt+3);
544 #endif
545 #else /* MinGW uses MS conventions */
546 if(memcmp(fmt, "I64", 3) == 0 &&
547 (*(fmt + 3) == 'd' || *(fmt + 3) == 'u' || *(fmt + 3) =='x' || *(fmt + 3) == 'X'))
549 *type = CELL_INT64;
550 return(fmt+4);
552 #endif
554 /* L and q seem not to be suported on most GCC although in the docs
555 if(*fmt == 'L' && (*(fmt + 1) == 'd' || *(fmt + 1) =='x' || *(fmt + 1) == 'X'))
557 *type = CELL_INT64;
558 return(fmt+3);
562 /* its a wchar_t unicode string */
563 if(*fmt == 'l' && *(fmt + 1) == 's')
565 *type = CELL_STRING;
566 return(fmt+2);
569 return(NULL);
572 CELL * p_format(CELL * params)
574 char * format;
575 char * fmt;
576 char * nextfmt;
577 CELL * cell;
578 STREAM fmtStream;
579 int fType;
580 double floatNum;
581 UINT intNum;
582 #ifndef NEWLISP64
583 INT64 bigNum;
584 #endif
585 int evalFlag = TRUE;
586 char saveChar;
588 params = getString(params, &format);
589 fmt = format;
591 openStrStream(&fmtStream, MAX_STRING, 0);
593 while(params->type != CELL_NIL)
595 /* printf("entry>%s<\n", fmt); */
596 nextfmt = getFormatType(fmt, &fType);
597 /* printf("exit>%s<\n", nextfmt); */
599 if(nextfmt == NULL)
601 closeStrStream(&fmtStream);
602 return(errorProcExt2(ERR_FORMAT_STRING, stuffString(format)));
605 saveChar = *nextfmt;
606 *nextfmt = 0;
608 if(evalFlag)
609 cell = evaluateExpression(params);
610 else
611 cell = params;
613 if(cell->type == CELL_EXPRESSION && fmt == format)
615 params = (CELL *)cell->contents;
616 evalFlag = FALSE;
617 cell = params;
620 if(fType == 0)
622 *nextfmt = saveChar;
623 closeStrStream(&fmtStream);
624 return(errorProcExt(ERR_FORMAT_NUM_ARGS, params));
627 if(fType == CELL_LONG)
629 if(isNumber(cell->type))
630 cell = getIntegerExt(cell, &intNum, FALSE);
631 else goto FORMAT_DATA_ERROR;
633 varPrintf((UINT)&fmtStream, fmt, intNum);
634 goto NEXT_FORMAT;
636 #ifndef NEWLISP64
637 if(fType == CELL_INT64)
639 if(isNumber(cell->type))
640 cell = getInteger64(cell, &bigNum);
641 else goto FORMAT_DATA_ERROR;
643 varPrintf((UINT)&fmtStream, fmt, bigNum);
644 goto NEXT_FORMAT;
646 #endif
647 if(fType == CELL_FLOAT)
649 if(cell->type == CELL_FLOAT)
650 #ifndef NEWLISP64
651 floatNum = *(double *)&cell->aux;
652 else if(cell->type == CELL_INT64)
653 floatNum = *(INT64 *)&cell->aux;
654 #else
655 floatNum = *(double *)&cell->contents;
656 #endif
657 else if(cell->type == CELL_LONG)
658 floatNum = (long)cell->contents;
659 else goto FORMAT_DATA_ERROR;
661 varPrintf((UINT)&fmtStream, fmt, floatNum);
662 goto NEXT_FORMAT;
665 if(fType != cell->type)
666 goto FORMAT_DATA_ERROR;
668 /* printf("stream>%s< with >%s<\n", (char *)cell->contents, fmt); */
669 varPrintf((UINT)&fmtStream, fmt, cell->contents);
671 NEXT_FORMAT:
672 *nextfmt = saveChar;
673 fmt = nextfmt;
674 params = params->next;
675 continue;
677 FORMAT_DATA_ERROR:
678 *nextfmt = saveChar;
679 closeStrStream(&fmtStream);
680 return(errorProcExt(ERR_FORMAT_DATA_TYPE, params));
683 getFormatType(fmt, &fType);
684 if(fType != 0)
686 closeStrStream(&fmtStream);
687 errorProcExt2(ERR_FORMAT_NUM_ARGS, stuffString(format));
690 varPrintf((UINT)&fmtStream, fmt);
691 /* writeStreamStr(&fmtStream, fmt, 0); */
693 cell = getCell(CELL_STRING);
694 cell->aux = fmtStream.position + 1;
695 cell->contents = (UINT)fmtStream.buffer;
697 return(cell);
700 void openStrStream(STREAM * stream, size_t buffSize, int reopenFlag)
702 if(stream->buffer != NULL && reopenFlag)
703 freeMemory(stream->buffer);
704 stream->buffer = stream->ptr = callocMemory(buffSize + 1);
705 stream->size = buffSize;
706 stream->position = stream->handle = 0;
709 void closeStrStream(STREAM * stream)
711 if(stream->buffer != NULL)
712 freeMemory(stream->buffer);
713 stream->buffer = stream->ptr = NULL;
714 stream->size = stream->position = 0;
715 if(stream->handle != 0)
717 close((int)stream->handle);
718 stream->handle = 0;
723 void writeStreamChar(STREAM * stream, char chr)
725 if(stream->position == stream->size)
727 stream->size += stream->size / 2;
728 stream->buffer = reallocMemory(stream->buffer, stream->size + 1);
729 memset(stream->buffer + stream->position, 0, stream->size - stream->position + 1);
730 stream->ptr = stream->buffer + stream->position;
732 *(stream->ptr++) = chr;
733 stream->position++;
737 void writeStreamStr(STREAM * stream, char * buff, size_t length)
739 size_t newPosition;
741 if(length == 0) length = strlen(buff);
742 newPosition = stream->position + length;
744 if(newPosition >= stream->size)
746 while(newPosition >= stream->size)
747 stream->size += stream->size / 2;
748 stream->buffer = reallocMemory(stream->buffer, stream->size + 1);
749 memset(stream->buffer + stream->position, 0, stream->size - stream->position + 1);
750 stream->ptr = stream->buffer + stream->position;
753 memcpy(stream->ptr, buff, length);
754 stream->ptr += length;
755 stream->position = newPosition;
759 /* creates a memory buffer and reads byte into it until
760 the limiter is found
763 char * readStreamText(STREAM * stream, char * limit)
765 STREAM outStream;
766 ssize_t findPos = -1;
767 size_t searchLen;
768 size_t llen = strlen(limit);
769 char * result;
771 memset(&outStream, 0, sizeof(STREAM));
772 openStrStream(&outStream, MAX_STRING, 0);
773 while(findPos == -1)
775 if((searchLen = strlen(stream->ptr)) < llen)
776 break;
777 findPos = searchBuffer(stream->ptr, searchLen, limit, llen, TRUE);
778 if(findPos != -1)
780 if(findPos > 0)
781 writeStreamStr(&outStream, stream->ptr, findPos);
782 stream->ptr += findPos + llen;
783 result = allocMemory(outStream.position + 1);
784 memcpy(result, outStream.buffer, outStream.position);
785 *(result + outStream.position) = 0;
786 closeStrStream(&outStream);
787 return(result);
790 writeStreamStr(&outStream, stream->ptr, searchLen - llen);
792 /* adjustment for the first time, after it will be 0 always */
793 stream->position += (stream->ptr - stream->buffer);
795 stream->position += searchLen - llen;
797 if(stream->handle == 0) /* its not a file */
799 stream->buffer = stream->ptr;
800 break;
802 else
804 lseek(stream->handle, stream->position, SEEK_SET);
805 memset(stream->buffer, 0, stream->size + 1);
806 if(read(stream->handle, stream->buffer, stream->size) > 0)
807 stream->ptr = stream->buffer;
808 else
810 *stream->ptr = 0;
811 break;
816 closeStrStream(&outStream);
818 return(NULL);
822 /* this is only used for reading with getToken()
823 stream->size does not reflect the real size of
824 the buffer as in makeStreamFromFile()
826 void makeStreamFromString(STREAM * stream, char * str)
828 memset(stream, 0, sizeof(STREAM));
829 stream->buffer = stream->ptr = str;
830 /* make getToken work to the end of str */
831 stream->size = strlen(str) + 4 * MAX_STRING;
834 int makeStreamFromFile(STREAM * stream, char * fileName, size_t size, size_t offset)
836 if((stream->handle = open(fileName, O_RDONLY | O_BINARY, 0)) == -1)
837 return(0);
839 stream->ptr = stream->buffer = (char *)callocMemory(size + 1);
840 if(offset != 0)
841 lseek((int)stream->handle, offset, SEEK_SET);
843 stream->position = offset;
844 stream->size = size;
846 /* load first buffer */
847 if(read(stream->handle, stream->buffer, size) <= 0)
849 closeStrStream(stream);
850 return(0);
853 return(TRUE);
857 ssize_t searchBuffer(char * buffer, size_t length, char * string, size_t size, int caseFlag)
859 size_t position = 0;
861 if(caseFlag == FALSE)
863 while(position < length)
865 if(toupper(*buffer) == toupper(*string))
866 if(my_strnicmp(buffer, string, size) == 0) break;
867 position++;
868 buffer++;
871 else /* case sensitive */
873 while(position < length)
875 if(*buffer == *string)
876 if(memcmp(buffer, string, size) == 0) break;
877 position++;
878 buffer++;
882 if(position == length) return(-1);
883 return(position);
886 /* eliminated in v.8.4.1
887 CELL * patternMatchS(char * pattern, char * string)
889 CELL * cell;
890 CELL * match;
891 CELL * star, * plus;
892 char * start;
893 int starLen, len;
895 start = string; starLen = len = 0;
896 MATCH:
897 switch(*pattern)
899 case '\\':
900 ++pattern;
901 if(*pattern != *string) return(nilCell);
902 break;
903 case 0:
904 if(*string != 0) return(nilCell);
905 return(stuffStringN(start, len));
906 case '?':
907 if(*string == 0) return(nilCell);
908 break;
909 case '#':
910 if(*string == 0) return(nilCell);
911 if(!isDigit((unsigned char)*string)) return(nilCell);
912 break;
914 case '+':
915 if(*(pattern + 1) == 0)
917 if(*string == 0)
918 plus = stuffString("");
919 else if(*(string + 1) == 0)
920 plus = stuffStringN(string, 1);
921 else return(nilCell);
923 if(len == 0) return(plus);
924 cell = stuffStringN(start, len);
925 cell->next = plus;
926 return(cell);
929 if((match = patternMatchS(pattern+1, string)) != nilCell)
930 plus = stuffString("");
931 else if((match = patternMatchS(pattern+1, string+1)) != nilCell)
932 plus = stuffStringN(string, 1);
933 else return(nilCell);
935 if(len == 0) cell = plus;
936 else
938 cell = stuffStringN(start, len);
939 cell->next = plus;
941 plus->next = match;
942 return(cell);
944 case '*':
945 if(*(pattern + 1) == 0)
947 if(len == 0) return(stuffString(string));
948 cell = stuffStringN(start, len);
949 cell->next = stuffString(string);
950 return(cell);
953 if((match = patternMatchS(pattern+1, string)) != nilCell)
955 star = stuffStringN(start+len, starLen);
956 if(len == 0) cell = star;
957 else
959 cell = stuffStringN(start, len);
960 cell->next = star;
962 star->next = match;
963 return(cell);
966 if(*string != 0) ++string, ++starLen;
967 else return(nilCell);
968 goto MATCH;
970 default:
971 if(*pattern != *string) return(nilCell);
972 break;
974 ++pattern;
975 ++string;
976 ++len;
977 goto MATCH;
982 CELL * p_integer(CELL * params)
984 char * intString;
985 INT64 num;
986 long base;
987 CELL * deflt;
988 INT64 result;
990 deflt = params->next;
991 params = evaluateExpression(params);
993 if(params->type == CELL_STRING)
994 intString = (char *)params->contents;
995 else if(isNumber(params->type))
997 getInteger64(params, &num);
998 return(stuffInteger64(num));
1000 else
1001 return(copyCell(evaluateExpression(deflt)));
1003 while(isspace((int)*intString)) intString++;
1004 if(!isDigit((unsigned char)*intString))
1006 if(*intString != '-' && *intString != '+')
1007 return(copyCell(evaluateExpression(deflt)));
1008 if(!isDigit((unsigned char)*(intString+1)))
1009 return(copyCell(evaluateExpression(deflt)));
1012 if(deflt->next != nilCell)
1013 getInteger(deflt->next, (UINT *)&base);
1014 else base = 0;
1015 #ifdef TRU64
1016 result = strtoul(intString, NULL, base);
1017 #else
1018 result = strtoull(intString,(char **)0, base);
1019 #endif
1020 return(stuffInteger64(result));
1023 CELL * p_float(CELL * params)
1025 char * fltString;
1026 double value;
1027 CELL * deflt;
1029 deflt = params->next;
1030 params = evaluateExpression(params);
1032 if(params->type == CELL_STRING)
1033 fltString = (char *)params->contents;
1034 else if(isNumber(params->type))
1036 getFloat(params, &value);
1037 return(stuffFloat(&value));
1039 else
1040 return(copyCell(evaluateExpression(deflt)));
1042 while(isspace((int)*fltString)) fltString++;
1043 if(!isDigit((unsigned char)*fltString))
1045 if(*fltString != '-' && *fltString != '+' && *fltString != lc_decimal_point)
1046 return(copyCell(evaluateExpression(deflt)));
1047 if(!isDigit((unsigned char)*(fltString+1)))
1048 return(copyCell(evaluateExpression(deflt)));
1051 value = atof(fltString);
1052 return( stuffFloat(&value) );
1056 CELL * p_symbol(CELL * params)
1058 char * token;
1059 char number[32];
1060 SYMBOL * context;
1061 SYMBOL * sPtr;
1062 CELL * cell;
1063 #ifdef WIN_32
1064 char * fmt = "%I64d";
1065 #endif
1068 cell = evaluateExpression(params);
1069 switch(cell->type)
1071 case CELL_LONG:
1072 snprintf(number, 30, "%ld", cell->contents);
1073 token = number;
1074 break;
1075 #ifndef NEWLISP64
1076 case CELL_INT64:
1077 #ifdef TRU64
1078 snprintf(number, 30, "%ld", *(INT64 *)&cell->aux);
1079 #else
1081 #ifdef WIN_32
1082 snprintf(number, 30, fmt, *(INT64 *)&cell->aux);
1083 #else
1084 snprintf(number, 30, "%lld", *(INT64 *)&cell->aux);
1085 #endif /* WIN_32 */
1087 #endif /* TRUE64 */
1088 token = number;
1089 break;
1090 #endif /* NEWLISP64 */
1091 case CELL_FLOAT:
1092 #ifndef NEWLISP64
1093 snprintf(number, 30, "%1.10g",*(double *)&cell->aux);
1094 #else
1095 snprintf(number, 30, "%1.10g",*(double *)&cell->contents);
1096 #endif
1097 token = number;
1098 break;
1099 case CELL_STRING:
1100 token = (char*)cell->contents;
1101 break;
1102 case CELL_SYMBOL:
1103 sPtr = (SYMBOL*)cell->contents;
1104 token = sPtr->name;
1105 break;
1106 default:
1107 return(errorProcExt(ERR_NUMBER_OR_STRING_EXPECTED, params));
1109 params = params->next;
1111 if(params == nilCell)
1112 context = currentContext;
1113 else if((context = getCreateContext(params, TRUE)) == NULL)
1114 return(errorProcExt(ERR_SYMBOL_OR_CONTEXT_EXPECTED, params));
1116 if(params->next != nilCell)
1118 cell = evaluateExpression(params->next);
1119 if(cell->type == CELL_NIL)
1121 sPtr = lookupSymbol(token, context);
1122 if(sPtr == NULL) return(nilCell);
1123 return(stuffSymbol(sPtr));
1127 sPtr = translateCreateSymbol(token, CELL_NIL, context, TRUE);
1128 return(stuffSymbol(sPtr));
1133 CELL * p_symbolSource(CELL * params)
1135 STREAM strStream;
1136 CELL * cell;
1138 openStrStream(&strStream, MAX_STRING, 0);
1139 serializeSymbols(params, (UINT)&strStream);
1140 cell = stuffString(strStream.buffer);
1141 closeStrStream(&strStream);
1142 return(cell);
1146 CELL * p_string(CELL * params)
1148 CELL * cell;
1149 STREAM strStream;
1151 openStrStream(&strStream, MAX_STRING, 0);
1152 prettyPrintFlags |= PRETTYPRINT_STRING;
1153 while (params != nilCell)
1155 cell = evaluateExpression(params);
1156 if(cell->type == CELL_STRING) /* speed optimization for strings */
1157 writeStreamStr(&strStream, (char *)cell->contents, 0);
1158 else
1159 printCell(cell , FALSE, (UINT)&strStream);
1160 params = params->next;
1162 prettyPrintFlags &= ~PRETTYPRINT_STRING;
1163 cell = stuffString(strStream.buffer);
1165 closeStrStream(&strStream);
1166 return(cell);
1170 UINT getAddress(CELL * params)
1172 UINT num;
1174 params = evaluateExpression(params);
1176 #ifndef NEWLISP64
1177 if(params->type == CELL_INT64)
1179 num = *(INT64 *)&params->aux;
1180 return(num);
1182 else if(params->type == CELL_FLOAT)
1184 num = *(INT64 *)&params->aux;
1185 return(num);
1187 #else
1188 if(params->type == CELL_FLOAT)
1190 num = *(double *)&params->contents;
1191 return(num);
1193 #endif
1195 return(params->contents);
1199 CELL * p_getChar(CELL * params)
1201 return(stuffInteger((UINT)*(char*)getAddress(params)));
1204 CELL * p_getString(CELL * params)
1206 return(stuffString((char *)getAddress(params)));
1209 CELL * p_getInteger(CELL * params)
1211 return(stuffInteger(*(unsigned int *)getAddress(params)));
1214 CELL * p_getLong(CELL * params)
1216 #ifndef NEWLISP64
1217 return(stuffInteger64(*(INT64 *)getAddress(params)));
1218 #else
1219 return(stuffInteger(*(UINT *)getAddress(params)));
1220 #endif
1223 CELL * p_getFloat(CELL * params)
1225 return(stuffFloat((double*)getAddress(params)));
1229 CELL * p_address(CELL * params)
1231 params = evaluateExpression(params);
1232 switch(params->type)
1234 case CELL_LONG:
1235 return(stuffInteger((UINT)&params->contents));
1236 #ifndef NEWLISP64
1237 case CELL_INT64:
1238 case CELL_FLOAT:
1239 return(stuffInteger((UINT)&params->aux));
1240 #else
1241 case CELL_FLOAT:
1242 return(stuffInteger((UINT)&params->contents));
1243 #endif
1244 default:
1245 break;
1247 return(stuffInteger(params->contents));
1251 CELL * p_copyMemory(CELL * params)
1253 UINT toAddress, fromAddress, nBytes;
1254 CELL * cell;
1256 cell = evaluateExpression(params);
1257 params = params->next;
1258 fromAddress = getAddress(cell);
1260 cell = evaluateExpression(params);
1261 toAddress = getAddress(cell);
1263 getInteger(params->next, &nBytes);
1265 memcpy((char*)toAddress, (char*)fromAddress, nBytes);
1267 return(stuffInteger(nBytes));
1271 CELL * cellTokenString(char * * source, size_t * srclen, char * separator, pcre * re)
1273 char * start;
1274 char * src;
1275 ssize_t len, slen;
1276 int ovector[OVECCOUNT];
1277 int rc;
1279 if(*source == NULL) return(stuffString(""));
1280 start = src = *source;
1281 len = 0;
1282 slen = strlen(separator);
1284 if(re == NULL)
1286 while(*src != 0)
1288 if(*src == *separator)
1290 if(strncmp(src, separator, slen) == 0)
1292 *source = src+slen;
1293 if(**source == 0) /* last token is separator */
1294 *source = NULL;
1295 return(stuffStringN(start, len));
1298 src++;
1299 len++;
1301 *source = src;
1302 if(len == 0) return(NULL);
1303 return(stuffStringN(start, len));
1305 else if(*src != 0)
1307 len = *srclen;
1308 rc = pcre_exec(re, NULL, src, len, 0, 0, ovector, OVECCOUNT);
1310 /* matching failed */
1311 if (rc == -1)
1313 *source = src + len;
1314 return(stuffStringN(src, len));
1317 /* error in pcre_exec() */
1318 if (rc < 0)
1319 regexError("error in pcre_exec()", rc, "");
1321 *source = src + ovector[1];
1322 *srclen -= ovector[1];
1323 if(**source == 0) /* last token is separator */
1324 *source = NULL;
1326 if(ovector[1] - ovector[0] == 0)
1327 return(NULL);
1329 return(stuffStringN(src, ovector[0]));
1332 return(NULL);
1336 CELL * p_parse(CELL * params)
1338 CELL * cell;
1339 CELL * newCell;
1340 CELL * result;
1341 char * string;
1342 char * separator;
1343 char token[MAX_STRING];
1344 STREAM stream;
1345 int tklen;
1346 size_t srclen;
1347 /* PCRE stuff */
1348 long options = 0;
1349 pcre *re = NULL;
1351 params = getStringSize(params, &string, &srclen, TRUE);
1352 if(params == nilCell) separator = NULL;
1353 else
1355 params = getString(params, &separator);
1356 if(params != nilCell)
1358 getInteger(params, (UINT *)&options);
1359 /* Compile the regular expression in separator */
1360 re = pcreCachedCompile(separator, options);
1362 else re = NULL;
1365 if(separator == NULL)
1366 makeStreamFromString(&stream, string);
1368 result = cell = getCell(CELL_EXPRESSION);
1369 while(string != NULL)
1371 if(separator != NULL)
1373 if((newCell = cellTokenString(&string, &srclen, separator, re)) == NULL)
1374 break;
1376 else
1378 if(getToken(&stream, token, &tklen) == TKN_EMPTY) break;
1379 newCell = stuffString(token);
1382 if(cell == result)
1383 result->contents = (UINT)newCell;
1384 else
1385 cell->next = newCell;
1387 cell = newCell;
1388 if(string == NULL)
1389 newCell->next = stuffString("");
1392 return(result);
1395 #define PACK_NONE 0
1396 #define PACK_CHAR 1
1397 #define PACK_BYTE 2
1398 #define PACK_INT 3
1399 #define PACK_UNSIGNED_INT 4
1400 #define PACK_LONG 5
1401 #define PACK_UNSIGNED_LONG 6
1402 #define PACK_LONG_LONG 7
1403 #define PACK_UNSIGNED_LONG_LONG 8
1404 #define PACK_FLOAT 9
1405 #define PACK_DOUBLE 10
1406 #define PACK_STRING 11
1407 #define PACK_NULL 12
1408 #define PACK_BIG_ENDIAN 13
1409 #define PACK_LITTLE_ENDIAN 14
1412 void swapEndian(char * data, int n)
1414 char tmp[8];
1415 int i;
1417 i = n;
1418 while(i) { tmp[n - i] = data[i - 1]; i--; }
1420 memcpy(data, &tmp[0], n);
1424 CELL * p_pack(CELL * params)
1426 char * format;
1427 char * source;
1428 char * packed;
1429 char * pPtr;
1430 CELL * cell;
1431 ssize_t length, ln;
1432 int len, type;
1433 int listFlag = 0;
1434 int bigEndian = 1, endianSwitch = 0;
1435 char chrV;
1436 unsigned char byteV;
1437 short int shortV;
1438 unsigned short int uint16V; /* 16 bit */
1439 unsigned int uint32V; /* 32 bit */
1440 unsigned long long uint64V; /* 64 bit */
1441 float floatV;
1442 double doubleV;
1444 /* find out endianess */
1445 bigEndian = (*((char *)&bigEndian) == 0) ;
1447 params = getString(params, &format);
1448 source = format;
1449 length = 0;
1450 while((source = parsePackFormat(source, &len, &type)) != NULL)
1451 length += len;
1453 if(length == 0) return(stuffString(""));
1454 pPtr = packed = allocMemory(length);
1455 source = format;
1456 length = 0;
1458 while((source = parsePackFormat(source, &len, &type)) != NULL)
1460 if(type == PACK_NULL)
1462 memset(pPtr, 0, len);
1463 pPtr += len;
1464 length += len;
1465 continue;
1468 else if(type == PACK_LITTLE_ENDIAN || type == PACK_BIG_ENDIAN)
1470 endianSwitch = ((type == PACK_BIG_ENDIAN) != bigEndian);
1471 continue;
1474 if(params->type == CELL_NIL) break;
1475 if(listFlag)
1476 cell = params;
1477 else
1478 cell = evaluateExpression(params);
1479 /* accept data in a list, (will recurse into it but never come out) */
1480 if(isList(cell->type))
1482 cell = (CELL *)cell->contents;
1483 params = cell;
1484 listFlag = 1;
1486 #ifndef NEWLISP64
1487 if(cell->type == CELL_FLOAT || cell->type == CELL_INT64)
1488 uint64V = *(INT64 *)&cell->aux;
1489 else /* CELL_LONG and CELL_STRING */
1490 uint64V = cell->contents;
1491 #else
1492 uint64V = cell->contents;
1493 #endif
1495 switch(type)
1497 case PACK_NONE:
1498 break;
1500 case PACK_BYTE:
1501 byteV = (char)uint64V;
1502 memcpy(pPtr, &byteV, 1);
1503 break;
1505 case PACK_CHAR:
1506 chrV = (char)uint64V;
1507 memcpy(pPtr, &chrV, 1);
1508 break;
1510 case PACK_INT:
1511 shortV = (short int)uint64V;
1512 memcpy(pPtr, &shortV, 2);
1513 if(endianSwitch) swapEndian(pPtr, 2);
1514 break;
1516 case PACK_UNSIGNED_INT:
1517 uint16V = (unsigned short int)uint64V;
1518 memcpy(pPtr, &uint16V, 2);
1519 if(endianSwitch) swapEndian(pPtr, 2);
1520 break;
1522 case PACK_LONG:
1523 case PACK_UNSIGNED_LONG:
1524 uint32V = (unsigned int)uint64V;
1525 memcpy(pPtr, &uint32V, 4);
1526 if(endianSwitch) swapEndian(pPtr, 4);
1527 break;
1529 case PACK_LONG_LONG:
1530 case PACK_UNSIGNED_LONG_LONG:
1531 memcpy(pPtr, &uint64V, 8);
1532 if(endianSwitch) swapEndian(pPtr, 8);
1533 break;
1535 case PACK_FLOAT:
1536 case PACK_DOUBLE:
1537 if(cell->type == CELL_FLOAT)
1538 doubleV = *(double *)&uint64V;
1539 else doubleV = (double)uint64V;
1540 if(type == PACK_FLOAT)
1542 floatV = doubleV;
1543 memcpy(pPtr, &floatV, 4);
1544 if(endianSwitch) swapEndian(pPtr, 4);
1546 else
1548 memcpy(pPtr, &doubleV, 8);
1549 if(endianSwitch) swapEndian(pPtr, 8);
1551 break;
1553 case PACK_STRING:
1554 if(cell->type == CELL_STRING)
1556 ln = cell->aux - 1;
1557 if(len <= ln)
1558 memcpy(pPtr, (void *)cell->contents, len);
1559 else
1561 memcpy(pPtr, (void*)cell->contents, ln);
1562 memset(pPtr + ln, 0, len - ln);
1565 else memset(pPtr, 0, len);
1566 break;
1568 default:
1569 break;
1572 pPtr += len;
1573 params = params->next;
1574 length += len;
1577 cell = stuffStringN(packed, length);
1578 free(packed);
1579 return(cell);
1583 char * parsePackFormat(char * format, int * length, int * type)
1585 *length = 0;
1586 while(*format == ' ') format++;
1588 if(*format == 0) return(NULL);
1590 switch(*format)
1592 case '<':
1593 *type = PACK_LITTLE_ENDIAN;
1594 format++;
1595 break;
1597 case '>':
1598 *type = PACK_BIG_ENDIAN;
1599 format++;
1600 break;
1602 case 'b':
1603 *length = 1;
1604 *type = PACK_BYTE;
1605 format++;
1606 break;
1608 case 'c':
1609 *length = 1;
1610 *type = PACK_CHAR;
1611 format++;
1612 break;
1613 case 's':
1614 case 'n':
1615 *type = (*format == 's') ? PACK_STRING : PACK_NULL;
1616 format++;
1617 if(isdigit((int)*format) )
1619 *length = atol(format);
1620 while(isdigit((int)*format)) format++;
1622 else *length = 1;
1624 break;
1625 case 'd':
1626 case 'u':
1627 *type = (*format == 'd') ? PACK_INT : PACK_UNSIGNED_INT;
1628 *length = 2;
1629 format++;
1630 break;
1631 case 'l':
1632 if(*(format + 1) == 'd' || *(format + 1) == 'u')
1634 *length = 4;
1635 *type = (*(format + 1) == 'd') ? PACK_LONG : PACK_UNSIGNED_LONG;
1636 format += 2;
1638 else if(*(format + 1) == 'f')
1640 *length = 8;
1641 *type = PACK_DOUBLE;
1642 format += 2;
1644 else
1646 *type = PACK_NONE;
1647 format++;
1649 break;
1650 case 'L':
1651 if(*(format + 1) == 'd' || *(format + 1) == 'u')
1653 *length = 8;
1654 *type = (*(format + 1) == 'd') ? PACK_LONG_LONG : PACK_UNSIGNED_LONG_LONG;
1655 format += 2;
1657 else
1659 *type = PACK_NONE;
1660 format++;
1662 break;
1663 case 'f':
1664 *length = 4;
1665 *type = PACK_FLOAT;
1666 format++;
1667 break;
1668 default:
1669 *type = PACK_NONE;
1670 format++;
1671 break;
1674 return(format);
1678 CELL * p_unpack(CELL * params)
1680 char * format;
1681 char * source;
1682 char * pPtr;
1683 CELL * cell;
1684 CELL * result;
1685 CELL * next;
1686 size_t length, maxlen;
1687 int len;
1688 int type;
1689 int bigEndian = 1, endianSwitch = 0;
1690 char chrV;
1691 unsigned char byteV;
1692 short shortV;
1693 unsigned short uint16V;
1694 int int32V;
1695 unsigned int uint32V;
1696 unsigned long long int uint64V;
1697 float floatV;
1698 double doubleV;
1700 /* find out endianess */
1701 bigEndian = (*((char *)&bigEndian) == 0) ;
1703 params = getString(params, &format);
1704 params = evaluateExpression(params);
1706 if(params->type == CELL_STRING)
1708 pPtr = (char *)params->contents;
1709 maxlen = params->aux - 1;
1711 else
1713 getIntegerExt(params, (void*)&pPtr, FALSE);
1714 maxlen = MAX_LONG;
1717 length = 0;
1718 source = format;
1719 result = getCell(CELL_EXPRESSION);
1720 next = NULL;
1722 while( (source = parsePackFormat(source, &len, &type))!= NULL)
1724 if(length + len > maxlen) break;
1726 if(type == PACK_LITTLE_ENDIAN || type == PACK_BIG_ENDIAN)
1728 endianSwitch = ((type == PACK_BIG_ENDIAN) != bigEndian);
1729 continue;
1732 switch(type)
1734 case PACK_NULL:
1735 pPtr += len;
1736 length += len;
1737 continue;
1739 case PACK_BYTE:
1740 memcpy(&byteV, pPtr, 1);
1741 cell = getCell(CELL_LONG);
1742 cell->contents = byteV;
1743 break;
1745 case PACK_CHAR:
1746 memcpy(&chrV, pPtr, 1);
1747 cell = getCell(CELL_LONG);
1748 cell->contents = chrV;
1749 break;
1751 case PACK_INT:
1752 memcpy(&shortV, pPtr, 2);
1753 if(endianSwitch) swapEndian((char*)&shortV, 2);
1754 cell = getCell(CELL_LONG);
1755 cell->contents = (int)shortV;
1756 break;
1758 case PACK_UNSIGNED_INT:
1759 memcpy(&uint16V, pPtr, 2);
1760 if(endianSwitch) swapEndian((char*)&uint16V, 2);
1761 cell = getCell(CELL_LONG);
1762 cell->contents = uint16V;
1763 break;
1765 case PACK_LONG:
1766 memcpy(&int32V, pPtr, 4);
1767 if(endianSwitch) swapEndian((char *)&int32V, 4);
1768 #ifndef NEWLISP64
1769 cell = getCell(CELL_INT64);
1770 *(INT64 *)&cell->aux = int32V;
1771 #else
1772 cell = getCell(CELL_LONG);
1773 *(long *)&cell->contents = int32V;
1774 #endif
1775 break;
1777 case PACK_UNSIGNED_LONG:
1778 memcpy(&uint32V, pPtr, 4);
1779 if(endianSwitch) swapEndian((char*)&uint32V, 4);
1780 #ifndef NEWLISP64
1781 cell = getCell(CELL_INT64);
1782 *(INT64 *)&cell->aux = uint32V;
1783 #else
1784 cell = getCell(CELL_LONG);
1785 cell->contents = uint32V;
1786 #endif
1787 break;
1789 case PACK_LONG_LONG:
1790 case PACK_UNSIGNED_LONG_LONG:
1791 memcpy(&uint64V, pPtr, 8);
1792 if(endianSwitch) swapEndian((char*)&uint64V, 8);
1793 #ifndef NEWLISP64
1794 cell = getCell(CELL_INT64);
1795 memcpy(&cell->aux, &uint64V, 8);
1796 #else
1797 cell = getCell(CELL_LONG);
1798 cell->contents = uint64V;
1799 #endif
1800 break;
1802 case PACK_FLOAT:
1803 memcpy(&floatV, pPtr, 4);
1804 if(endianSwitch) swapEndian((char*)&floatV, 4);
1805 doubleV = floatV;
1806 cell = stuffFloat(&doubleV);
1807 break;
1809 case PACK_DOUBLE:
1810 memcpy(&doubleV, pPtr, 8);
1811 if(endianSwitch) swapEndian((char*)&doubleV, 8);
1812 cell = stuffFloat(&doubleV);
1813 break;
1815 case PACK_STRING:
1816 cell = stuffStringN(pPtr, len);
1817 break;
1819 default:
1820 cell = getCell(CELL_NIL);
1821 break;
1824 pPtr += len; length += len;
1825 if(next == NULL)
1826 result->contents = (UINT)cell;
1827 else
1828 next->next = cell;
1830 next = cell;
1833 return(result);
1837 CELL * p_trim(CELL * params)
1839 char * str;
1840 size_t left, right, len;
1841 char * trimChr;
1842 #ifndef SUPPORT_UTF8
1843 char * ptr;
1844 char lchr, rchr;
1845 #else
1846 int * wstr;
1847 int * wptr;
1848 int lchr, rchr;
1849 CELL * result;
1850 #endif
1852 params = getString(params, &str);
1854 #ifndef SUPPORT_UTF8
1855 len = strlen(str);
1856 ptr = str;
1857 #else
1858 len = utf8_wlen(str);
1859 wptr = wstr = allocMemory((len + 1) * sizeof(int));
1860 len = utf8_wstr(wstr, str, len);
1861 #endif
1863 if(len == 0)
1864 return(stuffString(str));
1866 if(params == nilCell)
1867 lchr = rchr = 32;
1868 else
1870 params = getString(params, &trimChr);
1871 #ifndef SUPPORT_UTF8
1872 lchr = *trimChr;
1873 #else
1874 utf8_wchar(trimChr, &lchr);
1875 #endif
1876 if(params != nilCell)
1878 getString(params, &trimChr);
1879 #ifndef SUPPORT_UTF8
1880 rchr = *trimChr;
1881 #else
1882 utf8_wchar(trimChr, &rchr);
1883 #endif
1885 else rchr = lchr;
1888 left = right = 0;
1889 #ifndef SUPPORT_UTF8
1890 while(*str == lchr) str++, left++;
1891 #else
1892 while(*wstr == lchr) wstr++, left++;
1893 #endif
1895 if(left == len)
1896 return(stuffString(""));
1898 #ifndef SUPPORT_UTF8
1899 str = ptr + len - 1;
1900 while(*str == rchr && right < len) str--, ++right;
1902 return(stuffStringN(ptr + left, len - left - right));
1904 #else
1906 wstr = wptr + len - 1;
1907 while(*wstr == rchr && right < len) wstr--, ++right;
1909 *(wptr + len - right) = 0;
1911 str = allocMemory((len - left - right + 1) * UTF8_MAX_BYTES);
1912 len = wstr_utf8(str, wptr + left, (len - left - right) * UTF8_MAX_BYTES);
1913 free(wptr);
1914 result = stuffStringN(str, len);
1915 free(str);
1916 return(result);
1917 #endif
1923 /* ------- PRCE Perl Compatible Regular Expressions for all platforms ------- */
1924 /* see also http://www.pcre.org/ */
1927 void regexError(char * msg_1, int num, const char * msg_2);
1931 CELL * p_regex(CELL * params)
1933 pcre *re;
1934 int ovector[OVECCOUNT];
1935 int rc, idx;
1936 char * pattern;
1937 char * string;
1938 long options = 0;
1939 int len;
1940 CELL * cell, * result, * strCell;
1942 cell = getString(params, &pattern);
1943 strCell = evaluateExpression(cell);
1944 if(strCell->type != CELL_STRING)
1945 return(errorProcExt(ERR_STRING_EXPECTED, cell));
1947 string = (char *)strCell->contents;
1949 params = cell->next;
1950 if(params != nilCell)
1951 params = getInteger(params, (UINT *)&options);
1953 /* Compile the regular expression in the first argument */
1954 re = pcreCachedCompile(pattern, (int)options);
1956 /* Compilation succeeded: match the subject in the second argument */
1957 rc = pcre_exec(
1958 re, /* the compiled pattern */
1959 NULL, /* no extra data - we didn't study the pattern */
1960 string, /* the subject string */
1961 (int)strCell->aux - 1, /* the length of the subject */
1962 0, /* start at offset 0 in the subject */
1963 0, /* default options */
1964 ovector, /* output vector for substring information */
1965 OVECCOUNT); /* number of elements in the output vector */
1967 /* Matching failed */
1968 if (rc == -1)
1969 return(nilCell);
1971 /* error in pcre_exec() */
1972 if (rc < 0)
1973 regexError("error in pcre_exec()", rc, "");
1975 /* Match succeded */
1977 /* Show substrings stored in the output vector */
1978 result = cell = getCell(CELL_EXPRESSION);
1979 for(idx = 0; idx < rc; idx++)
1981 len = ovector[2*idx+1] - ovector[2*idx];
1982 strCell = stuffStringN(string + ovector[2*idx], len);
1984 deleteList((CELL*)sysSymbol[idx]->contents);
1985 sysSymbol[idx]->contents = (UINT)copyCell(strCell);
1987 if(idx == 0)
1989 cell->contents = (UINT)strCell;
1990 cell = (CELL *)cell->contents;
1992 else
1994 cell->next = strCell;
1995 cell = cell->next;
1998 cell->next = stuffInteger(ovector[2*idx]);
1999 cell = cell->next;
2000 cell->next = stuffInteger(len);
2001 cell = cell->next;
2004 return(result);
2008 ssize_t searchBufferRegex(char * string, int offset,
2009 char * pattern, int length, int options, int * len)
2011 pcre *re;
2012 int ovector[OVECCOUNT];
2013 int rc, idx;
2014 CELL * cell;
2016 options &= ~REPLACE_ONCE; /* turn custom bit off for PCRE */
2018 /* Compile the regular expression in the first argument */
2019 re = pcreCachedCompile(pattern, options);
2021 /* Compilation succeeded: match the subject in the second argument */
2022 rc = pcre_exec(re, NULL, string, length, offset, 0, ovector, OVECCOUNT);
2024 /* matching failed */
2025 if (rc == -1)
2026 return(-1);
2028 /* error in pcre_exec() */
2029 if (rc < 0)
2030 regexError("error in pcre_exec()", rc, "");
2033 for(idx = 0; idx < rc; idx++)
2035 cell = stuffStringN(string + ovector[2*idx], ovector[2*idx+1] - ovector[2*idx]);
2036 deleteList((CELL*)sysSymbol[idx]->contents);
2037 sysSymbol[idx]->contents = (UINT)cell;
2040 if(len != NULL)
2041 *len = ovector[1] - ovector[0];
2043 return((UINT)ovector[0]);
2047 pcre * pcreCachedCompile(char * pattern, int options)
2049 const char * error;
2050 int errOffset;
2051 static char * cPattern = NULL;
2052 static pcre * re = NULL;
2053 static int cacheOptions = -1;
2054 UINT len;
2056 if((cPattern == NULL) || (strcmp(cPattern, pattern) != 0) || (options != cacheOptions))
2058 cacheOptions = options;
2059 if(cPattern != NULL) freeMemory(cPattern);
2060 len = strlen(pattern);
2061 cPattern = (char *)allocMemory(len + 1);
2062 memcpy(cPattern, pattern, len + 1);
2063 #ifdef WIN_32
2064 if(re != NULL) free(re);
2065 #else
2066 if(re != NULL) (pcre_free)(re);
2067 #endif
2068 re = pcre_compile(pattern, options, &error, &errOffset, NULL);
2070 /* Compilation failed: print the error message and exit */
2071 if (re == NULL)
2073 freeMemory(cPattern);
2074 cPattern = NULL;
2075 regexError("offset", errOffset, error);
2079 return(re);
2083 void regexError(char * msg_1, int number, const char * msg_2)
2085 CELL * cell;
2086 char * errorBuff = malloc(256);
2087 snprintf(errorBuff, 256, "%s %d %s:", msg_1, number, msg_2);
2088 cell = stuffString(errorBuff);
2089 free(errorBuff);
2090 fatalError(ERR_REGEX, cell, 0);
2095 /* replace string with or without (options = -1) regular expressions */
2097 typedef struct {
2098 int offset;
2099 int length;
2100 char * repStr;
2101 int repLen;
2102 void * next;
2103 } REGEX;
2105 void freeRegex(REGEX * regex);
2107 char * replaceString
2108 (char * keyStr, int keyLen, char * buff, size_t buffLen, CELL * exprCell,
2109 UINT * cnt, int options, size_t * newLen)
2111 ssize_t oldLen, findPos;
2112 size_t count, offset;
2113 size_t repLen;
2114 char * newBuff;
2115 CELL * cell;
2116 REGEX * start_rx = NULL, * end_rx = NULL;
2117 int resultStackIdxSave;
2118 int errNo;
2119 int bias = 0;
2121 *newLen = oldLen = 0;
2122 *cnt = 0;
2123 offset = count = 0;
2125 /* save all found string and fill sys variables $0, $1 ... etc */
2126 while(offset <= buffLen)
2128 if(options == -1)
2129 findPos = searchBuffer(buff + offset, buffLen - offset, keyStr, keyLen, TRUE);
2130 else
2131 findPos = searchBufferRegex(buff, (int)offset, keyStr, (int)buffLen, options, &keyLen);
2133 if(findPos == -1) break;
2135 if(options != -1) findPos -= offset;
2137 if(count == 0)
2138 start_rx = end_rx = (REGEX *)callocMemory(sizeof(REGEX));
2139 else
2141 end_rx->next = callocMemory(sizeof(REGEX));
2142 end_rx = end_rx->next;
2145 end_rx->next = NULL;
2146 end_rx->offset = findPos + offset; /* pos where pattern found */
2148 resultStackIdxSave = resultStackIdx;
2150 if((cell = evaluateExpressionSafe(exprCell, &errNo)) == NULL)
2152 freeRegex(start_rx);
2153 longjmp(errorJump, errNo);
2156 end_rx->length = keyLen; /* length of pattern found */
2157 if(cell->type == CELL_STRING)
2159 repLen = end_rx->repLen = cell->aux-1;
2160 end_rx->repStr = (char *)allocMemory(repLen + 1); /* replacement string */
2161 memcpy(end_rx->repStr, (char *)cell->contents, cell->aux);
2163 else /* if replacement expression is not string leave old content */
2165 repLen = end_rx->repLen = keyLen;
2166 end_rx->repStr = (char *)callocMemory(repLen + 1);
2167 memcpy(end_rx->repStr, buff + offset + findPos, keyLen);
2170 cleanupResults(resultStackIdxSave);
2172 count++;
2173 offset += findPos + keyLen; /* next start for search */
2174 oldLen += keyLen; /* space occupied by old content */
2175 *newLen += repLen; /* space needed by new replacement content */
2177 bias = (keyLen == 0);
2178 offset += bias;
2180 if(options != -1)
2181 if(options & REPLACE_ONCE) break;
2184 if(count == 0) return(NULL);
2186 *newLen = buffLen - oldLen + *newLen;
2187 newBuff = callocMemory(*newLen + 1);
2189 end_rx = start_rx;
2191 /* count is now offset into the new buffer */
2192 *cnt = count;
2193 count = start_rx->offset;
2194 memcpy(newBuff, buff, count);
2196 while(start_rx != NULL) /* replace */
2198 repLen = start_rx->repLen;
2199 memcpy(newBuff + count, start_rx->repStr, repLen);
2200 count += repLen;
2201 freeMemory(start_rx->repStr);
2203 end_rx = start_rx->next;
2204 if(end_rx != NULL) /* copy from buffer */
2206 repLen = end_rx->offset - start_rx->offset - start_rx->length;
2207 memcpy(newBuff + count, buff + start_rx->offset + start_rx->length, repLen);
2208 count += repLen;
2211 freeMemory(start_rx);
2212 start_rx = end_rx;
2215 printf("count %d buffLen %d offset %d bias %d str %s lencpy %d\n",
2216 count, buffLen, offset, bias, buff + offset - bias, buffLen - offset + bias);
2218 memcpy(newBuff + count, buff + offset - bias, buffLen - offset + bias);
2219 return(newBuff);
2223 void freeRegex(REGEX * regex)
2225 REGEX * oldRegex;
2227 while(regex != NULL)
2229 if(regex->repStr != NULL)
2230 freeMemory(regex->repStr);
2231 oldRegex = regex;
2232 regex = regex->next;
2233 free(oldRegex);
2237 /* eof */