Convert xml-protected spaces to real spaces
[odt2txt.git] / regex.h
blob4940aa33c902e9420650689a461ab61593918ec5
1 /*
2 * regex.c: String and regex operations for odt2txt
4 * Copyright (c) 2006-2009 Dennis Stosberg <dennis@stosberg.net>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License,
8 * version 2 as published by the Free Software Foundation
9 */
11 #ifndef REGEX_H
12 #define REGEX_H
14 #include <regex.h>
15 #include <stdio.h>
16 #include <stddef.h>
17 #include <stdlib.h>
18 #include <string.h>
20 #include "strbuf.h"
22 #define _REG_DEFAULT 0 /* Stop after first match, to be removed */
23 #define _REG_GLOBAL 1 /* Find all matches of regexp */
24 #define _REG_EXEC 2 /* subst is a function pointer */
27 * Deletes match(es) of regex from *buf.
29 * Returns the number of matches that were deleted.
31 int regex_rm(STRBUF *buf,
32 const char *regex, int regopt);
35 * Replaces match(es) of regex from *buf with subst.
37 int regex_subst(STRBUF *buf,
38 const char *regex, int regopt,
39 const void *subst);
42 * Returns a pointer to a new string with two lines. The first line
43 * contains str, the second line contains strlen(str) copies of
44 * linechar.
46 char *underline(char linechar, const char *str);
49 * Wrappers around underline, to be used as argument to regex_subst
50 * when regopt is _REG_EXEC.
52 * They replace the match in buf with underline('=',match) or
53 * underline('-',match) respectively.
55 char *h1(const char *buf, regmatch_t matches[], size_t nmatch, size_t off);
56 char *h2(const char *buf, regmatch_t matches[], size_t nmatch, size_t off);
59 * Replace match with the name of the image frame
61 char *image(const char *buf, regmatch_t matches[], size_t nmatch, size_t off);
64 * Copies the contents of buf to a new string buffer, wrapped to a
65 * maximal line width of width characters.
67 STRBUF *wrap(STRBUF *buf, int width);
70 * number of characters that follow in the byte sequence
72 static const char utf8_length[128] =
74 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
75 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
76 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
77 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
78 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
79 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
80 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
81 3,3,3,3,3,3,3,3,4,4,4,4,5,5,0,0 /* 0xf0-0xff */
84 #endif /* REGEX_H */