pdf: add splines
[neatpost.git] / pdfext.c
blobdcefd52b12d0545c2d4eb0754a11a64cb6de8cf1
1 /* Parse and extract PDF objects */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "post.h"
8 /* the number white space characters */
9 int pdf_ws(char *pdf, int len, int pos)
11 int i = pos;
12 while (i < len && isspace((unsigned char) pdf[i]))
13 i++;
14 return i - pos;
17 /* s: string, d: dictionary, l: list, n: number, /: name, r: reference */
18 int pdf_type(char *pdf, int len, int pos)
20 pos += pdf_ws(pdf, len, pos);
21 if (pdf[pos] == '/')
22 return '/';
23 if (pdf[pos] == '(')
24 return 's';
25 if (pdf[pos] == '<' && pdf[pos + 1] != '<')
26 return 's';
27 if (pdf[pos] == '<' && pdf[pos + 1] == '<')
28 return 'd';
29 if (pdf[pos] == '[')
30 return 'l';
31 if (strchr("0123456789+-.", (unsigned char) pdf[pos])) {
32 if (!isdigit((unsigned char) pdf[pos]))
33 return 'n';
34 while (pos < len && isdigit((unsigned char) pdf[pos]))
35 pos++;
36 pos += pdf_ws(pdf, len, pos);
37 if (!isdigit((unsigned char) pdf[pos]))
38 return 'n';
39 while (pos < len && isdigit((unsigned char) pdf[pos]))
40 pos++;
41 pos += pdf_ws(pdf, len, pos);
42 return pos < len && pdf[pos] == 'R' ? 'r' : 'n';
44 return -1;
47 /* the length of a pdf object */
48 int pdf_len(char *pdf, int len, int pos)
50 int c;
51 int old = pos;
52 if (pos >= len)
53 return 0;
54 pos += pdf_ws(pdf, len, pos);
55 c = (unsigned char) pdf[pos];
56 if (strchr("0123456789+-.", c)) {
57 if (pdf_type(pdf, len, pos) == 'r') {
58 char *r = memchr(pdf + pos, 'R', len - pos);
59 return r - (pdf + old) + 1;
61 pos++;
62 while (pos < len && strchr("0123456789.", (unsigned char) pdf[pos]))
63 pos++;
65 if (c == '(') {
66 int depth = 1;
67 pos++;
68 while (pos < len && depth > 0) {
69 if (pdf[pos] == '(')
70 depth++;
71 if (pdf[pos] == ')')
72 depth--;
73 if (pdf[pos] == '\\')
74 pos++;
75 pos++;
78 if (c == '<' && pos + 1 < len && pdf[pos + 1] == '<') {
79 pos += 2;
80 while (pos + 2 < len && (pdf[pos] != '>' || pdf[pos + 1] != '>')) {
81 pos += pdf_len(pdf, len, pos);
82 pos += pdf_len(pdf, len, pos);
83 pos += pdf_ws(pdf, len, pos);
85 if (pos + 2 < len)
86 pos += 2;
87 } else if (c == '<') {
88 while (pos < len && pdf[pos] != '>')
89 pos++;
90 if (pos < len)
91 pos++;
93 if (c == '/') {
94 pos++;
95 while (pos < len && !strchr(" \t\r\n\f()<>[]{}/%",
96 (unsigned char) pdf[pos]))
97 pos++;
99 if (c == '[') {
100 pos++;
101 while (pos < len && pdf[pos] != ']') {
102 pos += pdf_len(pdf, len, pos);
103 pos += pdf_ws(pdf, len, pos);
105 pos++;
107 return pos - old;
110 static int startswith(char *s, char *t)
112 while (*s && *t)
113 if (*s++ != *t++)
114 return 0;
115 return 1;
118 /* read an indirect reference */
119 int pdf_obj(char *pdf, int len, int pos, int *obj, int *rev)
121 if (pdf_type(pdf, len, pos) != 'r')
122 return -1;
123 *obj = atoi(pdf + pos);
124 pos += pdf_len(pdf, len, pos);
125 *rev = atoi(pdf + pos);
126 return 0;
129 /* the value of a pdf dictionary key */
130 int pdf_dval(char *pdf, int len, int pos, char *key)
132 pos += 2;
133 while (pos + 2 < len && (pdf[pos] != '>' || pdf[pos + 1] != '>')) {
134 pos += pdf_ws(pdf, len, pos);
135 if (pdf_len(pdf, len, pos) == strlen(key) && startswith(key, pdf + pos)) {
136 pos += pdf_len(pdf, len, pos);
137 pos += pdf_ws(pdf, len, pos);
138 return pos;
140 pos += pdf_len(pdf, len, pos);
141 pos += pdf_len(pdf, len, pos);
142 pos += pdf_ws(pdf, len, pos);
144 return -1;
147 /* return a dictionary key */
148 int pdf_dkey(char *pdf, int len, int pos, int key)
150 int i = 0;
151 pos += 2;
152 while (pos + 2 < len && (pdf[pos] != '>' || pdf[pos + 1] != '>')) {
153 pos += pdf_ws(pdf, len, pos);
154 if (i++ == key)
155 return pos;
156 pos += pdf_len(pdf, len, pos);
157 pos += pdf_len(pdf, len, pos);
158 pos += pdf_ws(pdf, len, pos);
160 return -1;
163 /* return a list entry */
164 int pdf_lval(char *pdf, int len, int pos, int idx)
166 int i = 0;
167 pos++;
168 while (pos < len && pdf[pos] != ']') {
169 if (i++ == idx)
170 return pos;
171 pos += pdf_len(pdf, len, pos);
172 pos += pdf_ws(pdf, len, pos);
174 return -1;
177 static void *my_memrchr(void *m, int c, long n)
179 int i;
180 for (i = 0; i < n; i++)
181 if (*(unsigned char *) (m + n - 1 - i) == c)
182 return m + n - 1 - i;
183 return NULL;
186 static int prevline(char *pdf, int len, int off)
188 char *nl = my_memrchr(pdf, '\n', off);
189 if (nl && nl > pdf) {
190 char *nl2 = my_memrchr(pdf, '\n', nl - pdf - 1);
191 if (nl2)
192 return nl2 - pdf + 1;
194 return -1;
197 static int nextline(char *pdf, int len, int off)
199 char *nl = memchr(pdf + off, '\n', len - off);
200 if (nl)
201 return nl - pdf + 1;
202 return -1;
205 /* the position of the trailer */
206 int pdf_trailer(char *pdf, int len)
208 int pos = prevline(pdf, len, len); /* %%EOF */
209 while (!startswith(pdf + pos, "trailer"))
210 if ((pos = prevline(pdf, len, pos)) < 0)
211 return -1;
212 return nextline(pdf, len, pos); /* skip trailer\n */
215 /* the position of the last xref table */
216 static int pdf_xref(char *pdf, int len)
218 int pos = prevline(pdf, len, len); /* %%EOF */
219 if ((pos = prevline(pdf, len, pos)) < 0)
220 return -1;
221 /* read startxref offset */
222 if (sscanf(pdf + pos, "%d", &pos) != 1 || pos >= len || pos < 0)
223 return -1;
224 return nextline(pdf, len, pos); /* skip xref\n */
227 /* find a pdf object */
228 int pdf_find(char *pdf, int len, int obj, int rev)
230 int obj_beg, obj_cnt;
231 int cur_rev, cur_pos;
232 char *beg;
233 int i;
234 int pos = pdf_xref(pdf, len);
235 if (pos < 0)
236 return -1;
237 /* the numbers after xref */
238 while (pos < len && sscanf(pdf + pos, "%d %d", &obj_beg, &obj_cnt) == 2) {
239 for (i = 0; i < obj_cnt; i++) {
240 if ((pos = nextline(pdf, len, pos)) < 0)
241 return -1;
242 if (sscanf(pdf + pos, "%d %d", &cur_pos, &cur_rev) != 2)
243 return -1;
244 if (obj_beg + i == obj && cur_rev == rev) {
245 if (cur_pos < 0 || cur_pos >= len)
246 return -1;
247 if (!(beg = strstr(pdf + cur_pos, "obj")))
248 return -1;
249 pos = beg - pdf + 3;
250 pos += pdf_ws(pdf, len, pos);
251 return pos;
255 return -1;
258 /* read and dereference an indirect reference */
259 int pdf_ref(char *pdf, int len, int pos)
261 int obj, rev;
262 if (pdf_obj(pdf, len, pos, &obj, &rev))
263 return -1;
264 return pdf_find(pdf, len, obj, rev);
267 /* retrieve and dereference a dictionary entry */
268 int pdf_dval_val(char *pdf, int len, int pos, char *key)
270 int val = pdf_dval(pdf, len, pos, key);
271 int val_obj, val_rev;
272 if (val < 0)
273 return -1;
274 if (pdf_type(pdf, len, val) == 'r') {
275 pdf_obj(pdf, len, val, &val_obj, &val_rev);
276 return pdf_find(pdf, len, val_obj, val_rev);
278 return val;
281 /* retrieve a dictionary entry, which is an indirect reference */
282 int pdf_dval_obj(char *pdf, int len, int pos, char *key)
284 int val = pdf_dval(pdf, len, pos, key);
285 if (val < 0)
286 return -1;
287 return pdf_ref(pdf, len, val);