refer: records with an R key are technical reports
[neatrefer.git] / refer.c
blobe9185e3bc1a1378737beeef5051d577d00d13b3b
1 /*
2 * NEATREFER - A REFER CLONE FOR NEATROFF
4 * Copyright (C) 2011-2017 Ali Gholami Rudi <ali at rudi dot ir>
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 #include <ctype.h>
19 #include <fcntl.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <unistd.h>
25 #define NREFS (1 << 14)
26 #define LEN(a) (sizeof(a) / sizeof((a)[0]))
28 struct ref {
29 char *keys[128]; /* reference keys */
30 char *auth[128]; /* authors */
31 int id; /* allocated reference id */
32 int nauth;
35 static struct ref refs[NREFS]; /* all references in refer database */
36 static int refs_n;
37 static struct ref *cites[NREFS]; /* cited references */
38 static int cites_n;
39 static int inserted; /* number of inserted references */
40 static int multiref; /* allow specifying multiple references */
41 static int accumulate; /* accumulate all references */
42 static int initials; /* initials for authors' first name */
43 static int refauth; /* use author-year citations */
44 static int sortall; /* sort references */
45 static char *refmac; /* citation macro name */
46 static FILE *refdb; /* the database file */
48 #define ref_label(ref) ((ref)->keys['L'])
50 /* the next input line */
51 static char *lnget(void)
53 static char buf[1024];
54 return fgets(buf, sizeof(buf), stdin);
57 /* write an output line */
58 static void lnput(char *s, int n)
60 write(1, s, n >= 0 ? n : strlen(s));
63 /* the next refer database input line */
64 static char *dbget(void)
66 static char buf[1024];
67 return refdb ? fgets(buf, sizeof(buf), refdb) : NULL;
70 static char *sdup(char *s)
72 char *e = strchr(s, '\n') ? strchr(s, '\n') : strchr(s, '\0');
73 char *r;
74 int n = e - s;
75 r = malloc(n + 1);
76 memcpy(r, s, n);
77 r[n] = '\0';
78 return r;
81 /* format author names as J. Smith */
82 static char *ref_author(char *ref)
84 char *res;
85 char *out;
86 char *beg;
87 if (!initials)
88 return sdup(ref);
89 res = malloc(strlen(ref) + 32);
90 out = res;
91 while (1) {
92 while (*ref == ' ' || *ref == '.')
93 ref++;
94 if (*ref == '\0')
95 break;
96 beg = ref;
97 while (*ref && *ref != ' ' && *ref != '.')
98 ref++;
99 if (out != res)
100 *out++ = ' ';
101 if (islower((unsigned char) *beg) || *ref == '\0') {
102 while (beg < ref)
103 *out++ = *beg++;
104 } else { /* initials */
105 do {
106 *out++ = *beg++;
107 *out++ = '.';
108 while (beg < ref && *beg != '-')
109 beg++;
110 if (*beg == '-') /* handling J.-K. Smith */
111 *out++ = *beg++;
112 } while (beg < ref);
115 *out = '\0';
116 return res;
119 /* strip excess whitespace */
120 static void rstrip(char *s)
122 int i;
123 int last = -1;
124 for (i = 0; s[i]; i++)
125 if (s[i] != ' ' && s[i] != '\n')
126 last = i;
127 s[last + 1] = '\0';
130 /* read a single refer record */
131 static void db_ref(struct ref *ref, char *ln)
133 do {
134 if (ln[0] == '%' && ln[1] >= 'A' && ln[1] <= 'Z') {
135 char *r = ln + 2;
136 while (isspace((unsigned char) *r))
137 r++;
138 rstrip(r);
139 if (ln[1] == 'A')
140 ref->auth[ref->nauth++] = ref_author(r);
141 else
142 ref->keys[(unsigned char) ln[1]] = sdup(r);
143 ref->id = -1;
145 } while ((ln = dbget()) && ln[0] != '\n');
148 /* parse a refer-style bib file and fill refs[] */
149 static int db_parse(void)
151 char *ln;
152 while ((ln = dbget()))
153 if (ln[0] != '\n')
154 db_ref(&refs[refs_n++], ln);
155 return 0;
158 static char fields[] = "LTABERJDVNPITOH";
159 static char fields_flag[] = "OP";
160 static char *kinds[] = {"Other", "Article", "Book", "In book", "Report"};
162 static int ref_kind(struct ref *r)
164 if (r->keys['J'])
165 return 1;
166 if (r->keys['B'])
167 return 3;
168 if (r->keys['R'])
169 return 4;
170 if (r->keys['I'])
171 return 2;
172 return 0;
175 /* print the given reference */
176 static void ref_ins(struct ref *ref, int id)
178 char buf[1 << 12];
179 char *s = buf;
180 int kind = ref_kind(ref);
181 int j;
182 s += sprintf(s, ".ds [F %d\n", id);
183 s += sprintf(s, ".]-\n");
184 if (ref->nauth) {
185 s += sprintf(s, ".ds [A ");
186 for (j = 0; j < ref->nauth; j++)
187 s += sprintf(s, "%s%s", j ? ", " : "", ref->auth[j]);
188 s += sprintf(s, "\n");
190 for (j = 'B'; j <= 'Z'; j++) {
191 char *val = ref->keys[j];
192 if (!val || !strchr(fields, j))
193 continue;
194 s += sprintf(s, ".ds [%c %s\n", j, val ? val : "");
195 if (strchr(fields_flag, j))
196 s += sprintf(s, ".nr [%c 1\n", j);
198 s += sprintf(s, ".][ %d %s\n", kind, kinds[kind]);
199 lnput(buf, s - buf);
202 static char *lastname(char *name)
204 char *last = name;
205 while (*name) {
206 if (!islower((unsigned char) last[0]))
207 last = name;
208 while (*name && *name != ' ')
209 if (*name++ == '\\')
210 name++;
211 while (*name == ' ')
212 name++;
214 return last;
217 static int refcmp(struct ref *r1, struct ref *r2)
219 if (!r2->nauth || (r1->keys['H'] && !r2->keys['H']))
220 return -1;
221 if (!r1->nauth || (!r1->keys['H'] && r2->keys['H']))
222 return 1;
223 return strcmp(lastname(r1->auth[0]), lastname(r2->auth[0]));
226 /* print all references */
227 static void ref_all(void)
229 int i, j;
230 struct ref **sorted;
231 sorted = malloc(cites_n * sizeof(sorted[0]));
232 memcpy(sorted, cites, cites_n * sizeof(sorted[0]));
233 if (sortall == 'a') {
234 for (i = 1; i < cites_n; i++) {
235 for (j = i - 1; j >= 0 && refcmp(cites[i], sorted[j]) < 0; j--)
236 sorted[j + 1] = sorted[j];
237 sorted[j + 1] = cites[i];
240 lnput(".]<\n", -1);
241 for (i = 0; i < cites_n; i++)
242 ref_ins(sorted[i], sorted[i]->id + 1);
243 lnput(".]>", -1);
244 free(sorted);
247 static int intcmp(void *v1, void *v2)
249 return *(int *) v1 - *(int *) v2;
252 /* the given label was referenced; add it to cites[] */
253 static int refer_seen(char *label)
255 int i;
256 for (i = 0; i < refs_n; i++)
257 if (ref_label(&refs[i]) && !strcmp(label, ref_label(&refs[i])))
258 break;
259 if (i == refs_n)
260 return -1;
261 if (refs[i].id < 0) {
262 refs[i].id = cites_n++;
263 cites[refs[i].id] = &refs[i];
265 return refs[i].id;
268 static void refer_quote(char *d, char *s)
270 if (!strchr(s, ' ') && s[0] != '"') {
271 strcpy(d, s);
272 } else {
273 *d++ = '"';
274 while (*s) {
275 if (*s == '"')
276 *d++ = '"';
277 *d++ = *s++;
279 *d++ = '"';
280 *d = '\0';
284 /* replace .[ .] macros with reference numbers */
285 static void refer_cite(char *s)
287 char msg[256];
288 char label[256];
289 int id[256];
290 int nid = 0;
291 int i = 0;
292 msg[0] = '\0';
293 while (!nid || multiref) {
294 char *r = label;
295 while (*s && strchr(" \t\n,", (unsigned char) *s))
296 s++;
297 while (*s && !strchr(" \t\n,]", (unsigned char) *s))
298 *r++ = *s++;
299 *r = '\0';
300 if (!strcmp("$LIST$", label)) {
301 ref_all();
302 break;
304 id[nid] = refer_seen(label);
305 if (id[nid] < 0)
306 fprintf(stderr, "refer: <%s> not found\n", label);
307 else
308 nid++;
309 if (!*s || *s == '\n' || *s == ']')
310 break;
312 if (!refauth) { /* numbered citations */
313 /* sort references for cleaner reference intervals */
314 qsort(id, nid, sizeof(id[0]), (void *) intcmp);
315 while (i < nid) {
316 int beg = i++;
317 /* reading reference intervals */
318 while (i < nid && id[i] == id[i - 1] + 1)
319 i++;
320 if (beg)
321 sprintf(msg + strlen(msg), ",");
322 if (beg == i - 1)
323 sprintf(msg + strlen(msg), "%d", id[beg] + 1);
324 else
325 sprintf(msg + strlen(msg), "%d%s%d",
326 id[beg] + 1, beg < i - 2 ? "\\-" : ",", id[i - 1] + 1);
328 } else if (nid) { /* year + authors citations */
329 struct ref *ref = cites[id[0]];
330 sprintf(msg, "%s %d", ref->keys['D'] ? ref->keys['D'] : "-", ref->nauth);
331 for (i = 0; i < ref->nauth; i++) {
332 sprintf(msg + strlen(msg), " ");
333 refer_quote(msg + strlen(msg), lastname(ref->auth[i]));
336 lnput(msg, -1);
337 if (!accumulate)
338 for (i = 0; i < nid; i++)
339 ref_ins(cites[id[i]], ++inserted);
342 static int slen(char *s, int delim)
344 char *r = strchr(s, delim);
345 return r ? r - s : strchr(s, '\0') - s;
348 static int refer_reqname(char *mac, int maclen, char *s)
350 int i = 0;
351 if (*s++ != '.')
352 return 1;
353 for (i = 0; i < maclen && *s && *s != ' '; i++)
354 mac[i] = *s++;
355 mac[i] = '\0';
356 return *s != ' ';
359 static int refer_macname(char *mac, int maclen, char *s)
361 int i = 0;
362 if (*s++ != '\\')
363 return 1;
364 if (*s++ != '*')
365 return 1;
366 if (*s++ != '[')
367 return 1;
368 for (i = 0; i < maclen && *s && *s != ' '; i++)
369 mac[i] = *s++;
370 mac[i] = '\0';
371 return *s != ' ';
374 /* return 1 if mac is a citation macro */
375 static int refer_refmac(char *mac)
377 char *s = refmac ? strstr(refmac, mac) : NULL;
378 if (!mac[0] || !s)
379 return 0;
380 return (s == refmac || s[-1] == ',') &&
381 (!s[strlen(mac)] || s[strlen(mac)] == ',');
384 static void refer(void)
386 char mac[256];
387 char *s, *r, *ln;
388 while ((ln = lnget())) {
389 /* multi-line citations: .[ rudi17 .] */
390 if (ln[0] == '.' && ln[1] == '[') {
391 lnput(ln + 2, slen(ln + 2, '\n'));
392 if ((ln = lnget())) {
393 refer_cite(ln);
394 while (ln && (ln[0] != '.' || ln[1] != ']'))
395 ln = lnget();
396 if (ln)
397 lnput(ln + 2, -1);
399 continue;
401 /* single line citation .cite rudi17 */
402 if (ln[0] == '.' && !refer_reqname(mac, sizeof(mac), ln) &&
403 refer_refmac(mac)) {
404 int i = 1;
405 while (ln[i] && ln[i] != ' ')
406 i++;
407 while (ln[i] && ln[i] == ' ')
408 i++;
409 lnput(ln, i);
410 refer_cite(ln + i);
411 while (ln[i] && ln[i] != ' ' && ln[i] != '\n')
412 i++;
413 lnput(ln + i, -1);
414 continue;
416 s = ln;
417 r = s;
418 /* inline citations \*[cite rudi17] */
419 while ((r = strchr(r, '\\'))) {
420 r++;
421 if (refer_macname(mac, sizeof(mac), r - 1))
422 continue;
423 if (!refer_refmac(mac))
424 continue;
425 if (!strchr(r, ']'))
426 continue;
427 r = strchr(r, ' ') + 1;
428 lnput(s, r - s);
429 refer_cite(r);
430 while (*r && *r != ' ' && *r != ']')
431 r++;
432 s = r;
434 lnput(s, -1);
438 static char *usage =
439 "Usage neatrefer [options] <input >output\n"
440 "Options:\n"
441 "\t-p bib \tspecify the database file\n"
442 "\t-e \taccumulate references\n"
443 "\t-m \tmerge multiple references in a single .[/.] block\n"
444 "\t-i \tinitials for authors' first and middle names\n"
445 "\t-o xy \tcitation macro (\\*[xy label])\n"
446 "\t-a \tuse author-year citation style\n"
447 "\t-sa \tsort by author last names\n";
449 int main(int argc, char *argv[])
451 int i, j;
452 for (i = 1; i < argc; i++) {
453 switch (argv[i][0] == '-' ? argv[i][1] : 'h') {
454 case 'm':
455 multiref = 1;
456 break;
457 case 'e':
458 accumulate = 1;
459 break;
460 case 'p':
461 refdb = fopen(argv[i][2] ? argv[i] + 2 : argv[++i], "r");
462 if (refdb) {
463 db_parse();
464 fclose(refdb);
466 refdb = NULL;
467 break;
468 case 'o':
469 refmac = argv[i][2] ? argv[i] + 2 : argv[++i];
470 break;
471 case 'i':
472 initials = 1;
473 break;
474 case 'a':
475 refauth = 1;
476 break;
477 case 's':
478 sortall = (unsigned char) (argv[i][2] ? argv[i][2] : argv[++i][0]);
479 break;
480 default:
481 printf("%s", usage);
482 return 1;
485 if (refauth && multiref) {
486 fprintf(stderr, "refer: cannot use -m with -a\n");
487 return 1;
489 refer();
490 for (i = 0; i < refs_n; i++)
491 for (j = 0; j < LEN(refs[i].keys); j++)
492 if (refs[i].keys[j])
493 free(refs[i].keys[j]);
494 for (i = 0; i < refs_n; i++)
495 for (j = 0; j < LEN(refs[i].auth); j++)
496 if (refs[i].auth[j])
497 free(refs[i].auth[j]);
498 return 0;