2 * text-writer -- RTF-to-text translation writer code.
4 * Read RTF input, write text of document (text extraction).
6 * Wrapper must call WriterInit() once before processing any files,
7 * then set up input and call BeginFile() for each input file.
9 * This installs callbacks for the text and control token classes.
10 * The control class is necessary so that special characters such as
11 * \par, \tab, \sect, etc. can be converted.
13 * It's problematic what to do with text in headers and footers, and
14 * what to do about tables.
16 * This really is quite a stupid program, for instance, it could keep
17 * track of the current leader character and dump that out when a tab
20 * 04 Feb 91 Paul DuBois dubois@primate.wisc.edu
22 * This software may be redistributed without restriction and used for
23 * any purpose whatsoever.
28 * - Updated for distribution 1.05.
30 * - Updated to compile under THINK C 6.0.
32 * - Added Mike Sendall's entries for Macintosh char map.
34 * - Uses charset map and output sequence map for character translation.
36 * - Updated for 1.10 distribution.
42 # include "rtf2text.h"
43 # include "charlist.h"
45 static void TextClass ();
46 static void ControlClass ();
47 static void Destination ();
48 static void SpecialChar ();
49 static void PutStdChar ();
50 static void PutLitChar ();
51 static void PutLitStr ();
53 static char *outMap
[rtfSC_MaxChar
];
55 static CHARLIST charlist
= {0, NULL
, NULL
};
57 int RTFToBuffer(char* pBuffer
, int nBufferSize
);
58 int RTFToBuffer(char* pBuffer
, int nBufferSize
)
61 /* check if the buffer is big enough to hold all characters */
62 /* we require one more for the '\0' */
65 if(nBufferSize
< charlist
.nCount
+ 1) {
66 return charlist
.nCount
+ CHARLIST_CountChar(&charlist
, '\n') + 1;
69 while(charlist
.nCount
)
71 *pBuffer
= CHARLIST_Dequeue(&charlist
);
87 * Initialize the writer.
93 RTFReadOutputMap (outMap
,1);
100 /* install class callbacks */
102 RTFSetClassCallback (rtfText
, TextClass
);
103 RTFSetClassCallback (rtfControl
, ControlClass
);
110 * Write out a character. rtfMajor contains the input character, rtfMinor
111 * contains the corresponding standard character code.
113 * If the input character isn't in the charset map, try to print some
114 * representation of it.
122 if (rtfMinor
!= rtfSC_nothing
)
123 PutStdChar (rtfMinor
);
126 if (rtfMajor
< 128) /* in ASCII range */
127 sprintf (buf
, "[[%c]]", rtfMajor
);
129 sprintf (buf
, "[[\\'%02x]]", rtfMajor
);
151 * This function notices destinations that should be ignored
152 * and skips to their ends. This keeps, for instance, picture
153 * data from being considered as plain text.
163 case rtfFNContNotice
:
181 * The reason these use the rtfSC_xxx thingies instead of just writing
182 * out ' ', '-', '"', etc., is so that the mapping for these characters
183 * can be controlled by the text-map file.
198 PutStdChar (rtfSC_space
); /* make sure cells are separated */
201 PutStdChar (rtfSC_nobrkspace
);
207 PutStdChar (rtfSC_nobrkhyphen
);
210 PutStdChar (rtfSC_bullet
);
213 PutStdChar (rtfSC_emdash
);
216 PutStdChar (rtfSC_endash
);
219 PutStdChar (rtfSC_quoteleft
);
222 PutStdChar (rtfSC_quoteright
);
225 PutStdChar (rtfSC_quotedblleft
);
228 PutStdChar (rtfSC_quotedblright
);
235 * Eventually this should keep track of the destination of the
236 * current state and only write text when in the initial state.
238 * If the output sequence is unspecified in the output map, write
239 * the character's standard name instead. This makes map deficiencies
240 * obvious and provides incentive to fix it. :-)
243 void PutStdChar (int stdCode
)
246 char *oStr
= (char *) NULL
;
249 /* if (stdCode == rtfSC_nothing)
250 RTFPanic ("Unknown character code, logic error\n");
252 oStr
= outMap
[stdCode
];
253 if (oStr
== (char *) NULL
) /* no output sequence in map */
255 sprintf (buf
, "[[%s]]", RTFStdCharName (stdCode
));
262 void PutLitChar (int c
)
264 CHARLIST_Enqueue(&charlist
, (char) c
);
265 /* fputc (c, ostream); */
269 static void PutLitStr (char *s
)
273 CHARLIST_Enqueue(&charlist
, *s
);
275 /* fputs (s, ostream); */