From 02849301298cc8272cd2ac60f3f9b3b98b3e4517 Mon Sep 17 00:00:00 2001 From: Phil Krylov Date: Thu, 17 Mar 2005 13:57:27 +0000 Subject: [PATCH] Replaced slow and outdated character set handling in RTF reader by Unicode/codepages support. Added charset->codepage conversion. --- dlls/riched20/editor.c | 2 + dlls/riched20/reader.c | 1497 ++++++------------------------------------------ dlls/riched20/rtf.h | 415 +------------- 3 files changed, 177 insertions(+), 1737 deletions(-) diff --git a/dlls/riched20/editor.c b/dlls/riched20/editor.c index 2b945dc762c..58da1b0bf71 100644 --- a/dlls/riched20/editor.c +++ b/dlls/riched20/editor.c @@ -399,6 +399,7 @@ void ME_RTFReadHook(RTF_Info *info) { if (info->formatStackTop < maxCharFormatStack) { info->formatStack[info->formatStackTop].cbSize = sizeof(info->formatStack[0]); memcpy(&info->formatStack[info->formatStackTop], &info->style->fmt, sizeof(CHARFORMAT2W)); + info->codePageStack[info->formatStackTop] = info->codePage; } info->formatStackTop++; break; @@ -411,6 +412,7 @@ void ME_RTFReadHook(RTF_Info *info) { s = ME_ApplyStyle(info->style, &info->formatStack[info->formatStackTop]); ME_ReleaseStyle(info->style); info->style = s; + info->codePage = info->codePageStack[info->formatStackTop]; break; } } diff --git a/dlls/riched20/reader.c b/dlls/riched20/reader.c index 40d4517c017..b047dd310d6 100644 --- a/dlls/riched20/reader.c +++ b/dlls/riched20/reader.c @@ -67,871 +67,12 @@ static void LookupInit (void); static void Lookup (RTF_Info *, char *); static int Hash (char*); -static void CharSetInit (RTF_Info *); -static void ReadCharSetMaps (RTF_Info *); static void RTFOutputUnicodeString( RTF_Info *info, WCHAR *str, int len ); +static void CharAttr(RTF_Info *info); +static void CharSet(RTF_Info *info); +static void DocAttr(RTF_Info *info); -/* - RTF ANSI character set (\ansi) general map - These are taken from the ISO-Latin-1 (ISO-8859-1) encodings, with - a few additions - - Field 1 is the standard character name which the character value in - field 2 maps onto. (It doesn't mean "to produce the character in field 1, - use the value in field 2.) - - The character value may be given either as a single character (which will be - converted to the ASCII value of the character), or in numeric format, either - in decimal or 0xyy as hex yy. Single or double quotes may be used to quote - characters.*/ - -int ansi_gen[] = -{ - rtfSC_formula ,0x06, - rtfSC_nobrkhyphen ,0x1e, - rtfSC_opthyphen ,0x1f, - rtfSC_space ,' ', - rtfSC_exclam ,'!', - rtfSC_quotedbl ,'"', - rtfSC_numbersign ,'#', - rtfSC_dollar ,'$', - rtfSC_percent ,'%', - rtfSC_ampersand ,'&', - rtfSC_quoteright ,'\'', - rtfSC_parenleft ,'(', - rtfSC_parenright ,')', - rtfSC_asterisk ,'*', - rtfSC_plus ,'+', - rtfSC_comma ,',', - rtfSC_hyphen ,'-', - rtfSC_period ,'.', - rtfSC_slash ,'/', - rtfSC_zero ,'0', - rtfSC_one ,'1', - rtfSC_two ,'2', - rtfSC_three ,'3', - rtfSC_four ,'4', - rtfSC_five ,'5', - rtfSC_six ,'6', - rtfSC_seven ,'7', - rtfSC_eight ,'8', - rtfSC_nine ,'9', - rtfSC_colon ,':', - rtfSC_semicolon ,';', - rtfSC_less ,'<', - rtfSC_equal ,'=', - rtfSC_greater ,'>', - rtfSC_question ,'?', - rtfSC_at ,'@', - rtfSC_A ,'A', - rtfSC_B ,'B', - rtfSC_C ,'C', - rtfSC_D ,'D', - rtfSC_E ,'E', - rtfSC_F ,'F', - rtfSC_G ,'G', - rtfSC_H ,'H', - rtfSC_I ,'I', - rtfSC_J ,'J', - rtfSC_K ,'K', - rtfSC_L ,'L', - rtfSC_M ,'M', - rtfSC_N ,'N', - rtfSC_O ,'O', - rtfSC_P ,'P', - rtfSC_Q ,'Q', - rtfSC_R ,'R', - rtfSC_S ,'S', - rtfSC_T ,'T', - rtfSC_U ,'U', - rtfSC_V ,'V', - rtfSC_W ,'W', - rtfSC_X ,'X', - rtfSC_Y ,'Y', - rtfSC_Z ,'Z', - rtfSC_bracketleft ,'[', - rtfSC_backslash ,'\\', - rtfSC_bracketright ,']', - rtfSC_asciicircum ,'^', - rtfSC_underscore ,'_', - rtfSC_quoteleft ,'`', - rtfSC_a ,'a', - rtfSC_b ,'b', - rtfSC_c ,'c', - rtfSC_d ,'d', - rtfSC_e ,'e', - rtfSC_f ,'f', - rtfSC_g ,'g', - rtfSC_h ,'h', - rtfSC_i ,'i', - rtfSC_j ,'j', - rtfSC_k ,'k', - rtfSC_l ,'l', - rtfSC_m ,'m', - rtfSC_n ,'n', - rtfSC_o ,'o', - rtfSC_p ,'p', - rtfSC_q ,'q', - rtfSC_r ,'r', - rtfSC_s ,'s', - rtfSC_t ,'t', - rtfSC_u ,'u', - rtfSC_v ,'v', - rtfSC_w ,'w', - rtfSC_x ,'x', - rtfSC_y ,'y', - rtfSC_z ,'z', - rtfSC_braceleft ,'{', - rtfSC_bar ,'|', - rtfSC_braceright ,'}', - rtfSC_asciitilde ,'~', - rtfSC_nobrkspace ,0xa0, - rtfSC_exclamdown ,0xa1, - rtfSC_cent ,0xa2, - rtfSC_sterling ,0xa3, - rtfSC_currency ,0xa4, - rtfSC_yen ,0xa5, - rtfSC_brokenbar ,0xa6, - rtfSC_section ,0xa7, - rtfSC_dieresis ,0xa8, - rtfSC_copyright ,0xa9, - rtfSC_ordfeminine ,0xaa, - rtfSC_guillemotleft ,0xab, - rtfSC_logicalnot ,0xac, - rtfSC_opthyphen ,0xad, - rtfSC_registered ,0xae, - rtfSC_macron ,0xaf, - rtfSC_degree ,0xb0, - rtfSC_plusminus ,0xb1, - rtfSC_twosuperior ,0xb2, - rtfSC_threesuperior ,0xb3, - rtfSC_acute ,0xb4, - rtfSC_mu ,0xb5, - rtfSC_paragraph ,0xb6, - rtfSC_periodcentered ,0xb7, - rtfSC_cedilla ,0xb8, - rtfSC_onesuperior ,0xb9, - rtfSC_ordmasculine ,0xba, - rtfSC_guillemotright ,0xbb, - rtfSC_onequarter ,0xbc, - rtfSC_onehalf ,0xbd, - rtfSC_threequarters ,0xbe, - rtfSC_questiondown ,0xbf, - rtfSC_Agrave ,0xc0, - rtfSC_Aacute ,0xc1, - rtfSC_Acircumflex ,0xc2, - rtfSC_Atilde ,0xc3, - rtfSC_Adieresis ,0xc4, - rtfSC_Aring ,0xc5, - rtfSC_AE ,0xc6, - rtfSC_Ccedilla ,0xc7, - rtfSC_Egrave ,0xc8, - rtfSC_Eacute ,0xc9, - rtfSC_Ecircumflex ,0xca, - rtfSC_Edieresis ,0xcb, - rtfSC_Igrave ,0xcc, - rtfSC_Iacute ,0xcd, - rtfSC_Icircumflex ,0xce, - rtfSC_Idieresis ,0xcf, - rtfSC_Eth ,0xd0, - rtfSC_Ntilde ,0xd1, - rtfSC_Ograve ,0xd2, - rtfSC_Oacute ,0xd3, - rtfSC_Ocircumflex ,0xd4, - rtfSC_Otilde ,0xd5, - rtfSC_Odieresis ,0xd6, - rtfSC_multiply ,0xd7, - rtfSC_Oslash ,0xd8, - rtfSC_Ugrave ,0xd9, - rtfSC_Uacute ,0xda, - rtfSC_Ucircumflex ,0xdb, - rtfSC_Udieresis ,0xdc, - rtfSC_Yacute ,0xdd, - rtfSC_Thorn ,0xde, - rtfSC_germandbls ,0xdf, - rtfSC_agrave ,0xe0, - rtfSC_aacute ,0xe1, - rtfSC_acircumflex ,0xe2, - rtfSC_atilde ,0xe3, - rtfSC_adieresis ,0xe4, - rtfSC_aring ,0xe5, - rtfSC_ae ,0xe6, - rtfSC_ccedilla ,0xe7, - rtfSC_egrave ,0xe8, - rtfSC_eacute ,0xe9, - rtfSC_ecircumflex ,0xea, - rtfSC_edieresis ,0xeb, - rtfSC_igrave ,0xec, - rtfSC_iacute ,0xed, - rtfSC_icircumflex ,0xee, - rtfSC_idieresis ,0xef, - rtfSC_eth ,0xf0, - rtfSC_ntilde ,0xf1, - rtfSC_ograve ,0xf2, - rtfSC_oacute ,0xf3, - rtfSC_ocircumflex ,0xf4, - rtfSC_otilde ,0xf5, - rtfSC_odieresis ,0xf6, - rtfSC_divide ,0xf7, - rtfSC_oslash ,0xf8, - rtfSC_ugrave ,0xf9, - rtfSC_uacute ,0xfa, - rtfSC_ucircumflex ,0xfb, - rtfSC_udieresis ,0xfc, - rtfSC_yacute ,0xfd, - rtfSC_thorn ,0xfe, - rtfSC_ydieresis ,0xff -}; - -/* - * RTF ANSI character set (\ansi) Symbol font map - * - * Field 1 is the standard character name which the character value in - * field 2 maps onto. (It doesn't mean "to produce the character in field 1, - * use the value in field 2.) - * - * The character value may be given either as a single character (which will be - * converted to the ASCII value of the character), or in numeric format, either - * in decimal or 0xyy as hex yy. Single or double quotes may be used to quote - * characters. - * - */ - -int ansi_sym[] = -{ - rtfSC_formula ,0x06, - rtfSC_nobrkhyphen ,0x1e, - rtfSC_opthyphen ,0x1f, - rtfSC_space ,' ', - rtfSC_exclam ,'!', - rtfSC_universal ,'"', - rtfSC_mathnumbersign ,'#', - rtfSC_existential ,'$', - rtfSC_percent ,'%', - rtfSC_ampersand ,'&', - rtfSC_suchthat ,'\\', - rtfSC_parenleft ,'(', - rtfSC_parenright ,')', - rtfSC_mathasterisk ,'*', - rtfSC_mathplus ,'+', - rtfSC_comma ,',', - rtfSC_mathminus ,'-', - rtfSC_period ,'.', - rtfSC_slash ,'/', - rtfSC_zero ,'0', - rtfSC_one ,'1', - rtfSC_two ,'2', - rtfSC_three ,'3', - rtfSC_four ,'4', - rtfSC_five ,'5', - rtfSC_six ,'6', - rtfSC_seven ,'7', - rtfSC_eight ,'8', - rtfSC_nine ,'9', - rtfSC_colon ,':', - rtfSC_semicolon ,';', - rtfSC_less ,'<', - rtfSC_mathequal ,'=', - rtfSC_greater ,'>', - rtfSC_question ,'?', - rtfSC_congruent ,'@', - rtfSC_Alpha ,'A', - rtfSC_Beta ,'B', - rtfSC_Chi ,'C', - rtfSC_Delta ,'D', - rtfSC_Epsilon ,'E', - rtfSC_Phi ,'F', - rtfSC_Gamma ,'G', - rtfSC_Eta ,'H', - rtfSC_Iota ,'I', - rtfSC_Kappa ,'K', - rtfSC_Lambda ,'L', - rtfSC_Mu ,'M', - rtfSC_Nu ,'N', - rtfSC_Omicron ,'O', - rtfSC_Pi ,'P', - rtfSC_Theta ,'Q', - rtfSC_Rho ,'R', - rtfSC_Sigma ,'S', - rtfSC_Tau ,'T', - rtfSC_Upsilon ,'U', - rtfSC_varsigma ,'V', - rtfSC_Omega ,'W', - rtfSC_Xi ,'X', - rtfSC_Psi ,'Y', - rtfSC_Zeta ,'Z', - rtfSC_bracketleft ,'[', - rtfSC_backslash ,'\\', - rtfSC_bracketright ,']', - rtfSC_asciicircum ,'^', - rtfSC_underscore ,'_', - rtfSC_quoteleft ,'`', - rtfSC_alpha ,'a', - rtfSC_beta ,'b', - rtfSC_chi ,'c', - rtfSC_delta ,'d', - rtfSC_epsilon ,'e', - rtfSC_phi ,'f', - rtfSC_gamma ,'g', - rtfSC_eta ,'h', - rtfSC_iota ,'i', - rtfSC_kappa ,'k', - rtfSC_lambda ,'l', - rtfSC_mu ,'m', - rtfSC_nu ,'n', - rtfSC_omicron ,'o', - rtfSC_pi ,'p', - rtfSC_theta ,'q', - rtfSC_rho ,'r', - rtfSC_sigma ,'s', - rtfSC_tau ,'t', - rtfSC_upsilon ,'u', - rtfSC_omega ,'w', - rtfSC_xi ,'x', - rtfSC_psi ,'y', - rtfSC_zeta ,'z', - rtfSC_braceleft ,'{', - rtfSC_bar ,'|', - rtfSC_braceright ,'}', - rtfSC_mathtilde ,'~' -}; - -/* - * Output sequence map for rtf2text - * - * Field 1 is the standard character name. Field 2 is the output sequence - * to produce for that character. - * - * The output sequence is simply a string of characters. If it contains - * whitespace, it may be quoted. If it contains quotes, it may be quoted - * with a different quote character. - * - * characters in ASCII range (32-127 - */ - -const char *text_map[] = { - "space" ," ", - "exclam" ,"!", - "quotedbl" ,"\"", - "numbersign" ,"#", - "dollar" ,"$", - "percent" ,"%", - "ampersand" ,"&", - "quoteright" ,"'", - "parenleft" ,"(", - "parenright" ,")", - "asterisk" ,"*", - "plus" ,"+", - "comma" ,",", - "hyphen" ,"-", - "period" ,".", - "slash" ,"/", - "zero" ,"0", - "one" ,"1", - "two" ,"2", - "three" ,"3", - "four" ,"4", - "five" ,"5", - "six" ,"6", - "seven" ,"7", - "eight" ,"8", - "nine" ,"9", - "colon" ,":", - "semicolon" ,";", - "less" ,"<", - "equal" ,"=", - "greater" ,">", - "question" ,"?", - "at" ,"@", - "A" ,"A", - "B" ,"B", - "C" ,"C", - "D" ,"D", - "E" ,"E", - "F" ,"F", - "G" ,"G", - "H" ,"H", - "I" ,"I", - "J" ,"J", - "K" ,"K", - "L" ,"L", - "M" ,"M", - "N" ,"N", - "O" ,"O", - "P" ,"P", - "Q" ,"Q", - "R" ,"R", - "S" ,"S", - "T" ,"T", - "U" ,"U", - "V" ,"V", - "W" ,"W", - "X" ,"X", - "Y" ,"Y", - "Z" ,"Z", - "bracketleft" ,"[", - "backslash" ,"\\", - "bracketright" ,"]", - "asciicircum" ,"^", - "underscore" ,"_", - "quoteleft" ,"`", - "a" ,"a", - "b" ,"b", - "c" ,"c", - "d" ,"d", - "e" ,"e", - "f" ,"f", - "g" ,"g", - "h" ,"h", - "i" ,"i", - "j" ,"j", - "k" ,"k", - "l" ,"l", - "m" ,"m", - "n" ,"n", - "o" ,"o", - "p" ,"p", - "q" ,"q", - "r" ,"r", - "s" ,"s", - "t" ,"t", - "u" ,"u", - "v" ,"v", - "w" ,"w", - "x" ,"x", - "y" ,"y", - "z" ,"z", - "braceleft" ,"{", - "bar" ,"|", - "braceright" ,"}", - "asciitilde" ,"~", - "AE" ,"AE", - "OE" ,"OE", - "acute" ,"'", - "ae" ,"ae", - "angleleft" ,"<", - "angleright" ,">", - "arrowboth" ,"<->", - "arrowdblboth" ,"<=>", - "arrowdblleft" ,"<=", - "arrowdblright" ,"=>", - "arrowleft" ,"<-", - "arrowright" ,"->", - "bullet" ,"o", - "cent" ,"cent", - "circumflex" ,"^", - "copyright" ,"(c)", - "copyrightsans" ,"(c)", - "degree" ,"deg.", - "divide" ,"/", - "dotlessi" ,"i", - "ellipsis" ,"...", - "emdash" ,"--", - "endash" ,"-", - "fi" ,"fi", - "fl" ,"fl", - "fraction" ,"/", - "germandbls" ,"ss", - "grave" ,"`", - "greaterequal" ,">=", - "guillemotleft" ,"<<", - "guillemotright" ,">>", - "guilsinglleft" ,"<", - "guilsinglright" ,">", - "lessequal" ,"<=", - "logicalnot" ,"~", - "mathasterisk" ,"*", - "mathequal" ,"=", - "mathminus" ,"-", - "mathnumbersign" ,"#", - "mathplus" ,"+", - "mathtilde" ,"~", - "minus" ,"-", - "mu" ,"u", - "multiply" ,"x", - "nobrkhyphen" ,"-", - "nobrkspace" ," ", - "notequal" ,"!=", - "oe" ,"oe", - "onehalf" ,"1/2", - "onequarter" ,"1/4", - "periodcentered" ,".", - "plusminus" ,"+/-", - "quotedblbase" ,",,", - "quotedblleft" ,"\"", - "quotedblright" ,"\"", - "quotesinglbase" ,",", - "registered" ,"reg.", - "registersans" ,"reg.", - "threequarters" ,"3/4", - "tilde" ,"~", - "trademark" ,"(TM)", - "trademarksans" ,"(TM)" -}; - -/* - * This array is used to map standard character names onto their numeric codes. - * The position of the name within the array is the code. - * stdcharnames.h is generated in the ../h directory. - */ - -const char *stdCharName[] = -{ - "nothing", - "space", - "exclam", - "quotedbl", - "numbersign", - "dollar", - "percent", - "ampersand", - "quoteright", - "parenleft", - "parenright", - "asterisk", - "plus", - "comma", - "hyphen", - "period", - "slash", - "zero", - "one", - "two", - "three", - "four", - "five", - "six", - "seven", - "eight", - "nine", - "colon", - "semicolon", - "less", - "equal", - "greater", - "question", - "at", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "bracketleft", - "backslash", - "bracketright", - "asciicircum", - "underscore", - "quoteleft", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "braceleft", - "bar", - "braceright", - "asciitilde", - "exclamdown", - "cent", - "sterling", - "fraction", - "yen", - "florin", - "section", - "currency", - "quotedblleft", - "guillemotleft", - "guilsinglleft", - "guilsinglright", - "fi", - "fl", - "endash", - "dagger", - "daggerdbl", - "periodcentered", - "paragraph", - "bullet", - "quotesinglbase", - "quotedblbase", - "quotedblright", - "guillemotright", - "ellipsis", - "perthousand", - "questiondown", - "grave", - "acute", - "circumflex", - "tilde", - "macron", - "breve", - "dotaccent", - "dieresis", - "ring", - "cedilla", - "hungarumlaut", - "ogonek", - "caron", - "emdash", - "AE", - "ordfeminine", - "Lslash", - "Oslash", - "OE", - "ordmasculine", - "ae", - "dotlessi", - "lslash", - "oslash", - "oe", - "germandbls", - "Aacute", - "Acircumflex", - "Adieresis", - "Agrave", - "Aring", - "Atilde", - "Ccedilla", - "Eacute", - "Ecircumflex", - "Edieresis", - "Egrave", - "Eth", - "Iacute", - "Icircumflex", - "Idieresis", - "Igrave", - "Ntilde", - "Oacute", - "Ocircumflex", - "Odieresis", - "Ograve", - "Otilde", - "Scaron", - "Thorn", - "Uacute", - "Ucircumflex", - "Udieresis", - "Ugrave", - "Yacute", - "Ydieresis", - "aacute", - "acircumflex", - "adieresis", - "agrave", - "aring", - "atilde", - "brokenbar", - "ccedilla", - "copyright", - "degree", - "divide", - "eacute", - "ecircumflex", - "edieresis", - "egrave", - "eth", - "iacute", - "icircumflex", - "idieresis", - "igrave", - "logicalnot", - "minus", - "multiply", - "ntilde", - "oacute", - "ocircumflex", - "odieresis", - "ograve", - "onehalf", - "onequarter", - "onesuperior", - "otilde", - "plusminus", - "registered", - "thorn", - "threequarters", - "threesuperior", - "trademark", - "twosuperior", - "uacute", - "ucircumflex", - "udieresis", - "ugrave", - "yacute", - "ydieresis", - "Alpha", - "Beta", - "Chi", - "Delta", - "Epsilon", - "Phi", - "Gamma", - "Eta", - "Iota", - "Kappa", - "Lambda", - "Mu", - "Nu", - "Omicron", - "Pi", - "Theta", - "Rho", - "Sigma", - "Tau", - "Upsilon", - "varUpsilon", - "Omega", - "Xi", - "Psi", - "Zeta", - "alpha", - "beta", - "chi", - "delta", - "epsilon", - "phi", - "varphi", - "gamma", - "eta", - "iota", - "kappa", - "lambda", - "mu", - "nu", - "omicron", - "pi", - "varpi", - "theta", - "vartheta", - "rho", - "sigma", - "varsigma", - "tau", - "upsilon", - "omega", - "xi", - "psi", - "zeta", - "nobrkspace", - "nobrkhyphen", - "lessequal", - "greaterequal", - "infinity", - "integral", - "notequal", - "radical", - "radicalex", - "approxequal", - "apple", - "partialdiff", - "opthyphen", - "formula", - "lozenge", - "universal", - "existential", - "suchthat", - "congruent", - "therefore", - "perpendicular", - "minute", - "club", - "diamond", - "heart", - "spade", - "arrowboth", - "arrowleft", - "arrowup", - "arrowright", - "arrowdown", - "second", - "proportional", - "equivalence", - "arrowvertex", - "arrowhorizex", - "carriagereturn", - "aleph", - "Ifraktur", - "Rfraktur", - "weierstrass", - "circlemultiply", - "circleplus", - "emptyset", - "intersection", - "union", - "propersuperset", - "reflexsuperset", - "notsubset", - "propersubset", - "reflexsubset", - "element", - "notelement", - "angle", - "gradient", - "product", - "logicaland", - "logicalor", - "arrowdblboth", - "arrowdblleft", - "arrowdblup", - "arrowdblright", - "arrowdbldown", - "angleleft", - "registersans", - "copyrightsans", - "trademarksans", - "angleright", - "mathplus", - "mathminus", - "mathasterisk", - "mathnumbersign", - "dotmath", - "mathequal", - "mathtilde", - (char *) NULL -}; int _RTFGetChar(RTF_Info *info) { @@ -1052,6 +193,10 @@ void RTFInit(RTF_Info *info) info->styleList = sp; } + info->ansiCodePage = 1252; /* Latin-1 */ + info->unicodeLength = 1; /* \uc1 is the default */ + info->codePage = info->ansiCodePage; + info->rtfClass = -1; info->pushedClass = -1; info->pushedChar = EOF; @@ -1060,9 +205,6 @@ void RTFInit(RTF_Info *info) info->rtfLinePos = 0; info->prevChar = EOF; info->bumpLine = 0; - - CharSetInit (info); - info->csTop = 0; } /* @@ -1296,13 +438,11 @@ int RTFPeekToken(RTF_Info *info) static void _RTFGetToken(RTF_Info *info) { - RTFFont *fp; - TRACE("\n"); if (info->rtfFormat == SF_TEXT) { info->rtfMajor = GetChar (info); - info->rtfMinor = rtfSC_nothing; + info->rtfMinor = 0; info->rtfParam = rtfNoParam; info->rtfTextBuf[info->rtfTextLen = 0] = '\0'; if (info->rtfMajor == EOF) @@ -1332,61 +472,76 @@ static void _RTFGetToken(RTF_Info *info) */ _RTFGetToken2 (info); - if (info->rtfClass == rtfText) /* map RTF char to standard code */ - info->rtfMinor = RTFMapChar (info, info->rtfMajor); - - /* - * If auto-charset stuff is activated, see if anything needs doing, - * like reading the charset maps or switching between them. - */ +} - if (info->autoCharSetFlags == 0) - return; - if ((info->autoCharSetFlags & rtfReadCharSet) - && RTFCheckCM (info, rtfControl, rtfCharSet)) - { - ReadCharSetMaps (info); - } - else if ((info->autoCharSetFlags & rtfSwitchCharSet) - && RTFCheckCMM (info, rtfControl, rtfCharAttr, rtfFontNum)) - { - if ((fp = RTFGetFont (info, info->rtfParam)) != (RTFFont *) NULL) +static WCHAR +RTFANSIToUnicode(RTF_Info *info, char c) +{ + WCHAR buffer[2] = { 0, 0 }; + + /* TODO: Probably caching codepage conversion tables would be faster... */ + MultiByteToWideChar(info->codePage, 0, &c, 1, buffer, 2); + return buffer[0]; +} + + +static int +RTFCharSetToCodePage(RTF_Info *info, int charset) +{ + switch (charset) + { + case ANSI_CHARSET: + case DEFAULT_CHARSET: + return 0; + case SYMBOL_CHARSET: + return CP_SYMBOL; + case MAC_CHARSET: + return CP_MACCP; + case SHIFTJIS_CHARSET: + return 932; + case HANGEUL_CHARSET: + return 949; + case JOHAB_CHARSET: + return 1361; + case GB2312_CHARSET: + return 936; + case CHINESEBIG5_CHARSET: + return 950; + case GREEK_CHARSET: + return 1253; + case TURKISH_CHARSET: + return 1254; + case VIETNAMESE_CHARSET: + return 1258; + case HEBREW_CHARSET: + return 1255; + case ARABIC_CHARSET: + return 1256; + case BALTIC_CHARSET: + return 1257; + case RUSSIAN_CHARSET: + return 1251; + case THAI_CHARSET: + return 874; + case EASTEUROPE_CHARSET: + return 1250; + case OEM_CHARSET: + return CP_OEMCP; + default: { - if (strncmp (fp->rtfFName, "Symbol", 6) == 0) - info->curCharSet = rtfCSSymbol; + CHARSETINFO csi; + DWORD n = (DWORD)charset; + + /* FIXME: TranslateCharsetInfo does not work as good as it + * should, so let's use it only when all else fails */ + if (!TranslateCharsetInfo(&n, &csi, TCI_SRCCHARSET)) + RTFMsg(info,"%s: unknown charset %u\n", __FUNCTION__, charset); else - info->curCharSet = rtfCSGeneral; - RTFSetCharSet (info, info->curCharSet); - } - } - else if ((info->autoCharSetFlags & rtfSwitchCharSet) && info->rtfClass == rtfGroup) - { - switch (info->rtfMajor) - { - case rtfBeginGroup: - if (info->csTop >= maxCSStack) - RTFPanic (info, "_RTFGetToken: stack overflow"); - info->csStack[info->csTop++] = info->curCharSet; - break; - case rtfEndGroup: - /* - * If stack top is 1 at this point, we are ending the - * group started by the initial {, which ends the - * RTF stream - */ - if (info->csTop <= 0) - RTFPanic (info,"_RTFGetToken: stack underflow"); - else if (info->csTop == 1) - info->rtfClass = rtfEOF; - else - { - info->curCharSet = info->csStack[--info->csTop]; - RTFSetCharSet (info, info->curCharSet); - } - break; + return csi.ciACP; } } + return 0; } @@ -1448,6 +603,10 @@ static void _RTFGetToken2(RTF_Info *info) else { info->rtfClass = rtfText; + + if (c & 0x80) + info->rtfMajor = RTFANSIToUnicode(info, c); + else info->rtfMajor = c; } return; @@ -1473,8 +632,7 @@ static void _RTFGetToken2(RTF_Info *info) { /* should do isxdigit check! */ info->rtfClass = rtfText; - info->rtfMajor = RTFCharToHex (c) * 16 - + RTFCharToHex (c2); + info->rtfMajor = RTFANSIToUnicode(info, RTFCharToHex (c) * 16 + RTFCharToHex (c2)); return; } /* early eof, whoops (class is rtfUnknown) */ @@ -1627,247 +785,6 @@ void RTFSetToken(RTF_Info *info, int class, int major, int minor, int param, con /* ---------------------------------------------------------------------- */ /* - * Routines to handle mapping of RTF character sets - * onto standard characters. - * - * RTFStdCharCode(name) given char name, produce numeric code - * RTFStdCharName(code) given char code, return name - * RTFMapChar(c) map input (RTF) char code to std code - * RTFSetCharSet(id) select given charset map - * RTFGetCharSet() get current charset map - * - * See ../h/README for more information about charset names and codes. - */ - - -/* - * Initialize charset stuff. - */ - -static void CharSetInit(RTF_Info *info) -{ - TRACE("\n"); - - info->autoCharSetFlags = (rtfReadCharSet | rtfSwitchCharSet); - RTFFree (info->genCharSetFile); - info->genCharSetFile = (char *) NULL; - info->haveGenCharSet = 0; - RTFFree (info->symCharSetFile); - info->symCharSetFile = (char *) NULL; - info->haveSymCharSet = 0; - info->curCharSet = rtfCSGeneral; - info->curCharCode = info->genCharCode; -} - - -/* - * Specify the name of a file to be read when auto-charset-file reading is - * done. - */ - -void RTFSetCharSetMap (RTF_Info *info, char *name, int csId) -{ - TRACE("\n"); - - if ((name = RTFStrSave (name)) == (char *) NULL) /* make copy */ - RTFPanic (info,"RTFSetCharSetMap: out of memory"); - switch (csId) - { - case rtfCSGeneral: - RTFFree (info->genCharSetFile); /* free any previous value */ - info->genCharSetFile = name; - break; - case rtfCSSymbol: - RTFFree (info->symCharSetFile); /* free any previous value */ - info->symCharSetFile = name; - break; - } -} - - -/* - * Do auto-charset-file reading. - * will always use the ansi charset no mater what the value - * of the rtfTextBuf is. - * - * TODO: add support for other charset in the future. - * - */ - -static void ReadCharSetMaps(RTF_Info *info) -{ - char buf[rtfBufSiz]; - - TRACE("\n"); - - if (info->genCharSetFile != (char *) NULL) - (void) strcpy (buf, info->genCharSetFile); - else - sprintf (buf, "%s-gen", &info->rtfTextBuf[1]); - if (RTFReadCharSetMap (info, rtfCSGeneral) == 0) - RTFPanic (info,"ReadCharSetMaps: Cannot read charset map %s", buf); - if (info->symCharSetFile != (char *) NULL) - (void) strcpy (buf, info->symCharSetFile); - else - sprintf (buf, "%s-sym", &info->rtfTextBuf[1]); - if (RTFReadCharSetMap (info, rtfCSSymbol) == 0) - RTFPanic (info,"ReadCharSetMaps: Cannot read charset map %s", buf); -} - - - -/* - * Convert a CharSetMap (character_name, character) into - * this form : array[character_ident] = character; - */ - -int RTFReadCharSetMap(RTF_Info *info, int csId) -{ - int *stdCodeArray; - unsigned int i; - - TRACE("\n"); - - switch (csId) - { - default: - return (0); /* illegal charset id */ - case rtfCSGeneral: - - info->haveGenCharSet = 1; - stdCodeArray = info->genCharCode; - for (i = 0; i < charSetSize; i++) - { - stdCodeArray[i] = rtfSC_nothing; - } - - for ( i = 0 ; i< sizeof(ansi_gen)/(sizeof(int));i+=2) - { - stdCodeArray[ ansi_gen[i+1] ] = ansi_gen[i]; - } - break; - - case rtfCSSymbol: - - info->haveSymCharSet = 1; - stdCodeArray = info->symCharCode; - for (i = 0; i < charSetSize; i++) - { - stdCodeArray[i] = rtfSC_nothing; - } - - for ( i = 0 ; i< sizeof(ansi_sym)/(sizeof(int));i+=2) - { - stdCodeArray[ ansi_sym[i+1] ] = ansi_sym[i]; - } - break; - } - - return (1); -} - - -/* - * Given a standard character name (a string), find its code (a number). - * Return -1 if name is unknown. - */ - -int RTFStdCharCode(RTF_Info *info, const char *name) -{ - int i; - - TRACE("\n"); - - for (i = 0; i < rtfSC_MaxChar; i++) - { - if (strcmp (name, stdCharName[i]) == 0) - return (i); - } - return (-1); -} - - -/* - * Given a standard character code (a number), find its name (a string). - * Return NULL if code is unknown. - */ - -const char *RTFStdCharName(RTF_Info *info, int code) -{ - if (code < 0 || code >= rtfSC_MaxChar) - return ((char *) NULL); - return (stdCharName[code]); -} - - -/* - * Given an RTF input character code, find standard character code. - * The translator should read the appropriate charset maps when it finds a - * charset control. However, the file might not contain one. In this - * case, no map will be available. When the first attempt is made to - * map a character under these circumstances, RTFMapChar() assumes ANSI - * and reads the map as necessary. - */ - -int RTFMapChar(RTF_Info *info, int c) -{ - TRACE("\n"); - - switch (info->curCharSet) - { - case rtfCSGeneral: - if (!info->haveGenCharSet) - { - if (RTFReadCharSetMap (info, rtfCSGeneral) == 0) - RTFPanic (info,"RTFMapChar: cannot read ansi-gen"); - } - break; - case rtfCSSymbol: - if (!info->haveSymCharSet) - { - if (RTFReadCharSetMap (info, rtfCSSymbol) == 0) - RTFPanic (info,"RTFMapChar: cannot read ansi-sym"); - } - break; - } - if (c < 0 || c >= charSetSize) - return (rtfSC_nothing); - return (info->curCharCode[c]); -} - - -/* - * Set the current character set. If csId is illegal, uses general charset. - */ - -void RTFSetCharSet(RTF_Info *info, int csId) -{ - TRACE("\n"); - - switch (csId) - { - default: /* use general if csId unknown */ - case rtfCSGeneral: - info->curCharCode = info->genCharCode; - info->curCharSet = csId; - break; - case rtfCSSymbol: - info->curCharCode = info->symCharCode; - info->curCharSet = csId; - break; - } -} - - -int RTFGetCharSet(RTF_Info *info) -{ - return (info->curCharSet); -} - - -/* ---------------------------------------------------------------------- */ - -/* * Special destination readers. They gobble the destination so the * writer doesn't have to deal with them. That's wrong for any * translator that wants to process any of these itself. In that @@ -1970,6 +887,8 @@ static void ReadFontTbl(RTF_Info *info) break; /* ignore unknown? */ case rtfFontCharSet: fp->rtfFCharSet = info->rtfParam; + if (!fp->rtfFCodePage) + fp->rtfFCodePage = RTFCharSetToCodePage(info, info->rtfParam); break; case rtfFontPitch: fp->rtfFPitch = info->rtfParam; @@ -2786,6 +1705,9 @@ static RTFKey rtfKey[] = { rtfDocAttr, rtfRTLDoc, "rtldoc", 0 }, { rtfDocAttr, rtfLTRDoc, "ltrdoc", 0 }, + { rtfDocAttr, rtfAnsiCodePage, "ansicpg", 0 }, + { rtfDocAttr, rtfUnicodeLength, "uc", 0 }, + /* * Style attributes */ @@ -3284,7 +2206,7 @@ typedef struct tagRTFHashTableEntry { RTFKey **value; } RTFHashTableEntry; -static RTFHashTableEntry rtfHashTable[RTF_KEY_COUNT]; +static RTFHashTableEntry rtfHashTable[RTF_KEY_COUNT * 2]; /* @@ -3298,12 +2220,12 @@ static void LookupInit(void) if (inited == 0) { - memset(rtfHashTable, 0, RTF_KEY_COUNT * sizeof(*rtfHashTable)); + memset(rtfHashTable, 0, RTF_KEY_COUNT * 2 * sizeof(*rtfHashTable)); for (rp = rtfKey; rp->rtfKStr != (char *) NULL; rp++) { int index; rp->rtfKHash = Hash ((char*)rp->rtfKStr); - index = rp->rtfKHash % RTF_KEY_COUNT; + index = rp->rtfKHash % (RTF_KEY_COUNT * 2); if (!rtfHashTable[index].count) rtfHashTable[index].value = (void *)RTFAlloc(sizeof(RTFKey *)); else @@ -3330,7 +2252,7 @@ static void Lookup(RTF_Info *info, char *s) TRACE("\n"); ++s; /* skip over the leading \ character */ hash = Hash (s); - entry = &rtfHashTable[hash % RTF_KEY_COUNT]; + entry = &rtfHashTable[hash % (RTF_KEY_COUNT * 2)]; for (i = 0; i < entry->count; i++) { rp = entry->value[i]; @@ -3460,51 +2382,6 @@ int RTFHexToChar(int i) /* ---------------------------------------------------------------------- */ /* - * RTFReadOutputMap() -- Read output translation map - */ - -/* - * Read in an array describing the relation between the standard character set - * and an RTF translator's corresponding output sequences. Each line consists - * of a standard character name and the output sequence for that character. - * - * outMap is an array of strings into which the sequences should be placed. - * It should be declared like this in the calling program: - * - * char *outMap[rtfSC_MaxChar]; - * - * reinit should be non-zero if outMap should be initialized - * zero otherwise. - * - */ - -int RTFReadOutputMap(RTF_Info *info, char *outMap[], int reinit) -{ - unsigned int i; - int stdCode; - - if (reinit) - { - for (i = 0; i < rtfSC_MaxChar; i++) - { - outMap[i] = (char *) NULL; - } - } - - for (i=0 ;i< sizeof(text_map)/sizeof(char*); i+=2) - { - const char *name = text_map[i]; - const char *seq = text_map[i+1]; - stdCode = RTFStdCharCode( info, name ); - outMap[stdCode] = (char*)seq; - } - - return (1); -} - -/* ---------------------------------------------------------------------- */ - -/* * Open a library file. */ @@ -3598,9 +2475,7 @@ static void TextClass (RTF_Info *info); static void ControlClass (RTF_Info *info); static void Destination (RTF_Info *info); static void SpecialChar (RTF_Info *info); -static void PutStdChar (RTF_Info *info, int stdCode); static void PutLitChar (RTF_Info *info, int c); -static void PutLitStr (RTF_Info *info, char *s); /* * Initialize the writer. @@ -3609,7 +2484,6 @@ static void PutLitStr (RTF_Info *info, char *s); void WriterInit (RTF_Info *info ) { - RTFReadOutputMap (info, info->outMap,1); } @@ -3635,40 +2509,6 @@ TextClass (RTF_Info *info) PutLitChar (info, info->rtfMajor); } -/* - * Write out a character. rtfMajor contains the input character, rtfMinor - * contains the corresponding standard character code. - * - * If the input character isn't in the charset map, try to print some - * representation of it. - * - * I'm not removing it, because it may be helpful if someone else decides - * to rewrite the character handler in a i18n-friendly way - */ -#if 0 -static void -TextClass_orig (RTF_Info *info) -{ - char buf[rtfBufSiz]; - - TRACE("\n"); - - if (info->rtfFormat == SF_TEXT) - PutLitChar (info, info->rtfMajor); - else if (info->rtfMinor != rtfSC_nothing) - PutStdChar (info, info->rtfMinor); - else - { - if (info->rtfMajor < 256) /* in ASCII range */ - PutLitChar(info, info->rtfMajor); - else { - sprintf (buf, "[[\\'%02x]]", info->rtfMajor); - PutLitStr (info, buf); - } - } -} -#endif - static void ControlClass (RTF_Info *info) @@ -3677,9 +2517,18 @@ ControlClass (RTF_Info *info) switch (info->rtfMajor) { + case rtfCharAttr: + CharAttr(info); + break; + case rtfCharSet: + CharSet(info); + break; case rtfDestination: Destination (info); break; + case rtfDocAttr: + DocAttr(info); + break; case rtfSpecialChar: SpecialChar (info); break; @@ -3687,6 +2536,38 @@ ControlClass (RTF_Info *info) } +static void +CharAttr(RTF_Info *info) +{ + switch (info->rtfMinor) + { + case rtfFontNum: + info->codePage = RTFGetFont(info, info->rtfParam)->rtfFCodePage; + break; + } +} + + +static void +CharSet(RTF_Info *info) +{ + switch (info->rtfMinor) + { + case rtfAnsiCharSet: + info->ansiCodePage = 1252; /* Latin-1 */ + break; + case rtfMacCharSet: + info->ansiCodePage = 10000; /* MacRoman */ + break; + case rtfPcCharSet: + info->ansiCodePage = 437; + break; + case rtfPcaCharSet: + info->ansiCodePage = 850; + break; + } +} + /* * This function notices destinations that aren't explicitly handled * and skips to their ends. This keeps, for instance, picture @@ -3701,13 +2582,23 @@ Destination (RTF_Info *info) RTFSkipGroup (info); } -/* - * The reason these use the rtfSC_xxx thingies instead of just writing - * out ' ', '-', '"', etc., is so that the mapping for these characters - * can be controlled by the text-map file. - */ -void SpecialChar (RTF_Info *info) +static void +DocAttr(RTF_Info *info) +{ + switch (info->rtfMinor) + { + case rtfAnsiCodePage: + info->ansiCodePage = info->rtfParam; + break; + case rtfUnicodeLength: + info->unicodeLength = info->rtfParam; + break; + } +} + + +static void SpecialChar (RTF_Info *info) { TRACE("\n"); @@ -3726,16 +2617,23 @@ void SpecialChar (RTF_Info *info) case rtfUnicode: { WCHAR buf[2]; + int i; + buf[0] = info->rtfParam; buf[1] = 0; RTFFlushOutputBuffer(info); RTFOutputUnicodeString(info, buf, 1); + /* After \u we must skip number of character tokens set by \ucN */ + for (i = 0; i < info->unicodeLength; i++) + { RTFGetToken(info); - if (info->rtfClass != rtfText && info->rtfMajor != '?') + if (info->rtfClass != rtfText) { - ERR("The character behind \\u is not a question mark, but (%d,%d,%d)\n", + ERR("The token behind \\u is not text, but (%d,%d,%d)\n", info->rtfClass, info->rtfMajor, info->rtfMinor); + RTFUngetToken(info); + } } break; } @@ -3747,99 +2645,50 @@ void SpecialChar (RTF_Info *info) PutLitChar (info, '\n'); break; case rtfCell: - PutStdChar (info, rtfSC_space); /* make sure cells are separated */ + PutLitChar (info, ' '); /* make sure cells are separated */ break; case rtfNoBrkSpace: - PutStdChar (info, rtfSC_nobrkspace); + PutLitChar (info, 0x00A0); break; case rtfTab: PutLitChar (info, '\t'); break; case rtfNoBrkHyphen: - PutStdChar (info, rtfSC_nobrkhyphen); + PutLitChar (info, 0x2011); break; case rtfBullet: - PutStdChar (info, rtfSC_bullet); + PutLitChar (info, 0x2022); break; case rtfEmDash: - PutStdChar (info, rtfSC_emdash); + PutLitChar (info, 0x2014); break; case rtfEnDash: - PutStdChar (info, rtfSC_endash); + PutLitChar (info, 0x2013); break; case rtfLQuote: - PutStdChar (info, rtfSC_quoteleft); + PutLitChar (info, 0x2018); break; case rtfRQuote: - PutStdChar (info, rtfSC_quoteright); + PutLitChar (info, 0x2019); break; case rtfLDblQuote: - PutStdChar (info, rtfSC_quotedblleft); + PutLitChar (info, 0x201C); break; case rtfRDblQuote: - PutStdChar (info, rtfSC_quotedblright); + PutLitChar (info, 0x201D); break; } } -/* - * Eventually this should keep track of the destination of the - * current state and only write text when in the initial state. - * - * If the output sequence is unspecified in the output map, write - * the character's standard name instead. This makes map deficiencies - * obvious and provides incentive to fix it. :-) - */ - -void PutStdChar (RTF_Info *info, int stdCode) -{ - - char *oStr = (char *) NULL; - char buf[rtfBufSiz]; - -/* if (stdCode == rtfSC_nothing) - RTFPanic ("Unknown character code, logic error\n"); -*/ - TRACE("\n"); - - oStr = info->outMap[stdCode]; - if (oStr == (char *) NULL) /* no output sequence in map */ - { - sprintf (buf, "[[%s]]", RTFStdCharName (info, stdCode)); - oStr = buf; - } - PutLitStr (info, oStr); -} - -void PutLitChar (RTF_Info *info, int c) +static void PutLitChar (RTF_Info *info, int c) { if( info->dwOutputCount >= ( sizeof info->OutputBuffer - 1 ) ) RTFFlushOutputBuffer( info ); info->OutputBuffer[info->dwOutputCount++] = c; } -void RTFOutputANSIStringOrig( RTF_Info *info, char *str, int len ) -{ - assert(str[len] == '\0'); - if (len) { - SendMessageA( info->hwndEdit, EM_REPLACESEL, FALSE, (LPARAM) str); - } -} - -void RTFOutputANSIString( RTF_Info *info, char *str, int len ) -{ - assert(str[len] == '\0'); - if (len) { - WCHAR *buf = ALLOC_N_OBJ(WCHAR, len); - - len = MultiByteToWideChar(CP_ACP, 0, str, len, buf, len); - ME_InsertTextFromCursor( info->editor, 0, buf, len, info->style ); - FREE_OBJ(buf); - } -} - -void RTFOutputUnicodeString( RTF_Info *info, WCHAR *str, int len ) +static void RTFOutputUnicodeString( RTF_Info *info, WCHAR *str, int len ) { assert(str[len] == '\0'); if (len) { @@ -3847,24 +2696,10 @@ void RTFOutputUnicodeString( RTF_Info *info, WCHAR *str, int len ) } } + void RTFFlushOutputBuffer( RTF_Info *info ) { info->OutputBuffer[info->dwOutputCount] = 0; - RTFOutputANSIString(info, info->OutputBuffer, info->dwOutputCount); + RTFOutputUnicodeString(info, info->OutputBuffer, info->dwOutputCount); info->dwOutputCount = 0; } - -static void PutLitStr (RTF_Info *info, char *str ) -{ - int len = strlen( str ); - - if( ( len + info->dwOutputCount + 1 ) > sizeof info->OutputBuffer ) - RTFFlushOutputBuffer( info ); - if( ( len + 1 ) >= sizeof info->OutputBuffer ) - { - RTFOutputANSIString(info, str, len); - return; - } - strcpy( &info->OutputBuffer[info->dwOutputCount], str ); - info->dwOutputCount += len; -} diff --git a/dlls/riched20/rtf.h b/dlls/riched20/rtf.h index ede410eedca..252be08e54c 100644 --- a/dlls/riched20/rtf.h +++ b/dlls/riched20/rtf.h @@ -11,365 +11,6 @@ #include "richedit.h" -/* The following defines are automatically generated. Do not edit. */ - - -/* These must be sequential beginning from zero */ - -#define rtfSC_nothing 0 -#define rtfSC_space 1 -#define rtfSC_exclam 2 -#define rtfSC_quotedbl 3 -#define rtfSC_numbersign 4 -#define rtfSC_dollar 5 -#define rtfSC_percent 6 -#define rtfSC_ampersand 7 -#define rtfSC_quoteright 8 -#define rtfSC_parenleft 9 -#define rtfSC_parenright 10 -#define rtfSC_asterisk 11 -#define rtfSC_plus 12 -#define rtfSC_comma 13 -#define rtfSC_hyphen 14 -#define rtfSC_period 15 -#define rtfSC_slash 16 -#define rtfSC_zero 17 -#define rtfSC_one 18 -#define rtfSC_two 19 -#define rtfSC_three 20 -#define rtfSC_four 21 -#define rtfSC_five 22 -#define rtfSC_six 23 -#define rtfSC_seven 24 -#define rtfSC_eight 25 -#define rtfSC_nine 26 -#define rtfSC_colon 27 -#define rtfSC_semicolon 28 -#define rtfSC_less 29 -#define rtfSC_equal 30 -#define rtfSC_greater 31 -#define rtfSC_question 32 -#define rtfSC_at 33 -#define rtfSC_A 34 -#define rtfSC_B 35 -#define rtfSC_C 36 -#define rtfSC_D 37 -#define rtfSC_E 38 -#define rtfSC_F 39 -#define rtfSC_G 40 -#define rtfSC_H 41 -#define rtfSC_I 42 -#define rtfSC_J 43 -#define rtfSC_K 44 -#define rtfSC_L 45 -#define rtfSC_M 46 -#define rtfSC_N 47 -#define rtfSC_O 48 -#define rtfSC_P 49 -#define rtfSC_Q 50 -#define rtfSC_R 51 -#define rtfSC_S 52 -#define rtfSC_T 53 -#define rtfSC_U 54 -#define rtfSC_V 55 -#define rtfSC_W 56 -#define rtfSC_X 57 -#define rtfSC_Y 58 -#define rtfSC_Z 59 -#define rtfSC_bracketleft 60 -#define rtfSC_backslash 61 -#define rtfSC_bracketright 62 -#define rtfSC_asciicircum 63 -#define rtfSC_underscore 64 -#define rtfSC_quoteleft 65 -#define rtfSC_a 66 -#define rtfSC_b 67 -#define rtfSC_c 68 -#define rtfSC_d 69 -#define rtfSC_e 70 -#define rtfSC_f 71 -#define rtfSC_g 72 -#define rtfSC_h 73 -#define rtfSC_i 74 -#define rtfSC_j 75 -#define rtfSC_k 76 -#define rtfSC_l 77 -#define rtfSC_m 78 -#define rtfSC_n 79 -#define rtfSC_o 80 -#define rtfSC_p 81 -#define rtfSC_q 82 -#define rtfSC_r 83 -#define rtfSC_s 84 -#define rtfSC_t 85 -#define rtfSC_u 86 -#define rtfSC_v 87 -#define rtfSC_w 88 -#define rtfSC_x 89 -#define rtfSC_y 90 -#define rtfSC_z 91 -#define rtfSC_braceleft 92 -#define rtfSC_bar 93 -#define rtfSC_braceright 94 -#define rtfSC_asciitilde 95 -#define rtfSC_exclamdown 96 -#define rtfSC_cent 97 -#define rtfSC_sterling 98 -#define rtfSC_fraction 99 -#define rtfSC_yen 100 -#define rtfSC_florin 101 -#define rtfSC_section 102 -#define rtfSC_currency 103 -#define rtfSC_quotedblleft 104 -#define rtfSC_guillemotleft 105 -#define rtfSC_guilsinglleft 106 -#define rtfSC_guilsinglright 107 -#define rtfSC_fi 108 -#define rtfSC_fl 109 -#define rtfSC_endash 110 -#define rtfSC_dagger 111 -#define rtfSC_daggerdbl 112 -#define rtfSC_periodcentered 113 -#define rtfSC_paragraph 114 -#define rtfSC_bullet 115 -#define rtfSC_quotesinglbase 116 -#define rtfSC_quotedblbase 117 -#define rtfSC_quotedblright 118 -#define rtfSC_guillemotright 119 -#define rtfSC_ellipsis 120 -#define rtfSC_perthousand 121 -#define rtfSC_questiondown 122 -#define rtfSC_grave 123 -#define rtfSC_acute 124 -#define rtfSC_circumflex 125 -#define rtfSC_tilde 126 -#define rtfSC_macron 127 -#define rtfSC_breve 128 -#define rtfSC_dotaccent 129 -#define rtfSC_dieresis 130 -#define rtfSC_ring 131 -#define rtfSC_cedilla 132 -#define rtfSC_hungarumlaut 133 -#define rtfSC_ogonek 134 -#define rtfSC_caron 135 -#define rtfSC_emdash 136 -#define rtfSC_AE 137 -#define rtfSC_ordfeminine 138 -#define rtfSC_Lslash 139 -#define rtfSC_Oslash 140 -#define rtfSC_OE 141 -#define rtfSC_ordmasculine 142 -#define rtfSC_ae 143 -#define rtfSC_dotlessi 144 -#define rtfSC_lslash 145 -#define rtfSC_oslash 146 -#define rtfSC_oe 147 -#define rtfSC_germandbls 148 -#define rtfSC_Aacute 149 -#define rtfSC_Acircumflex 150 -#define rtfSC_Adieresis 151 -#define rtfSC_Agrave 152 -#define rtfSC_Aring 153 -#define rtfSC_Atilde 154 -#define rtfSC_Ccedilla 155 -#define rtfSC_Eacute 156 -#define rtfSC_Ecircumflex 157 -#define rtfSC_Edieresis 158 -#define rtfSC_Egrave 159 -#define rtfSC_Eth 160 -#define rtfSC_Iacute 161 -#define rtfSC_Icircumflex 162 -#define rtfSC_Idieresis 163 -#define rtfSC_Igrave 164 -#define rtfSC_Ntilde 165 -#define rtfSC_Oacute 166 -#define rtfSC_Ocircumflex 167 -#define rtfSC_Odieresis 168 -#define rtfSC_Ograve 169 -#define rtfSC_Otilde 170 -#define rtfSC_Scaron 171 -#define rtfSC_Thorn 172 -#define rtfSC_Uacute 173 -#define rtfSC_Ucircumflex 174 -#define rtfSC_Udieresis 175 -#define rtfSC_Ugrave 176 -#define rtfSC_Yacute 177 -#define rtfSC_Ydieresis 178 -#define rtfSC_aacute 179 -#define rtfSC_acircumflex 180 -#define rtfSC_adieresis 181 -#define rtfSC_agrave 182 -#define rtfSC_aring 183 -#define rtfSC_atilde 184 -#define rtfSC_brokenbar 185 -#define rtfSC_ccedilla 186 -#define rtfSC_copyright 187 -#define rtfSC_degree 188 -#define rtfSC_divide 189 -#define rtfSC_eacute 190 -#define rtfSC_ecircumflex 191 -#define rtfSC_edieresis 192 -#define rtfSC_egrave 193 -#define rtfSC_eth 194 -#define rtfSC_iacute 195 -#define rtfSC_icircumflex 196 -#define rtfSC_idieresis 197 -#define rtfSC_igrave 198 -#define rtfSC_logicalnot 199 -#define rtfSC_minus 200 -#define rtfSC_multiply 201 -#define rtfSC_ntilde 202 -#define rtfSC_oacute 203 -#define rtfSC_ocircumflex 204 -#define rtfSC_odieresis 205 -#define rtfSC_ograve 206 -#define rtfSC_onehalf 207 -#define rtfSC_onequarter 208 -#define rtfSC_onesuperior 209 -#define rtfSC_otilde 210 -#define rtfSC_plusminus 211 -#define rtfSC_registered 212 -#define rtfSC_thorn 213 -#define rtfSC_threequarters 214 -#define rtfSC_threesuperior 215 -#define rtfSC_trademark 216 -#define rtfSC_twosuperior 217 -#define rtfSC_uacute 218 -#define rtfSC_ucircumflex 219 -#define rtfSC_udieresis 220 -#define rtfSC_ugrave 221 -#define rtfSC_yacute 222 -#define rtfSC_ydieresis 223 -#define rtfSC_Alpha 224 -#define rtfSC_Beta 225 -#define rtfSC_Chi 226 -#define rtfSC_Delta 227 -#define rtfSC_Epsilon 228 -#define rtfSC_Phi 229 -#define rtfSC_Gamma 230 -#define rtfSC_Eta 231 -#define rtfSC_Iota 232 -#define rtfSC_Kappa 233 -#define rtfSC_Lambda 234 -#define rtfSC_Mu 235 -#define rtfSC_Nu 236 -#define rtfSC_Omicron 237 -#define rtfSC_Pi 238 -#define rtfSC_Theta 239 -#define rtfSC_Rho 240 -#define rtfSC_Sigma 241 -#define rtfSC_Tau 242 -#define rtfSC_Upsilon 243 -#define rtfSC_varUpsilon 244 -#define rtfSC_Omega 245 -#define rtfSC_Xi 246 -#define rtfSC_Psi 247 -#define rtfSC_Zeta 248 -#define rtfSC_alpha 249 -#define rtfSC_beta 250 -#define rtfSC_chi 251 -#define rtfSC_delta 252 -#define rtfSC_epsilon 253 -#define rtfSC_phi 254 -#define rtfSC_varphi 255 -#define rtfSC_gamma 256 -#define rtfSC_eta 257 -#define rtfSC_iota 258 -#define rtfSC_kappa 259 -#define rtfSC_lambda 260 -#define rtfSC_mu 261 -#define rtfSC_nu 262 -#define rtfSC_omicron 263 -#define rtfSC_pi 264 -#define rtfSC_varpi 265 -#define rtfSC_theta 266 -#define rtfSC_vartheta 267 -#define rtfSC_rho 268 -#define rtfSC_sigma 269 -#define rtfSC_varsigma 270 -#define rtfSC_tau 271 -#define rtfSC_upsilon 272 -#define rtfSC_omega 273 -#define rtfSC_xi 274 -#define rtfSC_psi 275 -#define rtfSC_zeta 276 -#define rtfSC_nobrkspace 277 -#define rtfSC_nobrkhyphen 278 -#define rtfSC_lessequal 279 -#define rtfSC_greaterequal 280 -#define rtfSC_infinity 281 -#define rtfSC_integral 282 -#define rtfSC_notequal 283 -#define rtfSC_radical 284 -#define rtfSC_radicalex 285 -#define rtfSC_approxequal 286 -#define rtfSC_apple 287 -#define rtfSC_partialdiff 288 -#define rtfSC_opthyphen 289 -#define rtfSC_formula 290 -#define rtfSC_lozenge 291 -#define rtfSC_universal 292 -#define rtfSC_existential 293 -#define rtfSC_suchthat 294 -#define rtfSC_congruent 295 -#define rtfSC_therefore 296 -#define rtfSC_perpendicular 297 -#define rtfSC_minute 298 -#define rtfSC_club 299 -#define rtfSC_diamond 300 -#define rtfSC_heart 301 -#define rtfSC_spade 302 -#define rtfSC_arrowboth 303 -#define rtfSC_arrowleft 304 -#define rtfSC_arrowup 305 -#define rtfSC_arrowright 306 -#define rtfSC_arrowdown 307 -#define rtfSC_second 308 -#define rtfSC_proportional 309 -#define rtfSC_equivalence 310 -#define rtfSC_arrowvertex 311 -#define rtfSC_arrowhorizex 312 -#define rtfSC_carriagereturn 313 -#define rtfSC_aleph 314 -#define rtfSC_Ifraktur 315 -#define rtfSC_Rfraktur 316 -#define rtfSC_weierstrass 317 -#define rtfSC_circlemultiply 318 -#define rtfSC_circleplus 319 -#define rtfSC_emptyset 320 -#define rtfSC_intersection 321 -#define rtfSC_union 322 -#define rtfSC_propersuperset 323 -#define rtfSC_reflexsuperset 324 -#define rtfSC_notsubset 325 -#define rtfSC_propersubset 326 -#define rtfSC_reflexsubset 327 -#define rtfSC_element 328 -#define rtfSC_notelement 329 -#define rtfSC_angle 330 -#define rtfSC_gradient 331 -#define rtfSC_product 332 -#define rtfSC_logicaland 333 -#define rtfSC_logicalor 334 -#define rtfSC_arrowdblboth 335 -#define rtfSC_arrowdblleft 336 -#define rtfSC_arrowdblup 337 -#define rtfSC_arrowdblright 338 -#define rtfSC_arrowdbldown 339 -#define rtfSC_angleleft 340 -#define rtfSC_registersans 341 -#define rtfSC_copyrightsans 342 -#define rtfSC_trademarksans 343 -#define rtfSC_angleright 344 -#define rtfSC_mathplus 345 -#define rtfSC_mathminus 346 -#define rtfSC_mathasterisk 347 -#define rtfSC_mathnumbersign 348 -#define rtfSC_dotmath 349 -#define rtfSC_mathequal 350 -#define rtfSC_mathtilde 351 - -#define rtfSC_MaxChar 352 /* * rtf.h - RTF document processing stuff. Release 1.10. */ @@ -711,6 +352,8 @@ # define rtfAnnotProtected 75 /* new in 1.10 */ # define rtfRTLDoc 76 /* new in 1.10 */ # define rtfLTRDoc 77 /* new in 1.10 */ +# define rtfAnsiCodePage 78 +# define rtfUnicodeLength 79 # define rtfSectAttr 9 # define rtfSectDef 0 @@ -1439,9 +1082,14 @@ struct _RTF_Info { int prevChar; int bumpLine; + /* Document-wide attributes */ RTFFont *fontList; /* these lists MUST be */ RTFColor *colorList; /* initialized to NULL */ RTFStyle *styleList; + int ansiCodePage; /* ANSI codepage used in conversion to Unicode */ + int unicodeLength; /* The length of ANSI representation of Unicode characters */ + + int codePage; /* Current codepage for text conversion */ char *inputName; char *outputName; @@ -1457,41 +1105,6 @@ struct _RTF_Info { ME_TextEditor *editor; ME_Style *style; - /* - * These arrays are used to map RTF input character values onto the standard - * character names represented by the values. Input character values are - * used as indices into the arrays to produce standard character codes. - */ - - - char *genCharSetFile ; - int genCharCode[charSetSize]; /* general */ - int haveGenCharSet; - - char *symCharSetFile; - int symCharCode[charSetSize]; /* symbol */ - int haveSymCharSet; - - int curCharSet; - int *curCharCode; - - /* - * By default, the reader is configured to handle charset mapping invisibly, - * including reading the charset files and switching charset maps as necessary - * for Symbol font. - */ - - int autoCharSetFlags; - - /* - * Stack for keeping track of charset map on group begin/end. This is - * necessary because group termination reverts the font to the previous - * value, which may implicitly change it. - */ - - int csStack[maxCSStack]; - int csTop; - RTFFuncPtr ccb[rtfMaxClass]; /* class callbacks */ RTFFuncPtr dcb[rtfMaxDestination]; /* destination callbacks */ @@ -1502,12 +1115,11 @@ struct _RTF_Info { FILE *(*libFileOpen) (); - char *outMap[rtfSC_MaxChar]; - DWORD dwOutputCount; - char OutputBuffer[0x1000]; + WCHAR OutputBuffer[0x1000]; CHARFORMAT2W formatStack[maxCharFormatStack]; + int codePageStack[maxCharFormatStack]; int formatStackTop; }; @@ -1561,15 +1173,6 @@ void RTFSetPanicProc ( RTF_Info *, RTFFuncPtr); void RTFMsg (RTF_Info *, const char *fmt, ...); void RTFPanic (RTF_Info *, const char *fmt, ...); -int RTFReadOutputMap ( RTF_Info *, char *[], int); -int RTFReadCharSetMap ( RTF_Info *, int); -void RTFSetCharSetMap ( RTF_Info *, char *, int); -int RTFStdCharCode ( RTF_Info *, const char *); -const char *RTFStdCharName ( RTF_Info *, int); -int RTFMapChar ( RTF_Info *, int); -int RTFGetCharSet( RTF_Info * ); -void RTFSetCharSet( RTF_Info *, int); - void RTFSetOpenLibFileProc ( RTF_Info *, FILE *(*)()); FILE *RTFOpenLibFile ( RTF_Info *, char *, char *); -- 2.11.4.GIT