patches/unzip/unzip-6.0-alt-iconv-utf8-print.patch

   1 From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001
   2 From: Petr Stodulka <pstodulk@redhat.com>
   3 Date: Tue, 24 Nov 2015 17:56:11 +0100
   4 Subject: [PATCH] print correctly non-ascii filenames
   5
   6 ---
   7  extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
   8  unzpriv.h |   7 ++
   9  2 files changed, 233 insertions(+), 63 deletions(-)
  10
  11 diff --git a/extract.c b/extract.c
  12 index 0ee4e93..741b7e0 100644
  13 --- a/extract.c
  14 +++ b/extract.c
  15 @@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry)
  16  } /* end function set_deferred_symlink() */
  17  #endif /* SYMLINKS */
  18
  19 +/*
  20 + * If Unicode is supported, assume we have what we need to do this
  21 + * check using wide characters, avoiding MBCS issues.
  22 + */
  23
  24 -
  25 +#ifndef UZ_FNFILTER_REPLACECHAR
  26 +        /* A convenient choice for the replacement of unprintable char codes is
  27 +         * the "single char wildcard", as this character is quite unlikely to
  28 +         * appear in filenames by itself.  The following default definition
  29 +         * sets the replacement char to a question mark as the most common
  30 +         * "single char wildcard"; this setting should be overridden in the
  31 +         * appropiate system-specific configuration header when needed.
  32 +         */
  33 +# define UZ_FNFILTER_REPLACECHAR      '?'
  34 +#endif
  35
  36  /*************************/
  37  /*  Function fnfilter()  */        /* here instead of in list.c for SFX */
  38 @@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
  39      extent size;
  40  {
  41  #ifndef NATIVE   /* ASCII:  filter ANSI escape codes, etc. */
  42 -    ZCONST uch *r=(ZCONST uch *)raw;
  43 +    ZCONST uch *r; // =(ZCONST uch *)raw;
  44      uch *s=space;
  45      uch *slim=NULL;
  46      uch *se=NULL;
  47      int have_overflow = FALSE;
  48
  49 -    if (size > 0) {
  50 -        slim = space + size
  51 -#ifdef _MBCS
  52 -                     - (MB_CUR_MAX - 1)
  53 -#endif
  54 -                     - 4;
  55 +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
  56 +/* If Unicode support is enabled, and we have multi-byte characters,
  57 + * then do the isprint() checks by first converting to wide characters
  58 + * and checking those.  This avoids our having to parse multi-byte
  59 + * characters for ourselves.  After the wide-char replacements have been
  60 + * made, the wide string is converted back to the local character set.
  61 + */
  62 +    wchar_t *wstring;    /* wchar_t version of raw */
  63 +    size_t wslen;        /* length of wstring */
  64 +    wchar_t *wostring;   /* wchar_t version of output string */
  65 +    size_t woslen;       /* length of wostring */
  66 +    char *newraw;        /* new raw */
  67 +
  68 +    /* 2012-11-06 SMS.
  69 +     * Changed to check the value returned by mbstowcs(), and bypass the
  70 +     * Unicode processing if it fails.  This seems to fix a problem
  71 +     * reported in the SourceForge forum, but it's not clear that we
  72 +     * should be doing any Unicode processing without some evidence that
  73 +     * the name actually is Unicode.  (Check bit 11 in the flags before
  74 +     * coming here?)
  75 +     * http://sourceforge.net/p/infozip/bugs/40/
  76 +     */
  77 +
  78 +    if (MB_CUR_MAX <= 1)
  79 +    {
  80 +        /* There's no point to converting multi-byte chars if there are
  81 +         * no multi-byte chars.
  82 +         */
  83 +        wslen = (size_t)-1;
  84      }
  85 -    while (*r) {
  86 -        if (size > 0 && s >= slim && se == NULL) {
  87 -            se = s;
  88 +    else
  89 +    {
  90 +        /* Get Unicode wide character count (for storage allocation). */
  91 +        wslen = mbstowcs( NULL, raw, 0);
  92 +    }
  93 +
  94 +    if (wslen != (size_t)-1)
  95 +    {
  96 +        /* Apparently valid Unicode.  Allocate wide-char storage. */
  97 +        wstring = (wchar_t *)malloc((wslen + 1) * sizeof(wchar_t));
  98 +        if (wstring == NULL) {
  99 +            strcpy( (char *)space, raw);
 100 +            return (char *)space;
 101          }
 102 -#ifdef QDOS
 103 -        if (qlflag & 2) {
 104 -            if (*r == '/' || *r == '.') {
 105 +        wostring = (wchar_t *)malloc(2 * (wslen + 1) * sizeof(wchar_t));
 106 +        if (wostring == NULL) {
 107 +            free(wstring);
 108 +            strcpy( (char *)space, raw);
 109 +            return (char *)space;
 110 +        }
 111 +
 112 +        /* Convert the multi-byte Unicode to wide chars. */
 113 +        wslen = mbstowcs(wstring, raw, wslen + 1);
 114 +
 115 +        /* Filter the wide-character string. */
 116 +        fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t)));
 117 +
 118 +        /* Convert filtered wide chars back to multi-byte. */
 119 +        woslen = wcstombs( NULL, wostring, 0);
 120 +        if ((newraw = malloc(woslen + 1)) == NULL) {
 121 +            free(wstring);
 122 +            free(wostring);
 123 +            strcpy( (char *)space, raw);
 124 +            return (char *)space;
 125 +        }
 126 +        woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1);
 127 +
 128 +        if (size > 0) {
 129 +            slim = space + size - 4;
 130 +        }
 131 +        r = (ZCONST uch *)newraw;
 132 +        while (*r) {
 133 +            if (size > 0 && s >= slim && se == NULL) {
 134 +                se = s;
 135 +            }
 136 +#  ifdef QDOS
 137 +            if (qlflag & 2) {
 138 +                if (*r == '/' || *r == '.') {
 139 +                    if (se != NULL && (s > (space + (size-3)))) {
 140 +                        have_overflow = TRUE;
 141 +                        break;
 142 +                    }
 143 +                    ++r;
 144 +                    *s++ = '_';
 145 +                    continue;
 146 +                }
 147 +            } else
 148 +#  endif
 149 +            {
 150                  if (se != NULL && (s > (space + (size-3)))) {
 151                      have_overflow = TRUE;
 152                      break;
 153                  }
 154 -                ++r;
 155 -                *s++ = '_';
 156 -                continue;
 157 +                *s++ = *r++;
 158              }
 159 -        } else
 160 +        }
 161 +        if (have_overflow) {
 162 +            strcpy((char *)se, "...");
 163 +        } else {
 164 +            *s = '\0';
 165 +        }
 166 +
 167 +        free(wstring);
 168 +        free(wostring);
 169 +        free(newraw);
 170 +    }
 171 +    else
 172 +# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
 173 +    {
 174 +        /* No Unicode support, or apparently invalid Unicode. */
 175 +        r = (ZCONST uch *)raw;
 176 +
 177 +        if (size > 0) {
 178 +            slim = space + size
 179 +#ifdef _MBCS
 180 +                         - (MB_CUR_MAX - 1)
 181 +#endif
 182 +                         - 4;
 183 +        }
 184 +        while (*r) {
 185 +            if (size > 0 && s >= slim && se == NULL) {
 186 +                se = s;
 187 +            }
 188 +#ifdef QDOS
 189 +            if (qlflag & 2) {
 190 +                if (*r == '/' || *r == '.') {
 191 +                    if (se != NULL && (s > (space + (size-3)))) {
 192 +                        have_overflow = TRUE;
 193 +                        break;
 194 +                    }
 195 +                    ++r;
 196 +                    *s++ = '_';
 197 +                    continue;
 198 +                }
 199 +            } else
 200  #endif
 201  #ifdef HAVE_WORKING_ISPRINT
 202 -# ifndef UZ_FNFILTER_REPLACECHAR
 203 -    /* A convenient choice for the replacement of unprintable char codes is
 204 -     * the "single char wildcard", as this character is quite unlikely to
 205 -     * appear in filenames by itself.  The following default definition
 206 -     * sets the replacement char to a question mark as the most common
 207 -     * "single char wildcard"; this setting should be overridden in the
 208 -     * appropiate system-specific configuration header when needed.
 209 -     */
 210 -#   define UZ_FNFILTER_REPLACECHAR      '?'
 211 -# endif
 212 -        if (!isprint(*r)) {
 213 +            if (!isprint(*r)) {
 214 +                if (*r < 32) {
 215 +                    /* ASCII control codes are escaped as "^{letter}". */
 216 +                    if (se != NULL && (s > (space + (size-4)))) {
 217 +                        have_overflow = TRUE;
 218 +                        break;
 219 +                    }
 220 +                    *s++ = '^', *s++ = (uch)(64 + *r++);
 221 +                } else {
 222 +                    /* Other unprintable codes are replaced by the
 223 +                     * placeholder character. */
 224 +                    if (se != NULL && (s > (space + (size-3)))) {
 225 +                        have_overflow = TRUE;
 226 +                        break;
 227 +                    }
 228 +                    *s++ = UZ_FNFILTER_REPLACECHAR;
 229 +                    INCSTR(r);
 230 +                }
 231 +#else /* !HAVE_WORKING_ISPRINT */
 232              if (*r < 32) {
 233                  /* ASCII control codes are escaped as "^{letter}". */
 234                  if (se != NULL && (s > (space + (size-4)))) {
 235 @@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
 236                      break;
 237                  }
 238                  *s++ = '^', *s++ = (uch)(64 + *r++);
 239 +#endif /* ?HAVE_WORKING_ISPRINT */
 240              } else {
 241 -                /* Other unprintable codes are replaced by the
 242 -                 * placeholder character. */
 243 +#ifdef _MBCS
 244 +                unsigned i = CLEN(r);
 245 +                if (se != NULL && (s > (space + (size-i-2)))) {
 246 +                    have_overflow = TRUE;
 247 +                    break;
 248 +                }
 249 +                for (; i > 0; i--)
 250 +                    *s++ = *r++;
 251 +#else
 252                  if (se != NULL && (s > (space + (size-3)))) {
 253                      have_overflow = TRUE;
 254                      break;
 255                  }
 256 -                *s++ = UZ_FNFILTER_REPLACECHAR;
 257 -                INCSTR(r);
 258 -            }
 259 -#else /* !HAVE_WORKING_ISPRINT */
 260 -        if (*r < 32) {
 261 -            /* ASCII control codes are escaped as "^{letter}". */
 262 -            if (se != NULL && (s > (space + (size-4)))) {
 263 -                have_overflow = TRUE;
 264 -                break;
 265 -            }
 266 -            *s++ = '^', *s++ = (uch)(64 + *r++);
 267 -#endif /* ?HAVE_WORKING_ISPRINT */
 268 -        } else {
 269 -#ifdef _MBCS
 270 -            unsigned i = CLEN(r);
 271 -            if (se != NULL && (s > (space + (size-i-2)))) {
 272 -                have_overflow = TRUE;
 273 -                break;
 274 -            }
 275 -            for (; i > 0; i--)
 276                  *s++ = *r++;
 277 -#else
 278 -            if (se != NULL && (s > (space + (size-3)))) {
 279 -                have_overflow = TRUE;
 280 -                break;
 281 -            }
 282 -            *s++ = *r++;
 283  #endif
 284 -         }
 285 -    }
 286 -    if (have_overflow) {
 287 -        strcpy((char *)se, "...");
 288 -    } else {
 289 -        *s = '\0';
 290 +             }
 291 +        }
 292 +        if (have_overflow) {
 293 +            strcpy((char *)se, "...");
 294 +        } else {
 295 +            *s = '\0';
 296 +        }
 297      }
 298
 299  #ifdef WINDLL
 300 @@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
 301  } /* end function fnfilter() */
 302
 303
 304 +#if defined( UNICODE_SUPPORT) && defined( _MBCS)
 305 +
 306 +/****************************/
 307 +/*  Function fnfilter[w]()  */  /* (Here instead of in list.c for SFX.) */
 308 +/****************************/
 309 +
 310 +/* fnfilterw() - Convert wide name to safely printable form. */
 311 +
 312 +/* fnfilterw() - Convert wide-character name to safely printable form. */
 313 +
 314 +wchar_t *fnfilterw( src, dst, siz)
 315 +    ZCONST wchar_t *src;        /* Pointer to source char (string). */
 316 +    wchar_t *dst;               /* Pointer to destination char (string). */
 317 +    extent siz;                 /* Not used (!). */
 318 +{
 319 +    wchar_t *dsx = dst;
 320 +
 321 +    /* Filter the wide chars. */
 322 +    while (*src)
 323 +    {
 324 +        if (iswprint( *src))
 325 +        {
 326 +            /* Printable code.  Copy it. */
 327 +            *dst++ = *src;
 328 +        }
 329 +        else
 330 +        {
 331 +            /* Unprintable code.  Substitute something printable for it. */
 332 +            if (*src < 32)
 333 +            {
 334 +                /* Replace ASCII control code with "^{letter}". */
 335 +                *dst++ = (wchar_t)'^';
 336 +                *dst++ = (wchar_t)(64 + *src);
 337 +            }
 338 +            else
 339 +            {
 340 +                /* Replace other unprintable code with the placeholder. */
 341 +                *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR;
 342 +            }
 343 +        }
 344 +        src++;
 345 +    }
 346 +    *dst = (wchar_t)0;  /* NUL-terminate the destination string. */
 347 +    return dsx;
 348 +} /* fnfilterw(). */
 349 +
 350 +#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
 351
 352
 353  #ifdef SET_DIR_ATTRIB
 354 diff --git a/unzpriv.h b/unzpriv.h
 355 index 22d3923..e48a652 100644
 356 --- a/unzpriv.h
 357 +++ b/unzpriv.h
 358 @@ -1212,6 +1212,7 @@
 359  # ifdef UNICODE_WCHAR
 360  #  if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP))
 361  #   include <wchar.h>
 362 +#   include <wctype.h>
 363  #  endif
 364  # endif
 365  # ifndef _MBCS  /* no need to include <locale.h> twice, see below */
 366 @@ -2410,6 +2411,12 @@ int    memflush                  OF((__GPRO__ ZCONST uch *rawbuf, ulg size));
 367  char  *fnfilter                  OF((ZCONST char *raw, uch *space,
 368                                       extent size));
 369
 370 +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
 371 +wchar_t *fnfilterw               OF((ZCONST wchar_t *src, wchar_t *dst,
 372 +                                     extent siz));
 373 +#endif
 374 +
 375 +
 376  /*---------------------------------------------------------------------------
 377      Decompression functions:
 378    ---------------------------------------------------------------------------*/
 379 --
 380 2.4.3
 381