1From: Giovanni Scafora <giovanni.archlinux.org> 2Subject: unzip files encoded with non-latin, non-unicode file names 3Last-Update: 2015-02-11 4 5Upstream-Status: Backport 6CVE: CVE-2015-1315 7 8Updated 2015-02-11 by Marc Deslauriers <marc.deslauriers@canonical.com> 9to fix buffer overflow in charset_to_intern() 10 11Signed-off-by: Marc Deslauriers <marc.deslauriers@canonical.com> 12 13Index: unzip-6.0/unix/unix.c 14=================================================================== 15--- unzip-6.0.orig/unix/unix.c 2015-02-11 08:46:43.675324290 -0500 16+++ unzip-6.0/unix/unix.c 2015-02-11 09:18:04.902081319 -0500 17@@ -30,6 +30,9 @@ 18 #define UNZIP_INTERNAL 19 #include "unzip.h" 20 21+#include <iconv.h> 22+#include <langinfo.h> 23+ 24 #ifdef SCO_XENIX 25 # define SYSNDIR 26 #else /* SCO Unix, AIX, DNIX, TI SysV, Coherent 4.x, ... */ 27@@ -1874,3 +1877,102 @@ 28 } 29 } 30 #endif /* QLZIP */ 31+ 32+ 33+typedef struct { 34+ char *local_charset; 35+ char *archive_charset; 36+} CHARSET_MAP; 37+ 38+/* A mapping of local <-> archive charsets used by default to convert filenames 39+ * of DOS/Windows Zip archives. Currently very basic. */ 40+static CHARSET_MAP dos_charset_map[] = { 41+ { "ANSI_X3.4-1968", "CP850" }, 42+ { "ISO-8859-1", "CP850" }, 43+ { "CP1252", "CP850" }, 44+ { "UTF-8", "CP866" }, 45+ { "KOI8-R", "CP866" }, 46+ { "KOI8-U", "CP866" }, 47+ { "ISO-8859-5", "CP866" } 48+}; 49+ 50+char OEM_CP[MAX_CP_NAME] = ""; 51+char ISO_CP[MAX_CP_NAME] = ""; 52+ 53+/* Try to guess the default value of OEM_CP based on the current locale. 54+ * ISO_CP is left alone for now. */ 55+void init_conversion_charsets() 56+{ 57+ const char *local_charset; 58+ int i; 59+ 60+ /* Make a guess only if OEM_CP not already set. */ 61+ if(*OEM_CP == '\0') { 62+ local_charset = nl_langinfo(CODESET); 63+ for(i = 0; i < sizeof(dos_charset_map)/sizeof(CHARSET_MAP); i++) 64+ if(!strcasecmp(local_charset, dos_charset_map[i].local_charset)) { 65+ strncpy(OEM_CP, dos_charset_map[i].archive_charset, 66+ sizeof(OEM_CP)); 67+ break; 68+ } 69+ } 70+} 71+ 72+/* Convert a string from one encoding to the current locale using iconv(). 73+ * Be as non-intrusive as possible. If error is encountered during covertion 74+ * just leave the string intact. */ 75+static void charset_to_intern(char *string, char *from_charset) 76+{ 77+ iconv_t cd; 78+ char *s,*d, *buf; 79+ size_t slen, dlen, buflen; 80+ const char *local_charset; 81+ 82+ if(*from_charset == '\0') 83+ return; 84+ 85+ buf = NULL; 86+ local_charset = nl_langinfo(CODESET); 87+ 88+ if((cd = iconv_open(local_charset, from_charset)) == (iconv_t)-1) 89+ return; 90+ 91+ slen = strlen(string); 92+ s = string; 93+ 94+ /* Make sure OUTBUFSIZ + 1 never ends up smaller than FILNAMSIZ 95+ * as this function also gets called with G.outbuf in fileio.c 96+ */ 97+ buflen = FILNAMSIZ; 98+ if (OUTBUFSIZ + 1 < FILNAMSIZ) 99+ { 100+ buflen = OUTBUFSIZ + 1; 101+ } 102+ 103+ d = buf = malloc(buflen); 104+ if(!d) 105+ goto cleanup; 106+ 107+ bzero(buf,buflen); 108+ dlen = buflen - 1; 109+ 110+ if(iconv(cd, &s, &slen, &d, &dlen) == (size_t)-1) 111+ goto cleanup; 112+ strncpy(string, buf, buflen); 113+ 114+ cleanup: 115+ free(buf); 116+ iconv_close(cd); 117+} 118+ 119+/* Convert a string from OEM_CP to the current locale charset. */ 120+inline void oem_intern(char *string) 121+{ 122+ charset_to_intern(string, OEM_CP); 123+} 124+ 125+/* Convert a string from ISO_CP to the current locale charset. */ 126+inline void iso_intern(char *string) 127+{ 128+ charset_to_intern(string, ISO_CP); 129+} 130Index: unzip-6.0/unix/unxcfg.h 131=================================================================== 132--- unzip-6.0.orig/unix/unxcfg.h 2015-02-11 08:46:43.675324290 -0500 133+++ unzip-6.0/unix/unxcfg.h 2015-02-11 08:46:43.671324260 -0500 134@@ -228,4 +228,30 @@ 135 /* wild_dir, dirname, wildname, matchname[], dirnamelen, have_dirname, */ 136 /* and notfirstcall are used by do_wild(). */ 137 138+ 139+#define MAX_CP_NAME 25 140+ 141+#ifdef SETLOCALE 142+# undef SETLOCALE 143+#endif 144+#define SETLOCALE(category, locale) setlocale(category, locale) 145+#include <locale.h> 146+ 147+#ifdef _ISO_INTERN 148+# undef _ISO_INTERN 149+#endif 150+#define _ISO_INTERN(str1) iso_intern(str1) 151+ 152+#ifdef _OEM_INTERN 153+# undef _OEM_INTERN 154+#endif 155+#ifndef IZ_OEM2ISO_ARRAY 156+# define IZ_OEM2ISO_ARRAY 157+#endif 158+#define _OEM_INTERN(str1) oem_intern(str1) 159+ 160+void iso_intern(char *); 161+void oem_intern(char *); 162+void init_conversion_charsets(void); 163+ 164 #endif /* !__unxcfg_h */ 165Index: unzip-6.0/unzip.c 166=================================================================== 167--- unzip-6.0.orig/unzip.c 2015-02-11 08:46:43.675324290 -0500 168+++ unzip-6.0/unzip.c 2015-02-11 08:46:43.675324290 -0500 169@@ -327,11 +327,21 @@ 170 -2 just filenames but allow -h/-t/-z -l long Unix \"ls -l\" format\n\ 171 -v verbose, multi-page format\n"; 172 173+#ifndef UNIX 174 static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ 175 -h print header line -t print totals for listed files or for all\n\ 176 -z print zipfile comment -T print file times in sortable decimal format\ 177 \n -C be case-insensitive %s\ 178 -x exclude filenames that follow from listing\n"; 179+#else /* UNIX */ 180+static ZCONST char Far ZipInfoUsageLine3[] = "miscellaneous options:\n\ 181+ -h print header line -t print totals for listed files or for all\n\ 182+ -z print zipfile comment %c-T%c print file times in sortable decimal format\ 183+\n %c-C%c be case-insensitive %s\ 184+ -x exclude filenames that follow from listing\n\ 185+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ 186+ -I CHARSET specify a character encoding for UNIX and other archives\n"; 187+#endif /* !UNIX */ 188 #ifdef MORE 189 static ZCONST char Far ZipInfoUsageLine4[] = 190 " -M page output through built-in \"more\"\n"; 191@@ -664,6 +674,17 @@ 192 -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\ 193 -C match filenames case-insensitively -L make (some) names \ 194 lowercase\n %-42s -V retain VMS version numbers\n%s"; 195+#elif (defined UNIX) 196+static ZCONST char Far UnzipUsageLine4[] = "\ 197+modifiers:\n\ 198+ -n never overwrite existing files -q quiet mode (-qq => quieter)\n\ 199+ -o overwrite files WITHOUT prompting -a auto-convert any text files\n\ 200+ -j junk paths (do not make directories) -aa treat ALL files as text\n\ 201+ -U use escapes for all non-ASCII Unicode -UU ignore any Unicode fields\n\ 202+ -C match filenames case-insensitively -L make (some) names \ 203+lowercase\n %-42s -V retain VMS version numbers\n%s\ 204+ -O CHARSET specify a character encoding for DOS, Windows and OS/2 archives\n\ 205+ -I CHARSET specify a character encoding for UNIX and other archives\n\n"; 206 #else /* !VMS */ 207 static ZCONST char Far UnzipUsageLine4[] = "\ 208 modifiers:\n\ 209@@ -802,6 +823,10 @@ 210 #endif /* UNICODE_SUPPORT */ 211 212 213+#ifdef UNIX 214+ init_conversion_charsets(); 215+#endif 216+ 217 #if (defined(__IBMC__) && defined(__DEBUG_ALLOC__)) 218 extern void DebugMalloc(void); 219 220@@ -1335,6 +1360,11 @@ 221 argc = *pargc; 222 argv = *pargv; 223 224+#ifdef UNIX 225+ extern char OEM_CP[MAX_CP_NAME]; 226+ extern char ISO_CP[MAX_CP_NAME]; 227+#endif 228+ 229 while (++argv, (--argc > 0 && *argv != NULL && **argv == '-')) { 230 s = *argv + 1; 231 while ((c = *s++) != 0) { /* "!= 0": prevent Turbo C warning */ 232@@ -1516,6 +1546,35 @@ 233 } 234 break; 235 #endif /* MACOS */ 236+#ifdef UNIX 237+ case ('I'): 238+ if (negative) { 239+ Info(slide, 0x401, ((char *)slide, 240+ "error: encodings can't be negated")); 241+ return(PK_PARAM); 242+ } else { 243+ if(*s) { /* Handle the -Icharset case */ 244+ /* Assume that charsets can't start with a dash to spot arguments misuse */ 245+ if(*s == '-') { 246+ Info(slide, 0x401, ((char *)slide, 247+ "error: a valid character encoding should follow the -I argument")); 248+ return(PK_PARAM); 249+ } 250+ strncpy(ISO_CP, s, sizeof(ISO_CP)); 251+ } else { /* -I charset */ 252+ ++argv; 253+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { 254+ Info(slide, 0x401, ((char *)slide, 255+ "error: a valid character encoding should follow the -I argument")); 256+ return(PK_PARAM); 257+ } 258+ s = *argv; 259+ strncpy(ISO_CP, s, sizeof(ISO_CP)); 260+ } 261+ while(*(++s)); /* No params straight after charset name */ 262+ } 263+ break; 264+#endif /* ?UNIX */ 265 case ('j'): /* junk pathnames/directory structure */ 266 if (negative) 267 uO.jflag = FALSE, negative = 0; 268@@ -1591,6 +1650,35 @@ 269 } else 270 ++uO.overwrite_all; 271 break; 272+#ifdef UNIX 273+ case ('O'): 274+ if (negative) { 275+ Info(slide, 0x401, ((char *)slide, 276+ "error: encodings can't be negated")); 277+ return(PK_PARAM); 278+ } else { 279+ if(*s) { /* Handle the -Ocharset case */ 280+ /* Assume that charsets can't start with a dash to spot arguments misuse */ 281+ if(*s == '-') { 282+ Info(slide, 0x401, ((char *)slide, 283+ "error: a valid character encoding should follow the -I argument")); 284+ return(PK_PARAM); 285+ } 286+ strncpy(OEM_CP, s, sizeof(OEM_CP)); 287+ } else { /* -O charset */ 288+ ++argv; 289+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { 290+ Info(slide, 0x401, ((char *)slide, 291+ "error: a valid character encoding should follow the -O argument")); 292+ return(PK_PARAM); 293+ } 294+ s = *argv; 295+ strncpy(OEM_CP, s, sizeof(OEM_CP)); 296+ } 297+ while(*(++s)); /* No params straight after charset name */ 298+ } 299+ break; 300+#endif /* ?UNIX */ 301 case ('p'): /* pipes: extract to stdout, no messages */ 302 if (negative) { 303 uO.cflag = FALSE; 304Index: unzip-6.0/unzpriv.h 305=================================================================== 306--- unzip-6.0.orig/unzpriv.h 2015-02-11 08:46:43.675324290 -0500 307+++ unzip-6.0/unzpriv.h 2015-02-11 08:46:43.675324290 -0500 308@@ -3008,7 +3008,7 @@ 309 !(((islochdr) || (isuxatt)) && \ 310 ((hostver) == 25 || (hostver) == 26 || (hostver) == 40))) || \ 311 (hostnum) == FS_HPFS_ || \ 312- ((hostnum) == FS_NTFS_ && (hostver) == 50)) { \ 313+ ((hostnum) == FS_NTFS_ /* && (hostver) == 50 */ )) { \ 314 _OEM_INTERN((string)); \ 315 } else { \ 316 _ISO_INTERN((string)); \ 317Index: unzip-6.0/zipinfo.c 318=================================================================== 319--- unzip-6.0.orig/zipinfo.c 2015-02-11 08:46:43.675324290 -0500 320+++ unzip-6.0/zipinfo.c 2015-02-11 08:46:43.675324290 -0500 321@@ -457,6 +457,10 @@ 322 int tflag_slm=TRUE, tflag_2v=FALSE; 323 int explicit_h=FALSE, explicit_t=FALSE; 324 325+#ifdef UNIX 326+ extern char OEM_CP[MAX_CP_NAME]; 327+ extern char ISO_CP[MAX_CP_NAME]; 328+#endif 329 330 #ifdef MACOS 331 uO.lflag = LFLAG; /* reset default on each call */ 332@@ -501,6 +505,35 @@ 333 uO.lflag = 0; 334 } 335 break; 336+#ifdef UNIX 337+ case ('I'): 338+ if (negative) { 339+ Info(slide, 0x401, ((char *)slide, 340+ "error: encodings can't be negated")); 341+ return(PK_PARAM); 342+ } else { 343+ if(*s) { /* Handle the -Icharset case */ 344+ /* Assume that charsets can't start with a dash to spot arguments misuse */ 345+ if(*s == '-') { 346+ Info(slide, 0x401, ((char *)slide, 347+ "error: a valid character encoding should follow the -I argument")); 348+ return(PK_PARAM); 349+ } 350+ strncpy(ISO_CP, s, sizeof(ISO_CP)); 351+ } else { /* -I charset */ 352+ ++argv; 353+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { 354+ Info(slide, 0x401, ((char *)slide, 355+ "error: a valid character encoding should follow the -I argument")); 356+ return(PK_PARAM); 357+ } 358+ s = *argv; 359+ strncpy(ISO_CP, s, sizeof(ISO_CP)); 360+ } 361+ while(*(++s)); /* No params straight after charset name */ 362+ } 363+ break; 364+#endif /* ?UNIX */ 365 case 'l': /* longer form of "ls -l" type listing */ 366 if (negative) 367 uO.lflag = -2, negative = 0; 368@@ -521,6 +554,35 @@ 369 G.M_flag = TRUE; 370 break; 371 #endif 372+#ifdef UNIX 373+ case ('O'): 374+ if (negative) { 375+ Info(slide, 0x401, ((char *)slide, 376+ "error: encodings can't be negated")); 377+ return(PK_PARAM); 378+ } else { 379+ if(*s) { /* Handle the -Ocharset case */ 380+ /* Assume that charsets can't start with a dash to spot arguments misuse */ 381+ if(*s == '-') { 382+ Info(slide, 0x401, ((char *)slide, 383+ "error: a valid character encoding should follow the -I argument")); 384+ return(PK_PARAM); 385+ } 386+ strncpy(OEM_CP, s, sizeof(OEM_CP)); 387+ } else { /* -O charset */ 388+ ++argv; 389+ if(!(--argc > 0 && *argv != NULL && **argv != '-')) { 390+ Info(slide, 0x401, ((char *)slide, 391+ "error: a valid character encoding should follow the -O argument")); 392+ return(PK_PARAM); 393+ } 394+ s = *argv; 395+ strncpy(OEM_CP, s, sizeof(OEM_CP)); 396+ } 397+ while(*(++s)); /* No params straight after charset name */ 398+ } 399+ break; 400+#endif /* ?UNIX */ 401 case 's': /* default: shorter "ls -l" type listing */ 402 if (negative) 403 uO.lflag = -2, negative = 0; 404