1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 /* 3 * Routines for dealing with .zip archives. 4 * 5 * Copyright (c) Meta Platforms, Inc. and affiliates. 6 */ 7 8 #include <errno.h> 9 #include <fcntl.h> 10 #include <stdint.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/mman.h> 14 #include <unistd.h> 15 16 #include "libbpf_internal.h" 17 #include "zip.h" 18 19 #pragma GCC diagnostic push 20 #pragma GCC diagnostic ignored "-Wpacked" 21 #pragma GCC diagnostic ignored "-Wattributes" 22 23 /* Specification of ZIP file format can be found here: 24 * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT 25 * For a high level overview of the structure of a ZIP file see 26 * sections 4.3.1 - 4.3.6. 27 * 28 * Data structures appearing in ZIP files do not contain any 29 * padding and they might be misaligned. To allow us to safely 30 * operate on pointers to such structures and their members, we 31 * declare the types as packed. 32 */ 33 34 #define END_OF_CD_RECORD_MAGIC 0x06054b50 35 36 /* See section 4.3.16 of the spec. */ 37 struct end_of_cd_record { 38 /* Magic value equal to END_OF_CD_RECORD_MAGIC */ 39 __u32 magic; 40 41 /* Number of the file containing this structure or 0xFFFF if ZIP64 archive. 42 * Zip archive might span multiple files (disks). 43 */ 44 __u16 this_disk; 45 46 /* Number of the file containing the beginning of the central directory or 47 * 0xFFFF if ZIP64 archive. 48 */ 49 __u16 cd_disk; 50 51 /* Number of central directory records on this disk or 0xFFFF if ZIP64 52 * archive. 53 */ 54 __u16 cd_records; 55 56 /* Number of central directory records on all disks or 0xFFFF if ZIP64 57 * archive. 58 */ 59 __u16 cd_records_total; 60 61 /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */ 62 __u32 cd_size; 63 64 /* Offset of the central directory from the beginning of the archive or 65 * 0xFFFFFFFF if ZIP64 archive. 66 */ 67 __u32 cd_offset; 68 69 /* Length of comment data following end of central directory record. */ 70 __u16 comment_length; 71 72 /* Up to 64k of arbitrary bytes. */ 73 /* uint8_t comment[comment_length] */ 74 } __attribute__((packed)); 75 76 #define CD_FILE_HEADER_MAGIC 0x02014b50 77 #define FLAG_ENCRYPTED (1 << 0) 78 #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3) 79 80 /* See section 4.3.12 of the spec. */ 81 struct cd_file_header { 82 /* Magic value equal to CD_FILE_HEADER_MAGIC. */ 83 __u32 magic; 84 __u16 version; 85 /* Minimum zip version needed to extract the file. */ 86 __u16 min_version; 87 __u16 flags; 88 __u16 compression; 89 __u16 last_modified_time; 90 __u16 last_modified_date; 91 __u32 crc; 92 __u32 compressed_size; 93 __u32 uncompressed_size; 94 __u16 file_name_length; 95 __u16 extra_field_length; 96 __u16 file_comment_length; 97 /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */ 98 __u16 disk; 99 __u16 internal_attributes; 100 __u32 external_attributes; 101 /* Offset from the start of the disk containing the local file header to the 102 * start of the local file header. 103 */ 104 __u32 offset; 105 } __attribute__((packed)); 106 107 #define LOCAL_FILE_HEADER_MAGIC 0x04034b50 108 109 /* See section 4.3.7 of the spec. */ 110 struct local_file_header { 111 /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */ 112 __u32 magic; 113 /* Minimum zip version needed to extract the file. */ 114 __u16 min_version; 115 __u16 flags; 116 __u16 compression; 117 __u16 last_modified_time; 118 __u16 last_modified_date; 119 __u32 crc; 120 __u32 compressed_size; 121 __u32 uncompressed_size; 122 __u16 file_name_length; 123 __u16 extra_field_length; 124 } __attribute__((packed)); 125 126 #pragma GCC diagnostic pop 127 128 struct zip_archive { 129 void *data; 130 __u32 size; 131 __u32 cd_offset; 132 __u32 cd_records; 133 }; 134 135 static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size) 136 { 137 if (offset + size > archive->size || offset > offset + size) 138 return NULL; 139 140 return archive->data + offset; 141 } 142 143 /* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the 144 * archive uses features which are not supported. 145 */ 146 static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset) 147 { 148 __u16 comment_length, cd_records; 149 struct end_of_cd_record *eocd; 150 __u32 cd_offset, cd_size; 151 152 eocd = check_access(archive, offset, sizeof(*eocd)); 153 if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC) 154 return -EINVAL; 155 156 comment_length = eocd->comment_length; 157 if (offset + sizeof(*eocd) + comment_length != archive->size) 158 return -EINVAL; 159 160 cd_records = eocd->cd_records; 161 if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records) 162 /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */ 163 return -ENOTSUP; 164 165 cd_offset = eocd->cd_offset; 166 cd_size = eocd->cd_size; 167 if (!check_access(archive, cd_offset, cd_size)) 168 return -EINVAL; 169 170 archive->cd_offset = cd_offset; 171 archive->cd_records = cd_records; 172 return 0; 173 } 174 175 static int find_cd(struct zip_archive *archive) 176 { 177 int64_t limit, offset; 178 int rc = -EINVAL; 179 180 if (archive->size <= sizeof(struct end_of_cd_record)) 181 return -EINVAL; 182 183 /* Because the end of central directory ends with a variable length array of 184 * up to 0xFFFF bytes we can't know exactly where it starts and need to 185 * search for it at the end of the file, scanning the (limit, offset] range. 186 */ 187 offset = archive->size - sizeof(struct end_of_cd_record); 188 limit = (int64_t)offset - (1 << 16); 189 190 for (; offset >= 0 && offset > limit && rc != 0; offset--) { 191 rc = try_parse_end_of_cd(archive, offset); 192 if (rc == -ENOTSUP) 193 break; 194 } 195 return rc; 196 } 197 198 struct zip_archive *zip_archive_open(const char *path) 199 { 200 struct zip_archive *archive; 201 int err, fd; 202 off_t size; 203 void *data; 204 205 fd = open(path, O_RDONLY | O_CLOEXEC); 206 if (fd < 0) 207 return ERR_PTR(-errno); 208 209 size = lseek(fd, 0, SEEK_END); 210 if (size == (off_t)-1 || size > UINT32_MAX) { 211 close(fd); 212 return ERR_PTR(-EINVAL); 213 } 214 215 data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); 216 err = -errno; 217 close(fd); 218 219 if (data == MAP_FAILED) 220 return ERR_PTR(err); 221 222 archive = malloc(sizeof(*archive)); 223 if (!archive) { 224 munmap(data, size); 225 return ERR_PTR(-ENOMEM); 226 }; 227 228 archive->data = data; 229 archive->size = size; 230 231 err = find_cd(archive); 232 if (err) { 233 munmap(data, size); 234 free(archive); 235 return ERR_PTR(err); 236 } 237 238 return archive; 239 } 240 241 void zip_archive_close(struct zip_archive *archive) 242 { 243 munmap(archive->data, archive->size); 244 free(archive); 245 } 246 247 static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive, 248 __u32 offset) 249 { 250 struct local_file_header *lfh; 251 252 lfh = check_access(archive, offset, sizeof(*lfh)); 253 if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC) 254 return NULL; 255 256 return lfh; 257 } 258 259 static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out) 260 { 261 struct local_file_header *lfh; 262 __u32 compressed_size; 263 const char *name; 264 void *data; 265 266 lfh = local_file_header_at_offset(archive, offset); 267 if (!lfh) 268 return -EINVAL; 269 270 offset += sizeof(*lfh); 271 if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR)) 272 return -EINVAL; 273 274 name = check_access(archive, offset, lfh->file_name_length); 275 if (!name) 276 return -EINVAL; 277 278 offset += lfh->file_name_length; 279 if (!check_access(archive, offset, lfh->extra_field_length)) 280 return -EINVAL; 281 282 offset += lfh->extra_field_length; 283 compressed_size = lfh->compressed_size; 284 data = check_access(archive, offset, compressed_size); 285 if (!data) 286 return -EINVAL; 287 288 out->compression = lfh->compression; 289 out->name_length = lfh->file_name_length; 290 out->name = name; 291 out->data = data; 292 out->data_length = compressed_size; 293 out->data_offset = offset; 294 295 return 0; 296 } 297 298 int zip_archive_find_entry(struct zip_archive *archive, const char *file_name, 299 struct zip_entry *out) 300 { 301 size_t file_name_length = strlen(file_name); 302 __u32 i, offset = archive->cd_offset; 303 304 for (i = 0; i < archive->cd_records; ++i) { 305 __u16 cdfh_name_length, cdfh_flags; 306 struct cd_file_header *cdfh; 307 const char *cdfh_name; 308 309 cdfh = check_access(archive, offset, sizeof(*cdfh)); 310 if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC) 311 return -EINVAL; 312 313 offset += sizeof(*cdfh); 314 cdfh_name_length = cdfh->file_name_length; 315 cdfh_name = check_access(archive, offset, cdfh_name_length); 316 if (!cdfh_name) 317 return -EINVAL; 318 319 cdfh_flags = cdfh->flags; 320 if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && 321 (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 && 322 file_name_length == cdfh_name_length && 323 memcmp(file_name, archive->data + offset, file_name_length) == 0) { 324 return get_entry_at_offset(archive, cdfh->offset, out); 325 } 326 327 offset += cdfh_name_length; 328 offset += cdfh->extra_field_length; 329 offset += cdfh->file_comment_length; 330 } 331 332 return -ENOENT; 333 } 334