11eebcb60SDaniel Müller // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
21eebcb60SDaniel Müller /*
31eebcb60SDaniel Müller * Routines for dealing with .zip archives.
41eebcb60SDaniel Müller *
51eebcb60SDaniel Müller * Copyright (c) Meta Platforms, Inc. and affiliates.
61eebcb60SDaniel Müller */
71eebcb60SDaniel Müller
81eebcb60SDaniel Müller #include <errno.h>
91eebcb60SDaniel Müller #include <fcntl.h>
101eebcb60SDaniel Müller #include <stdint.h>
111eebcb60SDaniel Müller #include <stdlib.h>
121eebcb60SDaniel Müller #include <string.h>
131eebcb60SDaniel Müller #include <sys/mman.h>
141eebcb60SDaniel Müller #include <unistd.h>
151eebcb60SDaniel Müller
161eebcb60SDaniel Müller #include "libbpf_internal.h"
171eebcb60SDaniel Müller #include "zip.h"
181eebcb60SDaniel Müller
19*6cb9430bSDaniel Müller #pragma GCC diagnostic push
20*6cb9430bSDaniel Müller #pragma GCC diagnostic ignored "-Wpacked"
21*6cb9430bSDaniel Müller #pragma GCC diagnostic ignored "-Wattributes"
22*6cb9430bSDaniel Müller
231eebcb60SDaniel Müller /* Specification of ZIP file format can be found here:
241eebcb60SDaniel Müller * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
251eebcb60SDaniel Müller * For a high level overview of the structure of a ZIP file see
261eebcb60SDaniel Müller * sections 4.3.1 - 4.3.6.
271eebcb60SDaniel Müller *
281eebcb60SDaniel Müller * Data structures appearing in ZIP files do not contain any
291eebcb60SDaniel Müller * padding and they might be misaligned. To allow us to safely
301eebcb60SDaniel Müller * operate on pointers to such structures and their members, we
311eebcb60SDaniel Müller * declare the types as packed.
321eebcb60SDaniel Müller */
331eebcb60SDaniel Müller
341eebcb60SDaniel Müller #define END_OF_CD_RECORD_MAGIC 0x06054b50
351eebcb60SDaniel Müller
361eebcb60SDaniel Müller /* See section 4.3.16 of the spec. */
371eebcb60SDaniel Müller struct end_of_cd_record {
381eebcb60SDaniel Müller /* Magic value equal to END_OF_CD_RECORD_MAGIC */
391eebcb60SDaniel Müller __u32 magic;
401eebcb60SDaniel Müller
411eebcb60SDaniel Müller /* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
421eebcb60SDaniel Müller * Zip archive might span multiple files (disks).
431eebcb60SDaniel Müller */
441eebcb60SDaniel Müller __u16 this_disk;
451eebcb60SDaniel Müller
461eebcb60SDaniel Müller /* Number of the file containing the beginning of the central directory or
471eebcb60SDaniel Müller * 0xFFFF if ZIP64 archive.
481eebcb60SDaniel Müller */
491eebcb60SDaniel Müller __u16 cd_disk;
501eebcb60SDaniel Müller
511eebcb60SDaniel Müller /* Number of central directory records on this disk or 0xFFFF if ZIP64
521eebcb60SDaniel Müller * archive.
531eebcb60SDaniel Müller */
541eebcb60SDaniel Müller __u16 cd_records;
551eebcb60SDaniel Müller
561eebcb60SDaniel Müller /* Number of central directory records on all disks or 0xFFFF if ZIP64
571eebcb60SDaniel Müller * archive.
581eebcb60SDaniel Müller */
591eebcb60SDaniel Müller __u16 cd_records_total;
601eebcb60SDaniel Müller
611eebcb60SDaniel Müller /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
621eebcb60SDaniel Müller __u32 cd_size;
631eebcb60SDaniel Müller
641eebcb60SDaniel Müller /* Offset of the central directory from the beginning of the archive or
651eebcb60SDaniel Müller * 0xFFFFFFFF if ZIP64 archive.
661eebcb60SDaniel Müller */
671eebcb60SDaniel Müller __u32 cd_offset;
681eebcb60SDaniel Müller
691eebcb60SDaniel Müller /* Length of comment data following end of central directory record. */
701eebcb60SDaniel Müller __u16 comment_length;
711eebcb60SDaniel Müller
721eebcb60SDaniel Müller /* Up to 64k of arbitrary bytes. */
731eebcb60SDaniel Müller /* uint8_t comment[comment_length] */
741eebcb60SDaniel Müller } __attribute__((packed));
751eebcb60SDaniel Müller
761eebcb60SDaniel Müller #define CD_FILE_HEADER_MAGIC 0x02014b50
771eebcb60SDaniel Müller #define FLAG_ENCRYPTED (1 << 0)
781eebcb60SDaniel Müller #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
791eebcb60SDaniel Müller
801eebcb60SDaniel Müller /* See section 4.3.12 of the spec. */
811eebcb60SDaniel Müller struct cd_file_header {
821eebcb60SDaniel Müller /* Magic value equal to CD_FILE_HEADER_MAGIC. */
831eebcb60SDaniel Müller __u32 magic;
841eebcb60SDaniel Müller __u16 version;
851eebcb60SDaniel Müller /* Minimum zip version needed to extract the file. */
861eebcb60SDaniel Müller __u16 min_version;
871eebcb60SDaniel Müller __u16 flags;
881eebcb60SDaniel Müller __u16 compression;
891eebcb60SDaniel Müller __u16 last_modified_time;
901eebcb60SDaniel Müller __u16 last_modified_date;
911eebcb60SDaniel Müller __u32 crc;
921eebcb60SDaniel Müller __u32 compressed_size;
931eebcb60SDaniel Müller __u32 uncompressed_size;
941eebcb60SDaniel Müller __u16 file_name_length;
951eebcb60SDaniel Müller __u16 extra_field_length;
961eebcb60SDaniel Müller __u16 file_comment_length;
971eebcb60SDaniel Müller /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
981eebcb60SDaniel Müller __u16 disk;
991eebcb60SDaniel Müller __u16 internal_attributes;
1001eebcb60SDaniel Müller __u32 external_attributes;
1011eebcb60SDaniel Müller /* Offset from the start of the disk containing the local file header to the
1021eebcb60SDaniel Müller * start of the local file header.
1031eebcb60SDaniel Müller */
1041eebcb60SDaniel Müller __u32 offset;
1051eebcb60SDaniel Müller } __attribute__((packed));
1061eebcb60SDaniel Müller
1071eebcb60SDaniel Müller #define LOCAL_FILE_HEADER_MAGIC 0x04034b50
1081eebcb60SDaniel Müller
1091eebcb60SDaniel Müller /* See section 4.3.7 of the spec. */
1101eebcb60SDaniel Müller struct local_file_header {
1111eebcb60SDaniel Müller /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
1121eebcb60SDaniel Müller __u32 magic;
1131eebcb60SDaniel Müller /* Minimum zip version needed to extract the file. */
1141eebcb60SDaniel Müller __u16 min_version;
1151eebcb60SDaniel Müller __u16 flags;
1161eebcb60SDaniel Müller __u16 compression;
1171eebcb60SDaniel Müller __u16 last_modified_time;
1181eebcb60SDaniel Müller __u16 last_modified_date;
1191eebcb60SDaniel Müller __u32 crc;
1201eebcb60SDaniel Müller __u32 compressed_size;
1211eebcb60SDaniel Müller __u32 uncompressed_size;
1221eebcb60SDaniel Müller __u16 file_name_length;
1231eebcb60SDaniel Müller __u16 extra_field_length;
1241eebcb60SDaniel Müller } __attribute__((packed));
1251eebcb60SDaniel Müller
126*6cb9430bSDaniel Müller #pragma GCC diagnostic pop
127*6cb9430bSDaniel Müller
1281eebcb60SDaniel Müller struct zip_archive {
1291eebcb60SDaniel Müller void *data;
1301eebcb60SDaniel Müller __u32 size;
1311eebcb60SDaniel Müller __u32 cd_offset;
1321eebcb60SDaniel Müller __u32 cd_records;
1331eebcb60SDaniel Müller };
1341eebcb60SDaniel Müller
check_access(struct zip_archive * archive,__u32 offset,__u32 size)1351eebcb60SDaniel Müller static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
1361eebcb60SDaniel Müller {
1371eebcb60SDaniel Müller if (offset + size > archive->size || offset > offset + size)
1381eebcb60SDaniel Müller return NULL;
1391eebcb60SDaniel Müller
1401eebcb60SDaniel Müller return archive->data + offset;
1411eebcb60SDaniel Müller }
1421eebcb60SDaniel Müller
1431eebcb60SDaniel Müller /* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
1441eebcb60SDaniel Müller * archive uses features which are not supported.
1451eebcb60SDaniel Müller */
try_parse_end_of_cd(struct zip_archive * archive,__u32 offset)1461eebcb60SDaniel Müller static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
1471eebcb60SDaniel Müller {
1481eebcb60SDaniel Müller __u16 comment_length, cd_records;
1491eebcb60SDaniel Müller struct end_of_cd_record *eocd;
1501eebcb60SDaniel Müller __u32 cd_offset, cd_size;
1511eebcb60SDaniel Müller
1521eebcb60SDaniel Müller eocd = check_access(archive, offset, sizeof(*eocd));
1531eebcb60SDaniel Müller if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
1541eebcb60SDaniel Müller return -EINVAL;
1551eebcb60SDaniel Müller
1561eebcb60SDaniel Müller comment_length = eocd->comment_length;
1571eebcb60SDaniel Müller if (offset + sizeof(*eocd) + comment_length != archive->size)
1581eebcb60SDaniel Müller return -EINVAL;
1591eebcb60SDaniel Müller
1601eebcb60SDaniel Müller cd_records = eocd->cd_records;
1611eebcb60SDaniel Müller if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
1621eebcb60SDaniel Müller /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
1631eebcb60SDaniel Müller return -ENOTSUP;
1641eebcb60SDaniel Müller
1651eebcb60SDaniel Müller cd_offset = eocd->cd_offset;
1661eebcb60SDaniel Müller cd_size = eocd->cd_size;
1671eebcb60SDaniel Müller if (!check_access(archive, cd_offset, cd_size))
1681eebcb60SDaniel Müller return -EINVAL;
1691eebcb60SDaniel Müller
1701eebcb60SDaniel Müller archive->cd_offset = cd_offset;
1711eebcb60SDaniel Müller archive->cd_records = cd_records;
1721eebcb60SDaniel Müller return 0;
1731eebcb60SDaniel Müller }
1741eebcb60SDaniel Müller
find_cd(struct zip_archive * archive)1751eebcb60SDaniel Müller static int find_cd(struct zip_archive *archive)
1761eebcb60SDaniel Müller {
1773ecde218SDaniel Müller int64_t limit, offset;
1781eebcb60SDaniel Müller int rc = -EINVAL;
1791eebcb60SDaniel Müller
1801eebcb60SDaniel Müller if (archive->size <= sizeof(struct end_of_cd_record))
1811eebcb60SDaniel Müller return -EINVAL;
1821eebcb60SDaniel Müller
1831eebcb60SDaniel Müller /* Because the end of central directory ends with a variable length array of
1841eebcb60SDaniel Müller * up to 0xFFFF bytes we can't know exactly where it starts and need to
1851eebcb60SDaniel Müller * search for it at the end of the file, scanning the (limit, offset] range.
1861eebcb60SDaniel Müller */
1871eebcb60SDaniel Müller offset = archive->size - sizeof(struct end_of_cd_record);
1881eebcb60SDaniel Müller limit = (int64_t)offset - (1 << 16);
1891eebcb60SDaniel Müller
1901eebcb60SDaniel Müller for (; offset >= 0 && offset > limit && rc != 0; offset--) {
1911eebcb60SDaniel Müller rc = try_parse_end_of_cd(archive, offset);
1921eebcb60SDaniel Müller if (rc == -ENOTSUP)
1931eebcb60SDaniel Müller break;
1941eebcb60SDaniel Müller }
1951eebcb60SDaniel Müller return rc;
1961eebcb60SDaniel Müller }
1971eebcb60SDaniel Müller
zip_archive_open(const char * path)1981eebcb60SDaniel Müller struct zip_archive *zip_archive_open(const char *path)
1991eebcb60SDaniel Müller {
2001eebcb60SDaniel Müller struct zip_archive *archive;
2011eebcb60SDaniel Müller int err, fd;
2021eebcb60SDaniel Müller off_t size;
2031eebcb60SDaniel Müller void *data;
2041eebcb60SDaniel Müller
2051eebcb60SDaniel Müller fd = open(path, O_RDONLY | O_CLOEXEC);
2061eebcb60SDaniel Müller if (fd < 0)
2071eebcb60SDaniel Müller return ERR_PTR(-errno);
2081eebcb60SDaniel Müller
2091eebcb60SDaniel Müller size = lseek(fd, 0, SEEK_END);
2101eebcb60SDaniel Müller if (size == (off_t)-1 || size > UINT32_MAX) {
2111eebcb60SDaniel Müller close(fd);
2121eebcb60SDaniel Müller return ERR_PTR(-EINVAL);
2131eebcb60SDaniel Müller }
2141eebcb60SDaniel Müller
2151eebcb60SDaniel Müller data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
2161eebcb60SDaniel Müller err = -errno;
2171eebcb60SDaniel Müller close(fd);
2181eebcb60SDaniel Müller
2191eebcb60SDaniel Müller if (data == MAP_FAILED)
2201eebcb60SDaniel Müller return ERR_PTR(err);
2211eebcb60SDaniel Müller
2221eebcb60SDaniel Müller archive = malloc(sizeof(*archive));
2231eebcb60SDaniel Müller if (!archive) {
2241eebcb60SDaniel Müller munmap(data, size);
2251eebcb60SDaniel Müller return ERR_PTR(-ENOMEM);
2261eebcb60SDaniel Müller };
2271eebcb60SDaniel Müller
2281eebcb60SDaniel Müller archive->data = data;
2291eebcb60SDaniel Müller archive->size = size;
2301eebcb60SDaniel Müller
2311eebcb60SDaniel Müller err = find_cd(archive);
2321eebcb60SDaniel Müller if (err) {
2331eebcb60SDaniel Müller munmap(data, size);
2341eebcb60SDaniel Müller free(archive);
2351eebcb60SDaniel Müller return ERR_PTR(err);
2361eebcb60SDaniel Müller }
2371eebcb60SDaniel Müller
2381eebcb60SDaniel Müller return archive;
2391eebcb60SDaniel Müller }
2401eebcb60SDaniel Müller
zip_archive_close(struct zip_archive * archive)2411eebcb60SDaniel Müller void zip_archive_close(struct zip_archive *archive)
2421eebcb60SDaniel Müller {
2431eebcb60SDaniel Müller munmap(archive->data, archive->size);
2441eebcb60SDaniel Müller free(archive);
2451eebcb60SDaniel Müller }
2461eebcb60SDaniel Müller
local_file_header_at_offset(struct zip_archive * archive,__u32 offset)2471eebcb60SDaniel Müller static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
2481eebcb60SDaniel Müller __u32 offset)
2491eebcb60SDaniel Müller {
2501eebcb60SDaniel Müller struct local_file_header *lfh;
2511eebcb60SDaniel Müller
2521eebcb60SDaniel Müller lfh = check_access(archive, offset, sizeof(*lfh));
2531eebcb60SDaniel Müller if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
2541eebcb60SDaniel Müller return NULL;
2551eebcb60SDaniel Müller
2561eebcb60SDaniel Müller return lfh;
2571eebcb60SDaniel Müller }
2581eebcb60SDaniel Müller
get_entry_at_offset(struct zip_archive * archive,__u32 offset,struct zip_entry * out)2591eebcb60SDaniel Müller static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
2601eebcb60SDaniel Müller {
2611eebcb60SDaniel Müller struct local_file_header *lfh;
2621eebcb60SDaniel Müller __u32 compressed_size;
2631eebcb60SDaniel Müller const char *name;
2641eebcb60SDaniel Müller void *data;
2651eebcb60SDaniel Müller
2661eebcb60SDaniel Müller lfh = local_file_header_at_offset(archive, offset);
2671eebcb60SDaniel Müller if (!lfh)
2681eebcb60SDaniel Müller return -EINVAL;
2691eebcb60SDaniel Müller
2701eebcb60SDaniel Müller offset += sizeof(*lfh);
2711eebcb60SDaniel Müller if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
2721eebcb60SDaniel Müller return -EINVAL;
2731eebcb60SDaniel Müller
2741eebcb60SDaniel Müller name = check_access(archive, offset, lfh->file_name_length);
2751eebcb60SDaniel Müller if (!name)
2761eebcb60SDaniel Müller return -EINVAL;
2771eebcb60SDaniel Müller
2781eebcb60SDaniel Müller offset += lfh->file_name_length;
2791eebcb60SDaniel Müller if (!check_access(archive, offset, lfh->extra_field_length))
2801eebcb60SDaniel Müller return -EINVAL;
2811eebcb60SDaniel Müller
2821eebcb60SDaniel Müller offset += lfh->extra_field_length;
2831eebcb60SDaniel Müller compressed_size = lfh->compressed_size;
2841eebcb60SDaniel Müller data = check_access(archive, offset, compressed_size);
2851eebcb60SDaniel Müller if (!data)
2861eebcb60SDaniel Müller return -EINVAL;
2871eebcb60SDaniel Müller
2881eebcb60SDaniel Müller out->compression = lfh->compression;
2891eebcb60SDaniel Müller out->name_length = lfh->file_name_length;
2901eebcb60SDaniel Müller out->name = name;
2911eebcb60SDaniel Müller out->data = data;
2921eebcb60SDaniel Müller out->data_length = compressed_size;
2931eebcb60SDaniel Müller out->data_offset = offset;
2941eebcb60SDaniel Müller
2951eebcb60SDaniel Müller return 0;
2961eebcb60SDaniel Müller }
2971eebcb60SDaniel Müller
zip_archive_find_entry(struct zip_archive * archive,const char * file_name,struct zip_entry * out)2981eebcb60SDaniel Müller int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
2991eebcb60SDaniel Müller struct zip_entry *out)
3001eebcb60SDaniel Müller {
3011eebcb60SDaniel Müller size_t file_name_length = strlen(file_name);
3021eebcb60SDaniel Müller __u32 i, offset = archive->cd_offset;
3031eebcb60SDaniel Müller
3041eebcb60SDaniel Müller for (i = 0; i < archive->cd_records; ++i) {
3051eebcb60SDaniel Müller __u16 cdfh_name_length, cdfh_flags;
3061eebcb60SDaniel Müller struct cd_file_header *cdfh;
3071eebcb60SDaniel Müller const char *cdfh_name;
3081eebcb60SDaniel Müller
3091eebcb60SDaniel Müller cdfh = check_access(archive, offset, sizeof(*cdfh));
3101eebcb60SDaniel Müller if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
3111eebcb60SDaniel Müller return -EINVAL;
3121eebcb60SDaniel Müller
3131eebcb60SDaniel Müller offset += sizeof(*cdfh);
3141eebcb60SDaniel Müller cdfh_name_length = cdfh->file_name_length;
3151eebcb60SDaniel Müller cdfh_name = check_access(archive, offset, cdfh_name_length);
3161eebcb60SDaniel Müller if (!cdfh_name)
3171eebcb60SDaniel Müller return -EINVAL;
3181eebcb60SDaniel Müller
3191eebcb60SDaniel Müller cdfh_flags = cdfh->flags;
3201eebcb60SDaniel Müller if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
3211eebcb60SDaniel Müller (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
3221eebcb60SDaniel Müller file_name_length == cdfh_name_length &&
3231eebcb60SDaniel Müller memcmp(file_name, archive->data + offset, file_name_length) == 0) {
3241eebcb60SDaniel Müller return get_entry_at_offset(archive, cdfh->offset, out);
3251eebcb60SDaniel Müller }
3261eebcb60SDaniel Müller
3271eebcb60SDaniel Müller offset += cdfh_name_length;
3281eebcb60SDaniel Müller offset += cdfh->extra_field_length;
3291eebcb60SDaniel Müller offset += cdfh->file_comment_length;
3301eebcb60SDaniel Müller }
3311eebcb60SDaniel Müller
3321eebcb60SDaniel Müller return -ENOENT;
3331eebcb60SDaniel Müller }
334