1*fe013be4SDimitry Andric //===-- ZipFile.cpp -------------------------------------------------------===//
2*fe013be4SDimitry Andric //
3*fe013be4SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*fe013be4SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*fe013be4SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*fe013be4SDimitry Andric //
7*fe013be4SDimitry Andric //===----------------------------------------------------------------------===//
8*fe013be4SDimitry Andric
9*fe013be4SDimitry Andric #include "lldb/Utility/ZipFile.h"
10*fe013be4SDimitry Andric #include "lldb/Utility/DataBuffer.h"
11*fe013be4SDimitry Andric #include "lldb/Utility/FileSpec.h"
12*fe013be4SDimitry Andric #include "llvm/Support/Endian.h"
13*fe013be4SDimitry Andric
14*fe013be4SDimitry Andric using namespace lldb_private;
15*fe013be4SDimitry Andric using namespace llvm::support;
16*fe013be4SDimitry Andric
17*fe013be4SDimitry Andric namespace {
18*fe013be4SDimitry Andric
19*fe013be4SDimitry Andric // Zip headers.
20*fe013be4SDimitry Andric // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
21*fe013be4SDimitry Andric
22*fe013be4SDimitry Andric // The end of central directory record.
23*fe013be4SDimitry Andric struct EocdRecord {
24*fe013be4SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x05, 0x06};
25*fe013be4SDimitry Andric char signature[sizeof(kSignature)];
26*fe013be4SDimitry Andric unaligned_uint16_t disks;
27*fe013be4SDimitry Andric unaligned_uint16_t cd_start_disk;
28*fe013be4SDimitry Andric unaligned_uint16_t cds_on_this_disk;
29*fe013be4SDimitry Andric unaligned_uint16_t cd_records;
30*fe013be4SDimitry Andric unaligned_uint32_t cd_size;
31*fe013be4SDimitry Andric unaligned_uint32_t cd_offset;
32*fe013be4SDimitry Andric unaligned_uint16_t comment_length;
33*fe013be4SDimitry Andric };
34*fe013be4SDimitry Andric
35*fe013be4SDimitry Andric // Logical find limit for the end of central directory record.
36*fe013be4SDimitry Andric const size_t kEocdRecordFindLimit =
37*fe013be4SDimitry Andric sizeof(EocdRecord) +
38*fe013be4SDimitry Andric std::numeric_limits<decltype(EocdRecord::comment_length)>::max();
39*fe013be4SDimitry Andric
40*fe013be4SDimitry Andric // Central directory record.
41*fe013be4SDimitry Andric struct CdRecord {
42*fe013be4SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x01, 0x02};
43*fe013be4SDimitry Andric char signature[sizeof(kSignature)];
44*fe013be4SDimitry Andric unaligned_uint16_t version_made_by;
45*fe013be4SDimitry Andric unaligned_uint16_t version_needed_to_extract;
46*fe013be4SDimitry Andric unaligned_uint16_t general_purpose_bit_flag;
47*fe013be4SDimitry Andric unaligned_uint16_t compression_method;
48*fe013be4SDimitry Andric unaligned_uint16_t last_modification_time;
49*fe013be4SDimitry Andric unaligned_uint16_t last_modification_date;
50*fe013be4SDimitry Andric unaligned_uint32_t crc32;
51*fe013be4SDimitry Andric unaligned_uint32_t compressed_size;
52*fe013be4SDimitry Andric unaligned_uint32_t uncompressed_size;
53*fe013be4SDimitry Andric unaligned_uint16_t file_name_length;
54*fe013be4SDimitry Andric unaligned_uint16_t extra_field_length;
55*fe013be4SDimitry Andric unaligned_uint16_t comment_length;
56*fe013be4SDimitry Andric unaligned_uint16_t file_start_disk;
57*fe013be4SDimitry Andric unaligned_uint16_t internal_file_attributes;
58*fe013be4SDimitry Andric unaligned_uint32_t external_file_attributes;
59*fe013be4SDimitry Andric unaligned_uint32_t local_file_header_offset;
60*fe013be4SDimitry Andric };
61*fe013be4SDimitry Andric // Immediately after CdRecord,
62*fe013be4SDimitry Andric // - file name (file_name_length)
63*fe013be4SDimitry Andric // - extra field (extra_field_length)
64*fe013be4SDimitry Andric // - comment (comment_length)
65*fe013be4SDimitry Andric
66*fe013be4SDimitry Andric // Local file header.
67*fe013be4SDimitry Andric struct LocalFileHeader {
68*fe013be4SDimitry Andric static constexpr char kSignature[] = {0x50, 0x4b, 0x03, 0x04};
69*fe013be4SDimitry Andric char signature[sizeof(kSignature)];
70*fe013be4SDimitry Andric unaligned_uint16_t version_needed_to_extract;
71*fe013be4SDimitry Andric unaligned_uint16_t general_purpose_bit_flag;
72*fe013be4SDimitry Andric unaligned_uint16_t compression_method;
73*fe013be4SDimitry Andric unaligned_uint16_t last_modification_time;
74*fe013be4SDimitry Andric unaligned_uint16_t last_modification_date;
75*fe013be4SDimitry Andric unaligned_uint32_t crc32;
76*fe013be4SDimitry Andric unaligned_uint32_t compressed_size;
77*fe013be4SDimitry Andric unaligned_uint32_t uncompressed_size;
78*fe013be4SDimitry Andric unaligned_uint16_t file_name_length;
79*fe013be4SDimitry Andric unaligned_uint16_t extra_field_length;
80*fe013be4SDimitry Andric };
81*fe013be4SDimitry Andric // Immediately after LocalFileHeader,
82*fe013be4SDimitry Andric // - file name (file_name_length)
83*fe013be4SDimitry Andric // - extra field (extra_field_length)
84*fe013be4SDimitry Andric // - file data (should be compressed_size == uncompressed_size, page aligned)
85*fe013be4SDimitry Andric
FindEocdRecord(lldb::DataBufferSP zip_data)86*fe013be4SDimitry Andric const EocdRecord *FindEocdRecord(lldb::DataBufferSP zip_data) {
87*fe013be4SDimitry Andric // Find backward the end of central directory record from the end of the zip
88*fe013be4SDimitry Andric // file to the find limit.
89*fe013be4SDimitry Andric const uint8_t *zip_data_end = zip_data->GetBytes() + zip_data->GetByteSize();
90*fe013be4SDimitry Andric const uint8_t *find_limit = zip_data_end - kEocdRecordFindLimit;
91*fe013be4SDimitry Andric const uint8_t *p = zip_data_end - sizeof(EocdRecord);
92*fe013be4SDimitry Andric for (; p >= zip_data->GetBytes() && p >= find_limit; p--) {
93*fe013be4SDimitry Andric auto eocd = reinterpret_cast<const EocdRecord *>(p);
94*fe013be4SDimitry Andric if (::memcmp(eocd->signature, EocdRecord::kSignature,
95*fe013be4SDimitry Andric sizeof(EocdRecord::kSignature)) == 0) {
96*fe013be4SDimitry Andric // Found the end of central directory. Sanity check the values.
97*fe013be4SDimitry Andric if (eocd->cd_records * sizeof(CdRecord) > eocd->cd_size ||
98*fe013be4SDimitry Andric zip_data->GetBytes() + eocd->cd_offset + eocd->cd_size > p)
99*fe013be4SDimitry Andric return nullptr;
100*fe013be4SDimitry Andric
101*fe013be4SDimitry Andric // This is a valid end of central directory record.
102*fe013be4SDimitry Andric return eocd;
103*fe013be4SDimitry Andric }
104*fe013be4SDimitry Andric }
105*fe013be4SDimitry Andric return nullptr;
106*fe013be4SDimitry Andric }
107*fe013be4SDimitry Andric
GetFile(lldb::DataBufferSP zip_data,uint32_t local_file_header_offset,lldb::offset_t & file_offset,lldb::offset_t & file_size)108*fe013be4SDimitry Andric bool GetFile(lldb::DataBufferSP zip_data, uint32_t local_file_header_offset,
109*fe013be4SDimitry Andric lldb::offset_t &file_offset, lldb::offset_t &file_size) {
110*fe013be4SDimitry Andric auto local_file_header = reinterpret_cast<const LocalFileHeader *>(
111*fe013be4SDimitry Andric zip_data->GetBytes() + local_file_header_offset);
112*fe013be4SDimitry Andric // The signature should match.
113*fe013be4SDimitry Andric if (::memcmp(local_file_header->signature, LocalFileHeader::kSignature,
114*fe013be4SDimitry Andric sizeof(LocalFileHeader::kSignature)) != 0)
115*fe013be4SDimitry Andric return false;
116*fe013be4SDimitry Andric
117*fe013be4SDimitry Andric auto file_data = reinterpret_cast<const uint8_t *>(local_file_header + 1) +
118*fe013be4SDimitry Andric local_file_header->file_name_length +
119*fe013be4SDimitry Andric local_file_header->extra_field_length;
120*fe013be4SDimitry Andric // File should be uncompressed.
121*fe013be4SDimitry Andric if (local_file_header->compressed_size !=
122*fe013be4SDimitry Andric local_file_header->uncompressed_size)
123*fe013be4SDimitry Andric return false;
124*fe013be4SDimitry Andric
125*fe013be4SDimitry Andric // This file is valid. Return the file offset and size.
126*fe013be4SDimitry Andric file_offset = file_data - zip_data->GetBytes();
127*fe013be4SDimitry Andric file_size = local_file_header->uncompressed_size;
128*fe013be4SDimitry Andric return true;
129*fe013be4SDimitry Andric }
130*fe013be4SDimitry Andric
FindFile(lldb::DataBufferSP zip_data,const EocdRecord * eocd,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)131*fe013be4SDimitry Andric bool FindFile(lldb::DataBufferSP zip_data, const EocdRecord *eocd,
132*fe013be4SDimitry Andric const llvm::StringRef file_path, lldb::offset_t &file_offset,
133*fe013be4SDimitry Andric lldb::offset_t &file_size) {
134*fe013be4SDimitry Andric // Find the file from the central directory records.
135*fe013be4SDimitry Andric auto cd = reinterpret_cast<const CdRecord *>(zip_data->GetBytes() +
136*fe013be4SDimitry Andric eocd->cd_offset);
137*fe013be4SDimitry Andric size_t cd_records = eocd->cd_records;
138*fe013be4SDimitry Andric for (size_t i = 0; i < cd_records; i++) {
139*fe013be4SDimitry Andric // The signature should match.
140*fe013be4SDimitry Andric if (::memcmp(cd->signature, CdRecord::kSignature,
141*fe013be4SDimitry Andric sizeof(CdRecord::kSignature)) != 0)
142*fe013be4SDimitry Andric return false;
143*fe013be4SDimitry Andric
144*fe013be4SDimitry Andric // Sanity check the file name values.
145*fe013be4SDimitry Andric auto file_name = reinterpret_cast<const char *>(cd + 1);
146*fe013be4SDimitry Andric size_t file_name_length = cd->file_name_length;
147*fe013be4SDimitry Andric if (file_name + file_name_length >= reinterpret_cast<const char *>(eocd) ||
148*fe013be4SDimitry Andric file_name_length == 0)
149*fe013be4SDimitry Andric return false;
150*fe013be4SDimitry Andric
151*fe013be4SDimitry Andric // Compare the file name.
152*fe013be4SDimitry Andric if (file_path == llvm::StringRef(file_name, file_name_length)) {
153*fe013be4SDimitry Andric // Found the file.
154*fe013be4SDimitry Andric return GetFile(zip_data, cd->local_file_header_offset, file_offset,
155*fe013be4SDimitry Andric file_size);
156*fe013be4SDimitry Andric } else {
157*fe013be4SDimitry Andric // Skip to the next central directory record.
158*fe013be4SDimitry Andric cd = reinterpret_cast<const CdRecord *>(
159*fe013be4SDimitry Andric reinterpret_cast<const char *>(cd) + sizeof(CdRecord) +
160*fe013be4SDimitry Andric cd->file_name_length + cd->extra_field_length + cd->comment_length);
161*fe013be4SDimitry Andric // Sanity check the pointer.
162*fe013be4SDimitry Andric if (reinterpret_cast<const char *>(cd) >=
163*fe013be4SDimitry Andric reinterpret_cast<const char *>(eocd))
164*fe013be4SDimitry Andric return false;
165*fe013be4SDimitry Andric }
166*fe013be4SDimitry Andric }
167*fe013be4SDimitry Andric
168*fe013be4SDimitry Andric return false;
169*fe013be4SDimitry Andric }
170*fe013be4SDimitry Andric
171*fe013be4SDimitry Andric } // end anonymous namespace
172*fe013be4SDimitry Andric
Find(lldb::DataBufferSP zip_data,const llvm::StringRef file_path,lldb::offset_t & file_offset,lldb::offset_t & file_size)173*fe013be4SDimitry Andric bool ZipFile::Find(lldb::DataBufferSP zip_data, const llvm::StringRef file_path,
174*fe013be4SDimitry Andric lldb::offset_t &file_offset, lldb::offset_t &file_size) {
175*fe013be4SDimitry Andric const EocdRecord *eocd = FindEocdRecord(zip_data);
176*fe013be4SDimitry Andric if (!eocd)
177*fe013be4SDimitry Andric return false;
178*fe013be4SDimitry Andric
179*fe013be4SDimitry Andric return FindFile(zip_data, eocd, file_path, file_offset, file_size);
180*fe013be4SDimitry Andric }
181