feat: initial Swift version
This commit is contained in:
366
src/Zip.swift
Normal file
366
src/Zip.swift
Normal file
@@ -0,0 +1,366 @@
|
||||
import Foundation
|
||||
import Compression // compression_decode_buffer
|
||||
import zlib // Z_DEFLATED, crc32
|
||||
import os // OSLog
|
||||
|
||||
private let log = OSLog(subsystem: Bundle.main.bundleIdentifier!, category: "Zip")
|
||||
|
||||
|
||||
// MARK: - Helper to parse byte headers
|
||||
|
||||
private struct ByteScanner {
|
||||
private let data: Data
|
||||
private var index: Int
|
||||
private let endIndex: Int
|
||||
|
||||
init (_ data: Data, start: Int) {
|
||||
self.data = data
|
||||
self.index = start
|
||||
self.endIndex = data.endIndex
|
||||
}
|
||||
|
||||
mutating func scan<T>() -> T {
|
||||
let newIndex = index + MemoryLayout<T>.size
|
||||
if newIndex > endIndex {
|
||||
os_log(.fault, log: log, "ByteScanner out of bounds")
|
||||
fatalError()
|
||||
}
|
||||
let result = data.subdata(in: index ..< newIndex).withUnsafeBytes { $0.load(as: T.self) }
|
||||
index = newIndex
|
||||
return result
|
||||
}
|
||||
|
||||
mutating func scanString(length: Int) -> String {
|
||||
let bytes = data.subdata(in: index ..< index + length)
|
||||
index += length
|
||||
return String(data: bytes, encoding: .utf8) ?? ""
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// MARK: - ZIP Headers
|
||||
|
||||
// See http://en.wikipedia.org/wiki/ZIP_(file_format)#File_headers
|
||||
|
||||
/// Local file header
|
||||
private struct ZIP_LocalFile {
|
||||
static let LENGTH: Int = 30
|
||||
|
||||
let magicNumber: UInt32 // 50 4B 03 04
|
||||
let versionNeededToExtract: UInt16
|
||||
let generalPurposeBitFlag: UInt16
|
||||
let compressionMethod: UInt16
|
||||
let fileLastModificationTime: UInt16
|
||||
let fileLastModificationDate: UInt16
|
||||
let CRC32: UInt32
|
||||
let compressedSize: UInt32
|
||||
let uncompressedSize: UInt32
|
||||
let fileNameLength: UInt16
|
||||
let extraFieldLength: UInt16
|
||||
|
||||
// let fileName: String
|
||||
// Extra field
|
||||
|
||||
init(_ data: Data, start: Data.Index = 0) {
|
||||
var scanner = ByteScanner(data, start: start)
|
||||
magicNumber = scanner.scan()
|
||||
versionNeededToExtract = scanner.scan()
|
||||
generalPurposeBitFlag = scanner.scan()
|
||||
compressionMethod = scanner.scan()
|
||||
fileLastModificationTime = scanner.scan()
|
||||
fileLastModificationDate = scanner.scan()
|
||||
CRC32 = scanner.scan()
|
||||
compressedSize = scanner.scan()
|
||||
uncompressedSize = scanner.scan()
|
||||
fileNameLength = scanner.scan()
|
||||
extraFieldLength = scanner.scan()
|
||||
// fileName = scanner.scanString(length: Int(fileNameLength))
|
||||
}
|
||||
}
|
||||
|
||||
/// Central directory file header
|
||||
private struct ZIP_CDFH {
|
||||
static let LENGTH: Int = 46
|
||||
|
||||
let magicNumber: UInt32 // 50 4B 01 02
|
||||
let versionMadeBy: UInt16
|
||||
let versionNeededToExtract: UInt16
|
||||
let generalPurposeBitFlag: UInt16
|
||||
let compressionMethod: UInt16
|
||||
let fileLastModificationTime: UInt16
|
||||
let fileLastModificationDate: UInt16
|
||||
let CRC32: UInt32
|
||||
let compressedSize: UInt32
|
||||
let uncompressedSize: UInt32
|
||||
let fileNameLength: UInt16
|
||||
let extraFieldLength: UInt16
|
||||
let fileCommentLength: UInt16
|
||||
let diskNumberWhereFileStarts: UInt16
|
||||
let internalFileAttributes: UInt16
|
||||
let externalFileAttributes: UInt32
|
||||
let relativeOffsetOfLocalFileHeader: UInt32
|
||||
|
||||
let fileName: String
|
||||
// Extra field
|
||||
// File comment
|
||||
|
||||
init(_ data: Data, start: Data.Index = 0) {
|
||||
var scanner = ByteScanner(data, start: start)
|
||||
magicNumber = scanner.scan()
|
||||
versionMadeBy = scanner.scan()
|
||||
versionNeededToExtract = scanner.scan()
|
||||
generalPurposeBitFlag = scanner.scan()
|
||||
compressionMethod = scanner.scan()
|
||||
fileLastModificationTime = scanner.scan()
|
||||
fileLastModificationDate = scanner.scan()
|
||||
CRC32 = scanner.scan()
|
||||
compressedSize = scanner.scan()
|
||||
uncompressedSize = scanner.scan()
|
||||
fileNameLength = scanner.scan()
|
||||
extraFieldLength = scanner.scan()
|
||||
fileCommentLength = scanner.scan()
|
||||
diskNumberWhereFileStarts = scanner.scan()
|
||||
internalFileAttributes = scanner.scan()
|
||||
externalFileAttributes = scanner.scan()
|
||||
relativeOffsetOfLocalFileHeader = scanner.scan()
|
||||
fileName = scanner.scanString(length: Int(fileNameLength))
|
||||
}
|
||||
}
|
||||
|
||||
/// End of central directory record
|
||||
private struct ZIP_EOCD {
|
||||
static let LENGTH: Int = 22
|
||||
|
||||
let magicNumber: UInt32 // 50 4B 05 06
|
||||
let numberOfThisDisk: UInt16
|
||||
let diskWhereCentralDirectoryStarts: UInt16
|
||||
let numberOfCentralDirectoryRecordsOnThisDisk: UInt16
|
||||
let totalNumberOfCentralDirectoryRecords: UInt16
|
||||
let sizeOfCentralDirectory: UInt32
|
||||
let offsetOfStartOfCentralDirectory: UInt32
|
||||
let commentLength: UInt16
|
||||
// Comment
|
||||
|
||||
init(_ data: Data, start: Data.Index = 0) {
|
||||
var scanner = ByteScanner(data, start: start)
|
||||
magicNumber = scanner.scan()
|
||||
numberOfThisDisk = scanner.scan()
|
||||
diskWhereCentralDirectoryStarts = scanner.scan()
|
||||
numberOfCentralDirectoryRecordsOnThisDisk = scanner.scan()
|
||||
totalNumberOfCentralDirectoryRecords = scanner.scan()
|
||||
sizeOfCentralDirectory = scanner.scan()
|
||||
offsetOfStartOfCentralDirectory = scanner.scan()
|
||||
commentLength = scanner.scan()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// MARK: - CRC32 check
|
||||
|
||||
extension Data {
|
||||
func crc() -> UInt32 {
|
||||
return UInt32(self.withUnsafeBytes { crc32(0, $0.baseAddress!, UInt32($0.count)) })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// MARK: - Unzip data
|
||||
|
||||
func unzipFileEntry(_ path: String, _ entry: ZipEntry) -> Data? {
|
||||
guard let fp = FileHandle(forReadingAtPath: path) else {
|
||||
return nil
|
||||
}
|
||||
defer {
|
||||
try? fp.close()
|
||||
}
|
||||
fp.seek(toFileOffset: UInt64(entry.offset))
|
||||
let file_record = ZIP_LocalFile(fp.readData(ofLength: ZIP_LocalFile.LENGTH))
|
||||
os_log(.debug, log: log, "header: %{public}@ vs %{public}@", String(describing: file_record), String(describing: entry))
|
||||
|
||||
// central directory size and local file size may differ! use local file for ground truth
|
||||
let dataOffset = Int(entry.offset) + ZIP_LocalFile.LENGTH + Int(file_record.fileNameLength) + Int(file_record.extraFieldLength)
|
||||
fp.seek(toFileOffset: UInt64(dataOffset))
|
||||
let rawData = fp.readData(ofLength: Int(entry.sizeCompressed))
|
||||
|
||||
if entry.method == Z_DEFLATED {
|
||||
let size = Int(entry.sizeUncompressed)
|
||||
let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: size)
|
||||
defer {
|
||||
buffer.deallocate()
|
||||
}
|
||||
|
||||
let uncompressedData = rawData.withUnsafeBytes ({
|
||||
let ptr = $0.baseAddress!.bindMemory(to: UInt8.self, capacity: 1)
|
||||
let read = compression_decode_buffer(buffer, size, ptr, Int(entry.sizeCompressed), nil, COMPRESSION_ZLIB)
|
||||
return Data(bytes: buffer, count:read)
|
||||
})
|
||||
if file_record.CRC32 != 0, uncompressedData.crc() != file_record.CRC32 {
|
||||
os_log(.error, log: log, "CRC check failed (after uncompress)")
|
||||
return nil
|
||||
}
|
||||
return uncompressedData
|
||||
|
||||
} else if entry.method == 0 {
|
||||
if file_record.CRC32 != 0, rawData.crc() != file_record.CRC32 {
|
||||
os_log(.error, log: log, "CRC check failed (uncompressed data)")
|
||||
return nil
|
||||
}
|
||||
return rawData
|
||||
|
||||
} else {
|
||||
os_log(.error, log: log, "unimplemented compression method: %{public}d", entry.method)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// MARK: - List files
|
||||
|
||||
private func listZip(_ path: String) -> [ZipEntry] {
|
||||
guard let fp = FileHandle(forReadingAtPath: path) else {
|
||||
return []
|
||||
}
|
||||
defer {
|
||||
try? fp.close()
|
||||
}
|
||||
|
||||
guard let endRecord = findCentralDirectory(fp), endRecord.sizeOfCentralDirectory > 0 else {
|
||||
return [];
|
||||
}
|
||||
return listDirectoryEntries(fp, endRecord);
|
||||
}
|
||||
|
||||
/// Find signature for central directory.
|
||||
private func findCentralDirectory(_ fp: FileHandle) -> ZIP_EOCD? {
|
||||
let eof = fp.seekToEndOfFile()
|
||||
fp.seek(toFileOffset: max(0, eof - 4096))
|
||||
let data = fp.readDataToEndOfFile()
|
||||
|
||||
let centralDirSignature: [UInt8] = [0x50, 0x4b, 0x05, 0x06]
|
||||
|
||||
guard let range = data.lastRange(of: centralDirSignature) else {
|
||||
os_log(.error, log: log, "no zip end-header found!")
|
||||
return nil
|
||||
}
|
||||
return ZIP_EOCD(data, start: range.lowerBound)
|
||||
}
|
||||
|
||||
/// List all files and folders of of the central directory.
|
||||
private func listDirectoryEntries(_ fp: FileHandle, _ centralDir: ZIP_EOCD) -> [ZipEntry] {
|
||||
fp.seek(toFileOffset: UInt64(centralDir.offsetOfStartOfCentralDirectory))
|
||||
let data = fp.readData(ofLength: Int(centralDir.sizeOfCentralDirectory))
|
||||
let total = data.count
|
||||
|
||||
var idx = 0
|
||||
var entries: [ZipEntry] = []
|
||||
|
||||
while idx + ZIP_CDFH.LENGTH < total {
|
||||
let record = ZIP_CDFH(data, start: idx)
|
||||
// read filename
|
||||
idx += ZIP_CDFH.LENGTH
|
||||
let filename = String(data: data.subdata(in: idx ..< idx + Int(record.fileNameLength)), encoding: .utf8)!
|
||||
entries.append(ZipEntry(filename, record))
|
||||
// update index
|
||||
idx += Int(record.fileNameLength + record.extraFieldLength + record.fileCommentLength)
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
|
||||
// MARK: - ZipEntry
|
||||
|
||||
struct ZipEntry {
|
||||
let filepath: String
|
||||
let offset: UInt32
|
||||
let method: UInt16
|
||||
let sizeCompressed: UInt32
|
||||
let sizeUncompressed: UInt32
|
||||
let filenameLength: UInt16
|
||||
let extraFieldLength: UInt16
|
||||
let CRC32: UInt32
|
||||
|
||||
fileprivate init(_ filename: String, _ record: ZIP_CDFH) {
|
||||
self.filepath = filename
|
||||
self.offset = record.relativeOffsetOfLocalFileHeader
|
||||
self.method = record.compressionMethod
|
||||
self.sizeCompressed = record.compressedSize
|
||||
self.sizeUncompressed = record.uncompressedSize
|
||||
self.filenameLength = record.fileNameLength
|
||||
self.extraFieldLength = record.extraFieldLength
|
||||
self.CRC32 = record.CRC32
|
||||
}
|
||||
}
|
||||
|
||||
extension Array where Element == ZipEntry {
|
||||
/// Return entry with shortest possible path (thus ignoring deeper nested files).
|
||||
func zipEntryWithShortestPath() -> ZipEntry? {
|
||||
var shortest = 99999
|
||||
var bestMatch: ZipEntry? = nil
|
||||
|
||||
for entry in self {
|
||||
if shortest > entry.filepath.count {
|
||||
shortest = entry.filepath.count
|
||||
bestMatch = entry
|
||||
}
|
||||
}
|
||||
return bestMatch
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// MARK: - ZipFile
|
||||
|
||||
struct ZipFile {
|
||||
private let pathToZipFile: String
|
||||
private let centralDirectory: [ZipEntry]
|
||||
|
||||
init(_ path: String) {
|
||||
self.pathToZipFile = path
|
||||
self.centralDirectory = listZip(path)
|
||||
}
|
||||
|
||||
// MARK: - public methods
|
||||
|
||||
func filesMatching(_ path: String) -> [ZipEntry] {
|
||||
let parts = path.split(separator: "*", omittingEmptySubsequences: false)
|
||||
return centralDirectory.filter {
|
||||
var idx = $0.filepath.startIndex
|
||||
if !$0.filepath.hasPrefix(parts.first!) || !$0.filepath.hasSuffix(parts.last!) {
|
||||
return false
|
||||
}
|
||||
for part in parts {
|
||||
guard let found = $0.filepath.range(of: part, range: idx..<$0.filepath.endIndex) else {
|
||||
return false
|
||||
}
|
||||
idx = found.upperBound
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
/// Unzip file directly into memory.
|
||||
/// @param filePath File path inside zip file.
|
||||
func unzipFile(_ filePath: String) -> Data? {
|
||||
if let matchingFile = self.filesMatching(filePath).zipEntryWithShortestPath() {
|
||||
os_log(.debug, log: log, "[unzip] %{public}@", matchingFile.filepath)
|
||||
return unzipFileEntry(pathToZipFile, matchingFile)
|
||||
}
|
||||
|
||||
// There is a dir listing but no matching file.
|
||||
// This means there wont be anything to extract.
|
||||
os_log(.error, log: log, "cannot find '%{public}@' for unzip", filePath)
|
||||
return nil
|
||||
}
|
||||
|
||||
/// Unzip file to filesystem.
|
||||
/// @param filePath File path inside zip file.
|
||||
/// @param targetDir Directory in which to unzip the file.
|
||||
func unzipFile(_ filePath: String, toDir targetDir: String) throws {
|
||||
if let data = self.unzipFile(filePath) {
|
||||
let filename = filePath.components(separatedBy: "/").last!
|
||||
let outputPath = targetDir.appending("/" + filename)
|
||||
os_log(.debug, log: log, "[unzip] write to %{public}@", outputPath)
|
||||
try data.write(to: URL(fileURLWithPath: outputPath), options: .atomic)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user