Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Use file modification times and sizes to speed up indexing #46

Merged
merged 2 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Sources/ReporterCore/Extensions/DataProtocol.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (c) 2024-2025 Jason Morley
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

import Foundation

// https://stackoverflow.com/questions/43241845/how-can-i-convert-data-into-types-like-doubles-ints-and-strings-in-swift#43244973

extension DataProtocol where Self: RangeReplaceableCollection {
init<N: Numeric>(_ numeric: N) {
self = withUnsafeBytes(of: numeric) { .init($0) }
}
}

extension DataProtocol {
func value<N: Numeric>() -> N { .init(self) }
}
36 changes: 36 additions & 0 deletions Sources/ReporterCore/Extensions/Numeric.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright (c) 2024-2025 Jason Morley
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

import Foundation

// https://stackoverflow.com/questions/43241845/how-can-i-convert-data-into-types-like-doubles-ints-and-strings-in-swift#43244973

extension Numeric {
var data: Data { .init(self) }
var bytes: [UInt8] { .init(self) }
}

extension Numeric {
init<D: DataProtocol>(_ data: D) {
var value: Self = .zero
_ = withUnsafeMutableBytes(of: &value, data.copyBytes)
self = value
}
}
29 changes: 29 additions & 0 deletions Sources/ReporterCore/Model/FileDetails.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright (c) 2024-2025 Jason Morley
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

import Foundation

public struct FileDetails: Sendable, Hashable {

public let relativePath: String
public let contentModificationTime: TimeInterval
public let fileSize: Int

}
44 changes: 43 additions & 1 deletion Sources/ReporterCore/Model/Item.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,54 @@ import Foundation

public struct Item: Codable, Hashable, Sendable {

enum CodingKeys: String, CodingKey {
case path
case contentModificationTime
case fileSize
case checksum
}

public let path: String
public let contentModificationTime: TimeInterval
public let fileSize: Int

public var fileDetails: FileDetails {
return .init(relativePath: path,
contentModificationTime: contentModificationTime,
fileSize: fileSize)
}

public let checksum: Data?

public init(path: String, checksum: Data?) {
public init(path: String,
contentModificationTime: TimeInterval,
fileSize: Int,
checksum: Data?) {
self.path = path
self.contentModificationTime = contentModificationTime
self.fileSize = fileSize
self.checksum = checksum
}

public init(from decoder: any Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
self.path = try container.decode(String.self, forKey: CodingKeys.path)
let data = try container.decode(Data.self,
forKey: CodingKeys.contentModificationTime)
self.contentModificationTime = TimeInterval(data)
self.fileSize = try container.decode(Int.self, forKey: CodingKeys.fileSize)
self.checksum = try container.decode(Data.self, forKey: CodingKeys.checksum)
}

public func encode(to encoder: any Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
// We round-trip the TimeInterval (aka Double) through Data to ensure there's an
// extra copy which avoids a misaligned read.
try container.encode(contentModificationTime.data,
forKey: CodingKeys.contentModificationTime)
try container.encode(path, forKey: CodingKeys.path)
try container.encode(fileSize, forKey: CodingKeys.fileSize)
try container.encode(checksum, forKey: CodingKeys.checksum)
}

}
64 changes: 48 additions & 16 deletions Sources/ReporterCore/Reporter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@ import Crypto
import Stencil
import SwiftSMTP

typealias Cache = [FileDetails: Data]

public class Reporter {

static func snapshot(folderURL: URL, console: Console) async throws -> Snapshot {
static func snapshot(folderURL: URL,
cache: Cache,
console: Console) async throws -> Snapshot {

let fileManager = FileManager.default

Expand All @@ -46,22 +50,31 @@ public class Reporter {
throw ReporterError.notDirectory
}

var files = [String]()

// TODO: Extract this into a custom enumerator?
// TODO: Check if I can get this directly from the enumerator?
var files: [FileDetails] = []
guard let enumerator = fileManager.enumerator(
at: folderURL,
includingPropertiesForKeys: [.isRegularFileKey],
options: [.skipsHiddenFiles, .skipsPackageDescendants]) else {
console.log("Failed to create enumerator")
throw ReporterError.failed
}

for case let fileURL as URL in enumerator {
do {
let fileAttributes = try fileURL.resourceValues(forKeys:[.isRegularFileKey])
let fileAttributes = try fileURL.resourceValues(forKeys: [
.isRegularFileKey,
.fileSizeKey,
.contentModificationDateKey,
.fileSizeKey
])
if fileAttributes.isRegularFile! {
files.append(try fileURL.path(relativeTo: folderURL,
percentEncoded: false))
files.append(FileDetails(
relativePath: try fileURL.path(relativeTo: folderURL,
percentEncoded: false),
contentModificationTime: fileAttributes.contentModificationDate!.timeIntervalSince1970,
fileSize: fileAttributes.fileSize!
))
}
} catch {
// TODO: Review these errors.
Expand All @@ -73,12 +86,18 @@ public class Reporter {
// Generate the hashes for the files concurrently.
let items = try await withThrowingTaskGroup(of: Item.self) { group in
let progress = Progress(totalUnitCount: Int64(files.count))
for relativePath in files {
for fileDetails in files {
group.addTask {
return try await Task {
let url = URL(fileURLWithPath: relativePath, relativeTo: folderURL)
let item = Item(path: relativePath,
checksum: try Self.checksum(url: url))
let url = URL(fileURLWithPath: fileDetails.relativePath,
relativeTo: folderURL)
let checksum = try (cache[fileDetails] ?? Self.checksum(url: url))
let item = Item(
path: fileDetails.relativePath,
contentModificationTime: fileDetails.contentModificationTime,
fileSize: fileDetails.fileSize,
checksum: checksum
)
progress.completedUnitCount += 1
console.progress(progress, message: folderURL.lastPathComponent)
return item
Expand All @@ -99,12 +118,12 @@ public class Reporter {
}

static func checksum(url: URL, bufferSize: Int = 4 * 1024 * 1024) throws -> Data {

let file = try FileHandle(forReadingFrom: url)
defer {
file.closeFile()
}

var md5 = Crypto.Insecure.MD5()
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
Expand All @@ -119,7 +138,9 @@ public class Reporter {
return Data(md5.finalize())
}

static func report(configuration: Configuration, snapshotURL: URL, console: Console) async throws -> Report {
static func report(configuration: Configuration,
snapshotURL: URL,
console: Console) async throws -> Report {

// Load the snapshot if it exists.
console.log("Loading state...")
Expand All @@ -137,8 +158,17 @@ public class Reporter {
let url = URL(fileURLWithPath: folder.expandingTildeInPath, isDirectory: true)
console.log("Indexing '\(url.path)'...")

let items = oldState.snapshots[url]?.items ?? []
let cache = items.reduce(into: Cache()) { partialResult, item in
partialResult[item.fileDetails] = item.checksum
}

// let cache = Cache()

// Get the new snapshot.
let snapshot = try await Self.snapshot(folderURL: url, console: console)
let snapshot = try await Self.snapshot(folderURL: url,
cache: cache,
console: console)
newState.snapshots[url] = snapshot
}

Expand Down Expand Up @@ -169,7 +199,9 @@ public class Reporter {
let configuration = try Configuration(contentsOf: configurationURL)

// Generate the report.
let report = try await report(configuration: configuration, snapshotURL: snapshotURL, console: console)
let report = try await report(configuration: configuration,
snapshotURL: snapshotURL,
console: console)

// Return early if there are no outstanding changes.
if report.isEmpty {
Expand Down