From ae1dd869007ad88663e8b49bd6fcafada06d2d03 Mon Sep 17 00:00:00 2001 From: Austin Gill Date: Sat, 16 Nov 2024 17:41:34 -0600 Subject: [PATCH] WIP: Try brute forcing 128-bit hashes to trigger quine --- Cargo.lock | 1 + Cargo.toml | 1 + herostratus-quine/Cargo.toml | 1 + herostratus-quine/src/git.rs | 7 +- herostratus-quine/src/job.rs | 120 ++++++++++++++++++++++++++++++++++ herostratus-quine/src/main.rs | 85 +++++++++++++++++++----- 6 files changed, 193 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bbdc8f9..37ac841 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -434,6 +434,7 @@ dependencies = [ "clap", "color-eyre", "eyre", + "generic-array", "git2", "sha1", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 902950f..6941ae3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,6 +25,7 @@ color-eyre = "0.6.2" ctor = "0.2.7" directories = "5.0.1" eyre = "0.6.12" +generic-array = "0.14" # need compat with sha1 git2 = "0.19" inventory = "0.3.15" lazy_static = "1.4.0" diff --git a/herostratus-quine/Cargo.toml b/herostratus-quine/Cargo.toml index 79e3567..0b59222 100644 --- a/herostratus-quine/Cargo.toml +++ b/herostratus-quine/Cargo.toml @@ -10,6 +10,7 @@ edition.workspace = true clap.workspace = true color-eyre.workspace = true eyre.workspace = true +generic-array.workspace = true git2.workspace = true sha1.workspace = true tracing-subscriber.workspace = true diff --git a/herostratus-quine/src/git.rs b/herostratus-quine/src/git.rs index 3d6c872..d3fa30e 100644 --- a/herostratus-quine/src/git.rs +++ b/herostratus-quine/src/git.rs @@ -34,12 +34,7 @@ pub fn generate_initial_commit( let hash_placeholder = "X".repeat(prefix_length as usize); - let message = format!( - "Quine: {hash_placeholder}\n\ - \n\ - This commit was lovingly brute-forced to contain is own hash prefix\n\ - by herostratus-quine." - ); + let message = format!("Quine: {hash_placeholder}"); let oid = make_empty_commit(repo, &who, &message)?; let commit = repo.find_commit(oid)?; Ok(commit) diff --git a/herostratus-quine/src/job.rs b/herostratus-quine/src/job.rs index e69de29..33cbaab 100644 --- a/herostratus-quine/src/job.rs +++ b/herostratus-quine/src/job.rs @@ -0,0 +1,120 @@ +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use generic_array::GenericArray; +use sha1::{Digest, Sha1}; + +pub fn spawn_worker_thread( + w: usize, + worker_start: u128, + worker_end: u128, + prefix_length: u8, + raw_commit: String, + is_running: Arc, +) -> std::thread::JoinHandle> { + std::thread::Builder::new() + .name(format!("quine-{w}")) + .spawn(move || { + worker( + w, + worker_start, + worker_end, + prefix_length, + raw_commit, + is_running, + ) + }) + .expect("Failed to spawn worker thread") +} + +pub fn join_all( + mut handles: Vec>>, + is_running: Arc, +) -> Vec { + let mut results = Vec::new(); + + while !handles.is_empty() { + let (finished, unfinished): (Vec<_>, Vec<_>) = + handles.into_iter().partition(|h| h.is_finished()); + + for handle in finished { + // terminate after the first result + if let Some(result) = handle.join().expect("Worker thread panicked") { + results.push(result); + is_running.store(false, Ordering::SeqCst); + } + } + + handles = unfinished; + std::thread::sleep(std::time::Duration::from_millis(100)); + } + + results +} + +fn worker( + worker: usize, + worker_start: u128, + worker_end: u128, + prefix_length: u8, + mut raw_commit: String, + is_running: Arc, +) -> Option { + tracing::debug!("Worker {worker} processing chunk {worker_start:#x}..={worker_end:#x}"); + + let placeholder = "X".repeat(prefix_length as usize); + let offset = raw_commit + .find(&placeholder) + .expect("Failed to find XXXXX placeholder pattern"); + + let mut hasher = Sha1::new(); + let mut output_buffer: [u8; 20] = [0; 20]; + let output = GenericArray::from_mut_slice(&mut output_buffer); + let prefix_length_bytes: usize = prefix_length as usize / 2; // prefix_length is in nibbles + let hex = "0123456789abcdef"; + for prefix in worker_start..=worker_end { + if !is_running.load(Ordering::SeqCst) { + break; + } + + // TODO: Give 10% progress reports + + let prefix_bytes: [u8; 16] = prefix.to_le_bytes(); + // hash printed as c63cf7 corresponds to byte array [0xc6, 0x3c, 0xf7] + for hash_idx in 0..prefix_length as usize { + let byte_idx = hash_idx / 2; + let prefix_nibble = if hash_idx % 2 == 0 { + // high nibble of prefix + prefix_bytes[byte_idx] >> 4 + } else { + // low nibble of prefix + prefix_bytes[byte_idx] & 0x0F + }; + let prefix_char = hex.as_bytes()[prefix_nibble as usize]; + + unsafe { + raw_commit.as_bytes_mut()[hash_idx + offset] = prefix_char; + } + } + // TODO: Do I need to add a UUID to each commit for more randomness? Or do I fall back on + // the whitespace trick from https://github.com/not-an-aardvark/lucky-commit ? + hasher.update(raw_commit.as_bytes()); + + // Finalize and reset the hasher, using preallocated output memory + hasher.finalize_into_reset(output); + + // let oid = git2::Oid::from_bytes(output).unwrap(); + // tracing::debug!("Worker {worker} attempting prefix {prefix:#x} found full hash {oid} from {raw_commit:?} {prefix_bytes:x?}"); + + // TODO: This doesn't account for nibble order. E.g., prefix 0x95A matches hash 5A090B, but + // it works as long as the prefix_length is even? + if output.as_slice()[..=prefix_length_bytes] == prefix_bytes[..=prefix_length_bytes] { + // hack for pretty-printing + let oid = git2::Oid::from_bytes(output).unwrap(); + tracing::info!("Worker {worker} found prefix {prefix:#x} for full hash {oid}"); + return Some(prefix); + } + } + + None +} diff --git a/herostratus-quine/src/main.rs b/herostratus-quine/src/main.rs index 92ac289..8b6c248 100644 --- a/herostratus-quine/src/main.rs +++ b/herostratus-quine/src/main.rs @@ -3,6 +3,9 @@ mod git; mod job; use std::io::IsTerminal; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; +use std::time::Instant; use clap::Parser; use cli::Args; @@ -16,10 +19,6 @@ fn main() -> eyre::Result<()> { } let mut args = Args::parse(); - args.prefix_length = u8::min(args.prefix_length, 40); - if args.jobs == 0 { - args.jobs = std::thread::available_parallelism()?.into(); - } let filter = EnvFilter::builder() .with_default_directive(args.log_level.into()) @@ -31,7 +30,17 @@ fn main() -> eyre::Result<()> { .with_writer(std::io::stderr) .init(); - tracing::debug!("{args:?}"); + // prefix is in characters in the hash, where each one is a nibble + // this tool uses a u128 as the hash prefix, so don't allow anything that would overflow that. + const PREFIX_LIMIT: u8 = 128 / 4; + if args.prefix_length > PREFIX_LIMIT { + tracing::warn!("Hash prefixes larger than {PREFIX_LIMIT} nibbles aren't supported"); + } + let prefix_length = u8::min(args.prefix_length, PREFIX_LIMIT); + + if args.jobs == 0 { + args.jobs = std::thread::available_parallelism()?.into(); + } let repo = git2::Repository::discover(&args.repository).wrap_err("Failed to discovery repository")?; @@ -59,24 +68,68 @@ fn main() -> eyre::Result<()> { // the author and committer names and timestamps). Since *making* 16^7 commits would be too // expensive, we make the initial commit, grab the raw commit contents, and then brute force a // shit ton of hashes. - let commit = git::generate_initial_commit(&repo, args.prefix_length) + let commit = git::generate_initial_commit(&repo, prefix_length) .wrap_err("Failed to generate initial commit")?; // The commit hash is the SHA1 hash of this string let raw_commit = git::get_raw_commit(&commit); - - // Smoke test! - let verify_hash = git::sha1(&raw_commit); - if commit.id() != verify_hash { - let err = Err(eyre::eyre!("raw commit:\n{raw_commit}")) - .wrap_err(format!("actual hash: {}", commit.id())) - .wrap_err(format!("calculated hash: {}", verify_hash)) - .wrap_err("Failed to verify SHA1 hash of initial quine commit"); - return err; - } + debug_assert_eq!(git::sha1(&raw_commit), commit.id()); // Now we have the raw string being hashed, time to spin up a shit-ton of workers to brute // force different variations of it! + // + // Split up the range 0000000..FFFFFFF into N workers (0..16^prefix_length) + + // TODO: Continuous u128 ranges don't result in continuous hash prefix ranges, because the + // hashes are formatted like ABCD for [0xAB, 0xCD], so for a u128 prefix like 0xABC would get + // formatted into a hash string like AB0C. + // + // I think I need to start over and throw out the u128 "optimization" + let min_prefix: u128 = 0; + let max_prefix: u128 = u128::MAX >> ((PREFIX_LIMIT - prefix_length) * 4); + let worker_chunk_size = max_prefix / args.jobs as u128; + debug_assert_eq!(max_prefix.trailing_ones(), prefix_length as u32 * 4); + tracing::debug!("Brute forcing the {prefix_length} nibble prefix range {min_prefix:#x}..={max_prefix:#x} with {worker_chunk_size:#x} bit chunks"); + + let start = Instant::now(); + + let mut worker_start; + let mut worker_end = 0; + let mut handles = Vec::new(); + let is_running = Arc::new(AtomicBool::new(true)); + for worker in 0..args.jobs { + // start and end are inclusive, so that we don't have overflow at the end with a full + // 128-bit prefix + worker_start = if worker == 0 { + min_prefix + } else { + worker_end + 1 + }; + worker_end = if worker == args.jobs - 1 { + max_prefix + } else { + worker_start + worker_chunk_size + }; + + let handle = job::spawn_worker_thread( + worker, + worker_start, + worker_end, + prefix_length, + raw_commit.clone(), + is_running.clone(), + ); + handles.push(handle); + } + + let results = job::join_all(handles, is_running); + tracing::info!("Workers finished after {:?}", start.elapsed()); + // tracing::debug!("Looking for {} {:x?}", commit.id(), commit.id().as_bytes()); + + if !results.is_empty() { + tracing::info!("Found results: {results:x?}"); + // TODO: Edit the commit with the calculated hash prefix, verify + } Ok(()) }