diff --git a/java/dev/enola/common/io/BUILD b/java/dev/enola/common/io/BUILD index 5d2840e20..280d22fa0 100644 --- a/java/dev/enola/common/io/BUILD +++ b/java/dev/enola/common/io/BUILD @@ -32,6 +32,8 @@ java_library( "//java/dev/enola/data", # TODO Separate BUILD for IPFSResource "@enola_maven//:com_github_ipld_java_cid", + "@enola_maven//:com_github_multiformats_java_multihash", + "@enola_maven//:com_github_multiformats_java_multibase", "@enola_maven//:com_github_java_json_tools_uri_template", "@enola_maven//:com_google_auto_service_auto_service_annotations", "@enola_maven//:com_google_errorprone_error_prone_annotations", @@ -56,6 +58,8 @@ junit_tests( "//test", "@enola_maven//:com_github_ipld_java_cid", "@enola_maven//:com_github_java_json_tools_uri_template", + "@enola_maven//:com_github_multiformats_java_multibase", + "@enola_maven//:com_github_multiformats_java_multihash", "@enola_maven//:com_google_auto_service_auto_service_annotations", "@enola_maven//:org_jspecify_jspecify", ], diff --git a/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResource.java b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResource.java new file mode 100644 index 000000000..3f717d0da --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResource.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +import com.google.common.hash.Hasher; +import com.google.common.io.ByteSource; +import com.google.common.io.CharSource; + +import dev.enola.common.io.iri.URIs; +import dev.enola.common.io.resource.*; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URI; + +public class IntegrityValidatingDelegatingResource extends DelegatingResource { + + public static class Provider implements ResourceProvider { + private final ResourceProvider delegatingResourceProvider; + + public Provider(ResourceProvider delegatingResourceProvider) { + this.delegatingResourceProvider = delegatingResourceProvider; + } + + @Override + public Resource getResource(URI uri) { + var original = delegatingResourceProvider.getResource(uri); + if (original == null) return null; + var integrity = URIs.getQueryMap(uri).get("integrity"); + if (integrity == null) return original; + var multihash = MultihashWithMultibase.decode(integrity); + return new IntegrityValidatingDelegatingResource(original, multihash); + } + } + + private final MultihashWithMultibase expectedHash; + private boolean validated = false; + + public IntegrityValidatingDelegatingResource( + Resource delegate, MultihashWithMultibase expectedHash) { + super(delegate); + this.expectedHash = expectedHash; + } + + @Override + public ByteSource byteSource() { + validate(); + return delegate.byteSource(); + } + + @Override + public CharSource charSource() { + validate(); + return delegate.charSource(); + } + + private synchronized void validate() { + if (validated) return; + + var delegateByteSource = delegate.byteSource(); + var hashFunction = Multihashes.toGuavaHashFunction(expectedHash.multihash()); + + Hasher hasher; + var optSize = delegateByteSource.sizeIfKnown(); + if (optSize.isPresent()) hasher = hashFunction.newHasher(Math.toIntExact(optSize.get())); + else hasher = hashFunction.newHasher(); + + try (var is = delegateByteSource.openBufferedStream()) { + var read = is.read(); + while (read != -1) { + hasher.putByte((byte) read); + read = is.read(); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + var hashCode = hasher.hash(); + var actualBytes = hashCode.asBytes(); + var actualHash = expectedHash.copy(actualBytes); + + // TODO It would be useful if Multihash had an equalsTo() method to avoid byte array copy + if (!expectedHash.equals(actualHash)) { + throw new IntegrityViolationException( + "Expected " + expectedHash + " but got " + actualHash); + } + + validated = true; + } +} diff --git a/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResourceTest.java b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResourceTest.java new file mode 100644 index 000000000..51e3ce802 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResourceTest.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +import static com.google.common.truth.Truth.assertThat; + +import static dev.enola.common.context.testlib.SingletonRule.$; + +import static org.junit.Assert.assertThrows; + +import dev.enola.common.context.testlib.SingletonRule; +import dev.enola.common.io.mediatype.MediaTypeProviders; +import dev.enola.common.io.resource.*; + +import org.junit.Rule; +import org.junit.Test; + +public class IntegrityValidatingDelegatingResourceTest { + + ResourceProvider rp = + new IntegrityValidatingDelegatingResource.Provider(new ClasspathResource.Provider()); + + public @Rule SingletonRule r1 = $(MediaTypeProviders.set()); + + @Test + public void example() { + // This is useful "to see it", e.g. when adding new tests + // throw new RuntimeException( + // Multihashes.example(Multihash.Type.sha2_512, Multibase.Base.Base58BTC)); + } + + @Test + public void bad() { + IntegrityViolationException thrown = + assertThrows( + IntegrityViolationException.class, + () -> { + check( + "z8VsnXyGnRwJpnrQXB8KcLstvgFYGZ2f5BCm3DVndcNZ8NswtkCqsut69e7yd1FKNtettjgy669GNVt8VSTGxkAiJaB"); + }); + assertThat(thrown).hasMessageThat().contains("z8VsnXy"); // actual + assertThat(thrown).hasMessageThat().contains("z8Vw9J6"); // expected + } + + @Test + public void good() { + check( + "z8Vw9J6ZbuvzUV7wuau1uws8hw2QTZUeFfgwdyre5LmC1yFUoR2b7WyR2M8CaDR9Z6A2FafkPjmETcLKetbBr5d2Qv7"); + } + + void check(String hash) { + rp.get("classpath:/test.png?integrity=" + hash).byteSource(); + } +} diff --git a/java/dev/enola/common/io/hashbrown/IntegrityViolationException.java b/java/dev/enola/common/io/hashbrown/IntegrityViolationException.java new file mode 100644 index 000000000..6838f6b03 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/IntegrityViolationException.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +public class IntegrityViolationException extends RuntimeException { + + public IntegrityViolationException(String message) { + super(message); + } +} diff --git a/java/dev/enola/common/io/hashbrown/MultihashWithMultibase.java b/java/dev/enola/common/io/hashbrown/MultihashWithMultibase.java new file mode 100644 index 000000000..c374c605d --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/MultihashWithMultibase.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +import io.ipfs.multibase.Multibase; +import io.ipfs.multihash.Multihash; + +import java.util.Objects; + +/** + * An alternative (wrapper, actually) over {@link io.ipfs.multihash.Multihash} which "remembers" its + * encoding base. + */ +public final class MultihashWithMultibase { + // TODO extends Multihash ? + + private final Multibase.Base multibase; + private final Multihash multihash; + + public static MultihashWithMultibase decode(String encoded) { + Multibase.Base base; + if (encoded.length() == 46 && encoded.startsWith("Qm")) + // TODO Base58BTC or Base58Flickr ? + base = Multibase.Base.Base58BTC; + else base = Multibase.Base.lookup(encoded.charAt(0)); + return new MultihashWithMultibase(base, Multihash.decode(encoded)); + } + + private MultihashWithMultibase(Multibase.Base multibase, Multihash decode) { + this.multibase = multibase; + this.multihash = decode; + } + + // TODO Give this method a better name... + public MultihashWithMultibase copy(byte[] bytes) { + var newMultihash = new Multihash(multihash.getType(), bytes); + return new MultihashWithMultibase(multibase, newMultihash); + } + + public Multihash multihash() { + return multihash; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof MultihashWithMultibase other)) return false; + return multihash.equals(other.multihash) && multibase.equals(other.multibase); + } + + @Override + public int hashCode() { + return Objects.hash(multihash, multibase); + } + + // TODO equals & hashCode + + @Override + public String toString() { + return Multihashes.toString(multihash, multibase); + } +} diff --git a/java/dev/enola/common/io/hashbrown/Multihashes.java b/java/dev/enola/common/io/hashbrown/Multihashes.java new file mode 100644 index 000000000..f58199928 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/Multihashes.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; + +import io.ipfs.multibase.Multibase; +import io.ipfs.multihash.Multihash; + +/** Extension methods for {@link io.ipfs.multihash.Multihash}. */ +public final class Multihashes { + + public static HashFunction toGuavaHashFunction(Multihash multihash) { + var type = multihash.getType(); + return switch (type) { + // @Deprecated case md5 -> Hashing.md5(); + // @Deprecated case sha1 -> Hashing.sha1(); + case sha2_256 -> Hashing.sha256(); + case sha2_512 -> Hashing.sha512(); + // Not suitable, as it's tiny: case murmur3 -> Hashing.murmur3_32_fixed(); + + // TODO What about all other supported types?! + // See https://github.com/multiformats/java-multihash/issues/41 ... + + default -> throw new IllegalArgumentException("Unsupported Multihash type: " + type); + }; + } + + public static String toString(Multihash multihash, Multibase.Base base) { + return Multibase.encode(base, multihash.toBytes()); + } + + public static String example(Multihash.Type type, Multibase.Base base) { + var bytes = new byte[type.length]; + // for (int i = 0; i < type.length; i++) bytes[i] = (byte) 7; + var multihash = new Multihash(type, bytes); + return toString(multihash, base); + } + + private Multihashes() {} +} diff --git a/java/dev/enola/common/io/hashbrown/package-info.java b/java/dev/enola/common/io/hashbrown/package-info.java new file mode 100644 index 000000000..21433e4d9 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/package-info.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2024-2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * hashbrown contains utilities related to hashing which are useful for + * integrity verifications and Content-addressable storage + * (CAS). + * + *

It's named after Hash browns (AKA + * "Rösti"), which the original author of this package (Michael Vorburger.ch) loves. + */ +@NullMarked +package dev.enola.common.io.hashbrown; + +import org.jspecify.annotations.NullMarked;