diff --git a/java/dev/enola/common/io/BUILD b/java/dev/enola/common/io/BUILD index 5d2840e20..280d22fa0 100644 --- a/java/dev/enola/common/io/BUILD +++ b/java/dev/enola/common/io/BUILD @@ -32,6 +32,8 @@ java_library( "//java/dev/enola/data", # TODO Separate BUILD for IPFSResource "@enola_maven//:com_github_ipld_java_cid", + "@enola_maven//:com_github_multiformats_java_multihash", + "@enola_maven//:com_github_multiformats_java_multibase", "@enola_maven//:com_github_java_json_tools_uri_template", "@enola_maven//:com_google_auto_service_auto_service_annotations", "@enola_maven//:com_google_errorprone_error_prone_annotations", @@ -56,6 +58,8 @@ junit_tests( "//test", "@enola_maven//:com_github_ipld_java_cid", "@enola_maven//:com_github_java_json_tools_uri_template", + "@enola_maven//:com_github_multiformats_java_multibase", + "@enola_maven//:com_github_multiformats_java_multihash", "@enola_maven//:com_google_auto_service_auto_service_annotations", "@enola_maven//:org_jspecify_jspecify", ], diff --git a/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResource.java b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResource.java new file mode 100644 index 000000000..733b34c7d --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResource.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +import com.google.common.hash.Hasher; +import com.google.common.io.ByteSource; +import com.google.common.io.CharSource; + +import dev.enola.common.io.iri.URIs; +import dev.enola.common.io.resource.*; + +import io.ipfs.multibase.Multibase; +import io.ipfs.multihash.Multihash; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.URI; + +public class IntegrityValidatingDelegatingResource extends DelegatingResource { + + public static class Provider implements ResourceProvider { + private final ResourceProvider delegatingResourceProvider; + + public Provider(ResourceProvider delegatingResourceProvider) { + this.delegatingResourceProvider = delegatingResourceProvider; + } + + @Override + public Resource getResource(URI uri) { + var original = delegatingResourceProvider.getResource(uri); + if (original == null) return null; + var integrity = URIs.getQueryMap(uri).get("integrity"); + if (integrity == null) return original; + var multihash = Multihash.decode(integrity); + return new IntegrityValidatingDelegatingResource(original, multihash); + } + } + + private final Multihash expectedHash; + private boolean validated = false; + + public IntegrityValidatingDelegatingResource(Resource delegate, Multihash expectedHash) { + super(delegate); + this.expectedHash = expectedHash; + } + + @Override + public ByteSource byteSource() { + validate(); + return delegate.byteSource(); + } + + @Override + public CharSource charSource() { + validate(); + return delegate.charSource(); + } + + private synchronized void validate() { + if (validated) return; + + var delegateByteSource = delegate.byteSource(); + var hashFunction = Multihashes.toGuavaHashFunction(expectedHash); + + Hasher hasher; + var optSize = delegateByteSource.sizeIfKnown(); + if (optSize.isPresent()) hasher = hashFunction.newHasher(Math.toIntExact(optSize.get())); + else hasher = hashFunction.newHasher(); + + try (var is = delegateByteSource.openBufferedStream()) { + var read = is.read(); + while (read != -1) { + hasher.putByte((byte) read); + read = is.read(); + } + } catch (IOException e) { + throw new UncheckedIOException(e); + } + var hashCode = hasher.hash(); + var actualBytes = hashCode.asBytes(); + var actualMultihash = new Multihash(expectedHash.getType(), actualBytes); + + // TODO It would be useful if Multihash had an equalsTo() method to avoid byte array copy + if (!expectedHash.equals(actualMultihash)) { + // TODO Fix that this looses the "original" Base from ?integrity=.. + var expectedHashString = Multihashes.toString(expectedHash, Multibase.Base.Base64); + var actualMultihashString = + Multihashes.toString(actualMultihash, Multibase.Base.Base64); + throw new IntegrityViolationException( + "Expected " + expectedHashString + " but got " + actualMultihashString); + } + + validated = true; + } +} diff --git a/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResourceTest.java b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResourceTest.java new file mode 100644 index 000000000..bcbcbf9d4 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/IntegrityValidatingDelegatingResourceTest.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +import static dev.enola.common.context.testlib.SingletonRule.$; + +import dev.enola.common.context.testlib.SingletonRule; +import dev.enola.common.io.mediatype.MediaTypeProviders; +import dev.enola.common.io.resource.*; + +import org.junit.Rule; +import org.junit.Test; + +public class IntegrityValidatingDelegatingResourceTest { + + ResourceProvider rp = + new IntegrityValidatingDelegatingResource.Provider(new ClasspathResource.Provider()); + + public @Rule SingletonRule r1 = $(MediaTypeProviders.set()); + + @Test(expected = IntegrityViolationException.class) + public void bad() { + rp.get("classpath:/test.png?integrity=m1QEQAAAAAAAAAAAAAAAAAAAAAA").charSource(); + // TODO Validate that IntegrityViolationException contains m1... and not fz... + + // TODO Remove... + /* + throw new RuntimeException( + Multihashes.toString( + new Multihash( + Multihash.Type.md5, + new byte[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }), + Multibase.Base.Base64)); + */ + } + + @Test + public void good() { + rp.get("classpath:/test.png?integrity=m1QEQtoy0Os8CMvMKItSdcFkRow").byteSource(); + } +} diff --git a/java/dev/enola/common/io/hashbrown/IntegrityViolationException.java b/java/dev/enola/common/io/hashbrown/IntegrityViolationException.java new file mode 100644 index 000000000..6838f6b03 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/IntegrityViolationException.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +public class IntegrityViolationException extends RuntimeException { + + public IntegrityViolationException(String message) { + super(message); + } +} diff --git a/java/dev/enola/common/io/hashbrown/Multihashes.java b/java/dev/enola/common/io/hashbrown/Multihashes.java new file mode 100644 index 000000000..a980e7dd4 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/Multihashes.java @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.enola.common.io.hashbrown; + +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hashing; + +import io.ipfs.multibase.Multibase; +import io.ipfs.multihash.Multihash; + +/** Extension methods for {@link io.ipfs.multihash.Multihash}. */ +public final class Multihashes { + + public static HashFunction toGuavaHashFunction(Multihash multihash) { + var type = multihash.getType(); + return switch (type) { + case md5 -> Hashing.md5(); + // TODO Add all other supported types... + default -> throw new IllegalArgumentException("Unsupported Multihash type: " + type); + }; + } + + public static String toString(Multihash multihash, Multibase.Base base) { + return Multibase.encode(base, multihash.toBytes()); + } + + private Multihashes() {} +} diff --git a/java/dev/enola/common/io/hashbrown/package-info.java b/java/dev/enola/common/io/hashbrown/package-info.java new file mode 100644 index 000000000..21433e4d9 --- /dev/null +++ b/java/dev/enola/common/io/hashbrown/package-info.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright 2024-2025 The Enola Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * hashbrown contains utilities related to hashing which are useful for + * integrity verifications and Content-addressable storage + * (CAS). + * + *

It's named after Hash browns (AKA + * "Rösti"), which the original author of this package (Michael Vorburger.ch) loves. + */ +@NullMarked +package dev.enola.common.io.hashbrown; + +import org.jspecify.annotations.NullMarked;