From e0e975266cc1a4dce320bdf2983b9de37487dae1 Mon Sep 17 00:00:00 2001
From: Eric Myhre <hash@exultant.us>
Date: Wed, 10 Mar 2021 09:59:32 +0100
Subject: [PATCH] Revive TestSmallerLengthHashID, and add a special case for
 identity multihash that rejects truncations.

See https://github.com/multiformats/go-multihash/pull/136#discussion_r587880523
for discussion.

This change means Sum behaves slightly differently for identity
multihashes than it does for any other multihash.  I'm not keeping
score on the number of ways identity multihash is weird anymore,
just documenting it and keeping tests passing.

The error message is lifted from the old `sumID` function verbatim.
---
 multihash.go |  2 +-
 sum.go       |  7 +++++++
 sum_test.go  | 27 +++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/multihash.go b/multihash.go
index 9eeb9ca..3d7a3b9 100644
--- a/multihash.go
+++ b/multihash.go
@@ -239,7 +239,7 @@ func Decode(buf []byte) (*DecodedMultihash, error) {
 // Encode a hash digest along with the specified function code.
 // Note: the length is derived from the length of the digest itself.
 //
-// The error return is legacy; it is always nil.
+// The error return is legacy; it is always nil.1
 func Encode(buf []byte, code uint64) ([]byte, error) {
 	// FUTURE: this function always causes heap allocs... but when used, this value is almost always going to be appended to another buffer (either as part of CID creation, or etc) -- should this whole function be rethought and alternatives offered?
 	newBuf := make([]byte, varint.UvarintSize(code)+varint.UvarintSize(uint64(len(buf)))+len(buf))
diff --git a/sum.go b/sum.go
index 53c1805..c18deb1 100644
--- a/sum.go
+++ b/sum.go
@@ -2,6 +2,7 @@ package multihash
 
 import (
 	"errors"
+	"fmt"
 )
 
 // ErrSumNotSupported is returned when the Sum function code is not implemented
@@ -27,6 +28,7 @@ func Sum(data []byte, code uint64, length int) (Multihash, error) {
 	sum := hasher.Sum(nil)
 
 	// Deal with any truncation.
+	//  Unless it's an identity multihash.  Those have different rules.
 	if length < 0 {
 		length = hasher.Size()
 	}
@@ -34,6 +36,11 @@ func Sum(data []byte, code uint64, length int) (Multihash, error) {
 		return nil, ErrLenTooLarge
 	}
 	if length >= 0 {
+		if code == IDENTITY {
+			if length != len(sum) {
+				return nil, fmt.Errorf("the length of the identity hash (%d) must be equal to the length of the data (%d)", length, len(sum))
+			}
+		}
 		sum = sum[:length]
 	}
 
diff --git a/sum_test.go b/sum_test.go
index c2e9c1b..512231c 100644
--- a/sum_test.go
+++ b/sum_test.go
@@ -123,6 +123,33 @@ func BenchmarkBlake2B(b *testing.B) {
 	}
 }
 
+func TestSmallerLengthHashID(t *testing.T) {
+
+	data := []byte("Identity hash input data.")
+	dataLength := len(data)
+
+	// Normal case: `length == len(data)`.
+	_, err := multihash.Sum(data, multihash.ID, dataLength)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Unconstrained length (-1): also allowed.
+	_, err = multihash.Sum(data, multihash.ID, -1)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Any other variation of those two scenarios should fail.
+	for l := (dataLength - 1); l >= 0; l-- {
+		_, err = multihash.Sum(data, multihash.ID, l)
+		if err == nil {
+			t.Fatal(fmt.Sprintf("identity hash of length %d smaller than data length %d didn't fail",
+				l, dataLength))
+		}
+	}
+}
+
 func TestTooLargeLength(t *testing.T) {
 	_, err := multihash.Sum([]byte("test"), multihash.SHA2_256, 33)
 	if err != multihash.ErrLenTooLarge {