Fixed matmuls

jafioti · Jan 21, 2024 · b1c435b · b1c435b
1 parent 4219d8e
commit b1c435b
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 8 deletions.
diff --git a/src/compilers/metal/fp16/matmul.rs b/src/compilers/metal/fp16/matmul.rs
@@ -299,12 +299,12 @@ impl Compiler for MetalMatMulCompiler {
             dims.swap(src2_shape.len() - 2, src2_shape.len() - 1);
             src2_shape.permute(&dims);
             // If src1 is padded or sliced, or batch dim isn't first, we need to make it contiguous
-            if (src1_shape
+            if src1_shape
                 .indexes
                 .iter()
                 .take(src1_shape.len() - 2)
                 .enumerate()
-                .any(|(a, b)| a != *b))
+                .any(|(a, b)| a != *b)
                 || src1_shape.is_sliced()
                 || src1_shape.is_padded()
             {
@@ -320,7 +320,12 @@ impl Compiler for MetalMatMulCompiler {
                 src1_shape = src1_shape.contiguous();
             }
             // If src2 is padded or sliced, or batch dim isn't first, we need to make it contiguous
-            if (src2_shape.len() == 3 && src2_shape.indexes[0] != 0)
+            if src2_shape
+                .indexes
+                .iter()
+                .take(src2_shape.len() - 2)
+                .enumerate()
+                .any(|(a, b)| a != *b)
                 || src2_shape.is_sliced()
                 || src2_shape.is_padded()
             {

diff --git a/src/compilers/metal/fp16/tests.rs b/src/compilers/metal/fp16/tests.rs
@@ -812,7 +812,7 @@ fn test_transformer_encoder_block() {
     let d_a = d_dev.tensor_from_vec(a_data, (DConst::<2>, DConst::<32>));
     let d_b = d_model.forward(d_a);
 
-    assert_close(&b.data(), &d_b.as_vec());
+    assert_close_precision(&b.data(), &d_b.as_vec(), 2);
 }
 
 #[test]

diff --git a/src/nn/transformer/attention.rs b/src/nn/transformer/attention.rs
@@ -177,7 +177,7 @@ impl<
         let tokens: GraphTensor<(B, S2, Const<V_DIM>)> = weights
             .matmul(values)
             .permute::<_, Axes4<0, 2, 1, 3>>()
-            .dyn_reshape(vec![B::const_size(), S2::const_size(), V_DIM.into()]);
+            .reshape();
         self.w_o.forward(tokens)
     }
 }

diff --git a/src/nn/transformer/encoder.rs b/src/nn/transformer/encoder.rs
@@ -61,9 +61,8 @@ impl<const DIM: usize, const FF: usize, const HEADS: usize, S: Dimension, B: Dim
     fn forward(&self, x: GraphTensor<(B, S, Const<DIM>)>) -> Self::Output {
         let y = self.attention.forward(x);
         let x = (x + y).layer_norm::<2>(1e-5);
-        // let y = self.ff.forward(x);
-        // (x + y).layer_norm::<2>(1e-5)
-        x
+        let y = self.ff.forward(x);
+        (x + y).layer_norm::<2>(1e-5)
     }
 }