From 24716a47b1cc881955c4d8fa9a80944e10850dc7 Mon Sep 17 00:00:00 2001 From: Vesa Karvonen Date: Thu, 27 Jul 2023 17:37:24 +0300 Subject: [PATCH] Pad to cache aligned size based on architecture and OCaml version --- CHANGES.md | 7 ++++++ src/Multicore_magic.ml | 31 +++++++++++++++++------ src/Multicore_magic.mli | 20 +++++++++------ src/cache.1.ml | 1 + src/cache.16.ml | 1 + src/cache.32.ml | 1 + src/cache.8.ml | 1 + src/cache.mli | 1 + src/dune | 48 ++++++++++++++++++++++++++++++++++++ test/Multicore_magic_test.ml | 12 +++++++++ 10 files changed, 108 insertions(+), 15 deletions(-) create mode 100644 src/cache.1.ml create mode 100644 src/cache.16.ml create mode 100644 src/cache.32.ml create mode 100644 src/cache.8.ml create mode 100644 src/cache.mli diff --git a/CHANGES.md b/CHANGES.md index b5da380..d711074 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## 2.0.0 + +- Changed the semantics of `copy_as_padded` to not always copy and to not + guarantee that `length_of_padded_array*` works with it. These semantic changes + allow better use of the OCaml allocator to guarantee cache friendly alignment. + (@polytypic) + ## 1.0.1 - Ported the library to OCaml 4 (@polytypic) diff --git a/src/Multicore_magic.ml b/src/Multicore_magic.ml index 9d852fc..735310f 100644 --- a/src/Multicore_magic.ml +++ b/src/Multicore_magic.ml @@ -1,17 +1,32 @@ -let num_padding_words = 15 +let num_padding_words = Cache.words_per_cache_line - 1 let copy_as_padded (o : 'a) : 'a = let o = Obj.repr o in - let n = Obj.new_block (Obj.tag o) (Obj.size o + num_padding_words) in - for i = 0 to Obj.size o - 1 do - Obj.set_field n i (Obj.field o i) - done; - Obj.magic n + if Obj.is_block o then begin + let original_size = Obj.size o in + let padded_size = + if original_size <= num_padding_words then num_padding_words + else original_size + num_padding_words + in + if original_size <> padded_size then begin + let t = Obj.tag o in + if Sys.word_size = 64 && t != Obj.double_array_tag then begin + let n = Obj.new_block t padded_size in + Array.blit (Obj.magic o) 0 (Obj.magic n) 0 original_size; + Obj.magic n + end + else Obj.magic o + end + else Obj.magic o + end + else Obj.magic o let make_padded_array n x = let a = Array.make (n + num_padding_words) x in - if Obj.is_block (Obj.repr x) && Obj.tag (Obj.repr x) != Obj.double_tag then - Array.fill a n num_padding_words (Obj.magic ()); + if Obj.is_block (Obj.repr x) then + Array.fill a n num_padding_words + (if Obj.tag (Obj.repr x) == Obj.double_tag then Obj.magic 0.0 + else Obj.magic ()); a let[@inline] length_of_padded_array x = Array.length x - num_padding_words diff --git a/src/Multicore_magic.mli b/src/Multicore_magic.mli index 4ee6935..85d134b 100644 --- a/src/Multicore_magic.mli +++ b/src/Multicore_magic.mli @@ -7,8 +7,9 @@ (** {1 Helpers for using padding to avoid false sharing} *) val copy_as_padded : 'a -> 'a -(** Creates a shallow clone of the given object. The clone will have extra - padding words added after the last used word. +(** Depending on the object, either creates a shallow clone of it or returns it + as is. When cloned, the clone will have extra padding words added after the + last used word. This is designed to help avoid {{:https://en.wikipedia.org/wiki/False_sharing} false sharing}. False @@ -32,21 +33,26 @@ val copy_as_padded : 'a -> 'a } let padded_variant = Multicore_magic.copy_as_padded (Some 1) - - let padded_array = Multicore_magic.copy_as_padded [|3; 1; 4|] ]} - Padding changes the length of an array, see {!length_of_padded_array}. *) + Padding changes the length of an array. If you need to pad an array, use + {!make_padded_array}. *) val make_padded_array : int -> 'a -> 'a array (** Creates a padded array. The length of the returned array includes padding. Use {!length_of_padded_array} to get the unpadded length. *) val length_of_padded_array : 'a array -> int -(** Returns the length of a padded array without the padding. *) +(** Returns the length of an array created by {!make_padded_array} without the + padding. + + {b WARNING}: This is not guaranteed to work with {!copy_as_padded}. *) val length_of_padded_array_minus_1 : 'a array -> int -(** Returns the length of a padded array without the padding minus 1. *) +(** Returns the length of an array created by {!make_padded_array} without the + padding minus 1. + + {b WARNING}: This is not guaranteed to work with {!copy_as_padded}. *) (** {1 Missing [Atomic] operations} *) diff --git a/src/cache.1.ml b/src/cache.1.ml new file mode 100644 index 0000000..5de3a57 --- /dev/null +++ b/src/cache.1.ml @@ -0,0 +1 @@ +let words_per_cache_line = 1 diff --git a/src/cache.16.ml b/src/cache.16.ml new file mode 100644 index 0000000..58c08a7 --- /dev/null +++ b/src/cache.16.ml @@ -0,0 +1 @@ +let words_per_cache_line = 16 diff --git a/src/cache.32.ml b/src/cache.32.ml new file mode 100644 index 0000000..fc15674 --- /dev/null +++ b/src/cache.32.ml @@ -0,0 +1 @@ +let words_per_cache_line = 32 diff --git a/src/cache.8.ml b/src/cache.8.ml new file mode 100644 index 0000000..6cb39e4 --- /dev/null +++ b/src/cache.8.ml @@ -0,0 +1 @@ +let words_per_cache_line = 8 diff --git a/src/cache.mli b/src/cache.mli new file mode 100644 index 0000000..685c409 --- /dev/null +++ b/src/cache.mli @@ -0,0 +1 @@ +val words_per_cache_line : int diff --git a/src/dune b/src/dune index 62a2837..2f25888 100644 --- a/src/dune +++ b/src/dune @@ -1,3 +1,51 @@ (library (name Multicore_magic) (public_name multicore-magic)) + +(rule + (targets cache.ml) + (deps cache.1.ml) + (enabled_if + (< %{ocaml_version} 5.0.0)) + (action + (progn + (copy cache.1.ml cache.ml)))) + +(rule + (targets cache.ml) + (deps cache.8.ml) + (enabled_if + (and + (>= %{ocaml_version} 5.0.0) + (not + (or + (= %{architecture} arm64) + (= %{architecture} power) + (= %{architecture} s390x))))) + (action + (progn + (copy cache.8.ml cache.ml)))) + +(rule + (targets cache.ml) + (deps cache.16.ml) + (enabled_if + (and + (>= %{ocaml_version} 5.0.0) + (or + (= %{architecture} arm64) + (= %{architecture} power)))) + (action + (progn + (copy cache.16.ml cache.ml)))) + +(rule + (targets cache.ml) + (deps cache.32.ml) + (enabled_if + (and + (>= %{ocaml_version} 5.0.0) + (= %{architecture} s390x))) + (action + (progn + (copy cache.32.ml cache.ml)))) diff --git a/test/Multicore_magic_test.ml b/test/Multicore_magic_test.ml index e3f6d9b..8614664 100644 --- a/test/Multicore_magic_test.ml +++ b/test/Multicore_magic_test.ml @@ -1,3 +1,4 @@ +let can_pad_int () = assert (Multicore_magic.copy_as_padded 101 = 101) let can_pad_ref () = assert (!(Multicore_magic.copy_as_padded (ref 101)) = 101) let can_pad_atomic () = @@ -11,6 +12,14 @@ let can_pad_records () = let x = Multicore_magic.copy_as_padded { foo; bar; baz } in assert (x.foo = foo && x.bar == bar && x.baz == baz) +let can_pad_float_record () = + let open struct + type record = { foo : float; bar : float; baz : float } + end in + let foo = 4.2 and bar = 10.1 and baz = 9.6 in + let x = Multicore_magic.copy_as_padded { foo; bar; baz } in + assert (x.foo = foo && x.bar = bar && x.baz = baz) + let can_pad_variants () = let open struct type variant = Foo of int * int Atomic.t @@ -65,9 +74,12 @@ let fence () = let () = Alcotest.run "multicore-magic" [ + ("can pad int", [ Alcotest.test_case "" `Quick can_pad_int ]); ("can pad ref", [ Alcotest.test_case "" `Quick can_pad_ref ]); ("can pad atomic", [ Alcotest.test_case "" `Quick can_pad_atomic ]); ("can pad records", [ Alcotest.test_case "" `Quick can_pad_records ]); + ( "can pad float record", + [ Alcotest.test_case "" `Quick can_pad_float_record ] ); ("can pad variants", [ Alcotest.test_case "" `Quick can_pad_variants ]); ("can pad arrays", [ Alcotest.test_case "" `Quick can_pad_arrays ]); ( "padded array length",