merge from main. fix conflicts / fancy index breakage

Python-for-HPC · Feb 5, 2024 · 940c468 · 940c468
2 parents 44ba135 + 484ab3e
commit 940c468
Show file tree

Hide file tree

Showing 8 changed files with 750 additions and 163 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -39,9 +39,9 @@ jobs:
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
     - name: Test with pytest
       run: |
-        (ulimit -d 3000000; RAMBA_WORKERS=2 RAMBA_NUM_THREADS=1 timeout 600 pytest -v --timeout 90)
+        (ulimit -d 5000000; RAMBA_WORKERS=2 RAMBA_NUM_THREADS=1 timeout 600 pytest -v --timeout 90)
     - name: Test with pytest under MPI
       run: |
-        (ulimit -d 3000000; RAMBA_NUM_THREADS=1 mpiexec -n 2 timeout 600 pytest -v --timeout 90)
+        (ulimit -d 5000000; RAMBA_NUM_THREADS=1 mpiexec -n 2 timeout 600 pytest -v --timeout 90)
       shell: bash
 
diff --git a/README.md b/README.md
@@ -229,9 +229,9 @@ Current status of Ramba compatibility with NumPy APIs.  Key:  &#x1f7e2; works
 |            | tiling          | &#x1f534; not implemented |
 |            | insert/remove elements | &#x1f534; not implemented |
 |            | rearrange elements | &#x1f534; not implemented |
-|Index/slice | range slice     | &#x1f7e1; partial         | produces view like in numpy; steps > 1 and negative steps are supported
-|            | masked arrays   | &#x1f7e1; partial         | only in assignments / in-place operations / reductions;  see below for details
-|            | fancy indexing  | &#x1f534; not implemented |
+|Index/slice | range slice     | &#x1f7e2; works           | produces view like in numpy; steps > 1 and negative steps are supported
+|            | masked arrays   | &#x1f7e1; partial         | only in assignments / in-place operations / reductions;  see docs for details
+|            | fancy indexing  | &#x1f7e2; mostly works    | fancy/advanced indexing using an array of indices is very expensive in a distributed context;  See docs for details/limitations.
 |            | index routines  | &#x1f534; not implemented | ("where" partly works)
 |Math        | arithmetic operations | &#x1f7e2; works     | +, -, +=, //, etc. 
 |            | comparisons     | &#x1f7e2; works           | 

diff --git a/docs/index.md b/docs/index.md
@@ -65,8 +65,14 @@ corresponding elements of the array should be updated;  other elements remain un
 
 (b)  When the masked array appears in any other expression, an output 1D array is constructed, containing all of the elements for which the mask is True.  The output is always 1D and a copy, regardless of the dimensionality of the original array.  
 
-Ramba currently only supports the first use case.  
+Ramba currently only supports the first use case.  Mixing mask indexing with slices, fancy indexing, etc., is not supported.  
 
+### Fancy / Advanced Indexing
+Ramba now supports fancy/advanced indexing of an array with array of index values.  Although this works, in a distributed context this is a very expensive operation.  When indexing for read, the result is always a copy, and may require significant communication betweeen nodes.  The result array will attempt to match the distribution of the indexing array, or use a clean default distribution if there is no distributed index.  When setting the array using advanced indexing, a view is used.  If more than one term refers to the same element of the array, then the result is unpredictable (due to parallel execution); this is unlike numpy, where the "last" value set wins.  
+
+Mixing advanced indexing on an axis with simple indexing, slices, "None", ellipses, etc. on others is also supported.  Supplying index arrays for multiple axes is supported (as long as the arrays can broadcast together, as in Numpy).  However, in the current implementation, at most only one of the index arrays can be a distributed Ramba array -- others must be nondistributed Numpy arrays.  Note that the precise rules used in Numpy when mixing these indexing types is bit arcane.  Ramba tries to match these, but the position of the dimensions corresponding to the index arrays in the output shape may differ from Numpy when mixing broadcasting of indexing arrays, None, and slices in the same operation.  
+
+Lists and tuples can be used for fancy indexing -- these will be converted to Numpy arrays first.  "Ragged" arrays from lists of lists with different lengths is not supported (and is deprecated in Numpy).  
 
 ## Ramba's Distribution Friendly APIs
 

diff --git a/ramba/__init__.py b/ramba/__init__.py
@@ -16,3 +16,4 @@
 
 import ramba.random as random
 import ramba.tests as tests
+from numpy import bool_, byte, ubyte, short, ushort, intc, uintc, int_, uint, longlong, ulonglong, half, float16, single, double, longdouble, int8, int16, int32, int64, float32, float64, csingle, cdouble, clongdouble, iinfo
diff --git a/ramba/ramba.py b/ramba/ramba.py
diff --git a/ramba/random/random.py b/ramba/random/random.py
@@ -70,6 +70,21 @@ def default_rng():
     return Generator()
 
 
+def randint(low, high=None, size=None, dtype=np.int64, **kwargs):
+    if size is None:
+        return np.random.randint(low, high, size, dtype)
+    else:
+        def impl(bcontainer, dim_lens, starts):
+            for i in numba.pndindex(dim_lens):
+                bcontainer[i] = np.random.randint(low, high)
+
+        return ramba.init_array(
+            size,
+            ramba.Filler(impl, mode=ramba.Filler.WHOLE_ARRAY_INPLACE, do_compile=True),
+            **kwargs
+        )
+
+
 def random(size=None, **kwargs):
     if size is None:
         return np.random.random()

diff --git a/ramba/shardview_array.py b/ramba/shardview_array.py
@@ -847,6 +847,16 @@ def distribution_to_divisions(dist):
         ret[i][1] = _stop(d) - 1
     return ret
 
+@numba.njit(fastmath=fastmath, cache=True)
+def distribution_like(dist):
+    svl = [
+        shardview( size = _size(dist[i]), index_start = _index_start(dist[i]))
+        for i in range(dist.shape[0])
+    ]
+    ret = np.empty( (dist.shape[0], svl[0].shape[0], svl[0].shape[1]), dtype=ramba_dist_dtype)
+    for i, sv in enumerate(svl):
+        ret[i] = sv
+    return ret
 
 dist_cache={}
 def default_distribution(size, dims_do_not_distribute=[], dist_dims=None):
@@ -859,15 +869,15 @@ def default_distribution(size, dims_do_not_distribute=[], dist_dims=None):
     num_dim = len(size)
     if isinstance(dist_dims, int):
         dist_dims = [dist_dims]
-    if isinstance(dist_dims, list):
+    if isinstance(dist_dims, (list, tuple)):
         assert dims_do_not_distribute is None or len(dims_do_not_distribute) == 0
         dims_do_not_distribute = [i for i in range(num_dim) if i not in dist_dims]
     starts = np.zeros(num_dim, dtype=np.int64)
     ends = np.array(list(size), dtype=np.int64)
     # the ends are inclusive, not one past the last index
     ends -= 1
     divisions = np.empty((num_workers, 2, num_dim), dtype=np.int64)
-    if do_not_distribute(size):
+    if dist_dims is None and do_not_distribute(size):
         make_uni_dist(divisions, 0, starts, ends)
     else:
         compute_regular_schedule(

diff --git a/ramba/tests/test_distributed_array.py b/ramba/tests/test_distributed_array.py
@@ -546,6 +546,60 @@ def impl(app, i, j, k, l):
                         if random.uniform(0, 1) < percent:
                             run_both(impl, i, j, k, l)
 
+    def test_3Dx1D(self):
+        def impl(app, i, j, k):
+            X = app.fromfunction(lambda x, y, z: x + y + z, (i, j, k))
+            theta = app.fromfunction(lambda x: x, (k,), dtype=X.dtype)
+            res = X @ theta
+            return res
+
+        run_both(impl, 15, 7, 9)
+
+    def test_1Dx3D(self):
+        def impl(app, i, j, k):
+            X = app.fromfunction(lambda x: x, (k,))
+            theta = app.fromfunction(lambda x, y, z: x + y + z, (i, k, j), dtype=X.dtype)
+            res = X @ theta
+            return res
+
+        run_both(impl, 12, 17, 6)
+
+    def test_5Dx3D(self):
+        def impl(app, a, b, c, i, j, k):
+            X = app.fromfunction(lambda v, w, x, y, z: v+w+x+y+z, (a, b, c, i, k))
+            theta = app.fromfunction(lambda x, y, z: x+y+z, (c, k, j), dtype=X.dtype)
+            res = X @ theta
+            return res
+
+        run_both(impl, 5, 2, 3, 4, 5, 7)
+
+    def test_dot_3Dx1D(self):
+        def impl(app, i, j, k):
+            X = app.fromfunction(lambda x, y, z: x + y + z, (i, j, k))
+            theta = app.fromfunction(lambda x: x, (k,), dtype=X.dtype)
+            res = app.dot(X, theta)
+            return res
+
+        run_both(impl, 15, 7, 9)
+
+    def test_dot_1Dx3D(self):
+        def impl(app, i, j, k):
+            X = app.fromfunction(lambda x: x, (k,))
+            theta = app.fromfunction(lambda x, y, z: x + y + z, (i, k, j), dtype=X.dtype)
+            res = app.dot(X, theta)
+            return res
+
+        run_both(impl, 12, 17, 6)
+
+    def test_dot_5Dx3D(self):
+        def impl(app, a, b, c, d, i, j, k):
+            X = app.fromfunction(lambda v, w, x, y, z: v+w+x+y+z, (a, b, c, i, k))
+            theta = app.fromfunction(lambda x, y, z: x+y+z, (d, k, j), dtype=X.dtype)
+            res = app.dot(X,theta)
+            return res
+
+        run_both(impl, 5, 2, 3, 6, 4, 5, 7)
+
 
 
 class TestBasic:
@@ -812,6 +866,51 @@ def impl(app):
 
         run_both(impl)
 
+    def test_fancy_indexing1(self):
+        # Test advanced indexing -- sizes
+        def impl(app):
+            a = app.ones((11,21,31,41),dtype=int)
+            b = a[7,5,[2,6,1],3:6]
+            c = a[None, [[3, 4, 7]], 4, [[3],[2],[7],[1]]]
+            d = a[None, [[2, 3, 1]], 4, None, [[1],[7]], 4:9]
+            return (b.shape, c.shape, d.shape)
+
+        run_both(impl)
+
+    def test_fancy_indexing2(self):
+        # Test advanced indexing -- gather/scatter values
+        def impl(app):
+            a = app.arange(500)
+            b = a[::7]
+            c = app.fromfunction(lambda i,j: (i+j)%70, (50,20), dtype=int)
+            d = b[c]
+            return d
+
+        run_both(impl)
+
+    def test_fancy_indexing3(self):
+        # Test advanced indexing -- setitem to scalar
+        def impl(app):
+            a = app.arange(500)
+            b = a[::2]
+            c = app.fromfunction(lambda i,j: i+j*100, (50,3), dtype=int)
+            b[c] = 1
+            return a
+
+        run_both(impl)
+
+    def test_fancy_indexing4(self):
+        # Test advanced indexing -- setitem to dist array with mismatched distribution
+        def impl(app):
+            a = app.arange(500)
+            b = a[::2]
+            c = app.fromfunction(lambda i,j: i+j*100, (50,3), dtype=int)
+            d = app.fromfunction(lambda i,j: (i-j), (50,100), dtype=int)
+            b[c] = d[:,48:51]
+            return a
+
+        run_both(impl)
+
     def test_smap1(self):
         a = ramba.arange(100)
         b = ramba.smap("lambda x: 3*x-7", a)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -16,3 +16,4 @@

		import ramba.random as random
		import ramba.tests as tests
		from numpy import bool_, byte, ubyte, short, ushort, intc, uintc, int_, uint, longlong, ulonglong, half, float16, single, double, longdouble, int8, int16, int32, int64, float32, float64, csingle, cdouble, clongdouble, iinfo