Skip to content

Commit

Permalink
Optimize squareform's square matrix construction
Browse files Browse the repository at this point in the history
Avoid breaking chunks into smaller pieces than necessary by taking
advantage of symmetry. Namely note that pieces of rows also correspond
to pieces of columns as well.  Starting from the right corner where
these pieces are simple singleton 1-D arrays, concatenate them together
into larger pieces. Use a 2-D singleton zero array as a seed for this
concatenation to grow from. By doing this, we avoid further subselection
from the 1-D array beyond the necessary selection of pieces.
  • Loading branch information
jakirkham committed Oct 9, 2017
1 parent 307e016 commit 7a4c412
Showing 1 changed file with 14 additions and 18 deletions.
32 changes: 14 additions & 18 deletions dask_distance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,27 +216,23 @@ def squareform(X, force="no"):

X_tri = []
j1 = 0
for j2 in _pycompat.irange(d - 1, -1, -1):
for j2 in _pycompat.irange(d - 1, 0, -1):
X_tri.append(X[j1:j1 + j2])
j1 += j2

z = dask.array.zeros((1,), dtype=X.dtype, chunks=(1,))

result = []
for i in range(d):
col_i = []

for j in range(i):
i_j = i - j
col_i.append(X_tri[j][i_j - 1:i_j])
col_i.append(z)
col_i.append(X_tri[i])

result.append(dask.array.concatenate([
a for a in col_i if a.size
]))

result = dask.array.stack(result)
z = dask.array.zeros((1, 1), dtype=X.dtype, chunks=(1, 1))

result = z
for i in _pycompat.irange(d - 2, -1, -1):
X_tri_i = X_tri[i]
result = result.rechunk(2 * X_tri_i.chunks)
result = dask.array.concatenate(
[
dask.array.concatenate([z, X_tri_i[None]], axis=1),
dask.array.concatenate([X_tri_i[:, None], result], axis=1)
],
axis=0
)
elif conv == "tovec":
result = [
X[i, i + 1:] for i in range(0, len(X) - 1)
Expand Down

0 comments on commit 7a4c412

Please sign in to comment.