Skip to content

Commit

Permalink
Added momentum to named_arrays.optimize.minimize_gradient_descent() (
Browse files Browse the repository at this point in the history
  • Loading branch information
byrdie authored Aug 26, 2024
1 parent 3900e80 commit baaab8f
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 12 deletions.
10 changes: 8 additions & 2 deletions docs/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,11 @@ @article{Eriksson1990
URL = {https://doi.org/10.1080/0025570X.1990.11977515},
eprint = {https://doi.org/10.1080/0025570X.1990.11977515}
}


@article{Goh2017,
author = {Goh, Gabriel},
title = {Why Momentum Really Works},
journal = {Distill},
year = {2017},
url = {http://distill.pub/2017/momentum},
doi = {10.23915/distill.00006}
}
2 changes: 1 addition & 1 deletion named_arrays/_scalars/scalar_named_array_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ def optimize_root_newton(
if callback is not None:
callback(i, x, f, converged)

converged |= np.abs(f) < max_abs_error
converged = np.abs(f) < max_abs_error

if np.all(converged):
return x
Expand Down
7 changes: 5 additions & 2 deletions named_arrays/_vectors/tests/test_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,11 @@ class TestOptimizeRoot(
@pytest.mark.parametrize(
argnames="function,expected",
argvalues=[
(lambda x: (np.square(na.value(x) - shift_horizontal) + shift_vertical).length, shift_horizontal)
for shift_horizontal in [20,]
(
lambda x: (np.square((na.value(x) - shift_horizontal).length) + shift_vertical) * u.ph,
shift_horizontal,
)
for shift_horizontal in [2,]
for shift_vertical in [1,]
]
)
Expand Down
10 changes: 7 additions & 3 deletions named_arrays/_vectors/vector_named_array_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def optimize_root_newton(
if callback is not None:
callback(i, x, f, converged)

converged |= np.abs(f) < max_abs_error
converged = np.abs(f) < max_abs_error

if np.all(converged):
return x
Expand Down Expand Up @@ -518,6 +518,7 @@ def optimize_minimum_gradient_descent(
function: Callable[[na.AbstractVectorArray], na.AbstractScalar],
guess: na.AbstractVectorArray,
step_size: float | na.AbstractScalar,
momentum: float | na.AbstractScalar,
gradient: None | Callable[[na.AbstractVectorArray], na.AbstractScalar],
min_gradient: na.ScalarLike,
max_iterations: int,
Expand Down Expand Up @@ -547,6 +548,7 @@ def optimize_minimum_gradient_descent(
converged = na.broadcast_to(0 * na.value(x), shape=shape).astype(bool)

x = na.broadcast_to(x, shape).astype(float)
z = 0

for i in range(max_iterations):

Expand All @@ -555,12 +557,14 @@ def optimize_minimum_gradient_descent(

grad = gradient(x)

converged |= np.abs(grad) < min_gradient
converged = np.abs(grad) < min_gradient

if np.all(converged):
return x

correction = step_size * grad
z = momentum * z + grad

correction = step_size * z

x = x - correction

Expand Down
33 changes: 30 additions & 3 deletions named_arrays/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,13 @@ def minimum_gradient_descent(
function: Callable[[InputT], OutputT],
guess: InputT,
step_size: None | InputT = None,
momentum: float | OutputT = 0,
gradient: None | Callable[[InputT], InputT] = None,
min_gradient: None | InputT = None,
max_iterations: int = 1000,
callback: None | Callable[[int, InputT, OutputT, na.AbstractArray], None] = None,
) -> InputT:
"""
r"""
Find the local minimum of the given function using the
`gradient descent <https://en.wikipedia.org/wiki/Gradient_descent>`_ method.
Expand All @@ -161,7 +162,12 @@ def minimum_gradient_descent(
The learning rate for the gradient descent algorithm.
This should have the same units as ``x / gradient(x)``.
If :obj:`None` (the default), this takes the value
``0.1 * na.unit(x / gradient(x))``.
``0.01 * na.unit(x / gradient(x))``.
momentum
The momentum constant, :math:`\beta` for the gradient descent algorithm.
Should be a dimensionless number between zero and one.
Defaults to zero, which equivalent to vanilla gradient descent with
no momentum.
gradient
The gradient of `function`.
If :obj:`None` (the default), the gradient is computed using
Expand All @@ -180,6 +186,26 @@ def minimum_gradient_descent(
``x`` is the current guess, ``f`` is the current function value,
and ``converged`` is an array storing the convergence state for every
minimum being computed.
Notes
-----
This function uses the update rules described in :cite:t:`Goh2017`,
.. math::
:label: momentum-equation
z_{k + 1} = \beta z_k + \nabla f(x_k)
.. math::
:label: gradient-descent
x_{k + 1} = x_k - \alpha z_k,
where :math:`x_k` is the current guess for iteration :math:`k`,
:math:`f` is the objective function,
:math:`\alpha` is the learning rate,
and :math:`\beta` is the momentum constant.
"""

x = guess
Expand All @@ -191,7 +217,7 @@ def minimum_gradient_descent(
unit_grad = unit_f / unit_x

if step_size is None:
step_size = 0.1 * (unit_x / unit_grad)
step_size = 0.01 * (unit_x / unit_grad)

if gradient is None:
def gradient(x: float | na.AbstractScalar | na.AbstractVectorArray):
Expand All @@ -209,6 +235,7 @@ def gradient(x: float | na.AbstractScalar | na.AbstractVectorArray):
function=function,
guess=guess,
step_size=step_size,
momentum=momentum,
gradient=gradient,
min_gradient=min_gradient,
max_iterations=max_iterations,
Expand Down
3 changes: 2 additions & 1 deletion named_arrays/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,9 +1469,10 @@ def callback(i, x, f, c):
function=function,
guess=array,
callback=callback,
momentum=0.5,
)

assert np.allclose(na.value(result), expected)
assert np.allclose(result, expected * na.unit_normalized(array))
assert out is result

@pytest.mark.parametrize(
Expand Down

0 comments on commit baaab8f

Please sign in to comment.